36 37#include "opt_ktrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/fcntl.h> 42#include <sys/kernel.h> 43#include <sys/kthread.h> 44#include <sys/lock.h> 45#include <sys/mutex.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/namei.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53#include <sys/socket.h> 54#include <sys/stat.h> 55#include <sys/ktrace.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/syslog.h> 60#include <sys/sysproto.h> 61 62#include <security/mac/mac_framework.h> 63 64/* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85#ifdef KTRACE 86 87FEATURE(ktrace, "Kernel support for system-call tracing"); 88 89#ifndef KTRACE_REQUEST_POOL 90#define KTRACE_REQUEST_POOL 100 91#endif 92 93struct ktr_request { 94 struct ktr_header ktr_header; 95 void *ktr_buffer; 96 union { 97 struct ktr_proc_ctor ktr_proc_ctor; 98 struct ktr_cap_fail ktr_cap_fail; 99 struct ktr_syscall ktr_syscall; 100 struct ktr_sysret ktr_sysret; 101 struct ktr_genio ktr_genio; 102 struct ktr_psig ktr_psig; 103 struct ktr_csw ktr_csw; 104 } ktr_data; 105 STAILQ_ENTRY(ktr_request) ktr_list; 106}; 107 108static int data_lengths[] = { 109 0, /* none */ 110 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 111 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 112 0, /* KTR_NAMEI */ 113 sizeof(struct ktr_genio), /* KTR_GENIO */ 114 sizeof(struct ktr_psig), /* KTR_PSIG */ 115 sizeof(struct ktr_csw), /* KTR_CSW */ 116 0, /* KTR_USER */ 117 0, /* KTR_STRUCT */ 118 0, /* KTR_SYSCTL */ 119 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 120 0, /* KTR_PROCDTOR */ 121 sizeof(struct ktr_cap_fail), /* KTR_CAPFAIL */ 122}; 123 124static STAILQ_HEAD(, ktr_request) ktr_free; 125 126static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 127 128static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 129TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 130 131static u_int ktr_geniosize = PAGE_SIZE; 132TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 133SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 134 0, "Maximum size of genio event payload"); 135 136static int print_message = 1; 137static struct mtx ktrace_mtx; 138static struct sx ktrace_sx; 139 140static void ktrace_init(void *dummy); 141static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 142static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 143static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 144static struct ktr_request *ktr_getrequest(int type); 145static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 146static void ktr_freeproc(struct proc *p, struct ucred **uc, 147 struct vnode **vp); 148static void ktr_freerequest(struct ktr_request *req); 149static void ktr_freerequest_locked(struct ktr_request *req); 150static void ktr_writerequest(struct thread *td, struct ktr_request *req); 151static int ktrcanset(struct thread *,struct proc *); 152static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 153static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 154static void ktrprocctor_entered(struct thread *, struct proc *); 155 156/* 157 * ktrace itself generates events, such as context switches, which we do not 158 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 159 * whether or not it is in a region where tracing of events should be 160 * suppressed. 161 */ 162static void 163ktrace_enter(struct thread *td) 164{ 165 166 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 167 td->td_pflags |= TDP_INKTRACE; 168} 169 170static void 171ktrace_exit(struct thread *td) 172{ 173 174 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 175 td->td_pflags &= ~TDP_INKTRACE; 176} 177 178static void 179ktrace_assert(struct thread *td) 180{ 181 182 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 183} 184 185static void 186ktrace_init(void *dummy) 187{ 188 struct ktr_request *req; 189 int i; 190 191 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 192 sx_init(&ktrace_sx, "ktrace_sx"); 193 STAILQ_INIT(&ktr_free); 194 for (i = 0; i < ktr_requestpool; i++) { 195 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 196 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 197 } 198} 199SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 200 201static int 202sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 203{ 204 struct thread *td; 205 u_int newsize, oldsize, wantsize; 206 int error; 207 208 /* Handle easy read-only case first to avoid warnings from GCC. */ 209 if (!req->newptr) { 210 oldsize = ktr_requestpool; 211 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 212 } 213 214 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 215 if (error) 216 return (error); 217 td = curthread; 218 ktrace_enter(td); 219 oldsize = ktr_requestpool; 220 newsize = ktrace_resize_pool(oldsize, wantsize); 221 ktrace_exit(td); 222 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 223 if (error) 224 return (error); 225 if (wantsize > oldsize && newsize < wantsize) 226 return (ENOSPC); 227 return (0); 228} 229SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 230 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 231 "Pool buffer size for ktrace(1)"); 232 233static u_int 234ktrace_resize_pool(u_int oldsize, u_int newsize) 235{ 236 STAILQ_HEAD(, ktr_request) ktr_new; 237 struct ktr_request *req; 238 int bound; 239 240 print_message = 1; 241 bound = newsize - oldsize; 242 if (bound == 0) 243 return (ktr_requestpool); 244 if (bound < 0) { 245 mtx_lock(&ktrace_mtx); 246 /* Shrink pool down to newsize if possible. */ 247 while (bound++ < 0) { 248 req = STAILQ_FIRST(&ktr_free); 249 if (req == NULL) 250 break; 251 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 252 ktr_requestpool--; 253 free(req, M_KTRACE); 254 } 255 } else { 256 /* Grow pool up to newsize. */ 257 STAILQ_INIT(&ktr_new); 258 while (bound-- > 0) { 259 req = malloc(sizeof(struct ktr_request), M_KTRACE, 260 M_WAITOK); 261 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 262 } 263 mtx_lock(&ktrace_mtx); 264 STAILQ_CONCAT(&ktr_free, &ktr_new); 265 ktr_requestpool += (newsize - oldsize); 266 } 267 mtx_unlock(&ktrace_mtx); 268 return (ktr_requestpool); 269} 270 271/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 272CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 273 (sizeof((struct thread *)NULL)->td_name)); 274 275static struct ktr_request * 276ktr_getrequest_entered(struct thread *td, int type) 277{ 278 struct ktr_request *req; 279 struct proc *p = td->td_proc; 280 int pm; 281 282 mtx_lock(&ktrace_mtx); 283 if (!KTRCHECK(td, type)) { 284 mtx_unlock(&ktrace_mtx); 285 return (NULL); 286 } 287 req = STAILQ_FIRST(&ktr_free); 288 if (req != NULL) { 289 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 290 req->ktr_header.ktr_type = type; 291 if (p->p_traceflag & KTRFAC_DROP) { 292 req->ktr_header.ktr_type |= KTR_DROP; 293 p->p_traceflag &= ~KTRFAC_DROP; 294 } 295 mtx_unlock(&ktrace_mtx); 296 microtime(&req->ktr_header.ktr_time); 297 req->ktr_header.ktr_pid = p->p_pid; 298 req->ktr_header.ktr_tid = td->td_tid; 299 bcopy(td->td_name, req->ktr_header.ktr_comm, 300 sizeof(req->ktr_header.ktr_comm)); 301 req->ktr_buffer = NULL; 302 req->ktr_header.ktr_len = 0; 303 } else { 304 p->p_traceflag |= KTRFAC_DROP; 305 pm = print_message; 306 print_message = 0; 307 mtx_unlock(&ktrace_mtx); 308 if (pm) 309 printf("Out of ktrace request objects.\n"); 310 } 311 return (req); 312} 313 314static struct ktr_request * 315ktr_getrequest(int type) 316{ 317 struct thread *td = curthread; 318 struct ktr_request *req; 319 320 ktrace_enter(td); 321 req = ktr_getrequest_entered(td, type); 322 if (req == NULL) 323 ktrace_exit(td); 324 325 return (req); 326} 327 328/* 329 * Some trace generation environments don't permit direct access to VFS, 330 * such as during a context switch where sleeping is not allowed. Under these 331 * circumstances, queue a request to the thread to be written asynchronously 332 * later. 333 */ 334static void 335ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 336{ 337 338 mtx_lock(&ktrace_mtx); 339 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 340 mtx_unlock(&ktrace_mtx); 341} 342 343/* 344 * Drain any pending ktrace records from the per-thread queue to disk. This 345 * is used both internally before committing other records, and also on 346 * system call return. We drain all the ones we can find at the time when 347 * drain is requested, but don't keep draining after that as those events 348 * may be approximately "after" the current event. 349 */ 350static void 351ktr_drain(struct thread *td) 352{ 353 struct ktr_request *queued_req; 354 STAILQ_HEAD(, ktr_request) local_queue; 355 356 ktrace_assert(td); 357 sx_assert(&ktrace_sx, SX_XLOCKED); 358 359 STAILQ_INIT(&local_queue); 360 361 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 362 mtx_lock(&ktrace_mtx); 363 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 364 mtx_unlock(&ktrace_mtx); 365 366 while ((queued_req = STAILQ_FIRST(&local_queue))) { 367 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 368 ktr_writerequest(td, queued_req); 369 ktr_freerequest(queued_req); 370 } 371 } 372} 373 374/* 375 * Submit a trace record for immediate commit to disk -- to be used only 376 * where entering VFS is OK. First drain any pending records that may have 377 * been cached in the thread. 378 */ 379static void 380ktr_submitrequest(struct thread *td, struct ktr_request *req) 381{ 382 383 ktrace_assert(td); 384 385 sx_xlock(&ktrace_sx); 386 ktr_drain(td); 387 ktr_writerequest(td, req); 388 ktr_freerequest(req); 389 sx_xunlock(&ktrace_sx); 390 ktrace_exit(td); 391} 392 393static void 394ktr_freerequest(struct ktr_request *req) 395{ 396 397 mtx_lock(&ktrace_mtx); 398 ktr_freerequest_locked(req); 399 mtx_unlock(&ktrace_mtx); 400} 401 402static void 403ktr_freerequest_locked(struct ktr_request *req) 404{ 405 406 mtx_assert(&ktrace_mtx, MA_OWNED); 407 if (req->ktr_buffer != NULL) 408 free(req->ktr_buffer, M_KTRACE); 409 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 410} 411 412/* 413 * Disable tracing for a process and release all associated resources. 414 * The caller is responsible for releasing a reference on the returned 415 * vnode and credentials. 416 */ 417static void 418ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 419{ 420 struct ktr_request *req; 421 422 PROC_LOCK_ASSERT(p, MA_OWNED); 423 mtx_assert(&ktrace_mtx, MA_OWNED); 424 *uc = p->p_tracecred; 425 p->p_tracecred = NULL; 426 if (vp != NULL) 427 *vp = p->p_tracevp; 428 p->p_tracevp = NULL; 429 p->p_traceflag = 0; 430 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 431 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 432 ktr_freerequest_locked(req); 433 } 434} 435 436void 437ktrsyscall(code, narg, args) 438 int code, narg; 439 register_t args[]; 440{ 441 struct ktr_request *req; 442 struct ktr_syscall *ktp; 443 size_t buflen; 444 char *buf = NULL; 445 446 buflen = sizeof(register_t) * narg; 447 if (buflen > 0) { 448 buf = malloc(buflen, M_KTRACE, M_WAITOK); 449 bcopy(args, buf, buflen); 450 } 451 req = ktr_getrequest(KTR_SYSCALL); 452 if (req == NULL) { 453 if (buf != NULL) 454 free(buf, M_KTRACE); 455 return; 456 } 457 ktp = &req->ktr_data.ktr_syscall; 458 ktp->ktr_code = code; 459 ktp->ktr_narg = narg; 460 if (buflen > 0) { 461 req->ktr_header.ktr_len = buflen; 462 req->ktr_buffer = buf; 463 } 464 ktr_submitrequest(curthread, req); 465} 466 467void 468ktrsysret(code, error, retval) 469 int code, error; 470 register_t retval; 471{ 472 struct ktr_request *req; 473 struct ktr_sysret *ktp; 474 475 req = ktr_getrequest(KTR_SYSRET); 476 if (req == NULL) 477 return; 478 ktp = &req->ktr_data.ktr_sysret; 479 ktp->ktr_code = code; 480 ktp->ktr_error = error; 481 ktp->ktr_retval = retval; /* what about val2 ? */ 482 ktr_submitrequest(curthread, req); 483} 484 485/* 486 * When a setuid process execs, disable tracing. 487 * 488 * XXX: We toss any pending asynchronous records. 489 */ 490void 491ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 492{ 493 494 PROC_LOCK_ASSERT(p, MA_OWNED); 495 mtx_lock(&ktrace_mtx); 496 ktr_freeproc(p, uc, vp); 497 mtx_unlock(&ktrace_mtx); 498} 499 500/* 501 * When a process exits, drain per-process asynchronous trace records 502 * and disable tracing. 503 */ 504void 505ktrprocexit(struct thread *td) 506{ 507 struct ktr_request *req; 508 struct proc *p; 509 struct ucred *cred; 510 struct vnode *vp; 511 int vfslocked; 512 513 p = td->td_proc; 514 if (p->p_traceflag == 0) 515 return; 516 517 ktrace_enter(td); 518 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 519 if (req != NULL) 520 ktr_enqueuerequest(td, req); 521 sx_xlock(&ktrace_sx); 522 ktr_drain(td); 523 sx_xunlock(&ktrace_sx); 524 PROC_LOCK(p); 525 mtx_lock(&ktrace_mtx); 526 ktr_freeproc(p, &cred, &vp); 527 mtx_unlock(&ktrace_mtx); 528 PROC_UNLOCK(p); 529 if (vp != NULL) { 530 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 531 vrele(vp); 532 VFS_UNLOCK_GIANT(vfslocked); 533 } 534 if (cred != NULL) 535 crfree(cred); 536 ktrace_exit(td); 537} 538 539static void 540ktrprocctor_entered(struct thread *td, struct proc *p) 541{ 542 struct ktr_proc_ctor *ktp; 543 struct ktr_request *req; 544 struct thread *td2; 545 546 ktrace_assert(td); 547 td2 = FIRST_THREAD_IN_PROC(p); 548 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 549 if (req == NULL) 550 return; 551 ktp = &req->ktr_data.ktr_proc_ctor; 552 ktp->sv_flags = p->p_sysent->sv_flags; 553 ktr_enqueuerequest(td2, req); 554} 555 556void 557ktrprocctor(struct proc *p) 558{ 559 struct thread *td = curthread; 560 561 if ((p->p_traceflag & KTRFAC_MASK) == 0) 562 return; 563 564 ktrace_enter(td); 565 ktrprocctor_entered(td, p); 566 ktrace_exit(td); 567} 568 569/* 570 * When a process forks, enable tracing in the new process if needed. 571 */ 572void 573ktrprocfork(struct proc *p1, struct proc *p2) 574{ 575 576 PROC_LOCK(p1); 577 mtx_lock(&ktrace_mtx); 578 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 579 if (p1->p_traceflag & KTRFAC_INHERIT) { 580 p2->p_traceflag = p1->p_traceflag; 581 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 582 VREF(p2->p_tracevp); 583 KASSERT(p1->p_tracecred != NULL, 584 ("ktrace vnode with no cred")); 585 p2->p_tracecred = crhold(p1->p_tracecred); 586 } 587 } 588 mtx_unlock(&ktrace_mtx); 589 PROC_UNLOCK(p1); 590 591 ktrprocctor(p2); 592} 593 594/* 595 * When a thread returns, drain any asynchronous records generated by the 596 * system call. 597 */ 598void 599ktruserret(struct thread *td) 600{ 601 602 ktrace_enter(td); 603 sx_xlock(&ktrace_sx); 604 ktr_drain(td); 605 sx_xunlock(&ktrace_sx); 606 ktrace_exit(td); 607} 608 609void 610ktrnamei(path) 611 char *path; 612{ 613 struct ktr_request *req; 614 int namelen; 615 char *buf = NULL; 616 617 namelen = strlen(path); 618 if (namelen > 0) { 619 buf = malloc(namelen, M_KTRACE, M_WAITOK); 620 bcopy(path, buf, namelen); 621 } 622 req = ktr_getrequest(KTR_NAMEI); 623 if (req == NULL) { 624 if (buf != NULL) 625 free(buf, M_KTRACE); 626 return; 627 } 628 if (namelen > 0) { 629 req->ktr_header.ktr_len = namelen; 630 req->ktr_buffer = buf; 631 } 632 ktr_submitrequest(curthread, req); 633} 634 635void 636ktrsysctl(name, namelen) 637 int *name; 638 u_int namelen; 639{ 640 struct ktr_request *req; 641 u_int mib[CTL_MAXNAME + 2]; 642 char *mibname; 643 size_t mibnamelen; 644 int error; 645 646 /* Lookup name of mib. */ 647 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 648 mib[0] = 0; 649 mib[1] = 1; 650 bcopy(name, mib + 2, namelen * sizeof(*name)); 651 mibnamelen = 128; 652 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 653 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 654 NULL, 0, &mibnamelen, 0); 655 if (error) { 656 free(mibname, M_KTRACE); 657 return; 658 } 659 req = ktr_getrequest(KTR_SYSCTL); 660 if (req == NULL) { 661 free(mibname, M_KTRACE); 662 return; 663 } 664 req->ktr_header.ktr_len = mibnamelen; 665 req->ktr_buffer = mibname; 666 ktr_submitrequest(curthread, req); 667} 668 669void 670ktrgenio(fd, rw, uio, error) 671 int fd; 672 enum uio_rw rw; 673 struct uio *uio; 674 int error; 675{ 676 struct ktr_request *req; 677 struct ktr_genio *ktg; 678 int datalen; 679 char *buf; 680 681 if (error) { 682 free(uio, M_IOV); 683 return; 684 } 685 uio->uio_offset = 0; 686 uio->uio_rw = UIO_WRITE; 687 datalen = imin(uio->uio_resid, ktr_geniosize); 688 buf = malloc(datalen, M_KTRACE, M_WAITOK); 689 error = uiomove(buf, datalen, uio); 690 free(uio, M_IOV); 691 if (error) { 692 free(buf, M_KTRACE); 693 return; 694 } 695 req = ktr_getrequest(KTR_GENIO); 696 if (req == NULL) { 697 free(buf, M_KTRACE); 698 return; 699 } 700 ktg = &req->ktr_data.ktr_genio; 701 ktg->ktr_fd = fd; 702 ktg->ktr_rw = rw; 703 req->ktr_header.ktr_len = datalen; 704 req->ktr_buffer = buf; 705 ktr_submitrequest(curthread, req); 706} 707 708void 709ktrpsig(sig, action, mask, code) 710 int sig; 711 sig_t action; 712 sigset_t *mask; 713 int code; 714{ 715 struct thread *td = curthread; 716 struct ktr_request *req; 717 struct ktr_psig *kp; 718 719 req = ktr_getrequest(KTR_PSIG); 720 if (req == NULL) 721 return; 722 kp = &req->ktr_data.ktr_psig; 723 kp->signo = (char)sig; 724 kp->action = action; 725 kp->mask = *mask; 726 kp->code = code; 727 ktr_enqueuerequest(td, req); 728 ktrace_exit(td); 729} 730 731void 732ktrcsw(out, user) 733 int out, user; 734{ 735 struct thread *td = curthread; 736 struct ktr_request *req; 737 struct ktr_csw *kc; 738 739 req = ktr_getrequest(KTR_CSW); 740 if (req == NULL) 741 return; 742 kc = &req->ktr_data.ktr_csw; 743 kc->out = out; 744 kc->user = user; 745 ktr_enqueuerequest(td, req); 746 ktrace_exit(td); 747} 748 749void 750ktrstruct(name, data, datalen) 751 const char *name; 752 void *data; 753 size_t datalen; 754{ 755 struct ktr_request *req; 756 char *buf = NULL; 757 size_t buflen; 758 759 if (!data) 760 datalen = 0; 761 buflen = strlen(name) + 1 + datalen; 762 buf = malloc(buflen, M_KTRACE, M_WAITOK); 763 strcpy(buf, name); 764 bcopy(data, buf + strlen(name) + 1, datalen); 765 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 766 free(buf, M_KTRACE); 767 return; 768 } 769 req->ktr_buffer = buf; 770 req->ktr_header.ktr_len = buflen; 771 ktr_submitrequest(curthread, req); 772} 773 774void
| 36 37#include "opt_ktrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/fcntl.h> 42#include <sys/kernel.h> 43#include <sys/kthread.h> 44#include <sys/lock.h> 45#include <sys/mutex.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/namei.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53#include <sys/socket.h> 54#include <sys/stat.h> 55#include <sys/ktrace.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/syslog.h> 60#include <sys/sysproto.h> 61 62#include <security/mac/mac_framework.h> 63 64/* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85#ifdef KTRACE 86 87FEATURE(ktrace, "Kernel support for system-call tracing"); 88 89#ifndef KTRACE_REQUEST_POOL 90#define KTRACE_REQUEST_POOL 100 91#endif 92 93struct ktr_request { 94 struct ktr_header ktr_header; 95 void *ktr_buffer; 96 union { 97 struct ktr_proc_ctor ktr_proc_ctor; 98 struct ktr_cap_fail ktr_cap_fail; 99 struct ktr_syscall ktr_syscall; 100 struct ktr_sysret ktr_sysret; 101 struct ktr_genio ktr_genio; 102 struct ktr_psig ktr_psig; 103 struct ktr_csw ktr_csw; 104 } ktr_data; 105 STAILQ_ENTRY(ktr_request) ktr_list; 106}; 107 108static int data_lengths[] = { 109 0, /* none */ 110 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 111 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 112 0, /* KTR_NAMEI */ 113 sizeof(struct ktr_genio), /* KTR_GENIO */ 114 sizeof(struct ktr_psig), /* KTR_PSIG */ 115 sizeof(struct ktr_csw), /* KTR_CSW */ 116 0, /* KTR_USER */ 117 0, /* KTR_STRUCT */ 118 0, /* KTR_SYSCTL */ 119 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 120 0, /* KTR_PROCDTOR */ 121 sizeof(struct ktr_cap_fail), /* KTR_CAPFAIL */ 122}; 123 124static STAILQ_HEAD(, ktr_request) ktr_free; 125 126static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 127 128static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 129TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 130 131static u_int ktr_geniosize = PAGE_SIZE; 132TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 133SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 134 0, "Maximum size of genio event payload"); 135 136static int print_message = 1; 137static struct mtx ktrace_mtx; 138static struct sx ktrace_sx; 139 140static void ktrace_init(void *dummy); 141static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 142static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 143static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 144static struct ktr_request *ktr_getrequest(int type); 145static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 146static void ktr_freeproc(struct proc *p, struct ucred **uc, 147 struct vnode **vp); 148static void ktr_freerequest(struct ktr_request *req); 149static void ktr_freerequest_locked(struct ktr_request *req); 150static void ktr_writerequest(struct thread *td, struct ktr_request *req); 151static int ktrcanset(struct thread *,struct proc *); 152static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 153static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 154static void ktrprocctor_entered(struct thread *, struct proc *); 155 156/* 157 * ktrace itself generates events, such as context switches, which we do not 158 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 159 * whether or not it is in a region where tracing of events should be 160 * suppressed. 161 */ 162static void 163ktrace_enter(struct thread *td) 164{ 165 166 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 167 td->td_pflags |= TDP_INKTRACE; 168} 169 170static void 171ktrace_exit(struct thread *td) 172{ 173 174 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 175 td->td_pflags &= ~TDP_INKTRACE; 176} 177 178static void 179ktrace_assert(struct thread *td) 180{ 181 182 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 183} 184 185static void 186ktrace_init(void *dummy) 187{ 188 struct ktr_request *req; 189 int i; 190 191 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 192 sx_init(&ktrace_sx, "ktrace_sx"); 193 STAILQ_INIT(&ktr_free); 194 for (i = 0; i < ktr_requestpool; i++) { 195 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 196 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 197 } 198} 199SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 200 201static int 202sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 203{ 204 struct thread *td; 205 u_int newsize, oldsize, wantsize; 206 int error; 207 208 /* Handle easy read-only case first to avoid warnings from GCC. */ 209 if (!req->newptr) { 210 oldsize = ktr_requestpool; 211 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 212 } 213 214 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 215 if (error) 216 return (error); 217 td = curthread; 218 ktrace_enter(td); 219 oldsize = ktr_requestpool; 220 newsize = ktrace_resize_pool(oldsize, wantsize); 221 ktrace_exit(td); 222 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 223 if (error) 224 return (error); 225 if (wantsize > oldsize && newsize < wantsize) 226 return (ENOSPC); 227 return (0); 228} 229SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 230 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 231 "Pool buffer size for ktrace(1)"); 232 233static u_int 234ktrace_resize_pool(u_int oldsize, u_int newsize) 235{ 236 STAILQ_HEAD(, ktr_request) ktr_new; 237 struct ktr_request *req; 238 int bound; 239 240 print_message = 1; 241 bound = newsize - oldsize; 242 if (bound == 0) 243 return (ktr_requestpool); 244 if (bound < 0) { 245 mtx_lock(&ktrace_mtx); 246 /* Shrink pool down to newsize if possible. */ 247 while (bound++ < 0) { 248 req = STAILQ_FIRST(&ktr_free); 249 if (req == NULL) 250 break; 251 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 252 ktr_requestpool--; 253 free(req, M_KTRACE); 254 } 255 } else { 256 /* Grow pool up to newsize. */ 257 STAILQ_INIT(&ktr_new); 258 while (bound-- > 0) { 259 req = malloc(sizeof(struct ktr_request), M_KTRACE, 260 M_WAITOK); 261 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 262 } 263 mtx_lock(&ktrace_mtx); 264 STAILQ_CONCAT(&ktr_free, &ktr_new); 265 ktr_requestpool += (newsize - oldsize); 266 } 267 mtx_unlock(&ktrace_mtx); 268 return (ktr_requestpool); 269} 270 271/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 272CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 273 (sizeof((struct thread *)NULL)->td_name)); 274 275static struct ktr_request * 276ktr_getrequest_entered(struct thread *td, int type) 277{ 278 struct ktr_request *req; 279 struct proc *p = td->td_proc; 280 int pm; 281 282 mtx_lock(&ktrace_mtx); 283 if (!KTRCHECK(td, type)) { 284 mtx_unlock(&ktrace_mtx); 285 return (NULL); 286 } 287 req = STAILQ_FIRST(&ktr_free); 288 if (req != NULL) { 289 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 290 req->ktr_header.ktr_type = type; 291 if (p->p_traceflag & KTRFAC_DROP) { 292 req->ktr_header.ktr_type |= KTR_DROP; 293 p->p_traceflag &= ~KTRFAC_DROP; 294 } 295 mtx_unlock(&ktrace_mtx); 296 microtime(&req->ktr_header.ktr_time); 297 req->ktr_header.ktr_pid = p->p_pid; 298 req->ktr_header.ktr_tid = td->td_tid; 299 bcopy(td->td_name, req->ktr_header.ktr_comm, 300 sizeof(req->ktr_header.ktr_comm)); 301 req->ktr_buffer = NULL; 302 req->ktr_header.ktr_len = 0; 303 } else { 304 p->p_traceflag |= KTRFAC_DROP; 305 pm = print_message; 306 print_message = 0; 307 mtx_unlock(&ktrace_mtx); 308 if (pm) 309 printf("Out of ktrace request objects.\n"); 310 } 311 return (req); 312} 313 314static struct ktr_request * 315ktr_getrequest(int type) 316{ 317 struct thread *td = curthread; 318 struct ktr_request *req; 319 320 ktrace_enter(td); 321 req = ktr_getrequest_entered(td, type); 322 if (req == NULL) 323 ktrace_exit(td); 324 325 return (req); 326} 327 328/* 329 * Some trace generation environments don't permit direct access to VFS, 330 * such as during a context switch where sleeping is not allowed. Under these 331 * circumstances, queue a request to the thread to be written asynchronously 332 * later. 333 */ 334static void 335ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 336{ 337 338 mtx_lock(&ktrace_mtx); 339 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 340 mtx_unlock(&ktrace_mtx); 341} 342 343/* 344 * Drain any pending ktrace records from the per-thread queue to disk. This 345 * is used both internally before committing other records, and also on 346 * system call return. We drain all the ones we can find at the time when 347 * drain is requested, but don't keep draining after that as those events 348 * may be approximately "after" the current event. 349 */ 350static void 351ktr_drain(struct thread *td) 352{ 353 struct ktr_request *queued_req; 354 STAILQ_HEAD(, ktr_request) local_queue; 355 356 ktrace_assert(td); 357 sx_assert(&ktrace_sx, SX_XLOCKED); 358 359 STAILQ_INIT(&local_queue); 360 361 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 362 mtx_lock(&ktrace_mtx); 363 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 364 mtx_unlock(&ktrace_mtx); 365 366 while ((queued_req = STAILQ_FIRST(&local_queue))) { 367 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 368 ktr_writerequest(td, queued_req); 369 ktr_freerequest(queued_req); 370 } 371 } 372} 373 374/* 375 * Submit a trace record for immediate commit to disk -- to be used only 376 * where entering VFS is OK. First drain any pending records that may have 377 * been cached in the thread. 378 */ 379static void 380ktr_submitrequest(struct thread *td, struct ktr_request *req) 381{ 382 383 ktrace_assert(td); 384 385 sx_xlock(&ktrace_sx); 386 ktr_drain(td); 387 ktr_writerequest(td, req); 388 ktr_freerequest(req); 389 sx_xunlock(&ktrace_sx); 390 ktrace_exit(td); 391} 392 393static void 394ktr_freerequest(struct ktr_request *req) 395{ 396 397 mtx_lock(&ktrace_mtx); 398 ktr_freerequest_locked(req); 399 mtx_unlock(&ktrace_mtx); 400} 401 402static void 403ktr_freerequest_locked(struct ktr_request *req) 404{ 405 406 mtx_assert(&ktrace_mtx, MA_OWNED); 407 if (req->ktr_buffer != NULL) 408 free(req->ktr_buffer, M_KTRACE); 409 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 410} 411 412/* 413 * Disable tracing for a process and release all associated resources. 414 * The caller is responsible for releasing a reference on the returned 415 * vnode and credentials. 416 */ 417static void 418ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 419{ 420 struct ktr_request *req; 421 422 PROC_LOCK_ASSERT(p, MA_OWNED); 423 mtx_assert(&ktrace_mtx, MA_OWNED); 424 *uc = p->p_tracecred; 425 p->p_tracecred = NULL; 426 if (vp != NULL) 427 *vp = p->p_tracevp; 428 p->p_tracevp = NULL; 429 p->p_traceflag = 0; 430 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 431 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 432 ktr_freerequest_locked(req); 433 } 434} 435 436void 437ktrsyscall(code, narg, args) 438 int code, narg; 439 register_t args[]; 440{ 441 struct ktr_request *req; 442 struct ktr_syscall *ktp; 443 size_t buflen; 444 char *buf = NULL; 445 446 buflen = sizeof(register_t) * narg; 447 if (buflen > 0) { 448 buf = malloc(buflen, M_KTRACE, M_WAITOK); 449 bcopy(args, buf, buflen); 450 } 451 req = ktr_getrequest(KTR_SYSCALL); 452 if (req == NULL) { 453 if (buf != NULL) 454 free(buf, M_KTRACE); 455 return; 456 } 457 ktp = &req->ktr_data.ktr_syscall; 458 ktp->ktr_code = code; 459 ktp->ktr_narg = narg; 460 if (buflen > 0) { 461 req->ktr_header.ktr_len = buflen; 462 req->ktr_buffer = buf; 463 } 464 ktr_submitrequest(curthread, req); 465} 466 467void 468ktrsysret(code, error, retval) 469 int code, error; 470 register_t retval; 471{ 472 struct ktr_request *req; 473 struct ktr_sysret *ktp; 474 475 req = ktr_getrequest(KTR_SYSRET); 476 if (req == NULL) 477 return; 478 ktp = &req->ktr_data.ktr_sysret; 479 ktp->ktr_code = code; 480 ktp->ktr_error = error; 481 ktp->ktr_retval = retval; /* what about val2 ? */ 482 ktr_submitrequest(curthread, req); 483} 484 485/* 486 * When a setuid process execs, disable tracing. 487 * 488 * XXX: We toss any pending asynchronous records. 489 */ 490void 491ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 492{ 493 494 PROC_LOCK_ASSERT(p, MA_OWNED); 495 mtx_lock(&ktrace_mtx); 496 ktr_freeproc(p, uc, vp); 497 mtx_unlock(&ktrace_mtx); 498} 499 500/* 501 * When a process exits, drain per-process asynchronous trace records 502 * and disable tracing. 503 */ 504void 505ktrprocexit(struct thread *td) 506{ 507 struct ktr_request *req; 508 struct proc *p; 509 struct ucred *cred; 510 struct vnode *vp; 511 int vfslocked; 512 513 p = td->td_proc; 514 if (p->p_traceflag == 0) 515 return; 516 517 ktrace_enter(td); 518 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 519 if (req != NULL) 520 ktr_enqueuerequest(td, req); 521 sx_xlock(&ktrace_sx); 522 ktr_drain(td); 523 sx_xunlock(&ktrace_sx); 524 PROC_LOCK(p); 525 mtx_lock(&ktrace_mtx); 526 ktr_freeproc(p, &cred, &vp); 527 mtx_unlock(&ktrace_mtx); 528 PROC_UNLOCK(p); 529 if (vp != NULL) { 530 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 531 vrele(vp); 532 VFS_UNLOCK_GIANT(vfslocked); 533 } 534 if (cred != NULL) 535 crfree(cred); 536 ktrace_exit(td); 537} 538 539static void 540ktrprocctor_entered(struct thread *td, struct proc *p) 541{ 542 struct ktr_proc_ctor *ktp; 543 struct ktr_request *req; 544 struct thread *td2; 545 546 ktrace_assert(td); 547 td2 = FIRST_THREAD_IN_PROC(p); 548 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 549 if (req == NULL) 550 return; 551 ktp = &req->ktr_data.ktr_proc_ctor; 552 ktp->sv_flags = p->p_sysent->sv_flags; 553 ktr_enqueuerequest(td2, req); 554} 555 556void 557ktrprocctor(struct proc *p) 558{ 559 struct thread *td = curthread; 560 561 if ((p->p_traceflag & KTRFAC_MASK) == 0) 562 return; 563 564 ktrace_enter(td); 565 ktrprocctor_entered(td, p); 566 ktrace_exit(td); 567} 568 569/* 570 * When a process forks, enable tracing in the new process if needed. 571 */ 572void 573ktrprocfork(struct proc *p1, struct proc *p2) 574{ 575 576 PROC_LOCK(p1); 577 mtx_lock(&ktrace_mtx); 578 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 579 if (p1->p_traceflag & KTRFAC_INHERIT) { 580 p2->p_traceflag = p1->p_traceflag; 581 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 582 VREF(p2->p_tracevp); 583 KASSERT(p1->p_tracecred != NULL, 584 ("ktrace vnode with no cred")); 585 p2->p_tracecred = crhold(p1->p_tracecred); 586 } 587 } 588 mtx_unlock(&ktrace_mtx); 589 PROC_UNLOCK(p1); 590 591 ktrprocctor(p2); 592} 593 594/* 595 * When a thread returns, drain any asynchronous records generated by the 596 * system call. 597 */ 598void 599ktruserret(struct thread *td) 600{ 601 602 ktrace_enter(td); 603 sx_xlock(&ktrace_sx); 604 ktr_drain(td); 605 sx_xunlock(&ktrace_sx); 606 ktrace_exit(td); 607} 608 609void 610ktrnamei(path) 611 char *path; 612{ 613 struct ktr_request *req; 614 int namelen; 615 char *buf = NULL; 616 617 namelen = strlen(path); 618 if (namelen > 0) { 619 buf = malloc(namelen, M_KTRACE, M_WAITOK); 620 bcopy(path, buf, namelen); 621 } 622 req = ktr_getrequest(KTR_NAMEI); 623 if (req == NULL) { 624 if (buf != NULL) 625 free(buf, M_KTRACE); 626 return; 627 } 628 if (namelen > 0) { 629 req->ktr_header.ktr_len = namelen; 630 req->ktr_buffer = buf; 631 } 632 ktr_submitrequest(curthread, req); 633} 634 635void 636ktrsysctl(name, namelen) 637 int *name; 638 u_int namelen; 639{ 640 struct ktr_request *req; 641 u_int mib[CTL_MAXNAME + 2]; 642 char *mibname; 643 size_t mibnamelen; 644 int error; 645 646 /* Lookup name of mib. */ 647 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 648 mib[0] = 0; 649 mib[1] = 1; 650 bcopy(name, mib + 2, namelen * sizeof(*name)); 651 mibnamelen = 128; 652 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 653 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 654 NULL, 0, &mibnamelen, 0); 655 if (error) { 656 free(mibname, M_KTRACE); 657 return; 658 } 659 req = ktr_getrequest(KTR_SYSCTL); 660 if (req == NULL) { 661 free(mibname, M_KTRACE); 662 return; 663 } 664 req->ktr_header.ktr_len = mibnamelen; 665 req->ktr_buffer = mibname; 666 ktr_submitrequest(curthread, req); 667} 668 669void 670ktrgenio(fd, rw, uio, error) 671 int fd; 672 enum uio_rw rw; 673 struct uio *uio; 674 int error; 675{ 676 struct ktr_request *req; 677 struct ktr_genio *ktg; 678 int datalen; 679 char *buf; 680 681 if (error) { 682 free(uio, M_IOV); 683 return; 684 } 685 uio->uio_offset = 0; 686 uio->uio_rw = UIO_WRITE; 687 datalen = imin(uio->uio_resid, ktr_geniosize); 688 buf = malloc(datalen, M_KTRACE, M_WAITOK); 689 error = uiomove(buf, datalen, uio); 690 free(uio, M_IOV); 691 if (error) { 692 free(buf, M_KTRACE); 693 return; 694 } 695 req = ktr_getrequest(KTR_GENIO); 696 if (req == NULL) { 697 free(buf, M_KTRACE); 698 return; 699 } 700 ktg = &req->ktr_data.ktr_genio; 701 ktg->ktr_fd = fd; 702 ktg->ktr_rw = rw; 703 req->ktr_header.ktr_len = datalen; 704 req->ktr_buffer = buf; 705 ktr_submitrequest(curthread, req); 706} 707 708void 709ktrpsig(sig, action, mask, code) 710 int sig; 711 sig_t action; 712 sigset_t *mask; 713 int code; 714{ 715 struct thread *td = curthread; 716 struct ktr_request *req; 717 struct ktr_psig *kp; 718 719 req = ktr_getrequest(KTR_PSIG); 720 if (req == NULL) 721 return; 722 kp = &req->ktr_data.ktr_psig; 723 kp->signo = (char)sig; 724 kp->action = action; 725 kp->mask = *mask; 726 kp->code = code; 727 ktr_enqueuerequest(td, req); 728 ktrace_exit(td); 729} 730 731void 732ktrcsw(out, user) 733 int out, user; 734{ 735 struct thread *td = curthread; 736 struct ktr_request *req; 737 struct ktr_csw *kc; 738 739 req = ktr_getrequest(KTR_CSW); 740 if (req == NULL) 741 return; 742 kc = &req->ktr_data.ktr_csw; 743 kc->out = out; 744 kc->user = user; 745 ktr_enqueuerequest(td, req); 746 ktrace_exit(td); 747} 748 749void 750ktrstruct(name, data, datalen) 751 const char *name; 752 void *data; 753 size_t datalen; 754{ 755 struct ktr_request *req; 756 char *buf = NULL; 757 size_t buflen; 758 759 if (!data) 760 datalen = 0; 761 buflen = strlen(name) + 1 + datalen; 762 buf = malloc(buflen, M_KTRACE, M_WAITOK); 763 strcpy(buf, name); 764 bcopy(data, buf + strlen(name) + 1, datalen); 765 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 766 free(buf, M_KTRACE); 767 return; 768 } 769 req->ktr_buffer = buf; 770 req->ktr_header.ktr_len = buflen; 771 ktr_submitrequest(curthread, req); 772} 773 774void
|
787 kcf->cap_needed = needed; 788 kcf->cap_held = held; 789 ktr_enqueuerequest(td, req); 790 ktrace_exit(td); 791} 792#endif /* KTRACE */ 793 794/* Interface and common routines */ 795 796#ifndef _SYS_SYSPROTO_H_ 797struct ktrace_args { 798 char *fname; 799 int ops; 800 int facs; 801 int pid; 802}; 803#endif 804/* ARGSUSED */ 805int 806sys_ktrace(td, uap) 807 struct thread *td; 808 register struct ktrace_args *uap; 809{ 810#ifdef KTRACE 811 register struct vnode *vp = NULL; 812 register struct proc *p; 813 struct pgrp *pg; 814 int facs = uap->facs & ~KTRFAC_ROOT; 815 int ops = KTROP(uap->ops); 816 int descend = uap->ops & KTRFLAG_DESCEND; 817 int nfound, ret = 0; 818 int flags, error = 0, vfslocked; 819 struct nameidata nd; 820 struct ucred *cred; 821 822 /* 823 * Need something to (un)trace. 824 */ 825 if (ops != KTROP_CLEARFILE && facs == 0) 826 return (EINVAL); 827 828 ktrace_enter(td); 829 if (ops != KTROP_CLEAR) { 830 /* 831 * an operation which requires a file argument. 832 */ 833 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 834 uap->fname, td); 835 flags = FREAD | FWRITE | O_NOFOLLOW; 836 error = vn_open(&nd, &flags, 0, NULL); 837 if (error) { 838 ktrace_exit(td); 839 return (error); 840 } 841 vfslocked = NDHASGIANT(&nd); 842 NDFREE(&nd, NDF_ONLY_PNBUF); 843 vp = nd.ni_vp; 844 VOP_UNLOCK(vp, 0); 845 if (vp->v_type != VREG) { 846 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 847 VFS_UNLOCK_GIANT(vfslocked); 848 ktrace_exit(td); 849 return (EACCES); 850 } 851 VFS_UNLOCK_GIANT(vfslocked); 852 } 853 /* 854 * Clear all uses of the tracefile. 855 */ 856 if (ops == KTROP_CLEARFILE) { 857 int vrele_count; 858 859 vrele_count = 0; 860 sx_slock(&allproc_lock); 861 FOREACH_PROC_IN_SYSTEM(p) { 862 PROC_LOCK(p); 863 if (p->p_tracevp == vp) { 864 if (ktrcanset(td, p)) { 865 mtx_lock(&ktrace_mtx); 866 ktr_freeproc(p, &cred, NULL); 867 mtx_unlock(&ktrace_mtx); 868 vrele_count++; 869 crfree(cred); 870 } else 871 error = EPERM; 872 } 873 PROC_UNLOCK(p); 874 } 875 sx_sunlock(&allproc_lock); 876 if (vrele_count > 0) { 877 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 878 while (vrele_count-- > 0) 879 vrele(vp); 880 VFS_UNLOCK_GIANT(vfslocked); 881 } 882 goto done; 883 } 884 /* 885 * do it 886 */ 887 sx_slock(&proctree_lock); 888 if (uap->pid < 0) { 889 /* 890 * by process group 891 */ 892 pg = pgfind(-uap->pid); 893 if (pg == NULL) { 894 sx_sunlock(&proctree_lock); 895 error = ESRCH; 896 goto done; 897 } 898 /* 899 * ktrops() may call vrele(). Lock pg_members 900 * by the proctree_lock rather than pg_mtx. 901 */ 902 PGRP_UNLOCK(pg); 903 nfound = 0; 904 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 905 PROC_LOCK(p); 906 if (p->p_state == PRS_NEW || 907 p_cansee(td, p) != 0) { 908 PROC_UNLOCK(p); 909 continue; 910 } 911 nfound++; 912 if (descend) 913 ret |= ktrsetchildren(td, p, ops, facs, vp); 914 else 915 ret |= ktrops(td, p, ops, facs, vp); 916 } 917 if (nfound == 0) { 918 sx_sunlock(&proctree_lock); 919 error = ESRCH; 920 goto done; 921 } 922 } else { 923 /* 924 * by pid 925 */ 926 p = pfind(uap->pid); 927 if (p == NULL) 928 error = ESRCH; 929 else 930 error = p_cansee(td, p); 931 if (error) { 932 if (p != NULL) 933 PROC_UNLOCK(p); 934 sx_sunlock(&proctree_lock); 935 goto done; 936 } 937 if (descend) 938 ret |= ktrsetchildren(td, p, ops, facs, vp); 939 else 940 ret |= ktrops(td, p, ops, facs, vp); 941 } 942 sx_sunlock(&proctree_lock); 943 if (!ret) 944 error = EPERM; 945done: 946 if (vp != NULL) { 947 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 948 (void) vn_close(vp, FWRITE, td->td_ucred, td); 949 VFS_UNLOCK_GIANT(vfslocked); 950 } 951 ktrace_exit(td); 952 return (error); 953#else /* !KTRACE */ 954 return (ENOSYS); 955#endif /* KTRACE */ 956} 957 958/* ARGSUSED */ 959int 960sys_utrace(td, uap) 961 struct thread *td; 962 register struct utrace_args *uap; 963{ 964 965#ifdef KTRACE 966 struct ktr_request *req; 967 void *cp; 968 int error; 969 970 if (!KTRPOINT(td, KTR_USER)) 971 return (0); 972 if (uap->len > KTR_USER_MAXLEN) 973 return (EINVAL); 974 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 975 error = copyin(uap->addr, cp, uap->len); 976 if (error) { 977 free(cp, M_KTRACE); 978 return (error); 979 } 980 req = ktr_getrequest(KTR_USER); 981 if (req == NULL) { 982 free(cp, M_KTRACE); 983 return (ENOMEM); 984 } 985 req->ktr_buffer = cp; 986 req->ktr_header.ktr_len = uap->len; 987 ktr_submitrequest(td, req); 988 return (0); 989#else /* !KTRACE */ 990 return (ENOSYS); 991#endif /* KTRACE */ 992} 993 994#ifdef KTRACE 995static int 996ktrops(td, p, ops, facs, vp) 997 struct thread *td; 998 struct proc *p; 999 int ops, facs; 1000 struct vnode *vp; 1001{ 1002 struct vnode *tracevp = NULL; 1003 struct ucred *tracecred = NULL; 1004 1005 PROC_LOCK_ASSERT(p, MA_OWNED); 1006 if (!ktrcanset(td, p)) { 1007 PROC_UNLOCK(p); 1008 return (0); 1009 } 1010 if (p->p_flag & P_WEXIT) { 1011 /* If the process is exiting, just ignore it. */ 1012 PROC_UNLOCK(p); 1013 return (1); 1014 } 1015 mtx_lock(&ktrace_mtx); 1016 if (ops == KTROP_SET) { 1017 if (p->p_tracevp != vp) { 1018 /* 1019 * if trace file already in use, relinquish below 1020 */ 1021 tracevp = p->p_tracevp; 1022 VREF(vp); 1023 p->p_tracevp = vp; 1024 } 1025 if (p->p_tracecred != td->td_ucred) { 1026 tracecred = p->p_tracecred; 1027 p->p_tracecred = crhold(td->td_ucred); 1028 } 1029 p->p_traceflag |= facs; 1030 if (priv_check(td, PRIV_KTRACE) == 0) 1031 p->p_traceflag |= KTRFAC_ROOT; 1032 } else { 1033 /* KTROP_CLEAR */ 1034 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1035 /* no more tracing */ 1036 ktr_freeproc(p, &tracecred, &tracevp); 1037 } 1038 mtx_unlock(&ktrace_mtx); 1039 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1040 ktrprocctor_entered(td, p); 1041 PROC_UNLOCK(p); 1042 if (tracevp != NULL) { 1043 int vfslocked; 1044 1045 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 1046 vrele(tracevp); 1047 VFS_UNLOCK_GIANT(vfslocked); 1048 } 1049 if (tracecred != NULL) 1050 crfree(tracecred); 1051 1052 return (1); 1053} 1054 1055static int 1056ktrsetchildren(td, top, ops, facs, vp) 1057 struct thread *td; 1058 struct proc *top; 1059 int ops, facs; 1060 struct vnode *vp; 1061{ 1062 register struct proc *p; 1063 register int ret = 0; 1064 1065 p = top; 1066 PROC_LOCK_ASSERT(p, MA_OWNED); 1067 sx_assert(&proctree_lock, SX_LOCKED); 1068 for (;;) { 1069 ret |= ktrops(td, p, ops, facs, vp); 1070 /* 1071 * If this process has children, descend to them next, 1072 * otherwise do any siblings, and if done with this level, 1073 * follow back up the tree (but not past top). 1074 */ 1075 if (!LIST_EMPTY(&p->p_children)) 1076 p = LIST_FIRST(&p->p_children); 1077 else for (;;) { 1078 if (p == top) 1079 return (ret); 1080 if (LIST_NEXT(p, p_sibling)) { 1081 p = LIST_NEXT(p, p_sibling); 1082 break; 1083 } 1084 p = p->p_pptr; 1085 } 1086 PROC_LOCK(p); 1087 } 1088 /*NOTREACHED*/ 1089} 1090 1091static void 1092ktr_writerequest(struct thread *td, struct ktr_request *req) 1093{ 1094 struct ktr_header *kth; 1095 struct vnode *vp; 1096 struct proc *p; 1097 struct ucred *cred; 1098 struct uio auio; 1099 struct iovec aiov[3]; 1100 struct mount *mp; 1101 int datalen, buflen, vrele_count; 1102 int error, vfslocked; 1103 1104 /* 1105 * We hold the vnode and credential for use in I/O in case ktrace is 1106 * disabled on the process as we write out the request. 1107 * 1108 * XXXRW: This is not ideal: we could end up performing a write after 1109 * the vnode has been closed. 1110 */ 1111 mtx_lock(&ktrace_mtx); 1112 vp = td->td_proc->p_tracevp; 1113 cred = td->td_proc->p_tracecred; 1114 1115 /* 1116 * If vp is NULL, the vp has been cleared out from under this 1117 * request, so just drop it. Make sure the credential and vnode are 1118 * in sync: we should have both or neither. 1119 */ 1120 if (vp == NULL) { 1121 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1122 mtx_unlock(&ktrace_mtx); 1123 return; 1124 } 1125 VREF(vp); 1126 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1127 crhold(cred); 1128 mtx_unlock(&ktrace_mtx); 1129 1130 kth = &req->ktr_header; 1131 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1132 sizeof(data_lengths) / sizeof(data_lengths[0]), 1133 ("data_lengths array overflow")); 1134 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1135 buflen = kth->ktr_len; 1136 auio.uio_iov = &aiov[0]; 1137 auio.uio_offset = 0; 1138 auio.uio_segflg = UIO_SYSSPACE; 1139 auio.uio_rw = UIO_WRITE; 1140 aiov[0].iov_base = (caddr_t)kth; 1141 aiov[0].iov_len = sizeof(struct ktr_header); 1142 auio.uio_resid = sizeof(struct ktr_header); 1143 auio.uio_iovcnt = 1; 1144 auio.uio_td = td; 1145 if (datalen != 0) { 1146 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1147 aiov[1].iov_len = datalen; 1148 auio.uio_resid += datalen; 1149 auio.uio_iovcnt++; 1150 kth->ktr_len += datalen; 1151 } 1152 if (buflen != 0) { 1153 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1154 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1155 aiov[auio.uio_iovcnt].iov_len = buflen; 1156 auio.uio_resid += buflen; 1157 auio.uio_iovcnt++; 1158 } 1159 1160 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1161 vn_start_write(vp, &mp, V_WAIT); 1162 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1163#ifdef MAC 1164 error = mac_vnode_check_write(cred, NOCRED, vp); 1165 if (error == 0) 1166#endif 1167 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1168 VOP_UNLOCK(vp, 0); 1169 vn_finished_write(mp); 1170 crfree(cred); 1171 if (!error) { 1172 vrele(vp); 1173 VFS_UNLOCK_GIANT(vfslocked); 1174 return; 1175 } 1176 VFS_UNLOCK_GIANT(vfslocked); 1177 1178 /* 1179 * If error encountered, give up tracing on this vnode. We defer 1180 * all the vrele()'s on the vnode until after we are finished walking 1181 * the various lists to avoid needlessly holding locks. 1182 * NB: at this point we still hold the vnode reference that must 1183 * not go away as we need the valid vnode to compare with. Thus let 1184 * vrele_count start at 1 and the reference will be freed 1185 * by the loop at the end after our last use of vp. 1186 */ 1187 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1188 error); 1189 vrele_count = 1; 1190 /* 1191 * First, clear this vnode from being used by any processes in the 1192 * system. 1193 * XXX - If one process gets an EPERM writing to the vnode, should 1194 * we really do this? Other processes might have suitable 1195 * credentials for the operation. 1196 */ 1197 cred = NULL; 1198 sx_slock(&allproc_lock); 1199 FOREACH_PROC_IN_SYSTEM(p) { 1200 PROC_LOCK(p); 1201 if (p->p_tracevp == vp) { 1202 mtx_lock(&ktrace_mtx); 1203 ktr_freeproc(p, &cred, NULL); 1204 mtx_unlock(&ktrace_mtx); 1205 vrele_count++; 1206 } 1207 PROC_UNLOCK(p); 1208 if (cred != NULL) { 1209 crfree(cred); 1210 cred = NULL; 1211 } 1212 } 1213 sx_sunlock(&allproc_lock); 1214 1215 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1216 while (vrele_count-- > 0) 1217 vrele(vp); 1218 VFS_UNLOCK_GIANT(vfslocked); 1219} 1220 1221/* 1222 * Return true if caller has permission to set the ktracing state 1223 * of target. Essentially, the target can't possess any 1224 * more permissions than the caller. KTRFAC_ROOT signifies that 1225 * root previously set the tracing status on the target process, and 1226 * so, only root may further change it. 1227 */ 1228static int 1229ktrcanset(td, targetp) 1230 struct thread *td; 1231 struct proc *targetp; 1232{ 1233 1234 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1235 if (targetp->p_traceflag & KTRFAC_ROOT && 1236 priv_check(td, PRIV_KTRACE)) 1237 return (0); 1238 1239 if (p_candebug(td, targetp) != 0) 1240 return (0); 1241 1242 return (1); 1243} 1244 1245#endif /* KTRACE */
| 789 kcf->cap_needed = needed; 790 kcf->cap_held = held; 791 ktr_enqueuerequest(td, req); 792 ktrace_exit(td); 793} 794#endif /* KTRACE */ 795 796/* Interface and common routines */ 797 798#ifndef _SYS_SYSPROTO_H_ 799struct ktrace_args { 800 char *fname; 801 int ops; 802 int facs; 803 int pid; 804}; 805#endif 806/* ARGSUSED */ 807int 808sys_ktrace(td, uap) 809 struct thread *td; 810 register struct ktrace_args *uap; 811{ 812#ifdef KTRACE 813 register struct vnode *vp = NULL; 814 register struct proc *p; 815 struct pgrp *pg; 816 int facs = uap->facs & ~KTRFAC_ROOT; 817 int ops = KTROP(uap->ops); 818 int descend = uap->ops & KTRFLAG_DESCEND; 819 int nfound, ret = 0; 820 int flags, error = 0, vfslocked; 821 struct nameidata nd; 822 struct ucred *cred; 823 824 /* 825 * Need something to (un)trace. 826 */ 827 if (ops != KTROP_CLEARFILE && facs == 0) 828 return (EINVAL); 829 830 ktrace_enter(td); 831 if (ops != KTROP_CLEAR) { 832 /* 833 * an operation which requires a file argument. 834 */ 835 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 836 uap->fname, td); 837 flags = FREAD | FWRITE | O_NOFOLLOW; 838 error = vn_open(&nd, &flags, 0, NULL); 839 if (error) { 840 ktrace_exit(td); 841 return (error); 842 } 843 vfslocked = NDHASGIANT(&nd); 844 NDFREE(&nd, NDF_ONLY_PNBUF); 845 vp = nd.ni_vp; 846 VOP_UNLOCK(vp, 0); 847 if (vp->v_type != VREG) { 848 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 849 VFS_UNLOCK_GIANT(vfslocked); 850 ktrace_exit(td); 851 return (EACCES); 852 } 853 VFS_UNLOCK_GIANT(vfslocked); 854 } 855 /* 856 * Clear all uses of the tracefile. 857 */ 858 if (ops == KTROP_CLEARFILE) { 859 int vrele_count; 860 861 vrele_count = 0; 862 sx_slock(&allproc_lock); 863 FOREACH_PROC_IN_SYSTEM(p) { 864 PROC_LOCK(p); 865 if (p->p_tracevp == vp) { 866 if (ktrcanset(td, p)) { 867 mtx_lock(&ktrace_mtx); 868 ktr_freeproc(p, &cred, NULL); 869 mtx_unlock(&ktrace_mtx); 870 vrele_count++; 871 crfree(cred); 872 } else 873 error = EPERM; 874 } 875 PROC_UNLOCK(p); 876 } 877 sx_sunlock(&allproc_lock); 878 if (vrele_count > 0) { 879 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 880 while (vrele_count-- > 0) 881 vrele(vp); 882 VFS_UNLOCK_GIANT(vfslocked); 883 } 884 goto done; 885 } 886 /* 887 * do it 888 */ 889 sx_slock(&proctree_lock); 890 if (uap->pid < 0) { 891 /* 892 * by process group 893 */ 894 pg = pgfind(-uap->pid); 895 if (pg == NULL) { 896 sx_sunlock(&proctree_lock); 897 error = ESRCH; 898 goto done; 899 } 900 /* 901 * ktrops() may call vrele(). Lock pg_members 902 * by the proctree_lock rather than pg_mtx. 903 */ 904 PGRP_UNLOCK(pg); 905 nfound = 0; 906 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 907 PROC_LOCK(p); 908 if (p->p_state == PRS_NEW || 909 p_cansee(td, p) != 0) { 910 PROC_UNLOCK(p); 911 continue; 912 } 913 nfound++; 914 if (descend) 915 ret |= ktrsetchildren(td, p, ops, facs, vp); 916 else 917 ret |= ktrops(td, p, ops, facs, vp); 918 } 919 if (nfound == 0) { 920 sx_sunlock(&proctree_lock); 921 error = ESRCH; 922 goto done; 923 } 924 } else { 925 /* 926 * by pid 927 */ 928 p = pfind(uap->pid); 929 if (p == NULL) 930 error = ESRCH; 931 else 932 error = p_cansee(td, p); 933 if (error) { 934 if (p != NULL) 935 PROC_UNLOCK(p); 936 sx_sunlock(&proctree_lock); 937 goto done; 938 } 939 if (descend) 940 ret |= ktrsetchildren(td, p, ops, facs, vp); 941 else 942 ret |= ktrops(td, p, ops, facs, vp); 943 } 944 sx_sunlock(&proctree_lock); 945 if (!ret) 946 error = EPERM; 947done: 948 if (vp != NULL) { 949 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 950 (void) vn_close(vp, FWRITE, td->td_ucred, td); 951 VFS_UNLOCK_GIANT(vfslocked); 952 } 953 ktrace_exit(td); 954 return (error); 955#else /* !KTRACE */ 956 return (ENOSYS); 957#endif /* KTRACE */ 958} 959 960/* ARGSUSED */ 961int 962sys_utrace(td, uap) 963 struct thread *td; 964 register struct utrace_args *uap; 965{ 966 967#ifdef KTRACE 968 struct ktr_request *req; 969 void *cp; 970 int error; 971 972 if (!KTRPOINT(td, KTR_USER)) 973 return (0); 974 if (uap->len > KTR_USER_MAXLEN) 975 return (EINVAL); 976 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 977 error = copyin(uap->addr, cp, uap->len); 978 if (error) { 979 free(cp, M_KTRACE); 980 return (error); 981 } 982 req = ktr_getrequest(KTR_USER); 983 if (req == NULL) { 984 free(cp, M_KTRACE); 985 return (ENOMEM); 986 } 987 req->ktr_buffer = cp; 988 req->ktr_header.ktr_len = uap->len; 989 ktr_submitrequest(td, req); 990 return (0); 991#else /* !KTRACE */ 992 return (ENOSYS); 993#endif /* KTRACE */ 994} 995 996#ifdef KTRACE 997static int 998ktrops(td, p, ops, facs, vp) 999 struct thread *td; 1000 struct proc *p; 1001 int ops, facs; 1002 struct vnode *vp; 1003{ 1004 struct vnode *tracevp = NULL; 1005 struct ucred *tracecred = NULL; 1006 1007 PROC_LOCK_ASSERT(p, MA_OWNED); 1008 if (!ktrcanset(td, p)) { 1009 PROC_UNLOCK(p); 1010 return (0); 1011 } 1012 if (p->p_flag & P_WEXIT) { 1013 /* If the process is exiting, just ignore it. */ 1014 PROC_UNLOCK(p); 1015 return (1); 1016 } 1017 mtx_lock(&ktrace_mtx); 1018 if (ops == KTROP_SET) { 1019 if (p->p_tracevp != vp) { 1020 /* 1021 * if trace file already in use, relinquish below 1022 */ 1023 tracevp = p->p_tracevp; 1024 VREF(vp); 1025 p->p_tracevp = vp; 1026 } 1027 if (p->p_tracecred != td->td_ucred) { 1028 tracecred = p->p_tracecred; 1029 p->p_tracecred = crhold(td->td_ucred); 1030 } 1031 p->p_traceflag |= facs; 1032 if (priv_check(td, PRIV_KTRACE) == 0) 1033 p->p_traceflag |= KTRFAC_ROOT; 1034 } else { 1035 /* KTROP_CLEAR */ 1036 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1037 /* no more tracing */ 1038 ktr_freeproc(p, &tracecred, &tracevp); 1039 } 1040 mtx_unlock(&ktrace_mtx); 1041 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1042 ktrprocctor_entered(td, p); 1043 PROC_UNLOCK(p); 1044 if (tracevp != NULL) { 1045 int vfslocked; 1046 1047 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 1048 vrele(tracevp); 1049 VFS_UNLOCK_GIANT(vfslocked); 1050 } 1051 if (tracecred != NULL) 1052 crfree(tracecred); 1053 1054 return (1); 1055} 1056 1057static int 1058ktrsetchildren(td, top, ops, facs, vp) 1059 struct thread *td; 1060 struct proc *top; 1061 int ops, facs; 1062 struct vnode *vp; 1063{ 1064 register struct proc *p; 1065 register int ret = 0; 1066 1067 p = top; 1068 PROC_LOCK_ASSERT(p, MA_OWNED); 1069 sx_assert(&proctree_lock, SX_LOCKED); 1070 for (;;) { 1071 ret |= ktrops(td, p, ops, facs, vp); 1072 /* 1073 * If this process has children, descend to them next, 1074 * otherwise do any siblings, and if done with this level, 1075 * follow back up the tree (but not past top). 1076 */ 1077 if (!LIST_EMPTY(&p->p_children)) 1078 p = LIST_FIRST(&p->p_children); 1079 else for (;;) { 1080 if (p == top) 1081 return (ret); 1082 if (LIST_NEXT(p, p_sibling)) { 1083 p = LIST_NEXT(p, p_sibling); 1084 break; 1085 } 1086 p = p->p_pptr; 1087 } 1088 PROC_LOCK(p); 1089 } 1090 /*NOTREACHED*/ 1091} 1092 1093static void 1094ktr_writerequest(struct thread *td, struct ktr_request *req) 1095{ 1096 struct ktr_header *kth; 1097 struct vnode *vp; 1098 struct proc *p; 1099 struct ucred *cred; 1100 struct uio auio; 1101 struct iovec aiov[3]; 1102 struct mount *mp; 1103 int datalen, buflen, vrele_count; 1104 int error, vfslocked; 1105 1106 /* 1107 * We hold the vnode and credential for use in I/O in case ktrace is 1108 * disabled on the process as we write out the request. 1109 * 1110 * XXXRW: This is not ideal: we could end up performing a write after 1111 * the vnode has been closed. 1112 */ 1113 mtx_lock(&ktrace_mtx); 1114 vp = td->td_proc->p_tracevp; 1115 cred = td->td_proc->p_tracecred; 1116 1117 /* 1118 * If vp is NULL, the vp has been cleared out from under this 1119 * request, so just drop it. Make sure the credential and vnode are 1120 * in sync: we should have both or neither. 1121 */ 1122 if (vp == NULL) { 1123 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1124 mtx_unlock(&ktrace_mtx); 1125 return; 1126 } 1127 VREF(vp); 1128 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1129 crhold(cred); 1130 mtx_unlock(&ktrace_mtx); 1131 1132 kth = &req->ktr_header; 1133 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1134 sizeof(data_lengths) / sizeof(data_lengths[0]), 1135 ("data_lengths array overflow")); 1136 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1137 buflen = kth->ktr_len; 1138 auio.uio_iov = &aiov[0]; 1139 auio.uio_offset = 0; 1140 auio.uio_segflg = UIO_SYSSPACE; 1141 auio.uio_rw = UIO_WRITE; 1142 aiov[0].iov_base = (caddr_t)kth; 1143 aiov[0].iov_len = sizeof(struct ktr_header); 1144 auio.uio_resid = sizeof(struct ktr_header); 1145 auio.uio_iovcnt = 1; 1146 auio.uio_td = td; 1147 if (datalen != 0) { 1148 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1149 aiov[1].iov_len = datalen; 1150 auio.uio_resid += datalen; 1151 auio.uio_iovcnt++; 1152 kth->ktr_len += datalen; 1153 } 1154 if (buflen != 0) { 1155 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1156 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1157 aiov[auio.uio_iovcnt].iov_len = buflen; 1158 auio.uio_resid += buflen; 1159 auio.uio_iovcnt++; 1160 } 1161 1162 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1163 vn_start_write(vp, &mp, V_WAIT); 1164 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1165#ifdef MAC 1166 error = mac_vnode_check_write(cred, NOCRED, vp); 1167 if (error == 0) 1168#endif 1169 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1170 VOP_UNLOCK(vp, 0); 1171 vn_finished_write(mp); 1172 crfree(cred); 1173 if (!error) { 1174 vrele(vp); 1175 VFS_UNLOCK_GIANT(vfslocked); 1176 return; 1177 } 1178 VFS_UNLOCK_GIANT(vfslocked); 1179 1180 /* 1181 * If error encountered, give up tracing on this vnode. We defer 1182 * all the vrele()'s on the vnode until after we are finished walking 1183 * the various lists to avoid needlessly holding locks. 1184 * NB: at this point we still hold the vnode reference that must 1185 * not go away as we need the valid vnode to compare with. Thus let 1186 * vrele_count start at 1 and the reference will be freed 1187 * by the loop at the end after our last use of vp. 1188 */ 1189 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1190 error); 1191 vrele_count = 1; 1192 /* 1193 * First, clear this vnode from being used by any processes in the 1194 * system. 1195 * XXX - If one process gets an EPERM writing to the vnode, should 1196 * we really do this? Other processes might have suitable 1197 * credentials for the operation. 1198 */ 1199 cred = NULL; 1200 sx_slock(&allproc_lock); 1201 FOREACH_PROC_IN_SYSTEM(p) { 1202 PROC_LOCK(p); 1203 if (p->p_tracevp == vp) { 1204 mtx_lock(&ktrace_mtx); 1205 ktr_freeproc(p, &cred, NULL); 1206 mtx_unlock(&ktrace_mtx); 1207 vrele_count++; 1208 } 1209 PROC_UNLOCK(p); 1210 if (cred != NULL) { 1211 crfree(cred); 1212 cred = NULL; 1213 } 1214 } 1215 sx_sunlock(&allproc_lock); 1216 1217 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1218 while (vrele_count-- > 0) 1219 vrele(vp); 1220 VFS_UNLOCK_GIANT(vfslocked); 1221} 1222 1223/* 1224 * Return true if caller has permission to set the ktracing state 1225 * of target. Essentially, the target can't possess any 1226 * more permissions than the caller. KTRFAC_ROOT signifies that 1227 * root previously set the tracing status on the target process, and 1228 * so, only root may further change it. 1229 */ 1230static int 1231ktrcanset(td, targetp) 1232 struct thread *td; 1233 struct proc *targetp; 1234{ 1235 1236 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1237 if (targetp->p_traceflag & KTRFAC_ROOT && 1238 priv_check(td, PRIV_KTRACE)) 1239 return (0); 1240 1241 if (p_candebug(td, targetp) != 0) 1242 return (0); 1243 1244 return (1); 1245} 1246 1247#endif /* KTRACE */
|