kern_ktrace.c revision 225617
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. 4 * Copyright (c) 2005 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)kern_ktrace.c 8.2 (Berkeley) 9/23/93 32 */ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 225617 2011-09-16 13:58:51Z kmacy $"); 36 37#include "opt_ktrace.h" 38 39#include <sys/param.h> 40#include <sys/systm.h> 41#include <sys/fcntl.h> 42#include <sys/kernel.h> 43#include <sys/kthread.h> 44#include <sys/lock.h> 45#include <sys/mutex.h> 46#include <sys/malloc.h> 47#include <sys/mount.h> 48#include <sys/namei.h> 49#include <sys/priv.h> 50#include <sys/proc.h> 51#include <sys/unistd.h> 52#include <sys/vnode.h> 53#include <sys/socket.h> 54#include <sys/stat.h> 55#include <sys/ktrace.h> 56#include <sys/sx.h> 57#include <sys/sysctl.h> 58#include <sys/sysent.h> 59#include <sys/syslog.h> 60#include <sys/sysproto.h> 61 62#include <security/mac/mac_framework.h> 63 64/* 65 * The ktrace facility allows the tracing of certain key events in user space 66 * processes, such as system calls, signal delivery, context switches, and 67 * user generated events using utrace(2). It works by streaming event 68 * records and data to a vnode associated with the process using the 69 * ktrace(2) system call. In general, records can be written directly from 70 * the context that generates the event. One important exception to this is 71 * during a context switch, where sleeping is not permitted. To handle this 72 * case, trace events are generated using in-kernel ktr_request records, and 73 * then delivered to disk at a convenient moment -- either immediately, the 74 * next traceable event, at system call return, or at process exit. 75 * 76 * When dealing with multiple threads or processes writing to the same event 77 * log, ordering guarantees are weak: specifically, if an event has multiple 78 * records (i.e., system call enter and return), they may be interlaced with 79 * records from another event. Process and thread ID information is provided 80 * in the record, and user applications can de-interlace events if required. 81 */ 82 83static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE"); 84 85#ifdef KTRACE 86 87FEATURE(ktrace, "Kernel support for system-call tracing"); 88 89#ifndef KTRACE_REQUEST_POOL 90#define KTRACE_REQUEST_POOL 100 91#endif 92 93struct ktr_request { 94 struct ktr_header ktr_header; 95 void *ktr_buffer; 96 union { 97 struct ktr_proc_ctor ktr_proc_ctor; 98 struct ktr_syscall ktr_syscall; 99 struct ktr_sysret ktr_sysret; 100 struct ktr_genio ktr_genio; 101 struct ktr_psig ktr_psig; 102 struct ktr_csw ktr_csw; 103 } ktr_data; 104 STAILQ_ENTRY(ktr_request) ktr_list; 105}; 106 107static int data_lengths[] = { 108 0, /* none */ 109 offsetof(struct ktr_syscall, ktr_args), /* KTR_SYSCALL */ 110 sizeof(struct ktr_sysret), /* KTR_SYSRET */ 111 0, /* KTR_NAMEI */ 112 sizeof(struct ktr_genio), /* KTR_GENIO */ 113 sizeof(struct ktr_psig), /* KTR_PSIG */ 114 sizeof(struct ktr_csw), /* KTR_CSW */ 115 0, /* KTR_USER */ 116 0, /* KTR_STRUCT */ 117 0, /* KTR_SYSCTL */ 118 sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */ 119 0, /* KTR_PROCDTOR */ 120}; 121 122static STAILQ_HEAD(, ktr_request) ktr_free; 123 124static SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options"); 125 126static u_int ktr_requestpool = KTRACE_REQUEST_POOL; 127TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool); 128 129static u_int ktr_geniosize = PAGE_SIZE; 130TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize); 131SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize, 132 0, "Maximum size of genio event payload"); 133 134static int print_message = 1; 135static struct mtx ktrace_mtx; 136static struct sx ktrace_sx; 137 138static void ktrace_init(void *dummy); 139static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS); 140static u_int ktrace_resize_pool(u_int oldsize, u_int newsize); 141static struct ktr_request *ktr_getrequest_entered(struct thread *td, int type); 142static struct ktr_request *ktr_getrequest(int type); 143static void ktr_submitrequest(struct thread *td, struct ktr_request *req); 144static void ktr_freeproc(struct proc *p, struct ucred **uc, 145 struct vnode **vp); 146static void ktr_freerequest(struct ktr_request *req); 147static void ktr_freerequest_locked(struct ktr_request *req); 148static void ktr_writerequest(struct thread *td, struct ktr_request *req); 149static int ktrcanset(struct thread *,struct proc *); 150static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *); 151static int ktrops(struct thread *,struct proc *,int,int,struct vnode *); 152static void ktrprocctor_entered(struct thread *, struct proc *); 153 154/* 155 * ktrace itself generates events, such as context switches, which we do not 156 * wish to trace. Maintain a flag, TDP_INKTRACE, on each thread to determine 157 * whether or not it is in a region where tracing of events should be 158 * suppressed. 159 */ 160static void 161ktrace_enter(struct thread *td) 162{ 163 164 KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set")); 165 td->td_pflags |= TDP_INKTRACE; 166} 167 168static void 169ktrace_exit(struct thread *td) 170{ 171 172 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set")); 173 td->td_pflags &= ~TDP_INKTRACE; 174} 175 176static void 177ktrace_assert(struct thread *td) 178{ 179 180 KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set")); 181} 182 183static void 184ktrace_init(void *dummy) 185{ 186 struct ktr_request *req; 187 int i; 188 189 mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET); 190 sx_init(&ktrace_sx, "ktrace_sx"); 191 STAILQ_INIT(&ktr_free); 192 for (i = 0; i < ktr_requestpool; i++) { 193 req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK); 194 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 195 } 196} 197SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL); 198 199static int 200sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS) 201{ 202 struct thread *td; 203 u_int newsize, oldsize, wantsize; 204 int error; 205 206 /* Handle easy read-only case first to avoid warnings from GCC. */ 207 if (!req->newptr) { 208 oldsize = ktr_requestpool; 209 return (SYSCTL_OUT(req, &oldsize, sizeof(u_int))); 210 } 211 212 error = SYSCTL_IN(req, &wantsize, sizeof(u_int)); 213 if (error) 214 return (error); 215 td = curthread; 216 ktrace_enter(td); 217 oldsize = ktr_requestpool; 218 newsize = ktrace_resize_pool(oldsize, wantsize); 219 ktrace_exit(td); 220 error = SYSCTL_OUT(req, &oldsize, sizeof(u_int)); 221 if (error) 222 return (error); 223 if (wantsize > oldsize && newsize < wantsize) 224 return (ENOSPC); 225 return (0); 226} 227SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW, 228 &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", 229 "Pool buffer size for ktrace(1)"); 230 231static u_int 232ktrace_resize_pool(u_int oldsize, u_int newsize) 233{ 234 STAILQ_HEAD(, ktr_request) ktr_new; 235 struct ktr_request *req; 236 int bound; 237 238 print_message = 1; 239 bound = newsize - oldsize; 240 if (bound == 0) 241 return (ktr_requestpool); 242 if (bound < 0) { 243 mtx_lock(&ktrace_mtx); 244 /* Shrink pool down to newsize if possible. */ 245 while (bound++ < 0) { 246 req = STAILQ_FIRST(&ktr_free); 247 if (req == NULL) 248 break; 249 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 250 ktr_requestpool--; 251 free(req, M_KTRACE); 252 } 253 } else { 254 /* Grow pool up to newsize. */ 255 STAILQ_INIT(&ktr_new); 256 while (bound-- > 0) { 257 req = malloc(sizeof(struct ktr_request), M_KTRACE, 258 M_WAITOK); 259 STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list); 260 } 261 mtx_lock(&ktrace_mtx); 262 STAILQ_CONCAT(&ktr_free, &ktr_new); 263 ktr_requestpool += (newsize - oldsize); 264 } 265 mtx_unlock(&ktrace_mtx); 266 return (ktr_requestpool); 267} 268 269/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */ 270CTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) == 271 (sizeof((struct thread *)NULL)->td_name)); 272 273static struct ktr_request * 274ktr_getrequest_entered(struct thread *td, int type) 275{ 276 struct ktr_request *req; 277 struct proc *p = td->td_proc; 278 int pm; 279 280 mtx_lock(&ktrace_mtx); 281 if (!KTRCHECK(td, type)) { 282 mtx_unlock(&ktrace_mtx); 283 return (NULL); 284 } 285 req = STAILQ_FIRST(&ktr_free); 286 if (req != NULL) { 287 STAILQ_REMOVE_HEAD(&ktr_free, ktr_list); 288 req->ktr_header.ktr_type = type; 289 if (p->p_traceflag & KTRFAC_DROP) { 290 req->ktr_header.ktr_type |= KTR_DROP; 291 p->p_traceflag &= ~KTRFAC_DROP; 292 } 293 mtx_unlock(&ktrace_mtx); 294 microtime(&req->ktr_header.ktr_time); 295 req->ktr_header.ktr_pid = p->p_pid; 296 req->ktr_header.ktr_tid = td->td_tid; 297 bcopy(td->td_name, req->ktr_header.ktr_comm, 298 sizeof(req->ktr_header.ktr_comm)); 299 req->ktr_buffer = NULL; 300 req->ktr_header.ktr_len = 0; 301 } else { 302 p->p_traceflag |= KTRFAC_DROP; 303 pm = print_message; 304 print_message = 0; 305 mtx_unlock(&ktrace_mtx); 306 if (pm) 307 printf("Out of ktrace request objects.\n"); 308 } 309 return (req); 310} 311 312static struct ktr_request * 313ktr_getrequest(int type) 314{ 315 struct thread *td = curthread; 316 struct ktr_request *req; 317 318 ktrace_enter(td); 319 req = ktr_getrequest_entered(td, type); 320 if (req == NULL) 321 ktrace_exit(td); 322 323 return (req); 324} 325 326/* 327 * Some trace generation environments don't permit direct access to VFS, 328 * such as during a context switch where sleeping is not allowed. Under these 329 * circumstances, queue a request to the thread to be written asynchronously 330 * later. 331 */ 332static void 333ktr_enqueuerequest(struct thread *td, struct ktr_request *req) 334{ 335 336 mtx_lock(&ktrace_mtx); 337 STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list); 338 mtx_unlock(&ktrace_mtx); 339} 340 341/* 342 * Drain any pending ktrace records from the per-thread queue to disk. This 343 * is used both internally before committing other records, and also on 344 * system call return. We drain all the ones we can find at the time when 345 * drain is requested, but don't keep draining after that as those events 346 * may be approximately "after" the current event. 347 */ 348static void 349ktr_drain(struct thread *td) 350{ 351 struct ktr_request *queued_req; 352 STAILQ_HEAD(, ktr_request) local_queue; 353 354 ktrace_assert(td); 355 sx_assert(&ktrace_sx, SX_XLOCKED); 356 357 STAILQ_INIT(&local_queue); 358 359 if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) { 360 mtx_lock(&ktrace_mtx); 361 STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr); 362 mtx_unlock(&ktrace_mtx); 363 364 while ((queued_req = STAILQ_FIRST(&local_queue))) { 365 STAILQ_REMOVE_HEAD(&local_queue, ktr_list); 366 ktr_writerequest(td, queued_req); 367 ktr_freerequest(queued_req); 368 } 369 } 370} 371 372/* 373 * Submit a trace record for immediate commit to disk -- to be used only 374 * where entering VFS is OK. First drain any pending records that may have 375 * been cached in the thread. 376 */ 377static void 378ktr_submitrequest(struct thread *td, struct ktr_request *req) 379{ 380 381 ktrace_assert(td); 382 383 sx_xlock(&ktrace_sx); 384 ktr_drain(td); 385 ktr_writerequest(td, req); 386 ktr_freerequest(req); 387 sx_xunlock(&ktrace_sx); 388 ktrace_exit(td); 389} 390 391static void 392ktr_freerequest(struct ktr_request *req) 393{ 394 395 mtx_lock(&ktrace_mtx); 396 ktr_freerequest_locked(req); 397 mtx_unlock(&ktrace_mtx); 398} 399 400static void 401ktr_freerequest_locked(struct ktr_request *req) 402{ 403 404 mtx_assert(&ktrace_mtx, MA_OWNED); 405 if (req->ktr_buffer != NULL) 406 free(req->ktr_buffer, M_KTRACE); 407 STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list); 408} 409 410/* 411 * Disable tracing for a process and release all associated resources. 412 * The caller is responsible for releasing a reference on the returned 413 * vnode and credentials. 414 */ 415static void 416ktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp) 417{ 418 struct ktr_request *req; 419 420 PROC_LOCK_ASSERT(p, MA_OWNED); 421 mtx_assert(&ktrace_mtx, MA_OWNED); 422 *uc = p->p_tracecred; 423 p->p_tracecred = NULL; 424 if (vp != NULL) 425 *vp = p->p_tracevp; 426 p->p_tracevp = NULL; 427 p->p_traceflag = 0; 428 while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) { 429 STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list); 430 ktr_freerequest_locked(req); 431 } 432} 433 434void 435ktrsyscall(code, narg, args) 436 int code, narg; 437 register_t args[]; 438{ 439 struct ktr_request *req; 440 struct ktr_syscall *ktp; 441 size_t buflen; 442 char *buf = NULL; 443 444 buflen = sizeof(register_t) * narg; 445 if (buflen > 0) { 446 buf = malloc(buflen, M_KTRACE, M_WAITOK); 447 bcopy(args, buf, buflen); 448 } 449 req = ktr_getrequest(KTR_SYSCALL); 450 if (req == NULL) { 451 if (buf != NULL) 452 free(buf, M_KTRACE); 453 return; 454 } 455 ktp = &req->ktr_data.ktr_syscall; 456 ktp->ktr_code = code; 457 ktp->ktr_narg = narg; 458 if (buflen > 0) { 459 req->ktr_header.ktr_len = buflen; 460 req->ktr_buffer = buf; 461 } 462 ktr_submitrequest(curthread, req); 463} 464 465void 466ktrsysret(code, error, retval) 467 int code, error; 468 register_t retval; 469{ 470 struct ktr_request *req; 471 struct ktr_sysret *ktp; 472 473 req = ktr_getrequest(KTR_SYSRET); 474 if (req == NULL) 475 return; 476 ktp = &req->ktr_data.ktr_sysret; 477 ktp->ktr_code = code; 478 ktp->ktr_error = error; 479 ktp->ktr_retval = retval; /* what about val2 ? */ 480 ktr_submitrequest(curthread, req); 481} 482 483/* 484 * When a setuid process execs, disable tracing. 485 * 486 * XXX: We toss any pending asynchronous records. 487 */ 488void 489ktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp) 490{ 491 492 PROC_LOCK_ASSERT(p, MA_OWNED); 493 mtx_lock(&ktrace_mtx); 494 ktr_freeproc(p, uc, vp); 495 mtx_unlock(&ktrace_mtx); 496} 497 498/* 499 * When a process exits, drain per-process asynchronous trace records 500 * and disable tracing. 501 */ 502void 503ktrprocexit(struct thread *td) 504{ 505 struct ktr_request *req; 506 struct proc *p; 507 struct ucred *cred; 508 struct vnode *vp; 509 int vfslocked; 510 511 p = td->td_proc; 512 if (p->p_traceflag == 0) 513 return; 514 515 ktrace_enter(td); 516 req = ktr_getrequest_entered(td, KTR_PROCDTOR); 517 if (req != NULL) 518 ktr_enqueuerequest(td, req); 519 sx_xlock(&ktrace_sx); 520 ktr_drain(td); 521 sx_xunlock(&ktrace_sx); 522 PROC_LOCK(p); 523 mtx_lock(&ktrace_mtx); 524 ktr_freeproc(p, &cred, &vp); 525 mtx_unlock(&ktrace_mtx); 526 PROC_UNLOCK(p); 527 if (vp != NULL) { 528 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 529 vrele(vp); 530 VFS_UNLOCK_GIANT(vfslocked); 531 } 532 if (cred != NULL) 533 crfree(cred); 534 ktrace_exit(td); 535} 536 537static void 538ktrprocctor_entered(struct thread *td, struct proc *p) 539{ 540 struct ktr_proc_ctor *ktp; 541 struct ktr_request *req; 542 struct thread *td2; 543 544 ktrace_assert(td); 545 td2 = FIRST_THREAD_IN_PROC(p); 546 req = ktr_getrequest_entered(td2, KTR_PROCCTOR); 547 if (req == NULL) 548 return; 549 ktp = &req->ktr_data.ktr_proc_ctor; 550 ktp->sv_flags = p->p_sysent->sv_flags; 551 ktr_enqueuerequest(td2, req); 552} 553 554void 555ktrprocctor(struct proc *p) 556{ 557 struct thread *td = curthread; 558 559 if ((p->p_traceflag & KTRFAC_MASK) == 0) 560 return; 561 562 ktrace_enter(td); 563 ktrprocctor_entered(td, p); 564 ktrace_exit(td); 565} 566 567/* 568 * When a process forks, enable tracing in the new process if needed. 569 */ 570void 571ktrprocfork(struct proc *p1, struct proc *p2) 572{ 573 574 PROC_LOCK(p1); 575 mtx_lock(&ktrace_mtx); 576 KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode")); 577 if (p1->p_traceflag & KTRFAC_INHERIT) { 578 p2->p_traceflag = p1->p_traceflag; 579 if ((p2->p_tracevp = p1->p_tracevp) != NULL) { 580 VREF(p2->p_tracevp); 581 KASSERT(p1->p_tracecred != NULL, 582 ("ktrace vnode with no cred")); 583 p2->p_tracecred = crhold(p1->p_tracecred); 584 } 585 } 586 mtx_unlock(&ktrace_mtx); 587 PROC_UNLOCK(p1); 588 589 ktrprocctor(p2); 590} 591 592/* 593 * When a thread returns, drain any asynchronous records generated by the 594 * system call. 595 */ 596void 597ktruserret(struct thread *td) 598{ 599 600 ktrace_enter(td); 601 sx_xlock(&ktrace_sx); 602 ktr_drain(td); 603 sx_xunlock(&ktrace_sx); 604 ktrace_exit(td); 605} 606 607void 608ktrnamei(path) 609 char *path; 610{ 611 struct ktr_request *req; 612 int namelen; 613 char *buf = NULL; 614 615 namelen = strlen(path); 616 if (namelen > 0) { 617 buf = malloc(namelen, M_KTRACE, M_WAITOK); 618 bcopy(path, buf, namelen); 619 } 620 req = ktr_getrequest(KTR_NAMEI); 621 if (req == NULL) { 622 if (buf != NULL) 623 free(buf, M_KTRACE); 624 return; 625 } 626 if (namelen > 0) { 627 req->ktr_header.ktr_len = namelen; 628 req->ktr_buffer = buf; 629 } 630 ktr_submitrequest(curthread, req); 631} 632 633void 634ktrsysctl(name, namelen) 635 int *name; 636 u_int namelen; 637{ 638 struct ktr_request *req; 639 u_int mib[CTL_MAXNAME + 2]; 640 char *mibname; 641 size_t mibnamelen; 642 int error; 643 644 /* Lookup name of mib. */ 645 KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long")); 646 mib[0] = 0; 647 mib[1] = 1; 648 bcopy(name, mib + 2, namelen * sizeof(*name)); 649 mibnamelen = 128; 650 mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK); 651 error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen, 652 NULL, 0, &mibnamelen, 0); 653 if (error) { 654 free(mibname, M_KTRACE); 655 return; 656 } 657 req = ktr_getrequest(KTR_SYSCTL); 658 if (req == NULL) { 659 free(mibname, M_KTRACE); 660 return; 661 } 662 req->ktr_header.ktr_len = mibnamelen; 663 req->ktr_buffer = mibname; 664 ktr_submitrequest(curthread, req); 665} 666 667void 668ktrgenio(fd, rw, uio, error) 669 int fd; 670 enum uio_rw rw; 671 struct uio *uio; 672 int error; 673{ 674 struct ktr_request *req; 675 struct ktr_genio *ktg; 676 int datalen; 677 char *buf; 678 679 if (error) { 680 free(uio, M_IOV); 681 return; 682 } 683 uio->uio_offset = 0; 684 uio->uio_rw = UIO_WRITE; 685 datalen = imin(uio->uio_resid, ktr_geniosize); 686 buf = malloc(datalen, M_KTRACE, M_WAITOK); 687 error = uiomove(buf, datalen, uio); 688 free(uio, M_IOV); 689 if (error) { 690 free(buf, M_KTRACE); 691 return; 692 } 693 req = ktr_getrequest(KTR_GENIO); 694 if (req == NULL) { 695 free(buf, M_KTRACE); 696 return; 697 } 698 ktg = &req->ktr_data.ktr_genio; 699 ktg->ktr_fd = fd; 700 ktg->ktr_rw = rw; 701 req->ktr_header.ktr_len = datalen; 702 req->ktr_buffer = buf; 703 ktr_submitrequest(curthread, req); 704} 705 706void 707ktrpsig(sig, action, mask, code) 708 int sig; 709 sig_t action; 710 sigset_t *mask; 711 int code; 712{ 713 struct thread *td = curthread; 714 struct ktr_request *req; 715 struct ktr_psig *kp; 716 717 req = ktr_getrequest(KTR_PSIG); 718 if (req == NULL) 719 return; 720 kp = &req->ktr_data.ktr_psig; 721 kp->signo = (char)sig; 722 kp->action = action; 723 kp->mask = *mask; 724 kp->code = code; 725 ktr_enqueuerequest(td, req); 726 ktrace_exit(td); 727} 728 729void 730ktrcsw(out, user) 731 int out, user; 732{ 733 struct thread *td = curthread; 734 struct ktr_request *req; 735 struct ktr_csw *kc; 736 737 req = ktr_getrequest(KTR_CSW); 738 if (req == NULL) 739 return; 740 kc = &req->ktr_data.ktr_csw; 741 kc->out = out; 742 kc->user = user; 743 ktr_enqueuerequest(td, req); 744 ktrace_exit(td); 745} 746 747void 748ktrstruct(name, data, datalen) 749 const char *name; 750 void *data; 751 size_t datalen; 752{ 753 struct ktr_request *req; 754 char *buf = NULL; 755 size_t buflen; 756 757 if (!data) 758 datalen = 0; 759 buflen = strlen(name) + 1 + datalen; 760 buf = malloc(buflen, M_KTRACE, M_WAITOK); 761 strcpy(buf, name); 762 bcopy(data, buf + strlen(name) + 1, datalen); 763 if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) { 764 free(buf, M_KTRACE); 765 return; 766 } 767 req->ktr_buffer = buf; 768 req->ktr_header.ktr_len = buflen; 769 ktr_submitrequest(curthread, req); 770} 771#endif /* KTRACE */ 772 773/* Interface and common routines */ 774 775#ifndef _SYS_SYSPROTO_H_ 776struct ktrace_args { 777 char *fname; 778 int ops; 779 int facs; 780 int pid; 781}; 782#endif 783/* ARGSUSED */ 784int 785sys_ktrace(td, uap) 786 struct thread *td; 787 register struct ktrace_args *uap; 788{ 789#ifdef KTRACE 790 register struct vnode *vp = NULL; 791 register struct proc *p; 792 struct pgrp *pg; 793 int facs = uap->facs & ~KTRFAC_ROOT; 794 int ops = KTROP(uap->ops); 795 int descend = uap->ops & KTRFLAG_DESCEND; 796 int nfound, ret = 0; 797 int flags, error = 0, vfslocked; 798 struct nameidata nd; 799 struct ucred *cred; 800 801 /* 802 * Need something to (un)trace. 803 */ 804 if (ops != KTROP_CLEARFILE && facs == 0) 805 return (EINVAL); 806 807 ktrace_enter(td); 808 if (ops != KTROP_CLEAR) { 809 /* 810 * an operation which requires a file argument. 811 */ 812 NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, 813 uap->fname, td); 814 flags = FREAD | FWRITE | O_NOFOLLOW; 815 error = vn_open(&nd, &flags, 0, NULL); 816 if (error) { 817 ktrace_exit(td); 818 return (error); 819 } 820 vfslocked = NDHASGIANT(&nd); 821 NDFREE(&nd, NDF_ONLY_PNBUF); 822 vp = nd.ni_vp; 823 VOP_UNLOCK(vp, 0); 824 if (vp->v_type != VREG) { 825 (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td); 826 VFS_UNLOCK_GIANT(vfslocked); 827 ktrace_exit(td); 828 return (EACCES); 829 } 830 VFS_UNLOCK_GIANT(vfslocked); 831 } 832 /* 833 * Clear all uses of the tracefile. 834 */ 835 if (ops == KTROP_CLEARFILE) { 836 int vrele_count; 837 838 vrele_count = 0; 839 sx_slock(&allproc_lock); 840 FOREACH_PROC_IN_SYSTEM(p) { 841 PROC_LOCK(p); 842 if (p->p_tracevp == vp) { 843 if (ktrcanset(td, p)) { 844 mtx_lock(&ktrace_mtx); 845 ktr_freeproc(p, &cred, NULL); 846 mtx_unlock(&ktrace_mtx); 847 vrele_count++; 848 crfree(cred); 849 } else 850 error = EPERM; 851 } 852 PROC_UNLOCK(p); 853 } 854 sx_sunlock(&allproc_lock); 855 if (vrele_count > 0) { 856 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 857 while (vrele_count-- > 0) 858 vrele(vp); 859 VFS_UNLOCK_GIANT(vfslocked); 860 } 861 goto done; 862 } 863 /* 864 * do it 865 */ 866 sx_slock(&proctree_lock); 867 if (uap->pid < 0) { 868 /* 869 * by process group 870 */ 871 pg = pgfind(-uap->pid); 872 if (pg == NULL) { 873 sx_sunlock(&proctree_lock); 874 error = ESRCH; 875 goto done; 876 } 877 /* 878 * ktrops() may call vrele(). Lock pg_members 879 * by the proctree_lock rather than pg_mtx. 880 */ 881 PGRP_UNLOCK(pg); 882 nfound = 0; 883 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 884 PROC_LOCK(p); 885 if (p->p_state == PRS_NEW || 886 p_cansee(td, p) != 0) { 887 PROC_UNLOCK(p); 888 continue; 889 } 890 nfound++; 891 if (descend) 892 ret |= ktrsetchildren(td, p, ops, facs, vp); 893 else 894 ret |= ktrops(td, p, ops, facs, vp); 895 } 896 if (nfound == 0) { 897 sx_sunlock(&proctree_lock); 898 error = ESRCH; 899 goto done; 900 } 901 } else { 902 /* 903 * by pid 904 */ 905 p = pfind(uap->pid); 906 if (p == NULL) 907 error = ESRCH; 908 else 909 error = p_cansee(td, p); 910 if (error) { 911 if (p != NULL) 912 PROC_UNLOCK(p); 913 sx_sunlock(&proctree_lock); 914 goto done; 915 } 916 if (descend) 917 ret |= ktrsetchildren(td, p, ops, facs, vp); 918 else 919 ret |= ktrops(td, p, ops, facs, vp); 920 } 921 sx_sunlock(&proctree_lock); 922 if (!ret) 923 error = EPERM; 924done: 925 if (vp != NULL) { 926 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 927 (void) vn_close(vp, FWRITE, td->td_ucred, td); 928 VFS_UNLOCK_GIANT(vfslocked); 929 } 930 ktrace_exit(td); 931 return (error); 932#else /* !KTRACE */ 933 return (ENOSYS); 934#endif /* KTRACE */ 935} 936 937/* ARGSUSED */ 938int 939sys_utrace(td, uap) 940 struct thread *td; 941 register struct utrace_args *uap; 942{ 943 944#ifdef KTRACE 945 struct ktr_request *req; 946 void *cp; 947 int error; 948 949 if (!KTRPOINT(td, KTR_USER)) 950 return (0); 951 if (uap->len > KTR_USER_MAXLEN) 952 return (EINVAL); 953 cp = malloc(uap->len, M_KTRACE, M_WAITOK); 954 error = copyin(uap->addr, cp, uap->len); 955 if (error) { 956 free(cp, M_KTRACE); 957 return (error); 958 } 959 req = ktr_getrequest(KTR_USER); 960 if (req == NULL) { 961 free(cp, M_KTRACE); 962 return (ENOMEM); 963 } 964 req->ktr_buffer = cp; 965 req->ktr_header.ktr_len = uap->len; 966 ktr_submitrequest(td, req); 967 return (0); 968#else /* !KTRACE */ 969 return (ENOSYS); 970#endif /* KTRACE */ 971} 972 973#ifdef KTRACE 974static int 975ktrops(td, p, ops, facs, vp) 976 struct thread *td; 977 struct proc *p; 978 int ops, facs; 979 struct vnode *vp; 980{ 981 struct vnode *tracevp = NULL; 982 struct ucred *tracecred = NULL; 983 984 PROC_LOCK_ASSERT(p, MA_OWNED); 985 if (!ktrcanset(td, p)) { 986 PROC_UNLOCK(p); 987 return (0); 988 } 989 if (p->p_flag & P_WEXIT) { 990 /* If the process is exiting, just ignore it. */ 991 PROC_UNLOCK(p); 992 return (1); 993 } 994 mtx_lock(&ktrace_mtx); 995 if (ops == KTROP_SET) { 996 if (p->p_tracevp != vp) { 997 /* 998 * if trace file already in use, relinquish below 999 */ 1000 tracevp = p->p_tracevp; 1001 VREF(vp); 1002 p->p_tracevp = vp; 1003 } 1004 if (p->p_tracecred != td->td_ucred) { 1005 tracecred = p->p_tracecred; 1006 p->p_tracecred = crhold(td->td_ucred); 1007 } 1008 p->p_traceflag |= facs; 1009 if (priv_check(td, PRIV_KTRACE) == 0) 1010 p->p_traceflag |= KTRFAC_ROOT; 1011 } else { 1012 /* KTROP_CLEAR */ 1013 if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) 1014 /* no more tracing */ 1015 ktr_freeproc(p, &tracecred, &tracevp); 1016 } 1017 mtx_unlock(&ktrace_mtx); 1018 if ((p->p_traceflag & KTRFAC_MASK) != 0) 1019 ktrprocctor_entered(td, p); 1020 PROC_UNLOCK(p); 1021 if (tracevp != NULL) { 1022 int vfslocked; 1023 1024 vfslocked = VFS_LOCK_GIANT(tracevp->v_mount); 1025 vrele(tracevp); 1026 VFS_UNLOCK_GIANT(vfslocked); 1027 } 1028 if (tracecred != NULL) 1029 crfree(tracecred); 1030 1031 return (1); 1032} 1033 1034static int 1035ktrsetchildren(td, top, ops, facs, vp) 1036 struct thread *td; 1037 struct proc *top; 1038 int ops, facs; 1039 struct vnode *vp; 1040{ 1041 register struct proc *p; 1042 register int ret = 0; 1043 1044 p = top; 1045 PROC_LOCK_ASSERT(p, MA_OWNED); 1046 sx_assert(&proctree_lock, SX_LOCKED); 1047 for (;;) { 1048 ret |= ktrops(td, p, ops, facs, vp); 1049 /* 1050 * If this process has children, descend to them next, 1051 * otherwise do any siblings, and if done with this level, 1052 * follow back up the tree (but not past top). 1053 */ 1054 if (!LIST_EMPTY(&p->p_children)) 1055 p = LIST_FIRST(&p->p_children); 1056 else for (;;) { 1057 if (p == top) 1058 return (ret); 1059 if (LIST_NEXT(p, p_sibling)) { 1060 p = LIST_NEXT(p, p_sibling); 1061 break; 1062 } 1063 p = p->p_pptr; 1064 } 1065 PROC_LOCK(p); 1066 } 1067 /*NOTREACHED*/ 1068} 1069 1070static void 1071ktr_writerequest(struct thread *td, struct ktr_request *req) 1072{ 1073 struct ktr_header *kth; 1074 struct vnode *vp; 1075 struct proc *p; 1076 struct ucred *cred; 1077 struct uio auio; 1078 struct iovec aiov[3]; 1079 struct mount *mp; 1080 int datalen, buflen, vrele_count; 1081 int error, vfslocked; 1082 1083 /* 1084 * We hold the vnode and credential for use in I/O in case ktrace is 1085 * disabled on the process as we write out the request. 1086 * 1087 * XXXRW: This is not ideal: we could end up performing a write after 1088 * the vnode has been closed. 1089 */ 1090 mtx_lock(&ktrace_mtx); 1091 vp = td->td_proc->p_tracevp; 1092 cred = td->td_proc->p_tracecred; 1093 1094 /* 1095 * If vp is NULL, the vp has been cleared out from under this 1096 * request, so just drop it. Make sure the credential and vnode are 1097 * in sync: we should have both or neither. 1098 */ 1099 if (vp == NULL) { 1100 KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL")); 1101 mtx_unlock(&ktrace_mtx); 1102 return; 1103 } 1104 VREF(vp); 1105 KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL")); 1106 crhold(cred); 1107 mtx_unlock(&ktrace_mtx); 1108 1109 kth = &req->ktr_header; 1110 KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) < 1111 sizeof(data_lengths) / sizeof(data_lengths[0]), 1112 ("data_lengths array overflow")); 1113 datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP]; 1114 buflen = kth->ktr_len; 1115 auio.uio_iov = &aiov[0]; 1116 auio.uio_offset = 0; 1117 auio.uio_segflg = UIO_SYSSPACE; 1118 auio.uio_rw = UIO_WRITE; 1119 aiov[0].iov_base = (caddr_t)kth; 1120 aiov[0].iov_len = sizeof(struct ktr_header); 1121 auio.uio_resid = sizeof(struct ktr_header); 1122 auio.uio_iovcnt = 1; 1123 auio.uio_td = td; 1124 if (datalen != 0) { 1125 aiov[1].iov_base = (caddr_t)&req->ktr_data; 1126 aiov[1].iov_len = datalen; 1127 auio.uio_resid += datalen; 1128 auio.uio_iovcnt++; 1129 kth->ktr_len += datalen; 1130 } 1131 if (buflen != 0) { 1132 KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write")); 1133 aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer; 1134 aiov[auio.uio_iovcnt].iov_len = buflen; 1135 auio.uio_resid += buflen; 1136 auio.uio_iovcnt++; 1137 } 1138 1139 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1140 vn_start_write(vp, &mp, V_WAIT); 1141 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1142#ifdef MAC 1143 error = mac_vnode_check_write(cred, NOCRED, vp); 1144 if (error == 0) 1145#endif 1146 error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred); 1147 VOP_UNLOCK(vp, 0); 1148 vn_finished_write(mp); 1149 crfree(cred); 1150 if (!error) { 1151 vrele(vp); 1152 VFS_UNLOCK_GIANT(vfslocked); 1153 return; 1154 } 1155 VFS_UNLOCK_GIANT(vfslocked); 1156 1157 /* 1158 * If error encountered, give up tracing on this vnode. We defer 1159 * all the vrele()'s on the vnode until after we are finished walking 1160 * the various lists to avoid needlessly holding locks. 1161 * NB: at this point we still hold the vnode reference that must 1162 * not go away as we need the valid vnode to compare with. Thus let 1163 * vrele_count start at 1 and the reference will be freed 1164 * by the loop at the end after our last use of vp. 1165 */ 1166 log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n", 1167 error); 1168 vrele_count = 1; 1169 /* 1170 * First, clear this vnode from being used by any processes in the 1171 * system. 1172 * XXX - If one process gets an EPERM writing to the vnode, should 1173 * we really do this? Other processes might have suitable 1174 * credentials for the operation. 1175 */ 1176 cred = NULL; 1177 sx_slock(&allproc_lock); 1178 FOREACH_PROC_IN_SYSTEM(p) { 1179 PROC_LOCK(p); 1180 if (p->p_tracevp == vp) { 1181 mtx_lock(&ktrace_mtx); 1182 ktr_freeproc(p, &cred, NULL); 1183 mtx_unlock(&ktrace_mtx); 1184 vrele_count++; 1185 } 1186 PROC_UNLOCK(p); 1187 if (cred != NULL) { 1188 crfree(cred); 1189 cred = NULL; 1190 } 1191 } 1192 sx_sunlock(&allproc_lock); 1193 1194 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1195 while (vrele_count-- > 0) 1196 vrele(vp); 1197 VFS_UNLOCK_GIANT(vfslocked); 1198} 1199 1200/* 1201 * Return true if caller has permission to set the ktracing state 1202 * of target. Essentially, the target can't possess any 1203 * more permissions than the caller. KTRFAC_ROOT signifies that 1204 * root previously set the tracing status on the target process, and 1205 * so, only root may further change it. 1206 */ 1207static int 1208ktrcanset(td, targetp) 1209 struct thread *td; 1210 struct proc *targetp; 1211{ 1212 1213 PROC_LOCK_ASSERT(targetp, MA_OWNED); 1214 if (targetp->p_traceflag & KTRFAC_ROOT && 1215 priv_check(td, PRIV_KTRACE)) 1216 return (0); 1217 1218 if (p_candebug(td, targetp) != 0) 1219 return (0); 1220 1221 return (1); 1222} 1223 1224#endif /* KTRACE */ 1225