1139804Simp/*-
21541Srgrimes * Copyright (c) 1989, 1993
3152376Srwatson *	The Regents of the University of California.
4152376Srwatson * Copyright (c) 2005 Robert N. M. Watson
5152376Srwatson * All rights reserved.
61541Srgrimes *
71541Srgrimes * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
91541Srgrimes * are met:
101541Srgrimes * 1. Redistributions of source code must retain the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer.
121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131541Srgrimes *    notice, this list of conditions and the following disclaimer in the
141541Srgrimes *    documentation and/or other materials provided with the distribution.
151541Srgrimes * 4. Neither the name of the University nor the names of its contributors
161541Srgrimes *    may be used to endorse or promote products derived from this software
171541Srgrimes *    without specific prior written permission.
181541Srgrimes *
191541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
201541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
261541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
301541Srgrimes *
311541Srgrimes *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
321541Srgrimes */
331541Srgrimes
34116182Sobrien#include <sys/cdefs.h>
35116182Sobrien__FBSDID("$FreeBSD$");
36116182Sobrien
3713203Swollman#include "opt_ktrace.h"
381541Srgrimes
391541Srgrimes#include <sys/param.h>
40255677Spjd#include <sys/capability.h>
412112Swollman#include <sys/systm.h>
4297993Sjhb#include <sys/fcntl.h>
4397993Sjhb#include <sys/kernel.h>
4497993Sjhb#include <sys/kthread.h>
4576166Smarkm#include <sys/lock.h>
4676166Smarkm#include <sys/mutex.h>
4797993Sjhb#include <sys/malloc.h>
48155031Sjeff#include <sys/mount.h>
4997993Sjhb#include <sys/namei.h>
50164033Srwatson#include <sys/priv.h>
511541Srgrimes#include <sys/proc.h>
5297993Sjhb#include <sys/unistd.h>
531541Srgrimes#include <sys/vnode.h>
54176471Sdes#include <sys/socket.h>
55176471Sdes#include <sys/stat.h>
561541Srgrimes#include <sys/ktrace.h>
5774927Sjhb#include <sys/sx.h>
5897993Sjhb#include <sys/sysctl.h>
59219042Sdchagin#include <sys/sysent.h>
601541Srgrimes#include <sys/syslog.h>
6197993Sjhb#include <sys/sysproto.h>
621541Srgrimes
63163606Srwatson#include <security/mac/mac_framework.h>
64163606Srwatson
65152376Srwatson/*
66152376Srwatson * The ktrace facility allows the tracing of certain key events in user space
67152376Srwatson * processes, such as system calls, signal delivery, context switches, and
68152376Srwatson * user generated events using utrace(2).  It works by streaming event
69152376Srwatson * records and data to a vnode associated with the process using the
70152376Srwatson * ktrace(2) system call.  In general, records can be written directly from
71152376Srwatson * the context that generates the event.  One important exception to this is
72152376Srwatson * during a context switch, where sleeping is not permitted.  To handle this
73152376Srwatson * case, trace events are generated using in-kernel ktr_request records, and
74152376Srwatson * then delivered to disk at a convenient moment -- either immediately, the
75152376Srwatson * next traceable event, at system call return, or at process exit.
76152376Srwatson *
77152376Srwatson * When dealing with multiple threads or processes writing to the same event
78152376Srwatson * log, ordering guarantees are weak: specifically, if an event has multiple
79152376Srwatson * records (i.e., system call enter and return), they may be interlaced with
80152376Srwatson * records from another event.  Process and thread ID information is provided
81152376Srwatson * in the record, and user applications can de-interlace events if required.
82152376Srwatson */
83152376Srwatson
8430354Sphkstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
8530309Sphk
8613203Swollman#ifdef KTRACE
8712577Sbde
88219028SnetchildFEATURE(ktrace, "Kernel support for system-call tracing");
89219028Snetchild
9097993Sjhb#ifndef KTRACE_REQUEST_POOL
9197993Sjhb#define	KTRACE_REQUEST_POOL	100
9297993Sjhb#endif
9312819Sphk
9497993Sjhbstruct ktr_request {
9597993Sjhb	struct	ktr_header ktr_header;
96151927Srwatson	void	*ktr_buffer;
9797993Sjhb	union {
98219042Sdchagin		struct	ktr_proc_ctor ktr_proc_ctor;
99226269Sdes		struct	ktr_cap_fail ktr_cap_fail;
10097993Sjhb		struct	ktr_syscall ktr_syscall;
10197993Sjhb		struct	ktr_sysret ktr_sysret;
10297993Sjhb		struct	ktr_genio ktr_genio;
10397993Sjhb		struct	ktr_psig ktr_psig;
10497993Sjhb		struct	ktr_csw ktr_csw;
105233925Sjhb		struct	ktr_fault ktr_fault;
106233925Sjhb		struct	ktr_faultend ktr_faultend;
10797993Sjhb	} ktr_data;
10897993Sjhb	STAILQ_ENTRY(ktr_request) ktr_list;
10997993Sjhb};
11097993Sjhb
11197993Sjhbstatic int data_lengths[] = {
112267443Sjilles	[KTR_SYSCALL] = offsetof(struct ktr_syscall, ktr_args),
113267443Sjilles	[KTR_SYSRET] = sizeof(struct ktr_sysret),
114267443Sjilles	[KTR_NAMEI] = 0,
115267443Sjilles	[KTR_GENIO] = sizeof(struct ktr_genio),
116267443Sjilles	[KTR_PSIG] = sizeof(struct ktr_psig),
117267443Sjilles	[KTR_CSW] = sizeof(struct ktr_csw),
118267443Sjilles	[KTR_USER] = 0,
119267443Sjilles	[KTR_STRUCT] = 0,
120267443Sjilles	[KTR_SYSCTL] = 0,
121267443Sjilles	[KTR_PROCCTOR] = sizeof(struct ktr_proc_ctor),
122267443Sjilles	[KTR_PROCDTOR] = 0,
123267443Sjilles	[KTR_CAPFAIL] = sizeof(struct ktr_cap_fail),
124267443Sjilles	[KTR_FAULT] = sizeof(struct ktr_fault),
125267443Sjilles	[KTR_FAULTEND] = sizeof(struct ktr_faultend),
12697993Sjhb};
12797993Sjhb
12897993Sjhbstatic STAILQ_HEAD(, ktr_request) ktr_free;
12997993Sjhb
130141633Sphkstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
131103234Sjhb
132118607Sjhbstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL;
133103234SjhbTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
13497993Sjhb
135118607Sjhbstatic u_int ktr_geniosize = PAGE_SIZE;
136103234SjhbTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
137103234SjhbSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
138103234Sjhb    0, "Maximum size of genio event payload");
139103234Sjhb
14097993Sjhbstatic int print_message = 1;
141214158Sjhbstatic struct mtx ktrace_mtx;
142152376Srwatsonstatic struct sx ktrace_sx;
14397993Sjhb
14497993Sjhbstatic void ktrace_init(void *dummy);
14597993Sjhbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
146219041Sdchaginstatic u_int ktrace_resize_pool(u_int oldsize, u_int newsize);
147219311Sdchaginstatic struct ktr_request *ktr_getrequest_entered(struct thread *td, int type);
14897993Sjhbstatic struct ktr_request *ktr_getrequest(int type);
149152376Srwatsonstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req);
150214158Sjhbstatic void ktr_freeproc(struct proc *p, struct ucred **uc,
151214158Sjhb    struct vnode **vp);
15297993Sjhbstatic void ktr_freerequest(struct ktr_request *req);
153214158Sjhbstatic void ktr_freerequest_locked(struct ktr_request *req);
154152376Srwatsonstatic void ktr_writerequest(struct thread *td, struct ktr_request *req);
15597993Sjhbstatic int ktrcanset(struct thread *,struct proc *);
15697993Sjhbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
15797993Sjhbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
158219311Sdchaginstatic void ktrprocctor_entered(struct thread *, struct proc *);
15997993Sjhb
160152376Srwatson/*
161152376Srwatson * ktrace itself generates events, such as context switches, which we do not
162152376Srwatson * wish to trace.  Maintain a flag, TDP_INKTRACE, on each thread to determine
163152376Srwatson * whether or not it is in a region where tracing of events should be
164152376Srwatson * suppressed.
165152376Srwatson */
16697993Sjhbstatic void
167152376Srwatsonktrace_enter(struct thread *td)
168152376Srwatson{
169152376Srwatson
170152376Srwatson	KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
171152376Srwatson	td->td_pflags |= TDP_INKTRACE;
172152376Srwatson}
173152376Srwatson
174152376Srwatsonstatic void
175152376Srwatsonktrace_exit(struct thread *td)
176152376Srwatson{
177152376Srwatson
178152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
179152376Srwatson	td->td_pflags &= ~TDP_INKTRACE;
180152376Srwatson}
181152376Srwatson
182152376Srwatsonstatic void
183152376Srwatsonktrace_assert(struct thread *td)
184152376Srwatson{
185152376Srwatson
186152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
187152376Srwatson}
188152376Srwatson
189152376Srwatsonstatic void
19097993Sjhbktrace_init(void *dummy)
1911541Srgrimes{
19297993Sjhb	struct ktr_request *req;
19397993Sjhb	int i;
1941541Srgrimes
19597993Sjhb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
196152376Srwatson	sx_init(&ktrace_sx, "ktrace_sx");
19797993Sjhb	STAILQ_INIT(&ktr_free);
19897993Sjhb	for (i = 0; i < ktr_requestpool; i++) {
199111119Simp		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
20097993Sjhb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
20197993Sjhb	}
2021541Srgrimes}
20397993SjhbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
2041541Srgrimes
20597993Sjhbstatic int
20697993Sjhbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
20797993Sjhb{
20897993Sjhb	struct thread *td;
209118607Sjhb	u_int newsize, oldsize, wantsize;
21097993Sjhb	int error;
21197993Sjhb
21297993Sjhb	/* Handle easy read-only case first to avoid warnings from GCC. */
21397993Sjhb	if (!req->newptr) {
21497993Sjhb		oldsize = ktr_requestpool;
215118607Sjhb		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
21697993Sjhb	}
21797993Sjhb
218118607Sjhb	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
21997993Sjhb	if (error)
22097993Sjhb		return (error);
22197993Sjhb	td = curthread;
222152376Srwatson	ktrace_enter(td);
22397993Sjhb	oldsize = ktr_requestpool;
224219041Sdchagin	newsize = ktrace_resize_pool(oldsize, wantsize);
225152376Srwatson	ktrace_exit(td);
226118607Sjhb	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
22797993Sjhb	if (error)
22897993Sjhb		return (error);
229122478Sjkoshy	if (wantsize > oldsize && newsize < wantsize)
23097993Sjhb		return (ENOSPC);
23197993Sjhb	return (0);
23297993Sjhb}
233103234SjhbSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
234211102Sgavin    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
235211102Sgavin    "Pool buffer size for ktrace(1)");
23697993Sjhb
237118607Sjhbstatic u_int
238219041Sdchaginktrace_resize_pool(u_int oldsize, u_int newsize)
23997993Sjhb{
240219041Sdchagin	STAILQ_HEAD(, ktr_request) ktr_new;
24197993Sjhb	struct ktr_request *req;
242122478Sjkoshy	int bound;
24397993Sjhb
24497993Sjhb	print_message = 1;
245219041Sdchagin	bound = newsize - oldsize;
246122478Sjkoshy	if (bound == 0)
247122478Sjkoshy		return (ktr_requestpool);
248219041Sdchagin	if (bound < 0) {
249219041Sdchagin		mtx_lock(&ktrace_mtx);
25097993Sjhb		/* Shrink pool down to newsize if possible. */
251122478Sjkoshy		while (bound++ < 0) {
25297993Sjhb			req = STAILQ_FIRST(&ktr_free);
25397993Sjhb			if (req == NULL)
254219041Sdchagin				break;
25597993Sjhb			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
25697993Sjhb			ktr_requestpool--;
25797993Sjhb			free(req, M_KTRACE);
25897993Sjhb		}
259219041Sdchagin	} else {
26097993Sjhb		/* Grow pool up to newsize. */
261219041Sdchagin		STAILQ_INIT(&ktr_new);
262122478Sjkoshy		while (bound-- > 0) {
26397993Sjhb			req = malloc(sizeof(struct ktr_request), M_KTRACE,
264111119Simp			    M_WAITOK);
265219041Sdchagin			STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list);
26697993Sjhb		}
267219041Sdchagin		mtx_lock(&ktrace_mtx);
268219041Sdchagin		STAILQ_CONCAT(&ktr_free, &ktr_new);
269219041Sdchagin		ktr_requestpool += (newsize - oldsize);
270219041Sdchagin	}
271219041Sdchagin	mtx_unlock(&ktrace_mtx);
27297993Sjhb	return (ktr_requestpool);
27397993Sjhb}
27497993Sjhb
275198411Sjhb/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
276198411SjhbCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
277198411Sjhb    (sizeof((struct thread *)NULL)->td_name));
278198411Sjhb
27997993Sjhbstatic struct ktr_request *
280219311Sdchaginktr_getrequest_entered(struct thread *td, int type)
28197993Sjhb{
28297993Sjhb	struct ktr_request *req;
28397993Sjhb	struct proc *p = td->td_proc;
28497993Sjhb	int pm;
28597993Sjhb
286152430Srwatson	mtx_lock(&ktrace_mtx);
28797993Sjhb	if (!KTRCHECK(td, type)) {
288152430Srwatson		mtx_unlock(&ktrace_mtx);
28997993Sjhb		return (NULL);
29097993Sjhb	}
29197993Sjhb	req = STAILQ_FIRST(&ktr_free);
29297993Sjhb	if (req != NULL) {
29397993Sjhb		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
29497993Sjhb		req->ktr_header.ktr_type = type;
295112199Sjhb		if (p->p_traceflag & KTRFAC_DROP) {
296112199Sjhb			req->ktr_header.ktr_type |= KTR_DROP;
297112199Sjhb			p->p_traceflag &= ~KTRFAC_DROP;
298112199Sjhb		}
299152430Srwatson		mtx_unlock(&ktrace_mtx);
30097993Sjhb		microtime(&req->ktr_header.ktr_time);
30197993Sjhb		req->ktr_header.ktr_pid = p->p_pid;
302151929Srwatson		req->ktr_header.ktr_tid = td->td_tid;
303198411Sjhb		bcopy(td->td_name, req->ktr_header.ktr_comm,
304198411Sjhb		    sizeof(req->ktr_header.ktr_comm));
305151927Srwatson		req->ktr_buffer = NULL;
30697993Sjhb		req->ktr_header.ktr_len = 0;
30797993Sjhb	} else {
308112199Sjhb		p->p_traceflag |= KTRFAC_DROP;
30997993Sjhb		pm = print_message;
31097993Sjhb		print_message = 0;
31197993Sjhb		mtx_unlock(&ktrace_mtx);
31297993Sjhb		if (pm)
31397993Sjhb			printf("Out of ktrace request objects.\n");
31497993Sjhb	}
31597993Sjhb	return (req);
31697993Sjhb}
31797993Sjhb
318219042Sdchaginstatic struct ktr_request *
319219042Sdchaginktr_getrequest(int type)
320219042Sdchagin{
321219042Sdchagin	struct thread *td = curthread;
322219042Sdchagin	struct ktr_request *req;
323219042Sdchagin
324219042Sdchagin	ktrace_enter(td);
325219311Sdchagin	req = ktr_getrequest_entered(td, type);
326219042Sdchagin	if (req == NULL)
327219042Sdchagin		ktrace_exit(td);
328219042Sdchagin
329219042Sdchagin	return (req);
330219042Sdchagin}
331219042Sdchagin
332152376Srwatson/*
333152376Srwatson * Some trace generation environments don't permit direct access to VFS,
334152376Srwatson * such as during a context switch where sleeping is not allowed.  Under these
335152376Srwatson * circumstances, queue a request to the thread to be written asynchronously
336152376Srwatson * later.
337152376Srwatson */
33897993Sjhbstatic void
339152376Srwatsonktr_enqueuerequest(struct thread *td, struct ktr_request *req)
34097993Sjhb{
34197993Sjhb
34297993Sjhb	mtx_lock(&ktrace_mtx);
343152376Srwatson	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
344118599Sjhb	mtx_unlock(&ktrace_mtx);
34597993Sjhb}
34697993Sjhb
347152376Srwatson/*
348152376Srwatson * Drain any pending ktrace records from the per-thread queue to disk.  This
349152376Srwatson * is used both internally before committing other records, and also on
350152376Srwatson * system call return.  We drain all the ones we can find at the time when
351152376Srwatson * drain is requested, but don't keep draining after that as those events
352189707Sjhb * may be approximately "after" the current event.
353152376Srwatson */
35497993Sjhbstatic void
355152376Srwatsonktr_drain(struct thread *td)
356152376Srwatson{
357152376Srwatson	struct ktr_request *queued_req;
358152376Srwatson	STAILQ_HEAD(, ktr_request) local_queue;
359152376Srwatson
360152376Srwatson	ktrace_assert(td);
361152376Srwatson	sx_assert(&ktrace_sx, SX_XLOCKED);
362152376Srwatson
363211512Sjhb	STAILQ_INIT(&local_queue);
364152376Srwatson
365152376Srwatson	if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
366152376Srwatson		mtx_lock(&ktrace_mtx);
367152376Srwatson		STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
368152376Srwatson		mtx_unlock(&ktrace_mtx);
369152376Srwatson
370152376Srwatson		while ((queued_req = STAILQ_FIRST(&local_queue))) {
371152376Srwatson			STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
372152376Srwatson			ktr_writerequest(td, queued_req);
373152376Srwatson			ktr_freerequest(queued_req);
374152376Srwatson		}
375152376Srwatson	}
376152376Srwatson}
377152376Srwatson
378152376Srwatson/*
379152376Srwatson * Submit a trace record for immediate commit to disk -- to be used only
380152376Srwatson * where entering VFS is OK.  First drain any pending records that may have
381152376Srwatson * been cached in the thread.
382152376Srwatson */
383152376Srwatsonstatic void
384219311Sdchaginktr_submitrequest(struct thread *td, struct ktr_request *req)
385152376Srwatson{
386152376Srwatson
387152376Srwatson	ktrace_assert(td);
388152376Srwatson
389152376Srwatson	sx_xlock(&ktrace_sx);
390152376Srwatson	ktr_drain(td);
391152376Srwatson	ktr_writerequest(td, req);
392152376Srwatson	ktr_freerequest(req);
393152376Srwatson	sx_xunlock(&ktrace_sx);
394152376Srwatson	ktrace_exit(td);
395152376Srwatson}
396152376Srwatson
397152376Srwatsonstatic void
39897993Sjhbktr_freerequest(struct ktr_request *req)
39997993Sjhb{
40097993Sjhb
401214158Sjhb	mtx_lock(&ktrace_mtx);
402214158Sjhb	ktr_freerequest_locked(req);
403214158Sjhb	mtx_unlock(&ktrace_mtx);
404214158Sjhb}
405214158Sjhb
406214158Sjhbstatic void
407214158Sjhbktr_freerequest_locked(struct ktr_request *req)
408214158Sjhb{
409214158Sjhb
410214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
411151927Srwatson	if (req->ktr_buffer != NULL)
412151927Srwatson		free(req->ktr_buffer, M_KTRACE);
41397993Sjhb	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
41497993Sjhb}
41597993Sjhb
416214158Sjhb/*
417214158Sjhb * Disable tracing for a process and release all associated resources.
418214158Sjhb * The caller is responsible for releasing a reference on the returned
419214158Sjhb * vnode and credentials.
420214158Sjhb */
421214158Sjhbstatic void
422214158Sjhbktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
423214158Sjhb{
424214158Sjhb	struct ktr_request *req;
425214158Sjhb
426214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
427214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
428214158Sjhb	*uc = p->p_tracecred;
429214158Sjhb	p->p_tracecred = NULL;
430214158Sjhb	if (vp != NULL)
431214158Sjhb		*vp = p->p_tracevp;
432214158Sjhb	p->p_tracevp = NULL;
433214158Sjhb	p->p_traceflag = 0;
434214158Sjhb	while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
435214158Sjhb		STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
436214158Sjhb		ktr_freerequest_locked(req);
437214158Sjhb	}
438214158Sjhb}
439214158Sjhb
4401549Srgrimesvoid
44197993Sjhbktrsyscall(code, narg, args)
44247955Sdt	int code, narg;
44347955Sdt	register_t args[];
4441541Srgrimes{
44597993Sjhb	struct ktr_request *req;
44697993Sjhb	struct ktr_syscall *ktp;
44797993Sjhb	size_t buflen;
448103233Sjhb	char *buf = NULL;
4491541Srgrimes
450103233Sjhb	buflen = sizeof(register_t) * narg;
451103233Sjhb	if (buflen > 0) {
452111119Simp		buf = malloc(buflen, M_KTRACE, M_WAITOK);
453103233Sjhb		bcopy(args, buf, buflen);
454103233Sjhb	}
45597993Sjhb	req = ktr_getrequest(KTR_SYSCALL);
456104230Sphk	if (req == NULL) {
457104230Sphk		if (buf != NULL)
458104230Sphk			free(buf, M_KTRACE);
45997993Sjhb		return;
460104230Sphk	}
46197993Sjhb	ktp = &req->ktr_data.ktr_syscall;
4621541Srgrimes	ktp->ktr_code = code;
4631541Srgrimes	ktp->ktr_narg = narg;
46497993Sjhb	if (buflen > 0) {
46597993Sjhb		req->ktr_header.ktr_len = buflen;
466151927Srwatson		req->ktr_buffer = buf;
46797993Sjhb	}
468152376Srwatson	ktr_submitrequest(curthread, req);
4691541Srgrimes}
4701541Srgrimes
4711549Srgrimesvoid
47297993Sjhbktrsysret(code, error, retval)
47347955Sdt	int code, error;
47447955Sdt	register_t retval;
4751541Srgrimes{
47697993Sjhb	struct ktr_request *req;
47797993Sjhb	struct ktr_sysret *ktp;
4781541Srgrimes
47997993Sjhb	req = ktr_getrequest(KTR_SYSRET);
48097993Sjhb	if (req == NULL)
48197993Sjhb		return;
48297993Sjhb	ktp = &req->ktr_data.ktr_sysret;
48397993Sjhb	ktp->ktr_code = code;
48497993Sjhb	ktp->ktr_error = error;
485228343Seadler	ktp->ktr_retval = ((error == 0) ? retval: 0);		/* what about val2 ? */
486152376Srwatson	ktr_submitrequest(curthread, req);
4871541Srgrimes}
4881541Srgrimes
489152376Srwatson/*
490214158Sjhb * When a setuid process execs, disable tracing.
491214158Sjhb *
492214158Sjhb * XXX: We toss any pending asynchronous records.
493152376Srwatson */
4941549Srgrimesvoid
495214158Sjhbktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
496214158Sjhb{
497214158Sjhb
498214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
499214158Sjhb	mtx_lock(&ktrace_mtx);
500214158Sjhb	ktr_freeproc(p, uc, vp);
501214158Sjhb	mtx_unlock(&ktrace_mtx);
502214158Sjhb}
503214158Sjhb
504214158Sjhb/*
505214158Sjhb * When a process exits, drain per-process asynchronous trace records
506214158Sjhb * and disable tracing.
507214158Sjhb */
508214158Sjhbvoid
509152376Srwatsonktrprocexit(struct thread *td)
510152376Srwatson{
511219042Sdchagin	struct ktr_request *req;
512214158Sjhb	struct proc *p;
513214158Sjhb	struct ucred *cred;
514214158Sjhb	struct vnode *vp;
515152376Srwatson
516214158Sjhb	p = td->td_proc;
517214158Sjhb	if (p->p_traceflag == 0)
518214158Sjhb		return;
519214158Sjhb
520152376Srwatson	ktrace_enter(td);
521219311Sdchagin	req = ktr_getrequest_entered(td, KTR_PROCDTOR);
522219311Sdchagin	if (req != NULL)
523219311Sdchagin		ktr_enqueuerequest(td, req);
524152376Srwatson	sx_xlock(&ktrace_sx);
525152376Srwatson	ktr_drain(td);
526152376Srwatson	sx_xunlock(&ktrace_sx);
527214158Sjhb	PROC_LOCK(p);
528214158Sjhb	mtx_lock(&ktrace_mtx);
529214158Sjhb	ktr_freeproc(p, &cred, &vp);
530214158Sjhb	mtx_unlock(&ktrace_mtx);
531214158Sjhb	PROC_UNLOCK(p);
532241896Skib	if (vp != NULL)
533214158Sjhb		vrele(vp);
534214158Sjhb	if (cred != NULL)
535214158Sjhb		crfree(cred);
536152376Srwatson	ktrace_exit(td);
537152376Srwatson}
538152376Srwatson
539219042Sdchaginstatic void
540219311Sdchaginktrprocctor_entered(struct thread *td, struct proc *p)
541219042Sdchagin{
542219042Sdchagin	struct ktr_proc_ctor *ktp;
543219042Sdchagin	struct ktr_request *req;
544219312Sdchagin	struct thread *td2;
545219042Sdchagin
546219042Sdchagin	ktrace_assert(td);
547219042Sdchagin	td2 = FIRST_THREAD_IN_PROC(p);
548219311Sdchagin	req = ktr_getrequest_entered(td2, KTR_PROCCTOR);
549219042Sdchagin	if (req == NULL)
550219042Sdchagin		return;
551219042Sdchagin	ktp = &req->ktr_data.ktr_proc_ctor;
552219042Sdchagin	ktp->sv_flags = p->p_sysent->sv_flags;
553219311Sdchagin	ktr_enqueuerequest(td2, req);
554219042Sdchagin}
555219042Sdchagin
556219042Sdchaginvoid
557219042Sdchaginktrprocctor(struct proc *p)
558219042Sdchagin{
559219042Sdchagin	struct thread *td = curthread;
560219042Sdchagin
561219042Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) == 0)
562219042Sdchagin		return;
563219042Sdchagin
564219042Sdchagin	ktrace_enter(td);
565219311Sdchagin	ktrprocctor_entered(td, p);
566219042Sdchagin	ktrace_exit(td);
567219042Sdchagin}
568219042Sdchagin
569152376Srwatson/*
570214158Sjhb * When a process forks, enable tracing in the new process if needed.
571214158Sjhb */
572214158Sjhbvoid
573214158Sjhbktrprocfork(struct proc *p1, struct proc *p2)
574214158Sjhb{
575214158Sjhb
576219042Sdchagin	PROC_LOCK(p1);
577214158Sjhb	mtx_lock(&ktrace_mtx);
578214158Sjhb	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
579214158Sjhb	if (p1->p_traceflag & KTRFAC_INHERIT) {
580214158Sjhb		p2->p_traceflag = p1->p_traceflag;
581214158Sjhb		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
582214158Sjhb			VREF(p2->p_tracevp);
583214158Sjhb			KASSERT(p1->p_tracecred != NULL,
584214158Sjhb			    ("ktrace vnode with no cred"));
585214158Sjhb			p2->p_tracecred = crhold(p1->p_tracecred);
586214158Sjhb		}
587214158Sjhb	}
588214158Sjhb	mtx_unlock(&ktrace_mtx);
589219042Sdchagin	PROC_UNLOCK(p1);
590219042Sdchagin
591219042Sdchagin	ktrprocctor(p2);
592214158Sjhb}
593214158Sjhb
594214158Sjhb/*
595152376Srwatson * When a thread returns, drain any asynchronous records generated by the
596152376Srwatson * system call.
597152376Srwatson */
598152376Srwatsonvoid
599152376Srwatsonktruserret(struct thread *td)
600152376Srwatson{
601152376Srwatson
602152376Srwatson	ktrace_enter(td);
603152376Srwatson	sx_xlock(&ktrace_sx);
604152376Srwatson	ktr_drain(td);
605152376Srwatson	sx_xunlock(&ktrace_sx);
606152376Srwatson	ktrace_exit(td);
607152376Srwatson}
608152376Srwatson
609152376Srwatsonvoid
61097993Sjhbktrnamei(path)
6111541Srgrimes	char *path;
6121541Srgrimes{
61397993Sjhb	struct ktr_request *req;
61497993Sjhb	int namelen;
615103233Sjhb	char *buf = NULL;
6161541Srgrimes
617103233Sjhb	namelen = strlen(path);
618103233Sjhb	if (namelen > 0) {
619111119Simp		buf = malloc(namelen, M_KTRACE, M_WAITOK);
620103233Sjhb		bcopy(path, buf, namelen);
621103233Sjhb	}
62297993Sjhb	req = ktr_getrequest(KTR_NAMEI);
623104230Sphk	if (req == NULL) {
624104230Sphk		if (buf != NULL)
625104230Sphk			free(buf, M_KTRACE);
62697993Sjhb		return;
627104230Sphk	}
62897993Sjhb	if (namelen > 0) {
62997993Sjhb		req->ktr_header.ktr_len = namelen;
630151927Srwatson		req->ktr_buffer = buf;
63197993Sjhb	}
632152376Srwatson	ktr_submitrequest(curthread, req);
6331541Srgrimes}
6341541Srgrimes
6351549Srgrimesvoid
636189707Sjhbktrsysctl(name, namelen)
637189707Sjhb	int *name;
638189707Sjhb	u_int namelen;
639189707Sjhb{
640189707Sjhb	struct ktr_request *req;
641189707Sjhb	u_int mib[CTL_MAXNAME + 2];
642189707Sjhb	char *mibname;
643189707Sjhb	size_t mibnamelen;
644189707Sjhb	int error;
645189707Sjhb
646189707Sjhb	/* Lookup name of mib. */
647189707Sjhb	KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
648189707Sjhb	mib[0] = 0;
649189707Sjhb	mib[1] = 1;
650189707Sjhb	bcopy(name, mib + 2, namelen * sizeof(*name));
651189707Sjhb	mibnamelen = 128;
652189707Sjhb	mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
653189707Sjhb	error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
654189707Sjhb	    NULL, 0, &mibnamelen, 0);
655189707Sjhb	if (error) {
656189707Sjhb		free(mibname, M_KTRACE);
657189707Sjhb		return;
658189707Sjhb	}
659189707Sjhb	req = ktr_getrequest(KTR_SYSCTL);
660189707Sjhb	if (req == NULL) {
661189707Sjhb		free(mibname, M_KTRACE);
662189707Sjhb		return;
663189707Sjhb	}
664189707Sjhb	req->ktr_header.ktr_len = mibnamelen;
665189707Sjhb	req->ktr_buffer = mibname;
666189707Sjhb	ktr_submitrequest(curthread, req);
667189707Sjhb}
668189707Sjhb
669189707Sjhbvoid
67097993Sjhbktrgenio(fd, rw, uio, error)
6711541Srgrimes	int fd;
6721541Srgrimes	enum uio_rw rw;
67362378Sgreen	struct uio *uio;
67462378Sgreen	int error;
6751541Srgrimes{
67697993Sjhb	struct ktr_request *req;
67797993Sjhb	struct ktr_genio *ktg;
678103235Sjhb	int datalen;
679103235Sjhb	char *buf;
6808876Srgrimes
681131897Sphk	if (error) {
682131897Sphk		free(uio, M_IOV);
6831541Srgrimes		return;
684131897Sphk	}
685103235Sjhb	uio->uio_offset = 0;
686103235Sjhb	uio->uio_rw = UIO_WRITE;
687231949Skib	datalen = MIN(uio->uio_resid, ktr_geniosize);
688111119Simp	buf = malloc(datalen, M_KTRACE, M_WAITOK);
689131897Sphk	error = uiomove(buf, datalen, uio);
690131897Sphk	free(uio, M_IOV);
691131897Sphk	if (error) {
692103235Sjhb		free(buf, M_KTRACE);
693103235Sjhb		return;
694103235Sjhb	}
69597993Sjhb	req = ktr_getrequest(KTR_GENIO);
696103235Sjhb	if (req == NULL) {
697103235Sjhb		free(buf, M_KTRACE);
69897993Sjhb		return;
699103235Sjhb	}
70097993Sjhb	ktg = &req->ktr_data.ktr_genio;
70197993Sjhb	ktg->ktr_fd = fd;
70297993Sjhb	ktg->ktr_rw = rw;
703103235Sjhb	req->ktr_header.ktr_len = datalen;
704151927Srwatson	req->ktr_buffer = buf;
705152376Srwatson	ktr_submitrequest(curthread, req);
7061541Srgrimes}
7071541Srgrimes
7081549Srgrimesvoid
70997993Sjhbktrpsig(sig, action, mask, code)
71051941Smarcel	int sig;
7111541Srgrimes	sig_t action;
71251791Smarcel	sigset_t *mask;
71351941Smarcel	int code;
7141541Srgrimes{
715219311Sdchagin	struct thread *td = curthread;
71697993Sjhb	struct ktr_request *req;
71797993Sjhb	struct ktr_psig	*kp;
7181541Srgrimes
71997993Sjhb	req = ktr_getrequest(KTR_PSIG);
72097993Sjhb	if (req == NULL)
72197993Sjhb		return;
72297993Sjhb	kp = &req->ktr_data.ktr_psig;
72397993Sjhb	kp->signo = (char)sig;
72497993Sjhb	kp->action = action;
72597993Sjhb	kp->mask = *mask;
72697993Sjhb	kp->code = code;
727219311Sdchagin	ktr_enqueuerequest(td, req);
728219311Sdchagin	ktrace_exit(td);
7291541Srgrimes}
7301541Srgrimes
7311549Srgrimesvoid
732234494Sjhbktrcsw(out, user, wmesg)
7331541Srgrimes	int out, user;
734234494Sjhb	const char *wmesg;
7351541Srgrimes{
736219311Sdchagin	struct thread *td = curthread;
73797993Sjhb	struct ktr_request *req;
73897993Sjhb	struct ktr_csw *kc;
7391541Srgrimes
74097993Sjhb	req = ktr_getrequest(KTR_CSW);
74197993Sjhb	if (req == NULL)
74297993Sjhb		return;
74397993Sjhb	kc = &req->ktr_data.ktr_csw;
74497993Sjhb	kc->out = out;
74597993Sjhb	kc->user = user;
746234494Sjhb	if (wmesg != NULL)
747234494Sjhb		strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg));
748234494Sjhb	else
749234494Sjhb		bzero(kc->wmesg, sizeof(kc->wmesg));
750219311Sdchagin	ktr_enqueuerequest(td, req);
751219311Sdchagin	ktrace_exit(td);
7521541Srgrimes}
753176471Sdes
754176471Sdesvoid
755210064Sjhbktrstruct(name, data, datalen)
756176471Sdes	const char *name;
757176471Sdes	void *data;
758176471Sdes	size_t datalen;
759176471Sdes{
760176471Sdes	struct ktr_request *req;
761176471Sdes	char *buf = NULL;
762176471Sdes	size_t buflen;
763176471Sdes
764176471Sdes	if (!data)
765176471Sdes		datalen = 0;
766210064Sjhb	buflen = strlen(name) + 1 + datalen;
767176471Sdes	buf = malloc(buflen, M_KTRACE, M_WAITOK);
768210064Sjhb	strcpy(buf, name);
769210064Sjhb	bcopy(data, buf + strlen(name) + 1, datalen);
770176471Sdes	if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
771176471Sdes		free(buf, M_KTRACE);
772176471Sdes		return;
773176471Sdes	}
774176471Sdes	req->ktr_buffer = buf;
775176471Sdes	req->ktr_header.ktr_len = buflen;
776176471Sdes	ktr_submitrequest(curthread, req);
777176471Sdes}
778226269Sdes
779226269Sdesvoid
780226495Sdesktrcapfail(type, needed, held)
781226495Sdes	enum ktr_cap_fail_type type;
782255219Spjd	const cap_rights_t *needed;
783255219Spjd	const cap_rights_t *held;
784226269Sdes{
785226269Sdes	struct thread *td = curthread;
786226269Sdes	struct ktr_request *req;
787226269Sdes	struct ktr_cap_fail *kcf;
788226269Sdes
789226269Sdes	req = ktr_getrequest(KTR_CAPFAIL);
790226269Sdes	if (req == NULL)
791226269Sdes		return;
792226269Sdes	kcf = &req->ktr_data.ktr_cap_fail;
793226495Sdes	kcf->cap_type = type;
794255677Spjd	if (needed != NULL)
795255677Spjd		kcf->cap_needed = *needed;
796255677Spjd	else
797255677Spjd		cap_rights_init(&kcf->cap_needed);
798255677Spjd	if (held != NULL)
799255677Spjd		kcf->cap_held = *held;
800255677Spjd	else
801255677Spjd		cap_rights_init(&kcf->cap_held);
802226269Sdes	ktr_enqueuerequest(td, req);
803226269Sdes	ktrace_exit(td);
804226269Sdes}
805233925Sjhb
806233925Sjhbvoid
807233925Sjhbktrfault(vaddr, type)
808233925Sjhb	vm_offset_t vaddr;
809233925Sjhb	int type;
810233925Sjhb{
811233925Sjhb	struct thread *td = curthread;
812233925Sjhb	struct ktr_request *req;
813233925Sjhb	struct ktr_fault *kf;
814233925Sjhb
815233925Sjhb	req = ktr_getrequest(KTR_FAULT);
816233925Sjhb	if (req == NULL)
817233925Sjhb		return;
818233925Sjhb	kf = &req->ktr_data.ktr_fault;
819233925Sjhb	kf->vaddr = vaddr;
820233925Sjhb	kf->type = type;
821233925Sjhb	ktr_enqueuerequest(td, req);
822233925Sjhb	ktrace_exit(td);
823233925Sjhb}
824233925Sjhb
825233925Sjhbvoid
826233925Sjhbktrfaultend(result)
827233925Sjhb	int result;
828233925Sjhb{
829233925Sjhb	struct thread *td = curthread;
830233925Sjhb	struct ktr_request *req;
831233925Sjhb	struct ktr_faultend *kf;
832233925Sjhb
833233925Sjhb	req = ktr_getrequest(KTR_FAULTEND);
834233925Sjhb	if (req == NULL)
835233925Sjhb		return;
836233925Sjhb	kf = &req->ktr_data.ktr_faultend;
837233925Sjhb	kf->result = result;
838233925Sjhb	ktr_enqueuerequest(td, req);
839233925Sjhb	ktrace_exit(td);
840233925Sjhb}
841114026Sjhb#endif /* KTRACE */
8421541Srgrimes
8431541Srgrimes/* Interface and common routines */
8441541Srgrimes
84512221Sbde#ifndef _SYS_SYSPROTO_H_
8461541Srgrimesstruct ktrace_args {
8471541Srgrimes	char	*fname;
8481541Srgrimes	int	ops;
8491541Srgrimes	int	facs;
8501541Srgrimes	int	pid;
8511541Srgrimes};
85212221Sbde#endif
8531541Srgrimes/* ARGSUSED */
8541549Srgrimesint
855225617Skmacysys_ktrace(td, uap)
85683366Sjulian	struct thread *td;
8571541Srgrimes	register struct ktrace_args *uap;
8581541Srgrimes{
85913203Swollman#ifdef KTRACE
8601541Srgrimes	register struct vnode *vp = NULL;
8611541Srgrimes	register struct proc *p;
8621541Srgrimes	struct pgrp *pg;
8631541Srgrimes	int facs = uap->facs & ~KTRFAC_ROOT;
8641541Srgrimes	int ops = KTROP(uap->ops);
8651541Srgrimes	int descend = uap->ops & KTRFLAG_DESCEND;
866147576Spjd	int nfound, ret = 0;
867241896Skib	int flags, error = 0;
8681541Srgrimes	struct nameidata nd;
869112198Sjhb	struct ucred *cred;
8701541Srgrimes
871114026Sjhb	/*
872114026Sjhb	 * Need something to (un)trace.
873114026Sjhb	 */
874114026Sjhb	if (ops != KTROP_CLEARFILE && facs == 0)
875114026Sjhb		return (EINVAL);
876114026Sjhb
877152376Srwatson	ktrace_enter(td);
8781541Srgrimes	if (ops != KTROP_CLEAR) {
8791541Srgrimes		/*
8801541Srgrimes		 * an operation which requires a file argument.
8811541Srgrimes		 */
882241896Skib		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
88362550Smckusick		flags = FREAD | FWRITE | O_NOFOLLOW;
884170152Skib		error = vn_open(&nd, &flags, 0, NULL);
8853308Sphk		if (error) {
886152376Srwatson			ktrace_exit(td);
8871541Srgrimes			return (error);
8881541Srgrimes		}
88954655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
8901541Srgrimes		vp = nd.ni_vp;
891175294Sattilio		VOP_UNLOCK(vp, 0);
8921541Srgrimes		if (vp->v_type != VREG) {
89391406Sjhb			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
894152376Srwatson			ktrace_exit(td);
8951541Srgrimes			return (EACCES);
8961541Srgrimes		}
8971541Srgrimes	}
8981541Srgrimes	/*
89985397Sdillon	 * Clear all uses of the tracefile.
9001541Srgrimes	 */
9011541Srgrimes	if (ops == KTROP_CLEARFILE) {
902166678Smpp		int vrele_count;
903166678Smpp
904166678Smpp		vrele_count = 0;
90574927Sjhb		sx_slock(&allproc_lock);
906166073Sdelphij		FOREACH_PROC_IN_SYSTEM(p) {
90794618Sjhb			PROC_LOCK(p);
908112198Sjhb			if (p->p_tracevp == vp) {
90997993Sjhb				if (ktrcanset(td, p)) {
91097993Sjhb					mtx_lock(&ktrace_mtx);
911214158Sjhb					ktr_freeproc(p, &cred, NULL);
91297993Sjhb					mtx_unlock(&ktrace_mtx);
913166678Smpp					vrele_count++;
914112198Sjhb					crfree(cred);
915166678Smpp				} else
9161541Srgrimes					error = EPERM;
917166678Smpp			}
918166678Smpp			PROC_UNLOCK(p);
9191541Srgrimes		}
92074927Sjhb		sx_sunlock(&allproc_lock);
921166678Smpp		if (vrele_count > 0) {
922166678Smpp			while (vrele_count-- > 0)
923166678Smpp				vrele(vp);
924166678Smpp		}
9251541Srgrimes		goto done;
9261541Srgrimes	}
9271541Srgrimes	/*
9281541Srgrimes	 * do it
9291541Srgrimes	 */
930114026Sjhb	sx_slock(&proctree_lock);
9311541Srgrimes	if (uap->pid < 0) {
9321541Srgrimes		/*
9331541Srgrimes		 * by process group
9341541Srgrimes		 */
9351541Srgrimes		pg = pgfind(-uap->pid);
9361541Srgrimes		if (pg == NULL) {
93794861Sjhb			sx_sunlock(&proctree_lock);
9381541Srgrimes			error = ESRCH;
9391541Srgrimes			goto done;
9401541Srgrimes		}
94191140Stanimura		/*
94291140Stanimura		 * ktrops() may call vrele(). Lock pg_members
94394861Sjhb		 * by the proctree_lock rather than pg_mtx.
94491140Stanimura		 */
94591140Stanimura		PGRP_UNLOCK(pg);
946147576Spjd		nfound = 0;
947147576Spjd		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
948147576Spjd			PROC_LOCK(p);
949220390Sjhb			if (p->p_state == PRS_NEW ||
950220390Sjhb			    p_cansee(td, p) != 0) {
951147576Spjd				PROC_UNLOCK(p);
952147576Spjd				continue;
953147576Spjd			}
954147576Spjd			nfound++;
9551541Srgrimes			if (descend)
95694618Sjhb				ret |= ktrsetchildren(td, p, ops, facs, vp);
9578876Srgrimes			else
95894618Sjhb				ret |= ktrops(td, p, ops, facs, vp);
959147576Spjd		}
960147576Spjd		if (nfound == 0) {
961147576Spjd			sx_sunlock(&proctree_lock);
962147576Spjd			error = ESRCH;
963147576Spjd			goto done;
964147576Spjd		}
9651541Srgrimes	} else {
9661541Srgrimes		/*
9671541Srgrimes		 * by pid
9681541Srgrimes		 */
9691541Srgrimes		p = pfind(uap->pid);
970211439Sjhb		if (p == NULL)
9711541Srgrimes			error = ESRCH;
972211439Sjhb		else
973211439Sjhb			error = p_cansee(td, p);
974147520Spjd		if (error) {
975211439Sjhb			if (p != NULL)
976211439Sjhb				PROC_UNLOCK(p);
977147520Spjd			sx_sunlock(&proctree_lock);
978147183Spjd			goto done;
979147520Spjd		}
9801541Srgrimes		if (descend)
98194618Sjhb			ret |= ktrsetchildren(td, p, ops, facs, vp);
9821541Srgrimes		else
98394618Sjhb			ret |= ktrops(td, p, ops, facs, vp);
9841541Srgrimes	}
985114026Sjhb	sx_sunlock(&proctree_lock);
9861541Srgrimes	if (!ret)
9871541Srgrimes		error = EPERM;
9881541Srgrimesdone:
989241896Skib	if (vp != NULL)
99091406Sjhb		(void) vn_close(vp, FWRITE, td->td_ucred, td);
991152376Srwatson	ktrace_exit(td);
9921541Srgrimes	return (error);
993114026Sjhb#else /* !KTRACE */
994114026Sjhb	return (ENOSYS);
995114026Sjhb#endif /* KTRACE */
9961541Srgrimes}
9971541Srgrimes
99818398Sphk/* ARGSUSED */
99918398Sphkint
1000225617Skmacysys_utrace(td, uap)
100183366Sjulian	struct thread *td;
100218398Sphk	register struct utrace_args *uap;
100318398Sphk{
100483366Sjulian
100513203Swollman#ifdef KTRACE
100697993Sjhb	struct ktr_request *req;
100799009Salfred	void *cp;
1008103237Sjhb	int error;
100918398Sphk
1010103237Sjhb	if (!KTRPOINT(td, KTR_USER))
1011103237Sjhb		return (0);
101270792Salfred	if (uap->len > KTR_USER_MAXLEN)
101370707Salfred		return (EINVAL);
1014111119Simp	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
1015103237Sjhb	error = copyin(uap->addr, cp, uap->len);
1016104230Sphk	if (error) {
1017104230Sphk		free(cp, M_KTRACE);
1018103237Sjhb		return (error);
1019104230Sphk	}
102097993Sjhb	req = ktr_getrequest(KTR_USER);
1021104230Sphk	if (req == NULL) {
1022104230Sphk		free(cp, M_KTRACE);
1023122457Sjkoshy		return (ENOMEM);
1024104230Sphk	}
1025151927Srwatson	req->ktr_buffer = cp;
1026103237Sjhb	req->ktr_header.ktr_len = uap->len;
1027152376Srwatson	ktr_submitrequest(td, req);
102818398Sphk	return (0);
1029114026Sjhb#else /* !KTRACE */
103018398Sphk	return (ENOSYS);
1031114026Sjhb#endif /* KTRACE */
103218398Sphk}
103318398Sphk
103418398Sphk#ifdef KTRACE
103512819Sphkstatic int
103694618Sjhbktrops(td, p, ops, facs, vp)
103794618Sjhb	struct thread *td;
103894618Sjhb	struct proc *p;
10391541Srgrimes	int ops, facs;
10401541Srgrimes	struct vnode *vp;
10411541Srgrimes{
104297993Sjhb	struct vnode *tracevp = NULL;
1043112198Sjhb	struct ucred *tracecred = NULL;
10441541Srgrimes
1045211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
104694618Sjhb	if (!ktrcanset(td, p)) {
104794618Sjhb		PROC_UNLOCK(p);
10481541Srgrimes		return (0);
104994618Sjhb	}
1050211439Sjhb	if (p->p_flag & P_WEXIT) {
1051211439Sjhb		/* If the process is exiting, just ignore it. */
1052211439Sjhb		PROC_UNLOCK(p);
1053211439Sjhb		return (1);
1054211439Sjhb	}
105597993Sjhb	mtx_lock(&ktrace_mtx);
10561541Srgrimes	if (ops == KTROP_SET) {
1057112198Sjhb		if (p->p_tracevp != vp) {
10581541Srgrimes			/*
105994618Sjhb			 * if trace file already in use, relinquish below
10601541Srgrimes			 */
1061112198Sjhb			tracevp = p->p_tracevp;
106297993Sjhb			VREF(vp);
1063112198Sjhb			p->p_tracevp = vp;
10641541Srgrimes		}
1065112198Sjhb		if (p->p_tracecred != td->td_ucred) {
1066112198Sjhb			tracecred = p->p_tracecred;
1067112198Sjhb			p->p_tracecred = crhold(td->td_ucred);
1068112198Sjhb		}
10691541Srgrimes		p->p_traceflag |= facs;
1070170587Srwatson		if (priv_check(td, PRIV_KTRACE) == 0)
10711541Srgrimes			p->p_traceflag |= KTRFAC_ROOT;
10728876Srgrimes	} else {
10731541Srgrimes		/* KTROP_CLEAR */
1074214158Sjhb		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
10751541Srgrimes			/* no more tracing */
1076214158Sjhb			ktr_freeproc(p, &tracecred, &tracevp);
10771541Srgrimes	}
107897993Sjhb	mtx_unlock(&ktrace_mtx);
1079219311Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) != 0)
1080219311Sdchagin		ktrprocctor_entered(td, p);
108194618Sjhb	PROC_UNLOCK(p);
1082241896Skib	if (tracevp != NULL)
108397993Sjhb		vrele(tracevp);
1084112198Sjhb	if (tracecred != NULL)
1085112198Sjhb		crfree(tracecred);
10861541Srgrimes
10871541Srgrimes	return (1);
10881541Srgrimes}
10891541Srgrimes
109012819Sphkstatic int
109194618Sjhbktrsetchildren(td, top, ops, facs, vp)
109294618Sjhb	struct thread *td;
109394618Sjhb	struct proc *top;
10941541Srgrimes	int ops, facs;
10951541Srgrimes	struct vnode *vp;
10961541Srgrimes{
10971541Srgrimes	register struct proc *p;
10981541Srgrimes	register int ret = 0;
10991541Srgrimes
11001541Srgrimes	p = top;
1101211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1102114026Sjhb	sx_assert(&proctree_lock, SX_LOCKED);
11031541Srgrimes	for (;;) {
110494618Sjhb		ret |= ktrops(td, p, ops, facs, vp);
11051541Srgrimes		/*
11061541Srgrimes		 * If this process has children, descend to them next,
11071541Srgrimes		 * otherwise do any siblings, and if done with this level,
11081541Srgrimes		 * follow back up the tree (but not past top).
11091541Srgrimes		 */
111053212Sphk		if (!LIST_EMPTY(&p->p_children))
111153212Sphk			p = LIST_FIRST(&p->p_children);
11121541Srgrimes		else for (;;) {
1113114026Sjhb			if (p == top)
11141541Srgrimes				return (ret);
111553212Sphk			if (LIST_NEXT(p, p_sibling)) {
111653212Sphk				p = LIST_NEXT(p, p_sibling);
11171541Srgrimes				break;
11181541Srgrimes			}
111914529Shsu			p = p->p_pptr;
11201541Srgrimes		}
1121211439Sjhb		PROC_LOCK(p);
11221541Srgrimes	}
11231541Srgrimes	/*NOTREACHED*/
11241541Srgrimes}
11251541Srgrimes
112612819Sphkstatic void
1127152376Srwatsonktr_writerequest(struct thread *td, struct ktr_request *req)
112897993Sjhb{
112997993Sjhb	struct ktr_header *kth;
11301541Srgrimes	struct vnode *vp;
113197993Sjhb	struct proc *p;
113297993Sjhb	struct ucred *cred;
11331541Srgrimes	struct uio auio;
113497993Sjhb	struct iovec aiov[3];
113562976Smckusick	struct mount *mp;
113697993Sjhb	int datalen, buflen, vrele_count;
1137241896Skib	int error;
11381541Srgrimes
113997993Sjhb	/*
1140152376Srwatson	 * We hold the vnode and credential for use in I/O in case ktrace is
1141152376Srwatson	 * disabled on the process as we write out the request.
1142152376Srwatson	 *
1143152376Srwatson	 * XXXRW: This is not ideal: we could end up performing a write after
1144152376Srwatson	 * the vnode has been closed.
1145152376Srwatson	 */
1146152376Srwatson	mtx_lock(&ktrace_mtx);
1147152376Srwatson	vp = td->td_proc->p_tracevp;
1148152376Srwatson	cred = td->td_proc->p_tracecred;
1149152376Srwatson
1150152376Srwatson	/*
115197993Sjhb	 * If vp is NULL, the vp has been cleared out from under this
1152152376Srwatson	 * request, so just drop it.  Make sure the credential and vnode are
1153152376Srwatson	 * in sync: we should have both or neither.
115497993Sjhb	 */
1155152376Srwatson	if (vp == NULL) {
1156152376Srwatson		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
1157185583Sbz		mtx_unlock(&ktrace_mtx);
11581541Srgrimes		return;
1159152376Srwatson	}
1160185583Sbz	VREF(vp);
1161152376Srwatson	KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
1162185583Sbz	crhold(cred);
1163185583Sbz	mtx_unlock(&ktrace_mtx);
1164152376Srwatson
116597993Sjhb	kth = &req->ktr_header;
1166189707Sjhb	KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
1167189707Sjhb	    sizeof(data_lengths) / sizeof(data_lengths[0]),
1168189707Sjhb	    ("data_lengths array overflow"));
1169118607Sjhb	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
117097993Sjhb	buflen = kth->ktr_len;
11711541Srgrimes	auio.uio_iov = &aiov[0];
11721541Srgrimes	auio.uio_offset = 0;
11731541Srgrimes	auio.uio_segflg = UIO_SYSSPACE;
11741541Srgrimes	auio.uio_rw = UIO_WRITE;
11751541Srgrimes	aiov[0].iov_base = (caddr_t)kth;
11761541Srgrimes	aiov[0].iov_len = sizeof(struct ktr_header);
11771541Srgrimes	auio.uio_resid = sizeof(struct ktr_header);
11781541Srgrimes	auio.uio_iovcnt = 1;
117997993Sjhb	auio.uio_td = td;
118097993Sjhb	if (datalen != 0) {
118197993Sjhb		aiov[1].iov_base = (caddr_t)&req->ktr_data;
118297993Sjhb		aiov[1].iov_len = datalen;
118397993Sjhb		auio.uio_resid += datalen;
11841541Srgrimes		auio.uio_iovcnt++;
118597993Sjhb		kth->ktr_len += datalen;
11861541Srgrimes	}
118797993Sjhb	if (buflen != 0) {
1188151927Srwatson		KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
1189151927Srwatson		aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
119097993Sjhb		aiov[auio.uio_iovcnt].iov_len = buflen;
119197993Sjhb		auio.uio_resid += buflen;
119297993Sjhb		auio.uio_iovcnt++;
1193103235Sjhb	}
1194152376Srwatson
119562976Smckusick	vn_start_write(vp, &mp, V_WAIT);
1196175202Sattilio	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1197101123Srwatson#ifdef MAC
1198172930Srwatson	error = mac_vnode_check_write(cred, NOCRED, vp);
1199101123Srwatson	if (error == 0)
1200101123Srwatson#endif
1201101123Srwatson		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1202175294Sattilio	VOP_UNLOCK(vp, 0);
120362976Smckusick	vn_finished_write(mp);
1204185583Sbz	crfree(cred);
1205185583Sbz	if (!error) {
1206185583Sbz		vrele(vp);
1207185583Sbz		return;
1208185583Sbz	}
1209185583Sbz
12101541Srgrimes	/*
121197993Sjhb	 * If error encountered, give up tracing on this vnode.  We defer
121297993Sjhb	 * all the vrele()'s on the vnode until after we are finished walking
121397993Sjhb	 * the various lists to avoid needlessly holding locks.
1214185583Sbz	 * NB: at this point we still hold the vnode reference that must
1215185583Sbz	 * not go away as we need the valid vnode to compare with. Thus let
1216185583Sbz	 * vrele_count start at 1 and the reference will be freed
1217185583Sbz	 * by the loop at the end after our last use of vp.
12181541Srgrimes	 */
12191541Srgrimes	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
12201541Srgrimes	    error);
1221185583Sbz	vrele_count = 1;
122297993Sjhb	/*
122397993Sjhb	 * First, clear this vnode from being used by any processes in the
122497993Sjhb	 * system.
122597993Sjhb	 * XXX - If one process gets an EPERM writing to the vnode, should
122697993Sjhb	 * we really do this?  Other processes might have suitable
122797993Sjhb	 * credentials for the operation.
122897993Sjhb	 */
1229112198Sjhb	cred = NULL;
123074927Sjhb	sx_slock(&allproc_lock);
1231166073Sdelphij	FOREACH_PROC_IN_SYSTEM(p) {
123297993Sjhb		PROC_LOCK(p);
1233112198Sjhb		if (p->p_tracevp == vp) {
123497993Sjhb			mtx_lock(&ktrace_mtx);
1235214158Sjhb			ktr_freeproc(p, &cred, NULL);
123697993Sjhb			mtx_unlock(&ktrace_mtx);
123797993Sjhb			vrele_count++;
12381541Srgrimes		}
123997993Sjhb		PROC_UNLOCK(p);
1240112198Sjhb		if (cred != NULL) {
1241112198Sjhb			crfree(cred);
1242112198Sjhb			cred = NULL;
1243112198Sjhb		}
12441541Srgrimes	}
124574927Sjhb	sx_sunlock(&allproc_lock);
1246152376Srwatson
124797993Sjhb	while (vrele_count-- > 0)
124897993Sjhb		vrele(vp);
12491541Srgrimes}
12501541Srgrimes
12511541Srgrimes/*
12521541Srgrimes * Return true if caller has permission to set the ktracing state
12531541Srgrimes * of target.  Essentially, the target can't possess any
12541541Srgrimes * more permissions than the caller.  KTRFAC_ROOT signifies that
12558876Srgrimes * root previously set the tracing status on the target process, and
12561541Srgrimes * so, only root may further change it.
12571541Srgrimes */
125812819Sphkstatic int
125994618Sjhbktrcanset(td, targetp)
126094618Sjhb	struct thread *td;
126194618Sjhb	struct proc *targetp;
12621541Srgrimes{
12631541Srgrimes
126494618Sjhb	PROC_LOCK_ASSERT(targetp, MA_OWNED);
126579335Srwatson	if (targetp->p_traceflag & KTRFAC_ROOT &&
1266170587Srwatson	    priv_check(td, PRIV_KTRACE))
126746155Sphk		return (0);
12681541Srgrimes
126996886Sjhb	if (p_candebug(td, targetp) != 0)
127079335Srwatson		return (0);
127179335Srwatson
127279335Srwatson	return (1);
12731541Srgrimes}
12741541Srgrimes
127513203Swollman#endif /* KTRACE */
1276