1139804Simp/*-
21541Srgrimes * Copyright (c) 1989, 1993
3152376Srwatson *	The Regents of the University of California.
4152376Srwatson * Copyright (c) 2005 Robert N. M. Watson
5152376Srwatson * All rights reserved.
61541Srgrimes *
71541Srgrimes * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
91541Srgrimes * are met:
101541Srgrimes * 1. Redistributions of source code must retain the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer.
121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131541Srgrimes *    notice, this list of conditions and the following disclaimer in the
141541Srgrimes *    documentation and/or other materials provided with the distribution.
151541Srgrimes * 4. Neither the name of the University nor the names of its contributors
161541Srgrimes *    may be used to endorse or promote products derived from this software
171541Srgrimes *    without specific prior written permission.
181541Srgrimes *
191541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
201541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
261541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
301541Srgrimes *
311541Srgrimes *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
321541Srgrimes */
331541Srgrimes
34116182Sobrien#include <sys/cdefs.h>
35116182Sobrien__FBSDID("$FreeBSD$");
36116182Sobrien
3713203Swollman#include "opt_ktrace.h"
381541Srgrimes
391541Srgrimes#include <sys/param.h>
402112Swollman#include <sys/systm.h>
4197993Sjhb#include <sys/fcntl.h>
4297993Sjhb#include <sys/kernel.h>
4397993Sjhb#include <sys/kthread.h>
4476166Smarkm#include <sys/lock.h>
4576166Smarkm#include <sys/mutex.h>
4697993Sjhb#include <sys/malloc.h>
47155031Sjeff#include <sys/mount.h>
4897993Sjhb#include <sys/namei.h>
49164033Srwatson#include <sys/priv.h>
501541Srgrimes#include <sys/proc.h>
5197993Sjhb#include <sys/unistd.h>
521541Srgrimes#include <sys/vnode.h>
53176471Sdes#include <sys/socket.h>
54176471Sdes#include <sys/stat.h>
551541Srgrimes#include <sys/ktrace.h>
5674927Sjhb#include <sys/sx.h>
5797993Sjhb#include <sys/sysctl.h>
58219042Sdchagin#include <sys/sysent.h>
591541Srgrimes#include <sys/syslog.h>
6097993Sjhb#include <sys/sysproto.h>
611541Srgrimes
62163606Srwatson#include <security/mac/mac_framework.h>
63163606Srwatson
64152376Srwatson/*
65152376Srwatson * The ktrace facility allows the tracing of certain key events in user space
66152376Srwatson * processes, such as system calls, signal delivery, context switches, and
67152376Srwatson * user generated events using utrace(2).  It works by streaming event
68152376Srwatson * records and data to a vnode associated with the process using the
69152376Srwatson * ktrace(2) system call.  In general, records can be written directly from
70152376Srwatson * the context that generates the event.  One important exception to this is
71152376Srwatson * during a context switch, where sleeping is not permitted.  To handle this
72152376Srwatson * case, trace events are generated using in-kernel ktr_request records, and
73152376Srwatson * then delivered to disk at a convenient moment -- either immediately, the
74152376Srwatson * next traceable event, at system call return, or at process exit.
75152376Srwatson *
76152376Srwatson * When dealing with multiple threads or processes writing to the same event
77152376Srwatson * log, ordering guarantees are weak: specifically, if an event has multiple
78152376Srwatson * records (i.e., system call enter and return), they may be interlaced with
79152376Srwatson * records from another event.  Process and thread ID information is provided
80152376Srwatson * in the record, and user applications can de-interlace events if required.
81152376Srwatson */
82152376Srwatson
8330354Sphkstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
8430309Sphk
8513203Swollman#ifdef KTRACE
8612577Sbde
87219028SnetchildFEATURE(ktrace, "Kernel support for system-call tracing");
88219028Snetchild
8997993Sjhb#ifndef KTRACE_REQUEST_POOL
9097993Sjhb#define	KTRACE_REQUEST_POOL	100
9197993Sjhb#endif
9212819Sphk
9397993Sjhbstruct ktr_request {
9497993Sjhb	struct	ktr_header ktr_header;
95151927Srwatson	void	*ktr_buffer;
9697993Sjhb	union {
97219042Sdchagin		struct	ktr_proc_ctor ktr_proc_ctor;
9897993Sjhb		struct	ktr_syscall ktr_syscall;
9997993Sjhb		struct	ktr_sysret ktr_sysret;
10097993Sjhb		struct	ktr_genio ktr_genio;
10197993Sjhb		struct	ktr_psig ktr_psig;
10297993Sjhb		struct	ktr_csw ktr_csw;
103237663Sjhb		struct	ktr_fault ktr_fault;
104237663Sjhb		struct	ktr_faultend ktr_faultend;
10597993Sjhb	} ktr_data;
10697993Sjhb	STAILQ_ENTRY(ktr_request) ktr_list;
10797993Sjhb};
10897993Sjhb
10997993Sjhbstatic int data_lengths[] = {
11097993Sjhb	0,					/* none */
11197993Sjhb	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
11297993Sjhb	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
11397993Sjhb	0,					/* KTR_NAMEI */
11497993Sjhb	sizeof(struct ktr_genio),		/* KTR_GENIO */
11597993Sjhb	sizeof(struct ktr_psig),		/* KTR_PSIG */
116219312Sdchagin	sizeof(struct ktr_csw),			/* KTR_CSW */
117176471Sdes	0,					/* KTR_USER */
118176471Sdes	0,					/* KTR_STRUCT */
119189707Sjhb	0,					/* KTR_SYSCTL */
120219042Sdchagin	sizeof(struct ktr_proc_ctor),		/* KTR_PROCCTOR */
121219042Sdchagin	0,					/* KTR_PROCDTOR */
122267015Sdelphij	0,					/* unused */
123237663Sjhb	sizeof(struct ktr_fault),		/* KTR_FAULT */
124237663Sjhb	sizeof(struct ktr_faultend),		/* KTR_FAULTEND */
12597993Sjhb};
12697993Sjhb
12797993Sjhbstatic STAILQ_HEAD(, ktr_request) ktr_free;
12897993Sjhb
129141633Sphkstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
130103234Sjhb
131118607Sjhbstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL;
132103234SjhbTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
13397993Sjhb
134118607Sjhbstatic u_int ktr_geniosize = PAGE_SIZE;
135103234SjhbTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
136103234SjhbSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
137103234Sjhb    0, "Maximum size of genio event payload");
138103234Sjhb
13997993Sjhbstatic int print_message = 1;
140214158Sjhbstatic struct mtx ktrace_mtx;
141152376Srwatsonstatic struct sx ktrace_sx;
14297993Sjhb
14397993Sjhbstatic void ktrace_init(void *dummy);
14497993Sjhbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
145219041Sdchaginstatic u_int ktrace_resize_pool(u_int oldsize, u_int newsize);
146219311Sdchaginstatic struct ktr_request *ktr_getrequest_entered(struct thread *td, int type);
14797993Sjhbstatic struct ktr_request *ktr_getrequest(int type);
148152376Srwatsonstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req);
149214158Sjhbstatic void ktr_freeproc(struct proc *p, struct ucred **uc,
150214158Sjhb    struct vnode **vp);
15197993Sjhbstatic void ktr_freerequest(struct ktr_request *req);
152214158Sjhbstatic void ktr_freerequest_locked(struct ktr_request *req);
153152376Srwatsonstatic void ktr_writerequest(struct thread *td, struct ktr_request *req);
15497993Sjhbstatic int ktrcanset(struct thread *,struct proc *);
15597993Sjhbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
15697993Sjhbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
157219311Sdchaginstatic void ktrprocctor_entered(struct thread *, struct proc *);
15897993Sjhb
159152376Srwatson/*
160152376Srwatson * ktrace itself generates events, such as context switches, which we do not
161152376Srwatson * wish to trace.  Maintain a flag, TDP_INKTRACE, on each thread to determine
162152376Srwatson * whether or not it is in a region where tracing of events should be
163152376Srwatson * suppressed.
164152376Srwatson */
16597993Sjhbstatic void
166152376Srwatsonktrace_enter(struct thread *td)
167152376Srwatson{
168152376Srwatson
169152376Srwatson	KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
170152376Srwatson	td->td_pflags |= TDP_INKTRACE;
171152376Srwatson}
172152376Srwatson
173152376Srwatsonstatic void
174152376Srwatsonktrace_exit(struct thread *td)
175152376Srwatson{
176152376Srwatson
177152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
178152376Srwatson	td->td_pflags &= ~TDP_INKTRACE;
179152376Srwatson}
180152376Srwatson
181152376Srwatsonstatic void
182152376Srwatsonktrace_assert(struct thread *td)
183152376Srwatson{
184152376Srwatson
185152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
186152376Srwatson}
187152376Srwatson
188152376Srwatsonstatic void
18997993Sjhbktrace_init(void *dummy)
1901541Srgrimes{
19197993Sjhb	struct ktr_request *req;
19297993Sjhb	int i;
1931541Srgrimes
19497993Sjhb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
195152376Srwatson	sx_init(&ktrace_sx, "ktrace_sx");
19697993Sjhb	STAILQ_INIT(&ktr_free);
19797993Sjhb	for (i = 0; i < ktr_requestpool; i++) {
198111119Simp		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
19997993Sjhb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
20097993Sjhb	}
2011541Srgrimes}
20297993SjhbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
2031541Srgrimes
20497993Sjhbstatic int
20597993Sjhbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
20697993Sjhb{
20797993Sjhb	struct thread *td;
208118607Sjhb	u_int newsize, oldsize, wantsize;
20997993Sjhb	int error;
21097993Sjhb
21197993Sjhb	/* Handle easy read-only case first to avoid warnings from GCC. */
21297993Sjhb	if (!req->newptr) {
21397993Sjhb		oldsize = ktr_requestpool;
214118607Sjhb		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
21597993Sjhb	}
21697993Sjhb
217118607Sjhb	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
21897993Sjhb	if (error)
21997993Sjhb		return (error);
22097993Sjhb	td = curthread;
221152376Srwatson	ktrace_enter(td);
22297993Sjhb	oldsize = ktr_requestpool;
223219041Sdchagin	newsize = ktrace_resize_pool(oldsize, wantsize);
224152376Srwatson	ktrace_exit(td);
225118607Sjhb	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
22697993Sjhb	if (error)
22797993Sjhb		return (error);
228122478Sjkoshy	if (wantsize > oldsize && newsize < wantsize)
22997993Sjhb		return (ENOSPC);
23097993Sjhb	return (0);
23197993Sjhb}
232103234SjhbSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
233211102Sgavin    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
234211102Sgavin    "Pool buffer size for ktrace(1)");
23597993Sjhb
236118607Sjhbstatic u_int
237219041Sdchaginktrace_resize_pool(u_int oldsize, u_int newsize)
23897993Sjhb{
239219041Sdchagin	STAILQ_HEAD(, ktr_request) ktr_new;
24097993Sjhb	struct ktr_request *req;
241122478Sjkoshy	int bound;
24297993Sjhb
24397993Sjhb	print_message = 1;
244219041Sdchagin	bound = newsize - oldsize;
245122478Sjkoshy	if (bound == 0)
246122478Sjkoshy		return (ktr_requestpool);
247219041Sdchagin	if (bound < 0) {
248219041Sdchagin		mtx_lock(&ktrace_mtx);
24997993Sjhb		/* Shrink pool down to newsize if possible. */
250122478Sjkoshy		while (bound++ < 0) {
25197993Sjhb			req = STAILQ_FIRST(&ktr_free);
25297993Sjhb			if (req == NULL)
253219041Sdchagin				break;
25497993Sjhb			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
25597993Sjhb			ktr_requestpool--;
25697993Sjhb			free(req, M_KTRACE);
25797993Sjhb		}
258219041Sdchagin	} else {
25997993Sjhb		/* Grow pool up to newsize. */
260219041Sdchagin		STAILQ_INIT(&ktr_new);
261122478Sjkoshy		while (bound-- > 0) {
26297993Sjhb			req = malloc(sizeof(struct ktr_request), M_KTRACE,
263111119Simp			    M_WAITOK);
264219041Sdchagin			STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list);
26597993Sjhb		}
266219041Sdchagin		mtx_lock(&ktrace_mtx);
267219041Sdchagin		STAILQ_CONCAT(&ktr_free, &ktr_new);
268219041Sdchagin		ktr_requestpool += (newsize - oldsize);
269219041Sdchagin	}
270219041Sdchagin	mtx_unlock(&ktrace_mtx);
27197993Sjhb	return (ktr_requestpool);
27297993Sjhb}
27397993Sjhb
274198411Sjhb/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
275198411SjhbCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
276198411Sjhb    (sizeof((struct thread *)NULL)->td_name));
277198411Sjhb
27897993Sjhbstatic struct ktr_request *
279219311Sdchaginktr_getrequest_entered(struct thread *td, int type)
28097993Sjhb{
28197993Sjhb	struct ktr_request *req;
28297993Sjhb	struct proc *p = td->td_proc;
28397993Sjhb	int pm;
28497993Sjhb
285152430Srwatson	mtx_lock(&ktrace_mtx);
28697993Sjhb	if (!KTRCHECK(td, type)) {
287152430Srwatson		mtx_unlock(&ktrace_mtx);
28897993Sjhb		return (NULL);
28997993Sjhb	}
29097993Sjhb	req = STAILQ_FIRST(&ktr_free);
29197993Sjhb	if (req != NULL) {
29297993Sjhb		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
29397993Sjhb		req->ktr_header.ktr_type = type;
294112199Sjhb		if (p->p_traceflag & KTRFAC_DROP) {
295112199Sjhb			req->ktr_header.ktr_type |= KTR_DROP;
296112199Sjhb			p->p_traceflag &= ~KTRFAC_DROP;
297112199Sjhb		}
298152430Srwatson		mtx_unlock(&ktrace_mtx);
29997993Sjhb		microtime(&req->ktr_header.ktr_time);
30097993Sjhb		req->ktr_header.ktr_pid = p->p_pid;
301151929Srwatson		req->ktr_header.ktr_tid = td->td_tid;
302198411Sjhb		bcopy(td->td_name, req->ktr_header.ktr_comm,
303198411Sjhb		    sizeof(req->ktr_header.ktr_comm));
304151927Srwatson		req->ktr_buffer = NULL;
30597993Sjhb		req->ktr_header.ktr_len = 0;
30697993Sjhb	} else {
307112199Sjhb		p->p_traceflag |= KTRFAC_DROP;
30897993Sjhb		pm = print_message;
30997993Sjhb		print_message = 0;
31097993Sjhb		mtx_unlock(&ktrace_mtx);
31197993Sjhb		if (pm)
31297993Sjhb			printf("Out of ktrace request objects.\n");
31397993Sjhb	}
31497993Sjhb	return (req);
31597993Sjhb}
31697993Sjhb
317219042Sdchaginstatic struct ktr_request *
318219042Sdchaginktr_getrequest(int type)
319219042Sdchagin{
320219042Sdchagin	struct thread *td = curthread;
321219042Sdchagin	struct ktr_request *req;
322219042Sdchagin
323219042Sdchagin	ktrace_enter(td);
324219311Sdchagin	req = ktr_getrequest_entered(td, type);
325219042Sdchagin	if (req == NULL)
326219042Sdchagin		ktrace_exit(td);
327219042Sdchagin
328219042Sdchagin	return (req);
329219042Sdchagin}
330219042Sdchagin
331152376Srwatson/*
332152376Srwatson * Some trace generation environments don't permit direct access to VFS,
333152376Srwatson * such as during a context switch where sleeping is not allowed.  Under these
334152376Srwatson * circumstances, queue a request to the thread to be written asynchronously
335152376Srwatson * later.
336152376Srwatson */
33797993Sjhbstatic void
338152376Srwatsonktr_enqueuerequest(struct thread *td, struct ktr_request *req)
33997993Sjhb{
34097993Sjhb
34197993Sjhb	mtx_lock(&ktrace_mtx);
342152376Srwatson	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
343118599Sjhb	mtx_unlock(&ktrace_mtx);
34497993Sjhb}
34597993Sjhb
346152376Srwatson/*
347152376Srwatson * Drain any pending ktrace records from the per-thread queue to disk.  This
348152376Srwatson * is used both internally before committing other records, and also on
349152376Srwatson * system call return.  We drain all the ones we can find at the time when
350152376Srwatson * drain is requested, but don't keep draining after that as those events
351189707Sjhb * may be approximately "after" the current event.
352152376Srwatson */
35397993Sjhbstatic void
354152376Srwatsonktr_drain(struct thread *td)
355152376Srwatson{
356152376Srwatson	struct ktr_request *queued_req;
357152376Srwatson	STAILQ_HEAD(, ktr_request) local_queue;
358152376Srwatson
359152376Srwatson	ktrace_assert(td);
360152376Srwatson	sx_assert(&ktrace_sx, SX_XLOCKED);
361152376Srwatson
362211512Sjhb	STAILQ_INIT(&local_queue);
363152376Srwatson
364152376Srwatson	if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
365152376Srwatson		mtx_lock(&ktrace_mtx);
366152376Srwatson		STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
367152376Srwatson		mtx_unlock(&ktrace_mtx);
368152376Srwatson
369152376Srwatson		while ((queued_req = STAILQ_FIRST(&local_queue))) {
370152376Srwatson			STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
371152376Srwatson			ktr_writerequest(td, queued_req);
372152376Srwatson			ktr_freerequest(queued_req);
373152376Srwatson		}
374152376Srwatson	}
375152376Srwatson}
376152376Srwatson
377152376Srwatson/*
378152376Srwatson * Submit a trace record for immediate commit to disk -- to be used only
379152376Srwatson * where entering VFS is OK.  First drain any pending records that may have
380152376Srwatson * been cached in the thread.
381152376Srwatson */
382152376Srwatsonstatic void
383219311Sdchaginktr_submitrequest(struct thread *td, struct ktr_request *req)
384152376Srwatson{
385152376Srwatson
386152376Srwatson	ktrace_assert(td);
387152376Srwatson
388152376Srwatson	sx_xlock(&ktrace_sx);
389152376Srwatson	ktr_drain(td);
390152376Srwatson	ktr_writerequest(td, req);
391152376Srwatson	ktr_freerequest(req);
392152376Srwatson	sx_xunlock(&ktrace_sx);
393152376Srwatson	ktrace_exit(td);
394152376Srwatson}
395152376Srwatson
396152376Srwatsonstatic void
39797993Sjhbktr_freerequest(struct ktr_request *req)
39897993Sjhb{
39997993Sjhb
400214158Sjhb	mtx_lock(&ktrace_mtx);
401214158Sjhb	ktr_freerequest_locked(req);
402214158Sjhb	mtx_unlock(&ktrace_mtx);
403214158Sjhb}
404214158Sjhb
405214158Sjhbstatic void
406214158Sjhbktr_freerequest_locked(struct ktr_request *req)
407214158Sjhb{
408214158Sjhb
409214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
410151927Srwatson	if (req->ktr_buffer != NULL)
411151927Srwatson		free(req->ktr_buffer, M_KTRACE);
41297993Sjhb	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
41397993Sjhb}
41497993Sjhb
415214158Sjhb/*
416214158Sjhb * Disable tracing for a process and release all associated resources.
417214158Sjhb * The caller is responsible for releasing a reference on the returned
418214158Sjhb * vnode and credentials.
419214158Sjhb */
420214158Sjhbstatic void
421214158Sjhbktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
422214158Sjhb{
423214158Sjhb	struct ktr_request *req;
424214158Sjhb
425214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
426214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
427214158Sjhb	*uc = p->p_tracecred;
428214158Sjhb	p->p_tracecred = NULL;
429214158Sjhb	if (vp != NULL)
430214158Sjhb		*vp = p->p_tracevp;
431214158Sjhb	p->p_tracevp = NULL;
432214158Sjhb	p->p_traceflag = 0;
433214158Sjhb	while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
434214158Sjhb		STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
435214158Sjhb		ktr_freerequest_locked(req);
436214158Sjhb	}
437214158Sjhb}
438214158Sjhb
4391549Srgrimesvoid
44097993Sjhbktrsyscall(code, narg, args)
44147955Sdt	int code, narg;
44247955Sdt	register_t args[];
4431541Srgrimes{
44497993Sjhb	struct ktr_request *req;
44597993Sjhb	struct ktr_syscall *ktp;
44697993Sjhb	size_t buflen;
447103233Sjhb	char *buf = NULL;
4481541Srgrimes
449103233Sjhb	buflen = sizeof(register_t) * narg;
450103233Sjhb	if (buflen > 0) {
451111119Simp		buf = malloc(buflen, M_KTRACE, M_WAITOK);
452103233Sjhb		bcopy(args, buf, buflen);
453103233Sjhb	}
45497993Sjhb	req = ktr_getrequest(KTR_SYSCALL);
455104230Sphk	if (req == NULL) {
456104230Sphk		if (buf != NULL)
457104230Sphk			free(buf, M_KTRACE);
45897993Sjhb		return;
459104230Sphk	}
46097993Sjhb	ktp = &req->ktr_data.ktr_syscall;
4611541Srgrimes	ktp->ktr_code = code;
4621541Srgrimes	ktp->ktr_narg = narg;
46397993Sjhb	if (buflen > 0) {
46497993Sjhb		req->ktr_header.ktr_len = buflen;
465151927Srwatson		req->ktr_buffer = buf;
46697993Sjhb	}
467152376Srwatson	ktr_submitrequest(curthread, req);
4681541Srgrimes}
4691541Srgrimes
4701549Srgrimesvoid
47197993Sjhbktrsysret(code, error, retval)
47247955Sdt	int code, error;
47347955Sdt	register_t retval;
4741541Srgrimes{
47597993Sjhb	struct ktr_request *req;
47697993Sjhb	struct ktr_sysret *ktp;
4771541Srgrimes
47897993Sjhb	req = ktr_getrequest(KTR_SYSRET);
47997993Sjhb	if (req == NULL)
48097993Sjhb		return;
48197993Sjhb	ktp = &req->ktr_data.ktr_sysret;
48297993Sjhb	ktp->ktr_code = code;
48397993Sjhb	ktp->ktr_error = error;
484230160Seadler	ktp->ktr_retval = ((error == 0) ? retval: 0);		/* what about val2 ? */
485152376Srwatson	ktr_submitrequest(curthread, req);
4861541Srgrimes}
4871541Srgrimes
488152376Srwatson/*
489214158Sjhb * When a setuid process execs, disable tracing.
490214158Sjhb *
491214158Sjhb * XXX: We toss any pending asynchronous records.
492152376Srwatson */
4931549Srgrimesvoid
494214158Sjhbktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
495214158Sjhb{
496214158Sjhb
497214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
498214158Sjhb	mtx_lock(&ktrace_mtx);
499214158Sjhb	ktr_freeproc(p, uc, vp);
500214158Sjhb	mtx_unlock(&ktrace_mtx);
501214158Sjhb}
502214158Sjhb
503214158Sjhb/*
504214158Sjhb * When a process exits, drain per-process asynchronous trace records
505214158Sjhb * and disable tracing.
506214158Sjhb */
507214158Sjhbvoid
508152376Srwatsonktrprocexit(struct thread *td)
509152376Srwatson{
510219042Sdchagin	struct ktr_request *req;
511214158Sjhb	struct proc *p;
512214158Sjhb	struct ucred *cred;
513214158Sjhb	struct vnode *vp;
514214158Sjhb	int vfslocked;
515152376Srwatson
516214158Sjhb	p = td->td_proc;
517214158Sjhb	if (p->p_traceflag == 0)
518214158Sjhb		return;
519214158Sjhb
520152376Srwatson	ktrace_enter(td);
521219311Sdchagin	req = ktr_getrequest_entered(td, KTR_PROCDTOR);
522219311Sdchagin	if (req != NULL)
523219311Sdchagin		ktr_enqueuerequest(td, req);
524152376Srwatson	sx_xlock(&ktrace_sx);
525152376Srwatson	ktr_drain(td);
526152376Srwatson	sx_xunlock(&ktrace_sx);
527214158Sjhb	PROC_LOCK(p);
528214158Sjhb	mtx_lock(&ktrace_mtx);
529214158Sjhb	ktr_freeproc(p, &cred, &vp);
530214158Sjhb	mtx_unlock(&ktrace_mtx);
531214158Sjhb	PROC_UNLOCK(p);
532214158Sjhb	if (vp != NULL) {
533214158Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
534214158Sjhb		vrele(vp);
535214158Sjhb		VFS_UNLOCK_GIANT(vfslocked);
536214158Sjhb	}
537214158Sjhb	if (cred != NULL)
538214158Sjhb		crfree(cred);
539152376Srwatson	ktrace_exit(td);
540152376Srwatson}
541152376Srwatson
542219042Sdchaginstatic void
543219311Sdchaginktrprocctor_entered(struct thread *td, struct proc *p)
544219042Sdchagin{
545219042Sdchagin	struct ktr_proc_ctor *ktp;
546219042Sdchagin	struct ktr_request *req;
547219312Sdchagin	struct thread *td2;
548219042Sdchagin
549219042Sdchagin	ktrace_assert(td);
550219042Sdchagin	td2 = FIRST_THREAD_IN_PROC(p);
551219311Sdchagin	req = ktr_getrequest_entered(td2, KTR_PROCCTOR);
552219042Sdchagin	if (req == NULL)
553219042Sdchagin		return;
554219042Sdchagin	ktp = &req->ktr_data.ktr_proc_ctor;
555219042Sdchagin	ktp->sv_flags = p->p_sysent->sv_flags;
556219311Sdchagin	ktr_enqueuerequest(td2, req);
557219042Sdchagin}
558219042Sdchagin
559219042Sdchaginvoid
560219042Sdchaginktrprocctor(struct proc *p)
561219042Sdchagin{
562219042Sdchagin	struct thread *td = curthread;
563219042Sdchagin
564219042Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) == 0)
565219042Sdchagin		return;
566219042Sdchagin
567219042Sdchagin	ktrace_enter(td);
568219311Sdchagin	ktrprocctor_entered(td, p);
569219042Sdchagin	ktrace_exit(td);
570219042Sdchagin}
571219042Sdchagin
572152376Srwatson/*
573214158Sjhb * When a process forks, enable tracing in the new process if needed.
574214158Sjhb */
575214158Sjhbvoid
576214158Sjhbktrprocfork(struct proc *p1, struct proc *p2)
577214158Sjhb{
578214158Sjhb
579219042Sdchagin	PROC_LOCK(p1);
580214158Sjhb	mtx_lock(&ktrace_mtx);
581214158Sjhb	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
582214158Sjhb	if (p1->p_traceflag & KTRFAC_INHERIT) {
583214158Sjhb		p2->p_traceflag = p1->p_traceflag;
584214158Sjhb		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
585214158Sjhb			VREF(p2->p_tracevp);
586214158Sjhb			KASSERT(p1->p_tracecred != NULL,
587214158Sjhb			    ("ktrace vnode with no cred"));
588214158Sjhb			p2->p_tracecred = crhold(p1->p_tracecred);
589214158Sjhb		}
590214158Sjhb	}
591214158Sjhb	mtx_unlock(&ktrace_mtx);
592219042Sdchagin	PROC_UNLOCK(p1);
593219042Sdchagin
594219042Sdchagin	ktrprocctor(p2);
595214158Sjhb}
596214158Sjhb
597214158Sjhb/*
598152376Srwatson * When a thread returns, drain any asynchronous records generated by the
599152376Srwatson * system call.
600152376Srwatson */
601152376Srwatsonvoid
602152376Srwatsonktruserret(struct thread *td)
603152376Srwatson{
604152376Srwatson
605152376Srwatson	ktrace_enter(td);
606152376Srwatson	sx_xlock(&ktrace_sx);
607152376Srwatson	ktr_drain(td);
608152376Srwatson	sx_xunlock(&ktrace_sx);
609152376Srwatson	ktrace_exit(td);
610152376Srwatson}
611152376Srwatson
612152376Srwatsonvoid
61397993Sjhbktrnamei(path)
6141541Srgrimes	char *path;
6151541Srgrimes{
61697993Sjhb	struct ktr_request *req;
61797993Sjhb	int namelen;
618103233Sjhb	char *buf = NULL;
6191541Srgrimes
620103233Sjhb	namelen = strlen(path);
621103233Sjhb	if (namelen > 0) {
622111119Simp		buf = malloc(namelen, M_KTRACE, M_WAITOK);
623103233Sjhb		bcopy(path, buf, namelen);
624103233Sjhb	}
62597993Sjhb	req = ktr_getrequest(KTR_NAMEI);
626104230Sphk	if (req == NULL) {
627104230Sphk		if (buf != NULL)
628104230Sphk			free(buf, M_KTRACE);
62997993Sjhb		return;
630104230Sphk	}
63197993Sjhb	if (namelen > 0) {
63297993Sjhb		req->ktr_header.ktr_len = namelen;
633151927Srwatson		req->ktr_buffer = buf;
63497993Sjhb	}
635152376Srwatson	ktr_submitrequest(curthread, req);
6361541Srgrimes}
6371541Srgrimes
6381549Srgrimesvoid
639189707Sjhbktrsysctl(name, namelen)
640189707Sjhb	int *name;
641189707Sjhb	u_int namelen;
642189707Sjhb{
643189707Sjhb	struct ktr_request *req;
644189707Sjhb	u_int mib[CTL_MAXNAME + 2];
645189707Sjhb	char *mibname;
646189707Sjhb	size_t mibnamelen;
647189707Sjhb	int error;
648189707Sjhb
649189707Sjhb	/* Lookup name of mib. */
650189707Sjhb	KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
651189707Sjhb	mib[0] = 0;
652189707Sjhb	mib[1] = 1;
653189707Sjhb	bcopy(name, mib + 2, namelen * sizeof(*name));
654189707Sjhb	mibnamelen = 128;
655189707Sjhb	mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
656189707Sjhb	error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
657189707Sjhb	    NULL, 0, &mibnamelen, 0);
658189707Sjhb	if (error) {
659189707Sjhb		free(mibname, M_KTRACE);
660189707Sjhb		return;
661189707Sjhb	}
662189707Sjhb	req = ktr_getrequest(KTR_SYSCTL);
663189707Sjhb	if (req == NULL) {
664189707Sjhb		free(mibname, M_KTRACE);
665189707Sjhb		return;
666189707Sjhb	}
667189707Sjhb	req->ktr_header.ktr_len = mibnamelen;
668189707Sjhb	req->ktr_buffer = mibname;
669189707Sjhb	ktr_submitrequest(curthread, req);
670189707Sjhb}
671189707Sjhb
672189707Sjhbvoid
67397993Sjhbktrgenio(fd, rw, uio, error)
6741541Srgrimes	int fd;
6751541Srgrimes	enum uio_rw rw;
67662378Sgreen	struct uio *uio;
67762378Sgreen	int error;
6781541Srgrimes{
67997993Sjhb	struct ktr_request *req;
68097993Sjhb	struct ktr_genio *ktg;
681103235Sjhb	int datalen;
682103235Sjhb	char *buf;
6838876Srgrimes
684131897Sphk	if (error) {
685131897Sphk		free(uio, M_IOV);
6861541Srgrimes		return;
687131897Sphk	}
688103235Sjhb	uio->uio_offset = 0;
689103235Sjhb	uio->uio_rw = UIO_WRITE;
690233353Skib	datalen = MIN(uio->uio_resid, ktr_geniosize);
691111119Simp	buf = malloc(datalen, M_KTRACE, M_WAITOK);
692131897Sphk	error = uiomove(buf, datalen, uio);
693131897Sphk	free(uio, M_IOV);
694131897Sphk	if (error) {
695103235Sjhb		free(buf, M_KTRACE);
696103235Sjhb		return;
697103235Sjhb	}
69897993Sjhb	req = ktr_getrequest(KTR_GENIO);
699103235Sjhb	if (req == NULL) {
700103235Sjhb		free(buf, M_KTRACE);
70197993Sjhb		return;
702103235Sjhb	}
70397993Sjhb	ktg = &req->ktr_data.ktr_genio;
70497993Sjhb	ktg->ktr_fd = fd;
70597993Sjhb	ktg->ktr_rw = rw;
706103235Sjhb	req->ktr_header.ktr_len = datalen;
707151927Srwatson	req->ktr_buffer = buf;
708152376Srwatson	ktr_submitrequest(curthread, req);
7091541Srgrimes}
7101541Srgrimes
7111549Srgrimesvoid
71297993Sjhbktrpsig(sig, action, mask, code)
71351941Smarcel	int sig;
7141541Srgrimes	sig_t action;
71551791Smarcel	sigset_t *mask;
71651941Smarcel	int code;
7171541Srgrimes{
718219311Sdchagin	struct thread *td = curthread;
71997993Sjhb	struct ktr_request *req;
72097993Sjhb	struct ktr_psig	*kp;
7211541Srgrimes
72297993Sjhb	req = ktr_getrequest(KTR_PSIG);
72397993Sjhb	if (req == NULL)
72497993Sjhb		return;
72597993Sjhb	kp = &req->ktr_data.ktr_psig;
72697993Sjhb	kp->signo = (char)sig;
72797993Sjhb	kp->action = action;
72897993Sjhb	kp->mask = *mask;
72997993Sjhb	kp->code = code;
730219311Sdchagin	ktr_enqueuerequest(td, req);
731219311Sdchagin	ktrace_exit(td);
7321541Srgrimes}
7331541Srgrimes
7341549Srgrimesvoid
735237719Sjhbktrcsw(out, user, wmesg)
7361541Srgrimes	int out, user;
737237719Sjhb	const char *wmesg;
7381541Srgrimes{
739219311Sdchagin	struct thread *td = curthread;
74097993Sjhb	struct ktr_request *req;
74197993Sjhb	struct ktr_csw *kc;
7421541Srgrimes
74397993Sjhb	req = ktr_getrequest(KTR_CSW);
74497993Sjhb	if (req == NULL)
74597993Sjhb		return;
74697993Sjhb	kc = &req->ktr_data.ktr_csw;
74797993Sjhb	kc->out = out;
74897993Sjhb	kc->user = user;
749237719Sjhb	if (wmesg != NULL)
750237719Sjhb		strlcpy(kc->wmesg, wmesg, sizeof(kc->wmesg));
751237719Sjhb	else
752237719Sjhb		bzero(kc->wmesg, sizeof(kc->wmesg));
753219311Sdchagin	ktr_enqueuerequest(td, req);
754219311Sdchagin	ktrace_exit(td);
7551541Srgrimes}
756176471Sdes
757176471Sdesvoid
758210064Sjhbktrstruct(name, data, datalen)
759176471Sdes	const char *name;
760176471Sdes	void *data;
761176471Sdes	size_t datalen;
762176471Sdes{
763176471Sdes	struct ktr_request *req;
764176471Sdes	char *buf = NULL;
765176471Sdes	size_t buflen;
766176471Sdes
767176471Sdes	if (!data)
768176471Sdes		datalen = 0;
769210064Sjhb	buflen = strlen(name) + 1 + datalen;
770176471Sdes	buf = malloc(buflen, M_KTRACE, M_WAITOK);
771210064Sjhb	strcpy(buf, name);
772210064Sjhb	bcopy(data, buf + strlen(name) + 1, datalen);
773176471Sdes	if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
774176471Sdes		free(buf, M_KTRACE);
775176471Sdes		return;
776176471Sdes	}
777176471Sdes	req->ktr_buffer = buf;
778176471Sdes	req->ktr_header.ktr_len = buflen;
779176471Sdes	ktr_submitrequest(curthread, req);
780176471Sdes}
781237663Sjhb
782237663Sjhbvoid
783237663Sjhbktrfault(vaddr, type)
784237663Sjhb	vm_offset_t vaddr;
785237663Sjhb	int type;
786237663Sjhb{
787237663Sjhb	struct thread *td = curthread;
788237663Sjhb	struct ktr_request *req;
789237663Sjhb	struct ktr_fault *kf;
790237663Sjhb
791237663Sjhb	req = ktr_getrequest(KTR_FAULT);
792237663Sjhb	if (req == NULL)
793237663Sjhb		return;
794237663Sjhb	kf = &req->ktr_data.ktr_fault;
795237663Sjhb	kf->vaddr = vaddr;
796237663Sjhb	kf->type = type;
797237663Sjhb	ktr_enqueuerequest(td, req);
798237663Sjhb	ktrace_exit(td);
799237663Sjhb}
800237663Sjhb
801237663Sjhbvoid
802237663Sjhbktrfaultend(result)
803237663Sjhb	int result;
804237663Sjhb{
805237663Sjhb	struct thread *td = curthread;
806237663Sjhb	struct ktr_request *req;
807237663Sjhb	struct ktr_faultend *kf;
808237663Sjhb
809237663Sjhb	req = ktr_getrequest(KTR_FAULTEND);
810237663Sjhb	if (req == NULL)
811237663Sjhb		return;
812237663Sjhb	kf = &req->ktr_data.ktr_faultend;
813237663Sjhb	kf->result = result;
814237663Sjhb	ktr_enqueuerequest(td, req);
815237663Sjhb	ktrace_exit(td);
816237663Sjhb}
817114026Sjhb#endif /* KTRACE */
8181541Srgrimes
8191541Srgrimes/* Interface and common routines */
8201541Srgrimes
82112221Sbde#ifndef _SYS_SYSPROTO_H_
8221541Srgrimesstruct ktrace_args {
8231541Srgrimes	char	*fname;
8241541Srgrimes	int	ops;
8251541Srgrimes	int	facs;
8261541Srgrimes	int	pid;
8271541Srgrimes};
82812221Sbde#endif
8291541Srgrimes/* ARGSUSED */
8301549Srgrimesint
831225617Skmacysys_ktrace(td, uap)
83283366Sjulian	struct thread *td;
8331541Srgrimes	register struct ktrace_args *uap;
8341541Srgrimes{
83513203Swollman#ifdef KTRACE
8361541Srgrimes	register struct vnode *vp = NULL;
8371541Srgrimes	register struct proc *p;
8381541Srgrimes	struct pgrp *pg;
8391541Srgrimes	int facs = uap->facs & ~KTRFAC_ROOT;
8401541Srgrimes	int ops = KTROP(uap->ops);
8411541Srgrimes	int descend = uap->ops & KTRFLAG_DESCEND;
842147576Spjd	int nfound, ret = 0;
843157233Sjhb	int flags, error = 0, vfslocked;
8441541Srgrimes	struct nameidata nd;
845112198Sjhb	struct ucred *cred;
8461541Srgrimes
847114026Sjhb	/*
848114026Sjhb	 * Need something to (un)trace.
849114026Sjhb	 */
850114026Sjhb	if (ops != KTROP_CLEARFILE && facs == 0)
851114026Sjhb		return (EINVAL);
852114026Sjhb
853152376Srwatson	ktrace_enter(td);
8541541Srgrimes	if (ops != KTROP_CLEAR) {
8551541Srgrimes		/*
8561541Srgrimes		 * an operation which requires a file argument.
8571541Srgrimes		 */
858157233Sjhb		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
859157233Sjhb		    uap->fname, td);
86062550Smckusick		flags = FREAD | FWRITE | O_NOFOLLOW;
861170152Skib		error = vn_open(&nd, &flags, 0, NULL);
8623308Sphk		if (error) {
863152376Srwatson			ktrace_exit(td);
8641541Srgrimes			return (error);
8651541Srgrimes		}
866157233Sjhb		vfslocked = NDHASGIANT(&nd);
86754655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
8681541Srgrimes		vp = nd.ni_vp;
869175294Sattilio		VOP_UNLOCK(vp, 0);
8701541Srgrimes		if (vp->v_type != VREG) {
87191406Sjhb			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
872157233Sjhb			VFS_UNLOCK_GIANT(vfslocked);
873152376Srwatson			ktrace_exit(td);
8741541Srgrimes			return (EACCES);
8751541Srgrimes		}
876157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
8771541Srgrimes	}
8781541Srgrimes	/*
87985397Sdillon	 * Clear all uses of the tracefile.
8801541Srgrimes	 */
8811541Srgrimes	if (ops == KTROP_CLEARFILE) {
882166678Smpp		int vrele_count;
883166678Smpp
884166678Smpp		vrele_count = 0;
88574927Sjhb		sx_slock(&allproc_lock);
886166073Sdelphij		FOREACH_PROC_IN_SYSTEM(p) {
88794618Sjhb			PROC_LOCK(p);
888112198Sjhb			if (p->p_tracevp == vp) {
88997993Sjhb				if (ktrcanset(td, p)) {
89097993Sjhb					mtx_lock(&ktrace_mtx);
891214158Sjhb					ktr_freeproc(p, &cred, NULL);
89297993Sjhb					mtx_unlock(&ktrace_mtx);
893166678Smpp					vrele_count++;
894112198Sjhb					crfree(cred);
895166678Smpp				} else
8961541Srgrimes					error = EPERM;
897166678Smpp			}
898166678Smpp			PROC_UNLOCK(p);
8991541Srgrimes		}
90074927Sjhb		sx_sunlock(&allproc_lock);
901166678Smpp		if (vrele_count > 0) {
902166678Smpp			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
903166678Smpp			while (vrele_count-- > 0)
904166678Smpp				vrele(vp);
905166678Smpp			VFS_UNLOCK_GIANT(vfslocked);
906166678Smpp		}
9071541Srgrimes		goto done;
9081541Srgrimes	}
9091541Srgrimes	/*
9101541Srgrimes	 * do it
9111541Srgrimes	 */
912114026Sjhb	sx_slock(&proctree_lock);
9131541Srgrimes	if (uap->pid < 0) {
9141541Srgrimes		/*
9151541Srgrimes		 * by process group
9161541Srgrimes		 */
9171541Srgrimes		pg = pgfind(-uap->pid);
9181541Srgrimes		if (pg == NULL) {
91994861Sjhb			sx_sunlock(&proctree_lock);
9201541Srgrimes			error = ESRCH;
9211541Srgrimes			goto done;
9221541Srgrimes		}
92391140Stanimura		/*
92491140Stanimura		 * ktrops() may call vrele(). Lock pg_members
92594861Sjhb		 * by the proctree_lock rather than pg_mtx.
92691140Stanimura		 */
92791140Stanimura		PGRP_UNLOCK(pg);
928147576Spjd		nfound = 0;
929147576Spjd		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
930147576Spjd			PROC_LOCK(p);
931220390Sjhb			if (p->p_state == PRS_NEW ||
932220390Sjhb			    p_cansee(td, p) != 0) {
933147576Spjd				PROC_UNLOCK(p);
934147576Spjd				continue;
935147576Spjd			}
936147576Spjd			nfound++;
9371541Srgrimes			if (descend)
93894618Sjhb				ret |= ktrsetchildren(td, p, ops, facs, vp);
9398876Srgrimes			else
94094618Sjhb				ret |= ktrops(td, p, ops, facs, vp);
941147576Spjd		}
942147576Spjd		if (nfound == 0) {
943147576Spjd			sx_sunlock(&proctree_lock);
944147576Spjd			error = ESRCH;
945147576Spjd			goto done;
946147576Spjd		}
9471541Srgrimes	} else {
9481541Srgrimes		/*
9491541Srgrimes		 * by pid
9501541Srgrimes		 */
9511541Srgrimes		p = pfind(uap->pid);
952211439Sjhb		if (p == NULL)
9531541Srgrimes			error = ESRCH;
954211439Sjhb		else
955211439Sjhb			error = p_cansee(td, p);
956147520Spjd		if (error) {
957211439Sjhb			if (p != NULL)
958211439Sjhb				PROC_UNLOCK(p);
959147520Spjd			sx_sunlock(&proctree_lock);
960147183Spjd			goto done;
961147520Spjd		}
9621541Srgrimes		if (descend)
96394618Sjhb			ret |= ktrsetchildren(td, p, ops, facs, vp);
9641541Srgrimes		else
96594618Sjhb			ret |= ktrops(td, p, ops, facs, vp);
9661541Srgrimes	}
967114026Sjhb	sx_sunlock(&proctree_lock);
9681541Srgrimes	if (!ret)
9691541Srgrimes		error = EPERM;
9701541Srgrimesdone:
971114026Sjhb	if (vp != NULL) {
972157233Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
97391406Sjhb		(void) vn_close(vp, FWRITE, td->td_ucred, td);
974157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
975114026Sjhb	}
976152376Srwatson	ktrace_exit(td);
9771541Srgrimes	return (error);
978114026Sjhb#else /* !KTRACE */
979114026Sjhb	return (ENOSYS);
980114026Sjhb#endif /* KTRACE */
9811541Srgrimes}
9821541Srgrimes
98318398Sphk/* ARGSUSED */
98418398Sphkint
985225617Skmacysys_utrace(td, uap)
98683366Sjulian	struct thread *td;
98718398Sphk	register struct utrace_args *uap;
98818398Sphk{
98983366Sjulian
99013203Swollman#ifdef KTRACE
99197993Sjhb	struct ktr_request *req;
99299009Salfred	void *cp;
993103237Sjhb	int error;
99418398Sphk
995103237Sjhb	if (!KTRPOINT(td, KTR_USER))
996103237Sjhb		return (0);
99770792Salfred	if (uap->len > KTR_USER_MAXLEN)
99870707Salfred		return (EINVAL);
999111119Simp	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
1000103237Sjhb	error = copyin(uap->addr, cp, uap->len);
1001104230Sphk	if (error) {
1002104230Sphk		free(cp, M_KTRACE);
1003103237Sjhb		return (error);
1004104230Sphk	}
100597993Sjhb	req = ktr_getrequest(KTR_USER);
1006104230Sphk	if (req == NULL) {
1007104230Sphk		free(cp, M_KTRACE);
1008122457Sjkoshy		return (ENOMEM);
1009104230Sphk	}
1010151927Srwatson	req->ktr_buffer = cp;
1011103237Sjhb	req->ktr_header.ktr_len = uap->len;
1012152376Srwatson	ktr_submitrequest(td, req);
101318398Sphk	return (0);
1014114026Sjhb#else /* !KTRACE */
101518398Sphk	return (ENOSYS);
1016114026Sjhb#endif /* KTRACE */
101718398Sphk}
101818398Sphk
101918398Sphk#ifdef KTRACE
102012819Sphkstatic int
102194618Sjhbktrops(td, p, ops, facs, vp)
102294618Sjhb	struct thread *td;
102394618Sjhb	struct proc *p;
10241541Srgrimes	int ops, facs;
10251541Srgrimes	struct vnode *vp;
10261541Srgrimes{
102797993Sjhb	struct vnode *tracevp = NULL;
1028112198Sjhb	struct ucred *tracecred = NULL;
10291541Srgrimes
1030211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
103194618Sjhb	if (!ktrcanset(td, p)) {
103294618Sjhb		PROC_UNLOCK(p);
10331541Srgrimes		return (0);
103494618Sjhb	}
1035211439Sjhb	if (p->p_flag & P_WEXIT) {
1036211439Sjhb		/* If the process is exiting, just ignore it. */
1037211439Sjhb		PROC_UNLOCK(p);
1038211439Sjhb		return (1);
1039211439Sjhb	}
104097993Sjhb	mtx_lock(&ktrace_mtx);
10411541Srgrimes	if (ops == KTROP_SET) {
1042112198Sjhb		if (p->p_tracevp != vp) {
10431541Srgrimes			/*
104494618Sjhb			 * if trace file already in use, relinquish below
10451541Srgrimes			 */
1046112198Sjhb			tracevp = p->p_tracevp;
104797993Sjhb			VREF(vp);
1048112198Sjhb			p->p_tracevp = vp;
10491541Srgrimes		}
1050112198Sjhb		if (p->p_tracecred != td->td_ucred) {
1051112198Sjhb			tracecred = p->p_tracecred;
1052112198Sjhb			p->p_tracecred = crhold(td->td_ucred);
1053112198Sjhb		}
10541541Srgrimes		p->p_traceflag |= facs;
1055170587Srwatson		if (priv_check(td, PRIV_KTRACE) == 0)
10561541Srgrimes			p->p_traceflag |= KTRFAC_ROOT;
10578876Srgrimes	} else {
10581541Srgrimes		/* KTROP_CLEAR */
1059214158Sjhb		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
10601541Srgrimes			/* no more tracing */
1061214158Sjhb			ktr_freeproc(p, &tracecred, &tracevp);
10621541Srgrimes	}
106397993Sjhb	mtx_unlock(&ktrace_mtx);
1064219311Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) != 0)
1065219311Sdchagin		ktrprocctor_entered(td, p);
106694618Sjhb	PROC_UNLOCK(p);
1067114026Sjhb	if (tracevp != NULL) {
1068155031Sjeff		int vfslocked;
1069155031Sjeff
1070155031Sjeff		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
107197993Sjhb		vrele(tracevp);
1072155031Sjeff		VFS_UNLOCK_GIANT(vfslocked);
1073114026Sjhb	}
1074112198Sjhb	if (tracecred != NULL)
1075112198Sjhb		crfree(tracecred);
10761541Srgrimes
10771541Srgrimes	return (1);
10781541Srgrimes}
10791541Srgrimes
108012819Sphkstatic int
108194618Sjhbktrsetchildren(td, top, ops, facs, vp)
108294618Sjhb	struct thread *td;
108394618Sjhb	struct proc *top;
10841541Srgrimes	int ops, facs;
10851541Srgrimes	struct vnode *vp;
10861541Srgrimes{
10871541Srgrimes	register struct proc *p;
10881541Srgrimes	register int ret = 0;
10891541Srgrimes
10901541Srgrimes	p = top;
1091211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1092114026Sjhb	sx_assert(&proctree_lock, SX_LOCKED);
10931541Srgrimes	for (;;) {
109494618Sjhb		ret |= ktrops(td, p, ops, facs, vp);
10951541Srgrimes		/*
10961541Srgrimes		 * If this process has children, descend to them next,
10971541Srgrimes		 * otherwise do any siblings, and if done with this level,
10981541Srgrimes		 * follow back up the tree (but not past top).
10991541Srgrimes		 */
110053212Sphk		if (!LIST_EMPTY(&p->p_children))
110153212Sphk			p = LIST_FIRST(&p->p_children);
11021541Srgrimes		else for (;;) {
1103114026Sjhb			if (p == top)
11041541Srgrimes				return (ret);
110553212Sphk			if (LIST_NEXT(p, p_sibling)) {
110653212Sphk				p = LIST_NEXT(p, p_sibling);
11071541Srgrimes				break;
11081541Srgrimes			}
110914529Shsu			p = p->p_pptr;
11101541Srgrimes		}
1111211439Sjhb		PROC_LOCK(p);
11121541Srgrimes	}
11131541Srgrimes	/*NOTREACHED*/
11141541Srgrimes}
11151541Srgrimes
111612819Sphkstatic void
1117152376Srwatsonktr_writerequest(struct thread *td, struct ktr_request *req)
111897993Sjhb{
111997993Sjhb	struct ktr_header *kth;
11201541Srgrimes	struct vnode *vp;
112197993Sjhb	struct proc *p;
112297993Sjhb	struct ucred *cred;
11231541Srgrimes	struct uio auio;
112497993Sjhb	struct iovec aiov[3];
112562976Smckusick	struct mount *mp;
112697993Sjhb	int datalen, buflen, vrele_count;
1127157233Sjhb	int error, vfslocked;
11281541Srgrimes
112997993Sjhb	/*
1130152376Srwatson	 * We hold the vnode and credential for use in I/O in case ktrace is
1131152376Srwatson	 * disabled on the process as we write out the request.
1132152376Srwatson	 *
1133152376Srwatson	 * XXXRW: This is not ideal: we could end up performing a write after
1134152376Srwatson	 * the vnode has been closed.
1135152376Srwatson	 */
1136152376Srwatson	mtx_lock(&ktrace_mtx);
1137152376Srwatson	vp = td->td_proc->p_tracevp;
1138152376Srwatson	cred = td->td_proc->p_tracecred;
1139152376Srwatson
1140152376Srwatson	/*
114197993Sjhb	 * If vp is NULL, the vp has been cleared out from under this
1142152376Srwatson	 * request, so just drop it.  Make sure the credential and vnode are
1143152376Srwatson	 * in sync: we should have both or neither.
114497993Sjhb	 */
1145152376Srwatson	if (vp == NULL) {
1146152376Srwatson		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
1147185583Sbz		mtx_unlock(&ktrace_mtx);
11481541Srgrimes		return;
1149152376Srwatson	}
1150185583Sbz	VREF(vp);
1151152376Srwatson	KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
1152185583Sbz	crhold(cred);
1153185583Sbz	mtx_unlock(&ktrace_mtx);
1154152376Srwatson
115597993Sjhb	kth = &req->ktr_header;
1156189707Sjhb	KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
1157189707Sjhb	    sizeof(data_lengths) / sizeof(data_lengths[0]),
1158189707Sjhb	    ("data_lengths array overflow"));
1159118607Sjhb	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
116097993Sjhb	buflen = kth->ktr_len;
11611541Srgrimes	auio.uio_iov = &aiov[0];
11621541Srgrimes	auio.uio_offset = 0;
11631541Srgrimes	auio.uio_segflg = UIO_SYSSPACE;
11641541Srgrimes	auio.uio_rw = UIO_WRITE;
11651541Srgrimes	aiov[0].iov_base = (caddr_t)kth;
11661541Srgrimes	aiov[0].iov_len = sizeof(struct ktr_header);
11671541Srgrimes	auio.uio_resid = sizeof(struct ktr_header);
11681541Srgrimes	auio.uio_iovcnt = 1;
116997993Sjhb	auio.uio_td = td;
117097993Sjhb	if (datalen != 0) {
117197993Sjhb		aiov[1].iov_base = (caddr_t)&req->ktr_data;
117297993Sjhb		aiov[1].iov_len = datalen;
117397993Sjhb		auio.uio_resid += datalen;
11741541Srgrimes		auio.uio_iovcnt++;
117597993Sjhb		kth->ktr_len += datalen;
11761541Srgrimes	}
117797993Sjhb	if (buflen != 0) {
1178151927Srwatson		KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
1179151927Srwatson		aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
118097993Sjhb		aiov[auio.uio_iovcnt].iov_len = buflen;
118197993Sjhb		auio.uio_resid += buflen;
118297993Sjhb		auio.uio_iovcnt++;
1183103235Sjhb	}
1184152376Srwatson
1185157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
118662976Smckusick	vn_start_write(vp, &mp, V_WAIT);
1187175202Sattilio	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1188101123Srwatson#ifdef MAC
1189172930Srwatson	error = mac_vnode_check_write(cred, NOCRED, vp);
1190101123Srwatson	if (error == 0)
1191101123Srwatson#endif
1192101123Srwatson		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1193175294Sattilio	VOP_UNLOCK(vp, 0);
119462976Smckusick	vn_finished_write(mp);
1195185583Sbz	crfree(cred);
1196185583Sbz	if (!error) {
1197185583Sbz		vrele(vp);
1198185583Sbz		VFS_UNLOCK_GIANT(vfslocked);
1199185583Sbz		return;
1200185583Sbz	}
1201157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
1202185583Sbz
12031541Srgrimes	/*
120497993Sjhb	 * If error encountered, give up tracing on this vnode.  We defer
120597993Sjhb	 * all the vrele()'s on the vnode until after we are finished walking
120697993Sjhb	 * the various lists to avoid needlessly holding locks.
1207185583Sbz	 * NB: at this point we still hold the vnode reference that must
1208185583Sbz	 * not go away as we need the valid vnode to compare with. Thus let
1209185583Sbz	 * vrele_count start at 1 and the reference will be freed
1210185583Sbz	 * by the loop at the end after our last use of vp.
12111541Srgrimes	 */
12121541Srgrimes	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
12131541Srgrimes	    error);
1214185583Sbz	vrele_count = 1;
121597993Sjhb	/*
121697993Sjhb	 * First, clear this vnode from being used by any processes in the
121797993Sjhb	 * system.
121897993Sjhb	 * XXX - If one process gets an EPERM writing to the vnode, should
121997993Sjhb	 * we really do this?  Other processes might have suitable
122097993Sjhb	 * credentials for the operation.
122197993Sjhb	 */
1222112198Sjhb	cred = NULL;
122374927Sjhb	sx_slock(&allproc_lock);
1224166073Sdelphij	FOREACH_PROC_IN_SYSTEM(p) {
122597993Sjhb		PROC_LOCK(p);
1226112198Sjhb		if (p->p_tracevp == vp) {
122797993Sjhb			mtx_lock(&ktrace_mtx);
1228214158Sjhb			ktr_freeproc(p, &cred, NULL);
122997993Sjhb			mtx_unlock(&ktrace_mtx);
123097993Sjhb			vrele_count++;
12311541Srgrimes		}
123297993Sjhb		PROC_UNLOCK(p);
1233112198Sjhb		if (cred != NULL) {
1234112198Sjhb			crfree(cred);
1235112198Sjhb			cred = NULL;
1236112198Sjhb		}
12371541Srgrimes	}
123874927Sjhb	sx_sunlock(&allproc_lock);
1239152376Srwatson
1240157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
124197993Sjhb	while (vrele_count-- > 0)
124297993Sjhb		vrele(vp);
1243157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
12441541Srgrimes}
12451541Srgrimes
12461541Srgrimes/*
12471541Srgrimes * Return true if caller has permission to set the ktracing state
12481541Srgrimes * of target.  Essentially, the target can't possess any
12491541Srgrimes * more permissions than the caller.  KTRFAC_ROOT signifies that
12508876Srgrimes * root previously set the tracing status on the target process, and
12511541Srgrimes * so, only root may further change it.
12521541Srgrimes */
125312819Sphkstatic int
125494618Sjhbktrcanset(td, targetp)
125594618Sjhb	struct thread *td;
125694618Sjhb	struct proc *targetp;
12571541Srgrimes{
12581541Srgrimes
125994618Sjhb	PROC_LOCK_ASSERT(targetp, MA_OWNED);
126079335Srwatson	if (targetp->p_traceflag & KTRFAC_ROOT &&
1261170587Srwatson	    priv_check(td, PRIV_KTRACE))
126246155Sphk		return (0);
12631541Srgrimes
126496886Sjhb	if (p_candebug(td, targetp) != 0)
126579335Srwatson		return (0);
126679335Srwatson
126779335Srwatson	return (1);
12681541Srgrimes}
12691541Srgrimes
127013203Swollman#endif /* KTRACE */
1271