kern_ktrace.c revision 226495
1139804Simp/*-
21541Srgrimes * Copyright (c) 1989, 1993
3152376Srwatson *	The Regents of the University of California.
4152376Srwatson * Copyright (c) 2005 Robert N. M. Watson
5152376Srwatson * All rights reserved.
61541Srgrimes *
71541Srgrimes * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
91541Srgrimes * are met:
101541Srgrimes * 1. Redistributions of source code must retain the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer.
121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131541Srgrimes *    notice, this list of conditions and the following disclaimer in the
141541Srgrimes *    documentation and/or other materials provided with the distribution.
151541Srgrimes * 4. Neither the name of the University nor the names of its contributors
161541Srgrimes *    may be used to endorse or promote products derived from this software
171541Srgrimes *    without specific prior written permission.
181541Srgrimes *
191541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
201541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
261541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
301541Srgrimes *
311541Srgrimes *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
321541Srgrimes */
331541Srgrimes
34116182Sobrien#include <sys/cdefs.h>
35116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 226495 2011-10-18 07:28:58Z des $");
36116182Sobrien
3713203Swollman#include "opt_ktrace.h"
381541Srgrimes
391541Srgrimes#include <sys/param.h>
402112Swollman#include <sys/systm.h>
4197993Sjhb#include <sys/fcntl.h>
4297993Sjhb#include <sys/kernel.h>
4397993Sjhb#include <sys/kthread.h>
4476166Smarkm#include <sys/lock.h>
4576166Smarkm#include <sys/mutex.h>
4697993Sjhb#include <sys/malloc.h>
47155031Sjeff#include <sys/mount.h>
4897993Sjhb#include <sys/namei.h>
49164033Srwatson#include <sys/priv.h>
501541Srgrimes#include <sys/proc.h>
5197993Sjhb#include <sys/unistd.h>
521541Srgrimes#include <sys/vnode.h>
53176471Sdes#include <sys/socket.h>
54176471Sdes#include <sys/stat.h>
551541Srgrimes#include <sys/ktrace.h>
5674927Sjhb#include <sys/sx.h>
5797993Sjhb#include <sys/sysctl.h>
58219042Sdchagin#include <sys/sysent.h>
591541Srgrimes#include <sys/syslog.h>
6097993Sjhb#include <sys/sysproto.h>
611541Srgrimes
62163606Srwatson#include <security/mac/mac_framework.h>
63163606Srwatson
64152376Srwatson/*
65152376Srwatson * The ktrace facility allows the tracing of certain key events in user space
66152376Srwatson * processes, such as system calls, signal delivery, context switches, and
67152376Srwatson * user generated events using utrace(2).  It works by streaming event
68152376Srwatson * records and data to a vnode associated with the process using the
69152376Srwatson * ktrace(2) system call.  In general, records can be written directly from
70152376Srwatson * the context that generates the event.  One important exception to this is
71152376Srwatson * during a context switch, where sleeping is not permitted.  To handle this
72152376Srwatson * case, trace events are generated using in-kernel ktr_request records, and
73152376Srwatson * then delivered to disk at a convenient moment -- either immediately, the
74152376Srwatson * next traceable event, at system call return, or at process exit.
75152376Srwatson *
76152376Srwatson * When dealing with multiple threads or processes writing to the same event
77152376Srwatson * log, ordering guarantees are weak: specifically, if an event has multiple
78152376Srwatson * records (i.e., system call enter and return), they may be interlaced with
79152376Srwatson * records from another event.  Process and thread ID information is provided
80152376Srwatson * in the record, and user applications can de-interlace events if required.
81152376Srwatson */
82152376Srwatson
8330354Sphkstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
8430309Sphk
8513203Swollman#ifdef KTRACE
8612577Sbde
87219028SnetchildFEATURE(ktrace, "Kernel support for system-call tracing");
88219028Snetchild
8997993Sjhb#ifndef KTRACE_REQUEST_POOL
9097993Sjhb#define	KTRACE_REQUEST_POOL	100
9197993Sjhb#endif
9212819Sphk
9397993Sjhbstruct ktr_request {
9497993Sjhb	struct	ktr_header ktr_header;
95151927Srwatson	void	*ktr_buffer;
9697993Sjhb	union {
97219042Sdchagin		struct	ktr_proc_ctor ktr_proc_ctor;
98226269Sdes		struct	ktr_cap_fail ktr_cap_fail;
9997993Sjhb		struct	ktr_syscall ktr_syscall;
10097993Sjhb		struct	ktr_sysret ktr_sysret;
10197993Sjhb		struct	ktr_genio ktr_genio;
10297993Sjhb		struct	ktr_psig ktr_psig;
10397993Sjhb		struct	ktr_csw ktr_csw;
10497993Sjhb	} ktr_data;
10597993Sjhb	STAILQ_ENTRY(ktr_request) ktr_list;
10697993Sjhb};
10797993Sjhb
10897993Sjhbstatic int data_lengths[] = {
10997993Sjhb	0,					/* none */
11097993Sjhb	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
11197993Sjhb	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
11297993Sjhb	0,					/* KTR_NAMEI */
11397993Sjhb	sizeof(struct ktr_genio),		/* KTR_GENIO */
11497993Sjhb	sizeof(struct ktr_psig),		/* KTR_PSIG */
115219312Sdchagin	sizeof(struct ktr_csw),			/* KTR_CSW */
116176471Sdes	0,					/* KTR_USER */
117176471Sdes	0,					/* KTR_STRUCT */
118189707Sjhb	0,					/* KTR_SYSCTL */
119219042Sdchagin	sizeof(struct ktr_proc_ctor),		/* KTR_PROCCTOR */
120219042Sdchagin	0,					/* KTR_PROCDTOR */
121226269Sdes	sizeof(struct ktr_cap_fail),		/* KTR_CAPFAIL */
12297993Sjhb};
12397993Sjhb
12497993Sjhbstatic STAILQ_HEAD(, ktr_request) ktr_free;
12597993Sjhb
126141633Sphkstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
127103234Sjhb
128118607Sjhbstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL;
129103234SjhbTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
13097993Sjhb
131118607Sjhbstatic u_int ktr_geniosize = PAGE_SIZE;
132103234SjhbTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
133103234SjhbSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
134103234Sjhb    0, "Maximum size of genio event payload");
135103234Sjhb
13697993Sjhbstatic int print_message = 1;
137214158Sjhbstatic struct mtx ktrace_mtx;
138152376Srwatsonstatic struct sx ktrace_sx;
13997993Sjhb
14097993Sjhbstatic void ktrace_init(void *dummy);
14197993Sjhbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
142219041Sdchaginstatic u_int ktrace_resize_pool(u_int oldsize, u_int newsize);
143219311Sdchaginstatic struct ktr_request *ktr_getrequest_entered(struct thread *td, int type);
14497993Sjhbstatic struct ktr_request *ktr_getrequest(int type);
145152376Srwatsonstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req);
146214158Sjhbstatic void ktr_freeproc(struct proc *p, struct ucred **uc,
147214158Sjhb    struct vnode **vp);
14897993Sjhbstatic void ktr_freerequest(struct ktr_request *req);
149214158Sjhbstatic void ktr_freerequest_locked(struct ktr_request *req);
150152376Srwatsonstatic void ktr_writerequest(struct thread *td, struct ktr_request *req);
15197993Sjhbstatic int ktrcanset(struct thread *,struct proc *);
15297993Sjhbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
15397993Sjhbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
154219311Sdchaginstatic void ktrprocctor_entered(struct thread *, struct proc *);
15597993Sjhb
156152376Srwatson/*
157152376Srwatson * ktrace itself generates events, such as context switches, which we do not
158152376Srwatson * wish to trace.  Maintain a flag, TDP_INKTRACE, on each thread to determine
159152376Srwatson * whether or not it is in a region where tracing of events should be
160152376Srwatson * suppressed.
161152376Srwatson */
16297993Sjhbstatic void
163152376Srwatsonktrace_enter(struct thread *td)
164152376Srwatson{
165152376Srwatson
166152376Srwatson	KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
167152376Srwatson	td->td_pflags |= TDP_INKTRACE;
168152376Srwatson}
169152376Srwatson
170152376Srwatsonstatic void
171152376Srwatsonktrace_exit(struct thread *td)
172152376Srwatson{
173152376Srwatson
174152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
175152376Srwatson	td->td_pflags &= ~TDP_INKTRACE;
176152376Srwatson}
177152376Srwatson
178152376Srwatsonstatic void
179152376Srwatsonktrace_assert(struct thread *td)
180152376Srwatson{
181152376Srwatson
182152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
183152376Srwatson}
184152376Srwatson
185152376Srwatsonstatic void
18697993Sjhbktrace_init(void *dummy)
1871541Srgrimes{
18897993Sjhb	struct ktr_request *req;
18997993Sjhb	int i;
1901541Srgrimes
19197993Sjhb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
192152376Srwatson	sx_init(&ktrace_sx, "ktrace_sx");
19397993Sjhb	STAILQ_INIT(&ktr_free);
19497993Sjhb	for (i = 0; i < ktr_requestpool; i++) {
195111119Simp		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
19697993Sjhb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
19797993Sjhb	}
1981541Srgrimes}
19997993SjhbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
2001541Srgrimes
20197993Sjhbstatic int
20297993Sjhbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
20397993Sjhb{
20497993Sjhb	struct thread *td;
205118607Sjhb	u_int newsize, oldsize, wantsize;
20697993Sjhb	int error;
20797993Sjhb
20897993Sjhb	/* Handle easy read-only case first to avoid warnings from GCC. */
20997993Sjhb	if (!req->newptr) {
21097993Sjhb		oldsize = ktr_requestpool;
211118607Sjhb		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
21297993Sjhb	}
21397993Sjhb
214118607Sjhb	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
21597993Sjhb	if (error)
21697993Sjhb		return (error);
21797993Sjhb	td = curthread;
218152376Srwatson	ktrace_enter(td);
21997993Sjhb	oldsize = ktr_requestpool;
220219041Sdchagin	newsize = ktrace_resize_pool(oldsize, wantsize);
221152376Srwatson	ktrace_exit(td);
222118607Sjhb	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
22397993Sjhb	if (error)
22497993Sjhb		return (error);
225122478Sjkoshy	if (wantsize > oldsize && newsize < wantsize)
22697993Sjhb		return (ENOSPC);
22797993Sjhb	return (0);
22897993Sjhb}
229103234SjhbSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
230211102Sgavin    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
231211102Sgavin    "Pool buffer size for ktrace(1)");
23297993Sjhb
233118607Sjhbstatic u_int
234219041Sdchaginktrace_resize_pool(u_int oldsize, u_int newsize)
23597993Sjhb{
236219041Sdchagin	STAILQ_HEAD(, ktr_request) ktr_new;
23797993Sjhb	struct ktr_request *req;
238122478Sjkoshy	int bound;
23997993Sjhb
24097993Sjhb	print_message = 1;
241219041Sdchagin	bound = newsize - oldsize;
242122478Sjkoshy	if (bound == 0)
243122478Sjkoshy		return (ktr_requestpool);
244219041Sdchagin	if (bound < 0) {
245219041Sdchagin		mtx_lock(&ktrace_mtx);
24697993Sjhb		/* Shrink pool down to newsize if possible. */
247122478Sjkoshy		while (bound++ < 0) {
24897993Sjhb			req = STAILQ_FIRST(&ktr_free);
24997993Sjhb			if (req == NULL)
250219041Sdchagin				break;
25197993Sjhb			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
25297993Sjhb			ktr_requestpool--;
25397993Sjhb			free(req, M_KTRACE);
25497993Sjhb		}
255219041Sdchagin	} else {
25697993Sjhb		/* Grow pool up to newsize. */
257219041Sdchagin		STAILQ_INIT(&ktr_new);
258122478Sjkoshy		while (bound-- > 0) {
25997993Sjhb			req = malloc(sizeof(struct ktr_request), M_KTRACE,
260111119Simp			    M_WAITOK);
261219041Sdchagin			STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list);
26297993Sjhb		}
263219041Sdchagin		mtx_lock(&ktrace_mtx);
264219041Sdchagin		STAILQ_CONCAT(&ktr_free, &ktr_new);
265219041Sdchagin		ktr_requestpool += (newsize - oldsize);
266219041Sdchagin	}
267219041Sdchagin	mtx_unlock(&ktrace_mtx);
26897993Sjhb	return (ktr_requestpool);
26997993Sjhb}
27097993Sjhb
271198411Sjhb/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
272198411SjhbCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
273198411Sjhb    (sizeof((struct thread *)NULL)->td_name));
274198411Sjhb
27597993Sjhbstatic struct ktr_request *
276219311Sdchaginktr_getrequest_entered(struct thread *td, int type)
27797993Sjhb{
27897993Sjhb	struct ktr_request *req;
27997993Sjhb	struct proc *p = td->td_proc;
28097993Sjhb	int pm;
28197993Sjhb
282152430Srwatson	mtx_lock(&ktrace_mtx);
28397993Sjhb	if (!KTRCHECK(td, type)) {
284152430Srwatson		mtx_unlock(&ktrace_mtx);
28597993Sjhb		return (NULL);
28697993Sjhb	}
28797993Sjhb	req = STAILQ_FIRST(&ktr_free);
28897993Sjhb	if (req != NULL) {
28997993Sjhb		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
29097993Sjhb		req->ktr_header.ktr_type = type;
291112199Sjhb		if (p->p_traceflag & KTRFAC_DROP) {
292112199Sjhb			req->ktr_header.ktr_type |= KTR_DROP;
293112199Sjhb			p->p_traceflag &= ~KTRFAC_DROP;
294112199Sjhb		}
295152430Srwatson		mtx_unlock(&ktrace_mtx);
29697993Sjhb		microtime(&req->ktr_header.ktr_time);
29797993Sjhb		req->ktr_header.ktr_pid = p->p_pid;
298151929Srwatson		req->ktr_header.ktr_tid = td->td_tid;
299198411Sjhb		bcopy(td->td_name, req->ktr_header.ktr_comm,
300198411Sjhb		    sizeof(req->ktr_header.ktr_comm));
301151927Srwatson		req->ktr_buffer = NULL;
30297993Sjhb		req->ktr_header.ktr_len = 0;
30397993Sjhb	} else {
304112199Sjhb		p->p_traceflag |= KTRFAC_DROP;
30597993Sjhb		pm = print_message;
30697993Sjhb		print_message = 0;
30797993Sjhb		mtx_unlock(&ktrace_mtx);
30897993Sjhb		if (pm)
30997993Sjhb			printf("Out of ktrace request objects.\n");
31097993Sjhb	}
31197993Sjhb	return (req);
31297993Sjhb}
31397993Sjhb
314219042Sdchaginstatic struct ktr_request *
315219042Sdchaginktr_getrequest(int type)
316219042Sdchagin{
317219042Sdchagin	struct thread *td = curthread;
318219042Sdchagin	struct ktr_request *req;
319219042Sdchagin
320219042Sdchagin	ktrace_enter(td);
321219311Sdchagin	req = ktr_getrequest_entered(td, type);
322219042Sdchagin	if (req == NULL)
323219042Sdchagin		ktrace_exit(td);
324219042Sdchagin
325219042Sdchagin	return (req);
326219042Sdchagin}
327219042Sdchagin
328152376Srwatson/*
329152376Srwatson * Some trace generation environments don't permit direct access to VFS,
330152376Srwatson * such as during a context switch where sleeping is not allowed.  Under these
331152376Srwatson * circumstances, queue a request to the thread to be written asynchronously
332152376Srwatson * later.
333152376Srwatson */
33497993Sjhbstatic void
335152376Srwatsonktr_enqueuerequest(struct thread *td, struct ktr_request *req)
33697993Sjhb{
33797993Sjhb
33897993Sjhb	mtx_lock(&ktrace_mtx);
339152376Srwatson	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
340118599Sjhb	mtx_unlock(&ktrace_mtx);
34197993Sjhb}
34297993Sjhb
343152376Srwatson/*
344152376Srwatson * Drain any pending ktrace records from the per-thread queue to disk.  This
345152376Srwatson * is used both internally before committing other records, and also on
346152376Srwatson * system call return.  We drain all the ones we can find at the time when
347152376Srwatson * drain is requested, but don't keep draining after that as those events
348189707Sjhb * may be approximately "after" the current event.
349152376Srwatson */
35097993Sjhbstatic void
351152376Srwatsonktr_drain(struct thread *td)
352152376Srwatson{
353152376Srwatson	struct ktr_request *queued_req;
354152376Srwatson	STAILQ_HEAD(, ktr_request) local_queue;
355152376Srwatson
356152376Srwatson	ktrace_assert(td);
357152376Srwatson	sx_assert(&ktrace_sx, SX_XLOCKED);
358152376Srwatson
359211512Sjhb	STAILQ_INIT(&local_queue);
360152376Srwatson
361152376Srwatson	if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
362152376Srwatson		mtx_lock(&ktrace_mtx);
363152376Srwatson		STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
364152376Srwatson		mtx_unlock(&ktrace_mtx);
365152376Srwatson
366152376Srwatson		while ((queued_req = STAILQ_FIRST(&local_queue))) {
367152376Srwatson			STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
368152376Srwatson			ktr_writerequest(td, queued_req);
369152376Srwatson			ktr_freerequest(queued_req);
370152376Srwatson		}
371152376Srwatson	}
372152376Srwatson}
373152376Srwatson
374152376Srwatson/*
375152376Srwatson * Submit a trace record for immediate commit to disk -- to be used only
376152376Srwatson * where entering VFS is OK.  First drain any pending records that may have
377152376Srwatson * been cached in the thread.
378152376Srwatson */
379152376Srwatsonstatic void
380219311Sdchaginktr_submitrequest(struct thread *td, struct ktr_request *req)
381152376Srwatson{
382152376Srwatson
383152376Srwatson	ktrace_assert(td);
384152376Srwatson
385152376Srwatson	sx_xlock(&ktrace_sx);
386152376Srwatson	ktr_drain(td);
387152376Srwatson	ktr_writerequest(td, req);
388152376Srwatson	ktr_freerequest(req);
389152376Srwatson	sx_xunlock(&ktrace_sx);
390152376Srwatson	ktrace_exit(td);
391152376Srwatson}
392152376Srwatson
393152376Srwatsonstatic void
39497993Sjhbktr_freerequest(struct ktr_request *req)
39597993Sjhb{
39697993Sjhb
397214158Sjhb	mtx_lock(&ktrace_mtx);
398214158Sjhb	ktr_freerequest_locked(req);
399214158Sjhb	mtx_unlock(&ktrace_mtx);
400214158Sjhb}
401214158Sjhb
402214158Sjhbstatic void
403214158Sjhbktr_freerequest_locked(struct ktr_request *req)
404214158Sjhb{
405214158Sjhb
406214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
407151927Srwatson	if (req->ktr_buffer != NULL)
408151927Srwatson		free(req->ktr_buffer, M_KTRACE);
40997993Sjhb	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
41097993Sjhb}
41197993Sjhb
412214158Sjhb/*
413214158Sjhb * Disable tracing for a process and release all associated resources.
414214158Sjhb * The caller is responsible for releasing a reference on the returned
415214158Sjhb * vnode and credentials.
416214158Sjhb */
417214158Sjhbstatic void
418214158Sjhbktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
419214158Sjhb{
420214158Sjhb	struct ktr_request *req;
421214158Sjhb
422214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
423214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
424214158Sjhb	*uc = p->p_tracecred;
425214158Sjhb	p->p_tracecred = NULL;
426214158Sjhb	if (vp != NULL)
427214158Sjhb		*vp = p->p_tracevp;
428214158Sjhb	p->p_tracevp = NULL;
429214158Sjhb	p->p_traceflag = 0;
430214158Sjhb	while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
431214158Sjhb		STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
432214158Sjhb		ktr_freerequest_locked(req);
433214158Sjhb	}
434214158Sjhb}
435214158Sjhb
4361549Srgrimesvoid
43797993Sjhbktrsyscall(code, narg, args)
43847955Sdt	int code, narg;
43947955Sdt	register_t args[];
4401541Srgrimes{
44197993Sjhb	struct ktr_request *req;
44297993Sjhb	struct ktr_syscall *ktp;
44397993Sjhb	size_t buflen;
444103233Sjhb	char *buf = NULL;
4451541Srgrimes
446103233Sjhb	buflen = sizeof(register_t) * narg;
447103233Sjhb	if (buflen > 0) {
448111119Simp		buf = malloc(buflen, M_KTRACE, M_WAITOK);
449103233Sjhb		bcopy(args, buf, buflen);
450103233Sjhb	}
45197993Sjhb	req = ktr_getrequest(KTR_SYSCALL);
452104230Sphk	if (req == NULL) {
453104230Sphk		if (buf != NULL)
454104230Sphk			free(buf, M_KTRACE);
45597993Sjhb		return;
456104230Sphk	}
45797993Sjhb	ktp = &req->ktr_data.ktr_syscall;
4581541Srgrimes	ktp->ktr_code = code;
4591541Srgrimes	ktp->ktr_narg = narg;
46097993Sjhb	if (buflen > 0) {
46197993Sjhb		req->ktr_header.ktr_len = buflen;
462151927Srwatson		req->ktr_buffer = buf;
46397993Sjhb	}
464152376Srwatson	ktr_submitrequest(curthread, req);
4651541Srgrimes}
4661541Srgrimes
4671549Srgrimesvoid
46897993Sjhbktrsysret(code, error, retval)
46947955Sdt	int code, error;
47047955Sdt	register_t retval;
4711541Srgrimes{
47297993Sjhb	struct ktr_request *req;
47397993Sjhb	struct ktr_sysret *ktp;
4741541Srgrimes
47597993Sjhb	req = ktr_getrequest(KTR_SYSRET);
47697993Sjhb	if (req == NULL)
47797993Sjhb		return;
47897993Sjhb	ktp = &req->ktr_data.ktr_sysret;
47997993Sjhb	ktp->ktr_code = code;
48097993Sjhb	ktp->ktr_error = error;
48197993Sjhb	ktp->ktr_retval = retval;		/* what about val2 ? */
482152376Srwatson	ktr_submitrequest(curthread, req);
4831541Srgrimes}
4841541Srgrimes
485152376Srwatson/*
486214158Sjhb * When a setuid process execs, disable tracing.
487214158Sjhb *
488214158Sjhb * XXX: We toss any pending asynchronous records.
489152376Srwatson */
4901549Srgrimesvoid
491214158Sjhbktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
492214158Sjhb{
493214158Sjhb
494214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
495214158Sjhb	mtx_lock(&ktrace_mtx);
496214158Sjhb	ktr_freeproc(p, uc, vp);
497214158Sjhb	mtx_unlock(&ktrace_mtx);
498214158Sjhb}
499214158Sjhb
500214158Sjhb/*
501214158Sjhb * When a process exits, drain per-process asynchronous trace records
502214158Sjhb * and disable tracing.
503214158Sjhb */
504214158Sjhbvoid
505152376Srwatsonktrprocexit(struct thread *td)
506152376Srwatson{
507219042Sdchagin	struct ktr_request *req;
508214158Sjhb	struct proc *p;
509214158Sjhb	struct ucred *cred;
510214158Sjhb	struct vnode *vp;
511214158Sjhb	int vfslocked;
512152376Srwatson
513214158Sjhb	p = td->td_proc;
514214158Sjhb	if (p->p_traceflag == 0)
515214158Sjhb		return;
516214158Sjhb
517152376Srwatson	ktrace_enter(td);
518219311Sdchagin	req = ktr_getrequest_entered(td, KTR_PROCDTOR);
519219311Sdchagin	if (req != NULL)
520219311Sdchagin		ktr_enqueuerequest(td, req);
521152376Srwatson	sx_xlock(&ktrace_sx);
522152376Srwatson	ktr_drain(td);
523152376Srwatson	sx_xunlock(&ktrace_sx);
524214158Sjhb	PROC_LOCK(p);
525214158Sjhb	mtx_lock(&ktrace_mtx);
526214158Sjhb	ktr_freeproc(p, &cred, &vp);
527214158Sjhb	mtx_unlock(&ktrace_mtx);
528214158Sjhb	PROC_UNLOCK(p);
529214158Sjhb	if (vp != NULL) {
530214158Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
531214158Sjhb		vrele(vp);
532214158Sjhb		VFS_UNLOCK_GIANT(vfslocked);
533214158Sjhb	}
534214158Sjhb	if (cred != NULL)
535214158Sjhb		crfree(cred);
536152376Srwatson	ktrace_exit(td);
537152376Srwatson}
538152376Srwatson
539219042Sdchaginstatic void
540219311Sdchaginktrprocctor_entered(struct thread *td, struct proc *p)
541219042Sdchagin{
542219042Sdchagin	struct ktr_proc_ctor *ktp;
543219042Sdchagin	struct ktr_request *req;
544219312Sdchagin	struct thread *td2;
545219042Sdchagin
546219042Sdchagin	ktrace_assert(td);
547219042Sdchagin	td2 = FIRST_THREAD_IN_PROC(p);
548219311Sdchagin	req = ktr_getrequest_entered(td2, KTR_PROCCTOR);
549219042Sdchagin	if (req == NULL)
550219042Sdchagin		return;
551219042Sdchagin	ktp = &req->ktr_data.ktr_proc_ctor;
552219042Sdchagin	ktp->sv_flags = p->p_sysent->sv_flags;
553219311Sdchagin	ktr_enqueuerequest(td2, req);
554219042Sdchagin}
555219042Sdchagin
556219042Sdchaginvoid
557219042Sdchaginktrprocctor(struct proc *p)
558219042Sdchagin{
559219042Sdchagin	struct thread *td = curthread;
560219042Sdchagin
561219042Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) == 0)
562219042Sdchagin		return;
563219042Sdchagin
564219042Sdchagin	ktrace_enter(td);
565219311Sdchagin	ktrprocctor_entered(td, p);
566219042Sdchagin	ktrace_exit(td);
567219042Sdchagin}
568219042Sdchagin
569152376Srwatson/*
570214158Sjhb * When a process forks, enable tracing in the new process if needed.
571214158Sjhb */
572214158Sjhbvoid
573214158Sjhbktrprocfork(struct proc *p1, struct proc *p2)
574214158Sjhb{
575214158Sjhb
576219042Sdchagin	PROC_LOCK(p1);
577214158Sjhb	mtx_lock(&ktrace_mtx);
578214158Sjhb	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
579214158Sjhb	if (p1->p_traceflag & KTRFAC_INHERIT) {
580214158Sjhb		p2->p_traceflag = p1->p_traceflag;
581214158Sjhb		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
582214158Sjhb			VREF(p2->p_tracevp);
583214158Sjhb			KASSERT(p1->p_tracecred != NULL,
584214158Sjhb			    ("ktrace vnode with no cred"));
585214158Sjhb			p2->p_tracecred = crhold(p1->p_tracecred);
586214158Sjhb		}
587214158Sjhb	}
588214158Sjhb	mtx_unlock(&ktrace_mtx);
589219042Sdchagin	PROC_UNLOCK(p1);
590219042Sdchagin
591219042Sdchagin	ktrprocctor(p2);
592214158Sjhb}
593214158Sjhb
594214158Sjhb/*
595152376Srwatson * When a thread returns, drain any asynchronous records generated by the
596152376Srwatson * system call.
597152376Srwatson */
598152376Srwatsonvoid
599152376Srwatsonktruserret(struct thread *td)
600152376Srwatson{
601152376Srwatson
602152376Srwatson	ktrace_enter(td);
603152376Srwatson	sx_xlock(&ktrace_sx);
604152376Srwatson	ktr_drain(td);
605152376Srwatson	sx_xunlock(&ktrace_sx);
606152376Srwatson	ktrace_exit(td);
607152376Srwatson}
608152376Srwatson
609152376Srwatsonvoid
61097993Sjhbktrnamei(path)
6111541Srgrimes	char *path;
6121541Srgrimes{
61397993Sjhb	struct ktr_request *req;
61497993Sjhb	int namelen;
615103233Sjhb	char *buf = NULL;
6161541Srgrimes
617103233Sjhb	namelen = strlen(path);
618103233Sjhb	if (namelen > 0) {
619111119Simp		buf = malloc(namelen, M_KTRACE, M_WAITOK);
620103233Sjhb		bcopy(path, buf, namelen);
621103233Sjhb	}
62297993Sjhb	req = ktr_getrequest(KTR_NAMEI);
623104230Sphk	if (req == NULL) {
624104230Sphk		if (buf != NULL)
625104230Sphk			free(buf, M_KTRACE);
62697993Sjhb		return;
627104230Sphk	}
62897993Sjhb	if (namelen > 0) {
62997993Sjhb		req->ktr_header.ktr_len = namelen;
630151927Srwatson		req->ktr_buffer = buf;
63197993Sjhb	}
632152376Srwatson	ktr_submitrequest(curthread, req);
6331541Srgrimes}
6341541Srgrimes
6351549Srgrimesvoid
636189707Sjhbktrsysctl(name, namelen)
637189707Sjhb	int *name;
638189707Sjhb	u_int namelen;
639189707Sjhb{
640189707Sjhb	struct ktr_request *req;
641189707Sjhb	u_int mib[CTL_MAXNAME + 2];
642189707Sjhb	char *mibname;
643189707Sjhb	size_t mibnamelen;
644189707Sjhb	int error;
645189707Sjhb
646189707Sjhb	/* Lookup name of mib. */
647189707Sjhb	KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
648189707Sjhb	mib[0] = 0;
649189707Sjhb	mib[1] = 1;
650189707Sjhb	bcopy(name, mib + 2, namelen * sizeof(*name));
651189707Sjhb	mibnamelen = 128;
652189707Sjhb	mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
653189707Sjhb	error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
654189707Sjhb	    NULL, 0, &mibnamelen, 0);
655189707Sjhb	if (error) {
656189707Sjhb		free(mibname, M_KTRACE);
657189707Sjhb		return;
658189707Sjhb	}
659189707Sjhb	req = ktr_getrequest(KTR_SYSCTL);
660189707Sjhb	if (req == NULL) {
661189707Sjhb		free(mibname, M_KTRACE);
662189707Sjhb		return;
663189707Sjhb	}
664189707Sjhb	req->ktr_header.ktr_len = mibnamelen;
665189707Sjhb	req->ktr_buffer = mibname;
666189707Sjhb	ktr_submitrequest(curthread, req);
667189707Sjhb}
668189707Sjhb
669189707Sjhbvoid
67097993Sjhbktrgenio(fd, rw, uio, error)
6711541Srgrimes	int fd;
6721541Srgrimes	enum uio_rw rw;
67362378Sgreen	struct uio *uio;
67462378Sgreen	int error;
6751541Srgrimes{
67697993Sjhb	struct ktr_request *req;
67797993Sjhb	struct ktr_genio *ktg;
678103235Sjhb	int datalen;
679103235Sjhb	char *buf;
6808876Srgrimes
681131897Sphk	if (error) {
682131897Sphk		free(uio, M_IOV);
6831541Srgrimes		return;
684131897Sphk	}
685103235Sjhb	uio->uio_offset = 0;
686103235Sjhb	uio->uio_rw = UIO_WRITE;
687103235Sjhb	datalen = imin(uio->uio_resid, ktr_geniosize);
688111119Simp	buf = malloc(datalen, M_KTRACE, M_WAITOK);
689131897Sphk	error = uiomove(buf, datalen, uio);
690131897Sphk	free(uio, M_IOV);
691131897Sphk	if (error) {
692103235Sjhb		free(buf, M_KTRACE);
693103235Sjhb		return;
694103235Sjhb	}
69597993Sjhb	req = ktr_getrequest(KTR_GENIO);
696103235Sjhb	if (req == NULL) {
697103235Sjhb		free(buf, M_KTRACE);
69897993Sjhb		return;
699103235Sjhb	}
70097993Sjhb	ktg = &req->ktr_data.ktr_genio;
70197993Sjhb	ktg->ktr_fd = fd;
70297993Sjhb	ktg->ktr_rw = rw;
703103235Sjhb	req->ktr_header.ktr_len = datalen;
704151927Srwatson	req->ktr_buffer = buf;
705152376Srwatson	ktr_submitrequest(curthread, req);
7061541Srgrimes}
7071541Srgrimes
7081549Srgrimesvoid
70997993Sjhbktrpsig(sig, action, mask, code)
71051941Smarcel	int sig;
7111541Srgrimes	sig_t action;
71251791Smarcel	sigset_t *mask;
71351941Smarcel	int code;
7141541Srgrimes{
715219311Sdchagin	struct thread *td = curthread;
71697993Sjhb	struct ktr_request *req;
71797993Sjhb	struct ktr_psig	*kp;
7181541Srgrimes
71997993Sjhb	req = ktr_getrequest(KTR_PSIG);
72097993Sjhb	if (req == NULL)
72197993Sjhb		return;
72297993Sjhb	kp = &req->ktr_data.ktr_psig;
72397993Sjhb	kp->signo = (char)sig;
72497993Sjhb	kp->action = action;
72597993Sjhb	kp->mask = *mask;
72697993Sjhb	kp->code = code;
727219311Sdchagin	ktr_enqueuerequest(td, req);
728219311Sdchagin	ktrace_exit(td);
7291541Srgrimes}
7301541Srgrimes
7311549Srgrimesvoid
73297993Sjhbktrcsw(out, user)
7331541Srgrimes	int out, user;
7341541Srgrimes{
735219311Sdchagin	struct thread *td = curthread;
73697993Sjhb	struct ktr_request *req;
73797993Sjhb	struct ktr_csw *kc;
7381541Srgrimes
73997993Sjhb	req = ktr_getrequest(KTR_CSW);
74097993Sjhb	if (req == NULL)
74197993Sjhb		return;
74297993Sjhb	kc = &req->ktr_data.ktr_csw;
74397993Sjhb	kc->out = out;
74497993Sjhb	kc->user = user;
745219311Sdchagin	ktr_enqueuerequest(td, req);
746219311Sdchagin	ktrace_exit(td);
7471541Srgrimes}
748176471Sdes
749176471Sdesvoid
750210064Sjhbktrstruct(name, data, datalen)
751176471Sdes	const char *name;
752176471Sdes	void *data;
753176471Sdes	size_t datalen;
754176471Sdes{
755176471Sdes	struct ktr_request *req;
756176471Sdes	char *buf = NULL;
757176471Sdes	size_t buflen;
758176471Sdes
759176471Sdes	if (!data)
760176471Sdes		datalen = 0;
761210064Sjhb	buflen = strlen(name) + 1 + datalen;
762176471Sdes	buf = malloc(buflen, M_KTRACE, M_WAITOK);
763210064Sjhb	strcpy(buf, name);
764210064Sjhb	bcopy(data, buf + strlen(name) + 1, datalen);
765176471Sdes	if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
766176471Sdes		free(buf, M_KTRACE);
767176471Sdes		return;
768176471Sdes	}
769176471Sdes	req->ktr_buffer = buf;
770176471Sdes	req->ktr_header.ktr_len = buflen;
771176471Sdes	ktr_submitrequest(curthread, req);
772176471Sdes}
773226269Sdes
774226269Sdesvoid
775226495Sdesktrcapfail(type, needed, held)
776226495Sdes	enum ktr_cap_fail_type type;
777226269Sdes	cap_rights_t needed;
778226269Sdes	cap_rights_t held;
779226269Sdes{
780226269Sdes	struct thread *td = curthread;
781226269Sdes	struct ktr_request *req;
782226269Sdes	struct ktr_cap_fail *kcf;
783226269Sdes
784226269Sdes	req = ktr_getrequest(KTR_CAPFAIL);
785226269Sdes	if (req == NULL)
786226269Sdes		return;
787226269Sdes	kcf = &req->ktr_data.ktr_cap_fail;
788226495Sdes	kcf->cap_type = type;
789226269Sdes	kcf->cap_needed = needed;
790226269Sdes	kcf->cap_held = held;
791226269Sdes	ktr_enqueuerequest(td, req);
792226269Sdes	ktrace_exit(td);
793226269Sdes}
794114026Sjhb#endif /* KTRACE */
7951541Srgrimes
7961541Srgrimes/* Interface and common routines */
7971541Srgrimes
79812221Sbde#ifndef _SYS_SYSPROTO_H_
7991541Srgrimesstruct ktrace_args {
8001541Srgrimes	char	*fname;
8011541Srgrimes	int	ops;
8021541Srgrimes	int	facs;
8031541Srgrimes	int	pid;
8041541Srgrimes};
80512221Sbde#endif
8061541Srgrimes/* ARGSUSED */
8071549Srgrimesint
808225617Skmacysys_ktrace(td, uap)
80983366Sjulian	struct thread *td;
8101541Srgrimes	register struct ktrace_args *uap;
8111541Srgrimes{
81213203Swollman#ifdef KTRACE
8131541Srgrimes	register struct vnode *vp = NULL;
8141541Srgrimes	register struct proc *p;
8151541Srgrimes	struct pgrp *pg;
8161541Srgrimes	int facs = uap->facs & ~KTRFAC_ROOT;
8171541Srgrimes	int ops = KTROP(uap->ops);
8181541Srgrimes	int descend = uap->ops & KTRFLAG_DESCEND;
819147576Spjd	int nfound, ret = 0;
820157233Sjhb	int flags, error = 0, vfslocked;
8211541Srgrimes	struct nameidata nd;
822112198Sjhb	struct ucred *cred;
8231541Srgrimes
824114026Sjhb	/*
825114026Sjhb	 * Need something to (un)trace.
826114026Sjhb	 */
827114026Sjhb	if (ops != KTROP_CLEARFILE && facs == 0)
828114026Sjhb		return (EINVAL);
829114026Sjhb
830152376Srwatson	ktrace_enter(td);
8311541Srgrimes	if (ops != KTROP_CLEAR) {
8321541Srgrimes		/*
8331541Srgrimes		 * an operation which requires a file argument.
8341541Srgrimes		 */
835157233Sjhb		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
836157233Sjhb		    uap->fname, td);
83762550Smckusick		flags = FREAD | FWRITE | O_NOFOLLOW;
838170152Skib		error = vn_open(&nd, &flags, 0, NULL);
8393308Sphk		if (error) {
840152376Srwatson			ktrace_exit(td);
8411541Srgrimes			return (error);
8421541Srgrimes		}
843157233Sjhb		vfslocked = NDHASGIANT(&nd);
84454655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
8451541Srgrimes		vp = nd.ni_vp;
846175294Sattilio		VOP_UNLOCK(vp, 0);
8471541Srgrimes		if (vp->v_type != VREG) {
84891406Sjhb			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
849157233Sjhb			VFS_UNLOCK_GIANT(vfslocked);
850152376Srwatson			ktrace_exit(td);
8511541Srgrimes			return (EACCES);
8521541Srgrimes		}
853157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
8541541Srgrimes	}
8551541Srgrimes	/*
85685397Sdillon	 * Clear all uses of the tracefile.
8571541Srgrimes	 */
8581541Srgrimes	if (ops == KTROP_CLEARFILE) {
859166678Smpp		int vrele_count;
860166678Smpp
861166678Smpp		vrele_count = 0;
86274927Sjhb		sx_slock(&allproc_lock);
863166073Sdelphij		FOREACH_PROC_IN_SYSTEM(p) {
86494618Sjhb			PROC_LOCK(p);
865112198Sjhb			if (p->p_tracevp == vp) {
86697993Sjhb				if (ktrcanset(td, p)) {
86797993Sjhb					mtx_lock(&ktrace_mtx);
868214158Sjhb					ktr_freeproc(p, &cred, NULL);
86997993Sjhb					mtx_unlock(&ktrace_mtx);
870166678Smpp					vrele_count++;
871112198Sjhb					crfree(cred);
872166678Smpp				} else
8731541Srgrimes					error = EPERM;
874166678Smpp			}
875166678Smpp			PROC_UNLOCK(p);
8761541Srgrimes		}
87774927Sjhb		sx_sunlock(&allproc_lock);
878166678Smpp		if (vrele_count > 0) {
879166678Smpp			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
880166678Smpp			while (vrele_count-- > 0)
881166678Smpp				vrele(vp);
882166678Smpp			VFS_UNLOCK_GIANT(vfslocked);
883166678Smpp		}
8841541Srgrimes		goto done;
8851541Srgrimes	}
8861541Srgrimes	/*
8871541Srgrimes	 * do it
8881541Srgrimes	 */
889114026Sjhb	sx_slock(&proctree_lock);
8901541Srgrimes	if (uap->pid < 0) {
8911541Srgrimes		/*
8921541Srgrimes		 * by process group
8931541Srgrimes		 */
8941541Srgrimes		pg = pgfind(-uap->pid);
8951541Srgrimes		if (pg == NULL) {
89694861Sjhb			sx_sunlock(&proctree_lock);
8971541Srgrimes			error = ESRCH;
8981541Srgrimes			goto done;
8991541Srgrimes		}
90091140Stanimura		/*
90191140Stanimura		 * ktrops() may call vrele(). Lock pg_members
90294861Sjhb		 * by the proctree_lock rather than pg_mtx.
90391140Stanimura		 */
90491140Stanimura		PGRP_UNLOCK(pg);
905147576Spjd		nfound = 0;
906147576Spjd		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
907147576Spjd			PROC_LOCK(p);
908220390Sjhb			if (p->p_state == PRS_NEW ||
909220390Sjhb			    p_cansee(td, p) != 0) {
910147576Spjd				PROC_UNLOCK(p);
911147576Spjd				continue;
912147576Spjd			}
913147576Spjd			nfound++;
9141541Srgrimes			if (descend)
91594618Sjhb				ret |= ktrsetchildren(td, p, ops, facs, vp);
9168876Srgrimes			else
91794618Sjhb				ret |= ktrops(td, p, ops, facs, vp);
918147576Spjd		}
919147576Spjd		if (nfound == 0) {
920147576Spjd			sx_sunlock(&proctree_lock);
921147576Spjd			error = ESRCH;
922147576Spjd			goto done;
923147576Spjd		}
9241541Srgrimes	} else {
9251541Srgrimes		/*
9261541Srgrimes		 * by pid
9271541Srgrimes		 */
9281541Srgrimes		p = pfind(uap->pid);
929211439Sjhb		if (p == NULL)
9301541Srgrimes			error = ESRCH;
931211439Sjhb		else
932211439Sjhb			error = p_cansee(td, p);
933147520Spjd		if (error) {
934211439Sjhb			if (p != NULL)
935211439Sjhb				PROC_UNLOCK(p);
936147520Spjd			sx_sunlock(&proctree_lock);
937147183Spjd			goto done;
938147520Spjd		}
9391541Srgrimes		if (descend)
94094618Sjhb			ret |= ktrsetchildren(td, p, ops, facs, vp);
9411541Srgrimes		else
94294618Sjhb			ret |= ktrops(td, p, ops, facs, vp);
9431541Srgrimes	}
944114026Sjhb	sx_sunlock(&proctree_lock);
9451541Srgrimes	if (!ret)
9461541Srgrimes		error = EPERM;
9471541Srgrimesdone:
948114026Sjhb	if (vp != NULL) {
949157233Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
95091406Sjhb		(void) vn_close(vp, FWRITE, td->td_ucred, td);
951157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
952114026Sjhb	}
953152376Srwatson	ktrace_exit(td);
9541541Srgrimes	return (error);
955114026Sjhb#else /* !KTRACE */
956114026Sjhb	return (ENOSYS);
957114026Sjhb#endif /* KTRACE */
9581541Srgrimes}
9591541Srgrimes
96018398Sphk/* ARGSUSED */
96118398Sphkint
962225617Skmacysys_utrace(td, uap)
96383366Sjulian	struct thread *td;
96418398Sphk	register struct utrace_args *uap;
96518398Sphk{
96683366Sjulian
96713203Swollman#ifdef KTRACE
96897993Sjhb	struct ktr_request *req;
96999009Salfred	void *cp;
970103237Sjhb	int error;
97118398Sphk
972103237Sjhb	if (!KTRPOINT(td, KTR_USER))
973103237Sjhb		return (0);
97470792Salfred	if (uap->len > KTR_USER_MAXLEN)
97570707Salfred		return (EINVAL);
976111119Simp	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
977103237Sjhb	error = copyin(uap->addr, cp, uap->len);
978104230Sphk	if (error) {
979104230Sphk		free(cp, M_KTRACE);
980103237Sjhb		return (error);
981104230Sphk	}
98297993Sjhb	req = ktr_getrequest(KTR_USER);
983104230Sphk	if (req == NULL) {
984104230Sphk		free(cp, M_KTRACE);
985122457Sjkoshy		return (ENOMEM);
986104230Sphk	}
987151927Srwatson	req->ktr_buffer = cp;
988103237Sjhb	req->ktr_header.ktr_len = uap->len;
989152376Srwatson	ktr_submitrequest(td, req);
99018398Sphk	return (0);
991114026Sjhb#else /* !KTRACE */
99218398Sphk	return (ENOSYS);
993114026Sjhb#endif /* KTRACE */
99418398Sphk}
99518398Sphk
99618398Sphk#ifdef KTRACE
99712819Sphkstatic int
99894618Sjhbktrops(td, p, ops, facs, vp)
99994618Sjhb	struct thread *td;
100094618Sjhb	struct proc *p;
10011541Srgrimes	int ops, facs;
10021541Srgrimes	struct vnode *vp;
10031541Srgrimes{
100497993Sjhb	struct vnode *tracevp = NULL;
1005112198Sjhb	struct ucred *tracecred = NULL;
10061541Srgrimes
1007211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
100894618Sjhb	if (!ktrcanset(td, p)) {
100994618Sjhb		PROC_UNLOCK(p);
10101541Srgrimes		return (0);
101194618Sjhb	}
1012211439Sjhb	if (p->p_flag & P_WEXIT) {
1013211439Sjhb		/* If the process is exiting, just ignore it. */
1014211439Sjhb		PROC_UNLOCK(p);
1015211439Sjhb		return (1);
1016211439Sjhb	}
101797993Sjhb	mtx_lock(&ktrace_mtx);
10181541Srgrimes	if (ops == KTROP_SET) {
1019112198Sjhb		if (p->p_tracevp != vp) {
10201541Srgrimes			/*
102194618Sjhb			 * if trace file already in use, relinquish below
10221541Srgrimes			 */
1023112198Sjhb			tracevp = p->p_tracevp;
102497993Sjhb			VREF(vp);
1025112198Sjhb			p->p_tracevp = vp;
10261541Srgrimes		}
1027112198Sjhb		if (p->p_tracecred != td->td_ucred) {
1028112198Sjhb			tracecred = p->p_tracecred;
1029112198Sjhb			p->p_tracecred = crhold(td->td_ucred);
1030112198Sjhb		}
10311541Srgrimes		p->p_traceflag |= facs;
1032170587Srwatson		if (priv_check(td, PRIV_KTRACE) == 0)
10331541Srgrimes			p->p_traceflag |= KTRFAC_ROOT;
10348876Srgrimes	} else {
10351541Srgrimes		/* KTROP_CLEAR */
1036214158Sjhb		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
10371541Srgrimes			/* no more tracing */
1038214158Sjhb			ktr_freeproc(p, &tracecred, &tracevp);
10391541Srgrimes	}
104097993Sjhb	mtx_unlock(&ktrace_mtx);
1041219311Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) != 0)
1042219311Sdchagin		ktrprocctor_entered(td, p);
104394618Sjhb	PROC_UNLOCK(p);
1044114026Sjhb	if (tracevp != NULL) {
1045155031Sjeff		int vfslocked;
1046155031Sjeff
1047155031Sjeff		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
104897993Sjhb		vrele(tracevp);
1049155031Sjeff		VFS_UNLOCK_GIANT(vfslocked);
1050114026Sjhb	}
1051112198Sjhb	if (tracecred != NULL)
1052112198Sjhb		crfree(tracecred);
10531541Srgrimes
10541541Srgrimes	return (1);
10551541Srgrimes}
10561541Srgrimes
105712819Sphkstatic int
105894618Sjhbktrsetchildren(td, top, ops, facs, vp)
105994618Sjhb	struct thread *td;
106094618Sjhb	struct proc *top;
10611541Srgrimes	int ops, facs;
10621541Srgrimes	struct vnode *vp;
10631541Srgrimes{
10641541Srgrimes	register struct proc *p;
10651541Srgrimes	register int ret = 0;
10661541Srgrimes
10671541Srgrimes	p = top;
1068211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1069114026Sjhb	sx_assert(&proctree_lock, SX_LOCKED);
10701541Srgrimes	for (;;) {
107194618Sjhb		ret |= ktrops(td, p, ops, facs, vp);
10721541Srgrimes		/*
10731541Srgrimes		 * If this process has children, descend to them next,
10741541Srgrimes		 * otherwise do any siblings, and if done with this level,
10751541Srgrimes		 * follow back up the tree (but not past top).
10761541Srgrimes		 */
107753212Sphk		if (!LIST_EMPTY(&p->p_children))
107853212Sphk			p = LIST_FIRST(&p->p_children);
10791541Srgrimes		else for (;;) {
1080114026Sjhb			if (p == top)
10811541Srgrimes				return (ret);
108253212Sphk			if (LIST_NEXT(p, p_sibling)) {
108353212Sphk				p = LIST_NEXT(p, p_sibling);
10841541Srgrimes				break;
10851541Srgrimes			}
108614529Shsu			p = p->p_pptr;
10871541Srgrimes		}
1088211439Sjhb		PROC_LOCK(p);
10891541Srgrimes	}
10901541Srgrimes	/*NOTREACHED*/
10911541Srgrimes}
10921541Srgrimes
109312819Sphkstatic void
1094152376Srwatsonktr_writerequest(struct thread *td, struct ktr_request *req)
109597993Sjhb{
109697993Sjhb	struct ktr_header *kth;
10971541Srgrimes	struct vnode *vp;
109897993Sjhb	struct proc *p;
109997993Sjhb	struct ucred *cred;
11001541Srgrimes	struct uio auio;
110197993Sjhb	struct iovec aiov[3];
110262976Smckusick	struct mount *mp;
110397993Sjhb	int datalen, buflen, vrele_count;
1104157233Sjhb	int error, vfslocked;
11051541Srgrimes
110697993Sjhb	/*
1107152376Srwatson	 * We hold the vnode and credential for use in I/O in case ktrace is
1108152376Srwatson	 * disabled on the process as we write out the request.
1109152376Srwatson	 *
1110152376Srwatson	 * XXXRW: This is not ideal: we could end up performing a write after
1111152376Srwatson	 * the vnode has been closed.
1112152376Srwatson	 */
1113152376Srwatson	mtx_lock(&ktrace_mtx);
1114152376Srwatson	vp = td->td_proc->p_tracevp;
1115152376Srwatson	cred = td->td_proc->p_tracecred;
1116152376Srwatson
1117152376Srwatson	/*
111897993Sjhb	 * If vp is NULL, the vp has been cleared out from under this
1119152376Srwatson	 * request, so just drop it.  Make sure the credential and vnode are
1120152376Srwatson	 * in sync: we should have both or neither.
112197993Sjhb	 */
1122152376Srwatson	if (vp == NULL) {
1123152376Srwatson		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
1124185583Sbz		mtx_unlock(&ktrace_mtx);
11251541Srgrimes		return;
1126152376Srwatson	}
1127185583Sbz	VREF(vp);
1128152376Srwatson	KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
1129185583Sbz	crhold(cred);
1130185583Sbz	mtx_unlock(&ktrace_mtx);
1131152376Srwatson
113297993Sjhb	kth = &req->ktr_header;
1133189707Sjhb	KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
1134189707Sjhb	    sizeof(data_lengths) / sizeof(data_lengths[0]),
1135189707Sjhb	    ("data_lengths array overflow"));
1136118607Sjhb	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
113797993Sjhb	buflen = kth->ktr_len;
11381541Srgrimes	auio.uio_iov = &aiov[0];
11391541Srgrimes	auio.uio_offset = 0;
11401541Srgrimes	auio.uio_segflg = UIO_SYSSPACE;
11411541Srgrimes	auio.uio_rw = UIO_WRITE;
11421541Srgrimes	aiov[0].iov_base = (caddr_t)kth;
11431541Srgrimes	aiov[0].iov_len = sizeof(struct ktr_header);
11441541Srgrimes	auio.uio_resid = sizeof(struct ktr_header);
11451541Srgrimes	auio.uio_iovcnt = 1;
114697993Sjhb	auio.uio_td = td;
114797993Sjhb	if (datalen != 0) {
114897993Sjhb		aiov[1].iov_base = (caddr_t)&req->ktr_data;
114997993Sjhb		aiov[1].iov_len = datalen;
115097993Sjhb		auio.uio_resid += datalen;
11511541Srgrimes		auio.uio_iovcnt++;
115297993Sjhb		kth->ktr_len += datalen;
11531541Srgrimes	}
115497993Sjhb	if (buflen != 0) {
1155151927Srwatson		KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
1156151927Srwatson		aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
115797993Sjhb		aiov[auio.uio_iovcnt].iov_len = buflen;
115897993Sjhb		auio.uio_resid += buflen;
115997993Sjhb		auio.uio_iovcnt++;
1160103235Sjhb	}
1161152376Srwatson
1162157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
116362976Smckusick	vn_start_write(vp, &mp, V_WAIT);
1164175202Sattilio	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1165101123Srwatson#ifdef MAC
1166172930Srwatson	error = mac_vnode_check_write(cred, NOCRED, vp);
1167101123Srwatson	if (error == 0)
1168101123Srwatson#endif
1169101123Srwatson		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1170175294Sattilio	VOP_UNLOCK(vp, 0);
117162976Smckusick	vn_finished_write(mp);
1172185583Sbz	crfree(cred);
1173185583Sbz	if (!error) {
1174185583Sbz		vrele(vp);
1175185583Sbz		VFS_UNLOCK_GIANT(vfslocked);
1176185583Sbz		return;
1177185583Sbz	}
1178157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
1179185583Sbz
11801541Srgrimes	/*
118197993Sjhb	 * If error encountered, give up tracing on this vnode.  We defer
118297993Sjhb	 * all the vrele()'s on the vnode until after we are finished walking
118397993Sjhb	 * the various lists to avoid needlessly holding locks.
1184185583Sbz	 * NB: at this point we still hold the vnode reference that must
1185185583Sbz	 * not go away as we need the valid vnode to compare with. Thus let
1186185583Sbz	 * vrele_count start at 1 and the reference will be freed
1187185583Sbz	 * by the loop at the end after our last use of vp.
11881541Srgrimes	 */
11891541Srgrimes	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
11901541Srgrimes	    error);
1191185583Sbz	vrele_count = 1;
119297993Sjhb	/*
119397993Sjhb	 * First, clear this vnode from being used by any processes in the
119497993Sjhb	 * system.
119597993Sjhb	 * XXX - If one process gets an EPERM writing to the vnode, should
119697993Sjhb	 * we really do this?  Other processes might have suitable
119797993Sjhb	 * credentials for the operation.
119897993Sjhb	 */
1199112198Sjhb	cred = NULL;
120074927Sjhb	sx_slock(&allproc_lock);
1201166073Sdelphij	FOREACH_PROC_IN_SYSTEM(p) {
120297993Sjhb		PROC_LOCK(p);
1203112198Sjhb		if (p->p_tracevp == vp) {
120497993Sjhb			mtx_lock(&ktrace_mtx);
1205214158Sjhb			ktr_freeproc(p, &cred, NULL);
120697993Sjhb			mtx_unlock(&ktrace_mtx);
120797993Sjhb			vrele_count++;
12081541Srgrimes		}
120997993Sjhb		PROC_UNLOCK(p);
1210112198Sjhb		if (cred != NULL) {
1211112198Sjhb			crfree(cred);
1212112198Sjhb			cred = NULL;
1213112198Sjhb		}
12141541Srgrimes	}
121574927Sjhb	sx_sunlock(&allproc_lock);
1216152376Srwatson
1217157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
121897993Sjhb	while (vrele_count-- > 0)
121997993Sjhb		vrele(vp);
1220157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
12211541Srgrimes}
12221541Srgrimes
12231541Srgrimes/*
12241541Srgrimes * Return true if caller has permission to set the ktracing state
12251541Srgrimes * of target.  Essentially, the target can't possess any
12261541Srgrimes * more permissions than the caller.  KTRFAC_ROOT signifies that
12278876Srgrimes * root previously set the tracing status on the target process, and
12281541Srgrimes * so, only root may further change it.
12291541Srgrimes */
123012819Sphkstatic int
123194618Sjhbktrcanset(td, targetp)
123294618Sjhb	struct thread *td;
123394618Sjhb	struct proc *targetp;
12341541Srgrimes{
12351541Srgrimes
123694618Sjhb	PROC_LOCK_ASSERT(targetp, MA_OWNED);
123779335Srwatson	if (targetp->p_traceflag & KTRFAC_ROOT &&
1238170587Srwatson	    priv_check(td, PRIV_KTRACE))
123946155Sphk		return (0);
12401541Srgrimes
124196886Sjhb	if (p_candebug(td, targetp) != 0)
124279335Srwatson		return (0);
124379335Srwatson
124479335Srwatson	return (1);
12451541Srgrimes}
12461541Srgrimes
124713203Swollman#endif /* KTRACE */
1248