kern_ktrace.c revision 226269
1139804Simp/*-
21541Srgrimes * Copyright (c) 1989, 1993
3152376Srwatson *	The Regents of the University of California.
4152376Srwatson * Copyright (c) 2005 Robert N. M. Watson
5152376Srwatson * All rights reserved.
61541Srgrimes *
71541Srgrimes * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
91541Srgrimes * are met:
101541Srgrimes * 1. Redistributions of source code must retain the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer.
121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131541Srgrimes *    notice, this list of conditions and the following disclaimer in the
141541Srgrimes *    documentation and/or other materials provided with the distribution.
151541Srgrimes * 4. Neither the name of the University nor the names of its contributors
161541Srgrimes *    may be used to endorse or promote products derived from this software
171541Srgrimes *    without specific prior written permission.
181541Srgrimes *
191541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
201541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
261541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
301541Srgrimes *
311541Srgrimes *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
321541Srgrimes */
331541Srgrimes
34116182Sobrien#include <sys/cdefs.h>
35116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 226269 2011-10-11 20:37:10Z des $");
36116182Sobrien
3713203Swollman#include "opt_ktrace.h"
381541Srgrimes
391541Srgrimes#include <sys/param.h>
402112Swollman#include <sys/systm.h>
4197993Sjhb#include <sys/fcntl.h>
4297993Sjhb#include <sys/kernel.h>
4397993Sjhb#include <sys/kthread.h>
4476166Smarkm#include <sys/lock.h>
4576166Smarkm#include <sys/mutex.h>
4697993Sjhb#include <sys/malloc.h>
47155031Sjeff#include <sys/mount.h>
4897993Sjhb#include <sys/namei.h>
49164033Srwatson#include <sys/priv.h>
501541Srgrimes#include <sys/proc.h>
5197993Sjhb#include <sys/unistd.h>
521541Srgrimes#include <sys/vnode.h>
53176471Sdes#include <sys/socket.h>
54176471Sdes#include <sys/stat.h>
551541Srgrimes#include <sys/ktrace.h>
5674927Sjhb#include <sys/sx.h>
5797993Sjhb#include <sys/sysctl.h>
58219042Sdchagin#include <sys/sysent.h>
591541Srgrimes#include <sys/syslog.h>
6097993Sjhb#include <sys/sysproto.h>
611541Srgrimes
62163606Srwatson#include <security/mac/mac_framework.h>
63163606Srwatson
64152376Srwatson/*
65152376Srwatson * The ktrace facility allows the tracing of certain key events in user space
66152376Srwatson * processes, such as system calls, signal delivery, context switches, and
67152376Srwatson * user generated events using utrace(2).  It works by streaming event
68152376Srwatson * records and data to a vnode associated with the process using the
69152376Srwatson * ktrace(2) system call.  In general, records can be written directly from
70152376Srwatson * the context that generates the event.  One important exception to this is
71152376Srwatson * during a context switch, where sleeping is not permitted.  To handle this
72152376Srwatson * case, trace events are generated using in-kernel ktr_request records, and
73152376Srwatson * then delivered to disk at a convenient moment -- either immediately, the
74152376Srwatson * next traceable event, at system call return, or at process exit.
75152376Srwatson *
76152376Srwatson * When dealing with multiple threads or processes writing to the same event
77152376Srwatson * log, ordering guarantees are weak: specifically, if an event has multiple
78152376Srwatson * records (i.e., system call enter and return), they may be interlaced with
79152376Srwatson * records from another event.  Process and thread ID information is provided
80152376Srwatson * in the record, and user applications can de-interlace events if required.
81152376Srwatson */
82152376Srwatson
8330354Sphkstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
8430309Sphk
8513203Swollman#ifdef KTRACE
8612577Sbde
87219028SnetchildFEATURE(ktrace, "Kernel support for system-call tracing");
88219028Snetchild
8997993Sjhb#ifndef KTRACE_REQUEST_POOL
9097993Sjhb#define	KTRACE_REQUEST_POOL	100
9197993Sjhb#endif
9212819Sphk
9397993Sjhbstruct ktr_request {
9497993Sjhb	struct	ktr_header ktr_header;
95151927Srwatson	void	*ktr_buffer;
9697993Sjhb	union {
97219042Sdchagin		struct	ktr_proc_ctor ktr_proc_ctor;
98226269Sdes		struct	ktr_cap_fail ktr_cap_fail;
9997993Sjhb		struct	ktr_syscall ktr_syscall;
10097993Sjhb		struct	ktr_sysret ktr_sysret;
10197993Sjhb		struct	ktr_genio ktr_genio;
10297993Sjhb		struct	ktr_psig ktr_psig;
10397993Sjhb		struct	ktr_csw ktr_csw;
10497993Sjhb	} ktr_data;
10597993Sjhb	STAILQ_ENTRY(ktr_request) ktr_list;
10697993Sjhb};
10797993Sjhb
10897993Sjhbstatic int data_lengths[] = {
10997993Sjhb	0,					/* none */
11097993Sjhb	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
11197993Sjhb	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
11297993Sjhb	0,					/* KTR_NAMEI */
11397993Sjhb	sizeof(struct ktr_genio),		/* KTR_GENIO */
11497993Sjhb	sizeof(struct ktr_psig),		/* KTR_PSIG */
115219312Sdchagin	sizeof(struct ktr_csw),			/* KTR_CSW */
116176471Sdes	0,					/* KTR_USER */
117176471Sdes	0,					/* KTR_STRUCT */
118189707Sjhb	0,					/* KTR_SYSCTL */
119219042Sdchagin	sizeof(struct ktr_proc_ctor),		/* KTR_PROCCTOR */
120219042Sdchagin	0,					/* KTR_PROCDTOR */
121226269Sdes	sizeof(struct ktr_cap_fail),		/* KTR_CAPFAIL */
12297993Sjhb};
12397993Sjhb
12497993Sjhbstatic STAILQ_HEAD(, ktr_request) ktr_free;
12597993Sjhb
126141633Sphkstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
127103234Sjhb
128118607Sjhbstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL;
129103234SjhbTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
13097993Sjhb
131118607Sjhbstatic u_int ktr_geniosize = PAGE_SIZE;
132103234SjhbTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
133103234SjhbSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
134103234Sjhb    0, "Maximum size of genio event payload");
135103234Sjhb
13697993Sjhbstatic int print_message = 1;
137214158Sjhbstatic struct mtx ktrace_mtx;
138152376Srwatsonstatic struct sx ktrace_sx;
13997993Sjhb
14097993Sjhbstatic void ktrace_init(void *dummy);
14197993Sjhbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
142219041Sdchaginstatic u_int ktrace_resize_pool(u_int oldsize, u_int newsize);
143219311Sdchaginstatic struct ktr_request *ktr_getrequest_entered(struct thread *td, int type);
14497993Sjhbstatic struct ktr_request *ktr_getrequest(int type);
145152376Srwatsonstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req);
146214158Sjhbstatic void ktr_freeproc(struct proc *p, struct ucred **uc,
147214158Sjhb    struct vnode **vp);
14897993Sjhbstatic void ktr_freerequest(struct ktr_request *req);
149214158Sjhbstatic void ktr_freerequest_locked(struct ktr_request *req);
150152376Srwatsonstatic void ktr_writerequest(struct thread *td, struct ktr_request *req);
15197993Sjhbstatic int ktrcanset(struct thread *,struct proc *);
15297993Sjhbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
15397993Sjhbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
154219311Sdchaginstatic void ktrprocctor_entered(struct thread *, struct proc *);
15597993Sjhb
156152376Srwatson/*
157152376Srwatson * ktrace itself generates events, such as context switches, which we do not
158152376Srwatson * wish to trace.  Maintain a flag, TDP_INKTRACE, on each thread to determine
159152376Srwatson * whether or not it is in a region where tracing of events should be
160152376Srwatson * suppressed.
161152376Srwatson */
16297993Sjhbstatic void
163152376Srwatsonktrace_enter(struct thread *td)
164152376Srwatson{
165152376Srwatson
166152376Srwatson	KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
167152376Srwatson	td->td_pflags |= TDP_INKTRACE;
168152376Srwatson}
169152376Srwatson
170152376Srwatsonstatic void
171152376Srwatsonktrace_exit(struct thread *td)
172152376Srwatson{
173152376Srwatson
174152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
175152376Srwatson	td->td_pflags &= ~TDP_INKTRACE;
176152376Srwatson}
177152376Srwatson
178152376Srwatsonstatic void
179152376Srwatsonktrace_assert(struct thread *td)
180152376Srwatson{
181152376Srwatson
182152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
183152376Srwatson}
184152376Srwatson
185152376Srwatsonstatic void
18697993Sjhbktrace_init(void *dummy)
1871541Srgrimes{
18897993Sjhb	struct ktr_request *req;
18997993Sjhb	int i;
1901541Srgrimes
19197993Sjhb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
192152376Srwatson	sx_init(&ktrace_sx, "ktrace_sx");
19397993Sjhb	STAILQ_INIT(&ktr_free);
19497993Sjhb	for (i = 0; i < ktr_requestpool; i++) {
195111119Simp		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
19697993Sjhb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
19797993Sjhb	}
1981541Srgrimes}
19997993SjhbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
2001541Srgrimes
20197993Sjhbstatic int
20297993Sjhbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
20397993Sjhb{
20497993Sjhb	struct thread *td;
205118607Sjhb	u_int newsize, oldsize, wantsize;
20697993Sjhb	int error;
20797993Sjhb
20897993Sjhb	/* Handle easy read-only case first to avoid warnings from GCC. */
20997993Sjhb	if (!req->newptr) {
21097993Sjhb		oldsize = ktr_requestpool;
211118607Sjhb		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
21297993Sjhb	}
21397993Sjhb
214118607Sjhb	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
21597993Sjhb	if (error)
21697993Sjhb		return (error);
21797993Sjhb	td = curthread;
218152376Srwatson	ktrace_enter(td);
21997993Sjhb	oldsize = ktr_requestpool;
220219041Sdchagin	newsize = ktrace_resize_pool(oldsize, wantsize);
221152376Srwatson	ktrace_exit(td);
222118607Sjhb	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
22397993Sjhb	if (error)
22497993Sjhb		return (error);
225122478Sjkoshy	if (wantsize > oldsize && newsize < wantsize)
22697993Sjhb		return (ENOSPC);
22797993Sjhb	return (0);
22897993Sjhb}
229103234SjhbSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
230211102Sgavin    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
231211102Sgavin    "Pool buffer size for ktrace(1)");
23297993Sjhb
233118607Sjhbstatic u_int
234219041Sdchaginktrace_resize_pool(u_int oldsize, u_int newsize)
23597993Sjhb{
236219041Sdchagin	STAILQ_HEAD(, ktr_request) ktr_new;
23797993Sjhb	struct ktr_request *req;
238122478Sjkoshy	int bound;
23997993Sjhb
24097993Sjhb	print_message = 1;
241219041Sdchagin	bound = newsize - oldsize;
242122478Sjkoshy	if (bound == 0)
243122478Sjkoshy		return (ktr_requestpool);
244219041Sdchagin	if (bound < 0) {
245219041Sdchagin		mtx_lock(&ktrace_mtx);
24697993Sjhb		/* Shrink pool down to newsize if possible. */
247122478Sjkoshy		while (bound++ < 0) {
24897993Sjhb			req = STAILQ_FIRST(&ktr_free);
24997993Sjhb			if (req == NULL)
250219041Sdchagin				break;
25197993Sjhb			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
25297993Sjhb			ktr_requestpool--;
25397993Sjhb			free(req, M_KTRACE);
25497993Sjhb		}
255219041Sdchagin	} else {
25697993Sjhb		/* Grow pool up to newsize. */
257219041Sdchagin		STAILQ_INIT(&ktr_new);
258122478Sjkoshy		while (bound-- > 0) {
25997993Sjhb			req = malloc(sizeof(struct ktr_request), M_KTRACE,
260111119Simp			    M_WAITOK);
261219041Sdchagin			STAILQ_INSERT_HEAD(&ktr_new, req, ktr_list);
26297993Sjhb		}
263219041Sdchagin		mtx_lock(&ktrace_mtx);
264219041Sdchagin		STAILQ_CONCAT(&ktr_free, &ktr_new);
265219041Sdchagin		ktr_requestpool += (newsize - oldsize);
266219041Sdchagin	}
267219041Sdchagin	mtx_unlock(&ktrace_mtx);
26897993Sjhb	return (ktr_requestpool);
26997993Sjhb}
27097993Sjhb
271198411Sjhb/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
272198411SjhbCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
273198411Sjhb    (sizeof((struct thread *)NULL)->td_name));
274198411Sjhb
27597993Sjhbstatic struct ktr_request *
276219311Sdchaginktr_getrequest_entered(struct thread *td, int type)
27797993Sjhb{
27897993Sjhb	struct ktr_request *req;
27997993Sjhb	struct proc *p = td->td_proc;
28097993Sjhb	int pm;
28197993Sjhb
282152430Srwatson	mtx_lock(&ktrace_mtx);
28397993Sjhb	if (!KTRCHECK(td, type)) {
284152430Srwatson		mtx_unlock(&ktrace_mtx);
28597993Sjhb		return (NULL);
28697993Sjhb	}
28797993Sjhb	req = STAILQ_FIRST(&ktr_free);
28897993Sjhb	if (req != NULL) {
28997993Sjhb		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
29097993Sjhb		req->ktr_header.ktr_type = type;
291112199Sjhb		if (p->p_traceflag & KTRFAC_DROP) {
292112199Sjhb			req->ktr_header.ktr_type |= KTR_DROP;
293112199Sjhb			p->p_traceflag &= ~KTRFAC_DROP;
294112199Sjhb		}
295152430Srwatson		mtx_unlock(&ktrace_mtx);
29697993Sjhb		microtime(&req->ktr_header.ktr_time);
29797993Sjhb		req->ktr_header.ktr_pid = p->p_pid;
298151929Srwatson		req->ktr_header.ktr_tid = td->td_tid;
299198411Sjhb		bcopy(td->td_name, req->ktr_header.ktr_comm,
300198411Sjhb		    sizeof(req->ktr_header.ktr_comm));
301151927Srwatson		req->ktr_buffer = NULL;
30297993Sjhb		req->ktr_header.ktr_len = 0;
30397993Sjhb	} else {
304112199Sjhb		p->p_traceflag |= KTRFAC_DROP;
30597993Sjhb		pm = print_message;
30697993Sjhb		print_message = 0;
30797993Sjhb		mtx_unlock(&ktrace_mtx);
30897993Sjhb		if (pm)
30997993Sjhb			printf("Out of ktrace request objects.\n");
31097993Sjhb	}
31197993Sjhb	return (req);
31297993Sjhb}
31397993Sjhb
314219042Sdchaginstatic struct ktr_request *
315219042Sdchaginktr_getrequest(int type)
316219042Sdchagin{
317219042Sdchagin	struct thread *td = curthread;
318219042Sdchagin	struct ktr_request *req;
319219042Sdchagin
320219042Sdchagin	ktrace_enter(td);
321219311Sdchagin	req = ktr_getrequest_entered(td, type);
322219042Sdchagin	if (req == NULL)
323219042Sdchagin		ktrace_exit(td);
324219042Sdchagin
325219042Sdchagin	return (req);
326219042Sdchagin}
327219042Sdchagin
328152376Srwatson/*
329152376Srwatson * Some trace generation environments don't permit direct access to VFS,
330152376Srwatson * such as during a context switch where sleeping is not allowed.  Under these
331152376Srwatson * circumstances, queue a request to the thread to be written asynchronously
332152376Srwatson * later.
333152376Srwatson */
33497993Sjhbstatic void
335152376Srwatsonktr_enqueuerequest(struct thread *td, struct ktr_request *req)
33697993Sjhb{
33797993Sjhb
33897993Sjhb	mtx_lock(&ktrace_mtx);
339152376Srwatson	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
340118599Sjhb	mtx_unlock(&ktrace_mtx);
34197993Sjhb}
34297993Sjhb
343152376Srwatson/*
344152376Srwatson * Drain any pending ktrace records from the per-thread queue to disk.  This
345152376Srwatson * is used both internally before committing other records, and also on
346152376Srwatson * system call return.  We drain all the ones we can find at the time when
347152376Srwatson * drain is requested, but don't keep draining after that as those events
348189707Sjhb * may be approximately "after" the current event.
349152376Srwatson */
35097993Sjhbstatic void
351152376Srwatsonktr_drain(struct thread *td)
352152376Srwatson{
353152376Srwatson	struct ktr_request *queued_req;
354152376Srwatson	STAILQ_HEAD(, ktr_request) local_queue;
355152376Srwatson
356152376Srwatson	ktrace_assert(td);
357152376Srwatson	sx_assert(&ktrace_sx, SX_XLOCKED);
358152376Srwatson
359211512Sjhb	STAILQ_INIT(&local_queue);
360152376Srwatson
361152376Srwatson	if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
362152376Srwatson		mtx_lock(&ktrace_mtx);
363152376Srwatson		STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
364152376Srwatson		mtx_unlock(&ktrace_mtx);
365152376Srwatson
366152376Srwatson		while ((queued_req = STAILQ_FIRST(&local_queue))) {
367152376Srwatson			STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
368152376Srwatson			ktr_writerequest(td, queued_req);
369152376Srwatson			ktr_freerequest(queued_req);
370152376Srwatson		}
371152376Srwatson	}
372152376Srwatson}
373152376Srwatson
374152376Srwatson/*
375152376Srwatson * Submit a trace record for immediate commit to disk -- to be used only
376152376Srwatson * where entering VFS is OK.  First drain any pending records that may have
377152376Srwatson * been cached in the thread.
378152376Srwatson */
379152376Srwatsonstatic void
380219311Sdchaginktr_submitrequest(struct thread *td, struct ktr_request *req)
381152376Srwatson{
382152376Srwatson
383152376Srwatson	ktrace_assert(td);
384152376Srwatson
385152376Srwatson	sx_xlock(&ktrace_sx);
386152376Srwatson	ktr_drain(td);
387152376Srwatson	ktr_writerequest(td, req);
388152376Srwatson	ktr_freerequest(req);
389152376Srwatson	sx_xunlock(&ktrace_sx);
390152376Srwatson	ktrace_exit(td);
391152376Srwatson}
392152376Srwatson
393152376Srwatsonstatic void
39497993Sjhbktr_freerequest(struct ktr_request *req)
39597993Sjhb{
39697993Sjhb
397214158Sjhb	mtx_lock(&ktrace_mtx);
398214158Sjhb	ktr_freerequest_locked(req);
399214158Sjhb	mtx_unlock(&ktrace_mtx);
400214158Sjhb}
401214158Sjhb
402214158Sjhbstatic void
403214158Sjhbktr_freerequest_locked(struct ktr_request *req)
404214158Sjhb{
405214158Sjhb
406214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
407151927Srwatson	if (req->ktr_buffer != NULL)
408151927Srwatson		free(req->ktr_buffer, M_KTRACE);
40997993Sjhb	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
41097993Sjhb}
41197993Sjhb
412214158Sjhb/*
413214158Sjhb * Disable tracing for a process and release all associated resources.
414214158Sjhb * The caller is responsible for releasing a reference on the returned
415214158Sjhb * vnode and credentials.
416214158Sjhb */
417214158Sjhbstatic void
418214158Sjhbktr_freeproc(struct proc *p, struct ucred **uc, struct vnode **vp)
419214158Sjhb{
420214158Sjhb	struct ktr_request *req;
421214158Sjhb
422214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
423214158Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
424214158Sjhb	*uc = p->p_tracecred;
425214158Sjhb	p->p_tracecred = NULL;
426214158Sjhb	if (vp != NULL)
427214158Sjhb		*vp = p->p_tracevp;
428214158Sjhb	p->p_tracevp = NULL;
429214158Sjhb	p->p_traceflag = 0;
430214158Sjhb	while ((req = STAILQ_FIRST(&p->p_ktr)) != NULL) {
431214158Sjhb		STAILQ_REMOVE_HEAD(&p->p_ktr, ktr_list);
432214158Sjhb		ktr_freerequest_locked(req);
433214158Sjhb	}
434214158Sjhb}
435214158Sjhb
4361549Srgrimesvoid
43797993Sjhbktrsyscall(code, narg, args)
43847955Sdt	int code, narg;
43947955Sdt	register_t args[];
4401541Srgrimes{
44197993Sjhb	struct ktr_request *req;
44297993Sjhb	struct ktr_syscall *ktp;
44397993Sjhb	size_t buflen;
444103233Sjhb	char *buf = NULL;
4451541Srgrimes
446103233Sjhb	buflen = sizeof(register_t) * narg;
447103233Sjhb	if (buflen > 0) {
448111119Simp		buf = malloc(buflen, M_KTRACE, M_WAITOK);
449103233Sjhb		bcopy(args, buf, buflen);
450103233Sjhb	}
45197993Sjhb	req = ktr_getrequest(KTR_SYSCALL);
452104230Sphk	if (req == NULL) {
453104230Sphk		if (buf != NULL)
454104230Sphk			free(buf, M_KTRACE);
45597993Sjhb		return;
456104230Sphk	}
45797993Sjhb	ktp = &req->ktr_data.ktr_syscall;
4581541Srgrimes	ktp->ktr_code = code;
4591541Srgrimes	ktp->ktr_narg = narg;
46097993Sjhb	if (buflen > 0) {
46197993Sjhb		req->ktr_header.ktr_len = buflen;
462151927Srwatson		req->ktr_buffer = buf;
46397993Sjhb	}
464152376Srwatson	ktr_submitrequest(curthread, req);
4651541Srgrimes}
4661541Srgrimes
4671549Srgrimesvoid
46897993Sjhbktrsysret(code, error, retval)
46947955Sdt	int code, error;
47047955Sdt	register_t retval;
4711541Srgrimes{
47297993Sjhb	struct ktr_request *req;
47397993Sjhb	struct ktr_sysret *ktp;
4741541Srgrimes
47597993Sjhb	req = ktr_getrequest(KTR_SYSRET);
47697993Sjhb	if (req == NULL)
47797993Sjhb		return;
47897993Sjhb	ktp = &req->ktr_data.ktr_sysret;
47997993Sjhb	ktp->ktr_code = code;
48097993Sjhb	ktp->ktr_error = error;
48197993Sjhb	ktp->ktr_retval = retval;		/* what about val2 ? */
482152376Srwatson	ktr_submitrequest(curthread, req);
4831541Srgrimes}
4841541Srgrimes
485152376Srwatson/*
486214158Sjhb * When a setuid process execs, disable tracing.
487214158Sjhb *
488214158Sjhb * XXX: We toss any pending asynchronous records.
489152376Srwatson */
4901549Srgrimesvoid
491214158Sjhbktrprocexec(struct proc *p, struct ucred **uc, struct vnode **vp)
492214158Sjhb{
493214158Sjhb
494214158Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
495214158Sjhb	mtx_lock(&ktrace_mtx);
496214158Sjhb	ktr_freeproc(p, uc, vp);
497214158Sjhb	mtx_unlock(&ktrace_mtx);
498214158Sjhb}
499214158Sjhb
500214158Sjhb/*
501214158Sjhb * When a process exits, drain per-process asynchronous trace records
502214158Sjhb * and disable tracing.
503214158Sjhb */
504214158Sjhbvoid
505152376Srwatsonktrprocexit(struct thread *td)
506152376Srwatson{
507219042Sdchagin	struct ktr_request *req;
508214158Sjhb	struct proc *p;
509214158Sjhb	struct ucred *cred;
510214158Sjhb	struct vnode *vp;
511214158Sjhb	int vfslocked;
512152376Srwatson
513214158Sjhb	p = td->td_proc;
514214158Sjhb	if (p->p_traceflag == 0)
515214158Sjhb		return;
516214158Sjhb
517152376Srwatson	ktrace_enter(td);
518219311Sdchagin	req = ktr_getrequest_entered(td, KTR_PROCDTOR);
519219311Sdchagin	if (req != NULL)
520219311Sdchagin		ktr_enqueuerequest(td, req);
521152376Srwatson	sx_xlock(&ktrace_sx);
522152376Srwatson	ktr_drain(td);
523152376Srwatson	sx_xunlock(&ktrace_sx);
524214158Sjhb	PROC_LOCK(p);
525214158Sjhb	mtx_lock(&ktrace_mtx);
526214158Sjhb	ktr_freeproc(p, &cred, &vp);
527214158Sjhb	mtx_unlock(&ktrace_mtx);
528214158Sjhb	PROC_UNLOCK(p);
529214158Sjhb	if (vp != NULL) {
530214158Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
531214158Sjhb		vrele(vp);
532214158Sjhb		VFS_UNLOCK_GIANT(vfslocked);
533214158Sjhb	}
534214158Sjhb	if (cred != NULL)
535214158Sjhb		crfree(cred);
536152376Srwatson	ktrace_exit(td);
537152376Srwatson}
538152376Srwatson
539219042Sdchaginstatic void
540219311Sdchaginktrprocctor_entered(struct thread *td, struct proc *p)
541219042Sdchagin{
542219042Sdchagin	struct ktr_proc_ctor *ktp;
543219042Sdchagin	struct ktr_request *req;
544219312Sdchagin	struct thread *td2;
545219042Sdchagin
546219042Sdchagin	ktrace_assert(td);
547219042Sdchagin	td2 = FIRST_THREAD_IN_PROC(p);
548219311Sdchagin	req = ktr_getrequest_entered(td2, KTR_PROCCTOR);
549219042Sdchagin	if (req == NULL)
550219042Sdchagin		return;
551219042Sdchagin	ktp = &req->ktr_data.ktr_proc_ctor;
552219042Sdchagin	ktp->sv_flags = p->p_sysent->sv_flags;
553219311Sdchagin	ktr_enqueuerequest(td2, req);
554219042Sdchagin}
555219042Sdchagin
556219042Sdchaginvoid
557219042Sdchaginktrprocctor(struct proc *p)
558219042Sdchagin{
559219042Sdchagin	struct thread *td = curthread;
560219042Sdchagin
561219042Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) == 0)
562219042Sdchagin		return;
563219042Sdchagin
564219042Sdchagin	ktrace_enter(td);
565219311Sdchagin	ktrprocctor_entered(td, p);
566219042Sdchagin	ktrace_exit(td);
567219042Sdchagin}
568219042Sdchagin
569152376Srwatson/*
570214158Sjhb * When a process forks, enable tracing in the new process if needed.
571214158Sjhb */
572214158Sjhbvoid
573214158Sjhbktrprocfork(struct proc *p1, struct proc *p2)
574214158Sjhb{
575214158Sjhb
576219042Sdchagin	PROC_LOCK(p1);
577214158Sjhb	mtx_lock(&ktrace_mtx);
578214158Sjhb	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
579214158Sjhb	if (p1->p_traceflag & KTRFAC_INHERIT) {
580214158Sjhb		p2->p_traceflag = p1->p_traceflag;
581214158Sjhb		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
582214158Sjhb			VREF(p2->p_tracevp);
583214158Sjhb			KASSERT(p1->p_tracecred != NULL,
584214158Sjhb			    ("ktrace vnode with no cred"));
585214158Sjhb			p2->p_tracecred = crhold(p1->p_tracecred);
586214158Sjhb		}
587214158Sjhb	}
588214158Sjhb	mtx_unlock(&ktrace_mtx);
589219042Sdchagin	PROC_UNLOCK(p1);
590219042Sdchagin
591219042Sdchagin	ktrprocctor(p2);
592214158Sjhb}
593214158Sjhb
594214158Sjhb/*
595152376Srwatson * When a thread returns, drain any asynchronous records generated by the
596152376Srwatson * system call.
597152376Srwatson */
598152376Srwatsonvoid
599152376Srwatsonktruserret(struct thread *td)
600152376Srwatson{
601152376Srwatson
602152376Srwatson	ktrace_enter(td);
603152376Srwatson	sx_xlock(&ktrace_sx);
604152376Srwatson	ktr_drain(td);
605152376Srwatson	sx_xunlock(&ktrace_sx);
606152376Srwatson	ktrace_exit(td);
607152376Srwatson}
608152376Srwatson
609152376Srwatsonvoid
61097993Sjhbktrnamei(path)
6111541Srgrimes	char *path;
6121541Srgrimes{
61397993Sjhb	struct ktr_request *req;
61497993Sjhb	int namelen;
615103233Sjhb	char *buf = NULL;
6161541Srgrimes
617103233Sjhb	namelen = strlen(path);
618103233Sjhb	if (namelen > 0) {
619111119Simp		buf = malloc(namelen, M_KTRACE, M_WAITOK);
620103233Sjhb		bcopy(path, buf, namelen);
621103233Sjhb	}
62297993Sjhb	req = ktr_getrequest(KTR_NAMEI);
623104230Sphk	if (req == NULL) {
624104230Sphk		if (buf != NULL)
625104230Sphk			free(buf, M_KTRACE);
62697993Sjhb		return;
627104230Sphk	}
62897993Sjhb	if (namelen > 0) {
62997993Sjhb		req->ktr_header.ktr_len = namelen;
630151927Srwatson		req->ktr_buffer = buf;
63197993Sjhb	}
632152376Srwatson	ktr_submitrequest(curthread, req);
6331541Srgrimes}
6341541Srgrimes
6351549Srgrimesvoid
636189707Sjhbktrsysctl(name, namelen)
637189707Sjhb	int *name;
638189707Sjhb	u_int namelen;
639189707Sjhb{
640189707Sjhb	struct ktr_request *req;
641189707Sjhb	u_int mib[CTL_MAXNAME + 2];
642189707Sjhb	char *mibname;
643189707Sjhb	size_t mibnamelen;
644189707Sjhb	int error;
645189707Sjhb
646189707Sjhb	/* Lookup name of mib. */
647189707Sjhb	KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
648189707Sjhb	mib[0] = 0;
649189707Sjhb	mib[1] = 1;
650189707Sjhb	bcopy(name, mib + 2, namelen * sizeof(*name));
651189707Sjhb	mibnamelen = 128;
652189707Sjhb	mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
653189707Sjhb	error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
654189707Sjhb	    NULL, 0, &mibnamelen, 0);
655189707Sjhb	if (error) {
656189707Sjhb		free(mibname, M_KTRACE);
657189707Sjhb		return;
658189707Sjhb	}
659189707Sjhb	req = ktr_getrequest(KTR_SYSCTL);
660189707Sjhb	if (req == NULL) {
661189707Sjhb		free(mibname, M_KTRACE);
662189707Sjhb		return;
663189707Sjhb	}
664189707Sjhb	req->ktr_header.ktr_len = mibnamelen;
665189707Sjhb	req->ktr_buffer = mibname;
666189707Sjhb	ktr_submitrequest(curthread, req);
667189707Sjhb}
668189707Sjhb
669189707Sjhbvoid
67097993Sjhbktrgenio(fd, rw, uio, error)
6711541Srgrimes	int fd;
6721541Srgrimes	enum uio_rw rw;
67362378Sgreen	struct uio *uio;
67462378Sgreen	int error;
6751541Srgrimes{
67697993Sjhb	struct ktr_request *req;
67797993Sjhb	struct ktr_genio *ktg;
678103235Sjhb	int datalen;
679103235Sjhb	char *buf;
6808876Srgrimes
681131897Sphk	if (error) {
682131897Sphk		free(uio, M_IOV);
6831541Srgrimes		return;
684131897Sphk	}
685103235Sjhb	uio->uio_offset = 0;
686103235Sjhb	uio->uio_rw = UIO_WRITE;
687103235Sjhb	datalen = imin(uio->uio_resid, ktr_geniosize);
688111119Simp	buf = malloc(datalen, M_KTRACE, M_WAITOK);
689131897Sphk	error = uiomove(buf, datalen, uio);
690131897Sphk	free(uio, M_IOV);
691131897Sphk	if (error) {
692103235Sjhb		free(buf, M_KTRACE);
693103235Sjhb		return;
694103235Sjhb	}
69597993Sjhb	req = ktr_getrequest(KTR_GENIO);
696103235Sjhb	if (req == NULL) {
697103235Sjhb		free(buf, M_KTRACE);
69897993Sjhb		return;
699103235Sjhb	}
70097993Sjhb	ktg = &req->ktr_data.ktr_genio;
70197993Sjhb	ktg->ktr_fd = fd;
70297993Sjhb	ktg->ktr_rw = rw;
703103235Sjhb	req->ktr_header.ktr_len = datalen;
704151927Srwatson	req->ktr_buffer = buf;
705152376Srwatson	ktr_submitrequest(curthread, req);
7061541Srgrimes}
7071541Srgrimes
7081549Srgrimesvoid
70997993Sjhbktrpsig(sig, action, mask, code)
71051941Smarcel	int sig;
7111541Srgrimes	sig_t action;
71251791Smarcel	sigset_t *mask;
71351941Smarcel	int code;
7141541Srgrimes{
715219311Sdchagin	struct thread *td = curthread;
71697993Sjhb	struct ktr_request *req;
71797993Sjhb	struct ktr_psig	*kp;
7181541Srgrimes
71997993Sjhb	req = ktr_getrequest(KTR_PSIG);
72097993Sjhb	if (req == NULL)
72197993Sjhb		return;
72297993Sjhb	kp = &req->ktr_data.ktr_psig;
72397993Sjhb	kp->signo = (char)sig;
72497993Sjhb	kp->action = action;
72597993Sjhb	kp->mask = *mask;
72697993Sjhb	kp->code = code;
727219311Sdchagin	ktr_enqueuerequest(td, req);
728219311Sdchagin	ktrace_exit(td);
7291541Srgrimes}
7301541Srgrimes
7311549Srgrimesvoid
73297993Sjhbktrcsw(out, user)
7331541Srgrimes	int out, user;
7341541Srgrimes{
735219311Sdchagin	struct thread *td = curthread;
73697993Sjhb	struct ktr_request *req;
73797993Sjhb	struct ktr_csw *kc;
7381541Srgrimes
73997993Sjhb	req = ktr_getrequest(KTR_CSW);
74097993Sjhb	if (req == NULL)
74197993Sjhb		return;
74297993Sjhb	kc = &req->ktr_data.ktr_csw;
74397993Sjhb	kc->out = out;
74497993Sjhb	kc->user = user;
745219311Sdchagin	ktr_enqueuerequest(td, req);
746219311Sdchagin	ktrace_exit(td);
7471541Srgrimes}
748176471Sdes
749176471Sdesvoid
750210064Sjhbktrstruct(name, data, datalen)
751176471Sdes	const char *name;
752176471Sdes	void *data;
753176471Sdes	size_t datalen;
754176471Sdes{
755176471Sdes	struct ktr_request *req;
756176471Sdes	char *buf = NULL;
757176471Sdes	size_t buflen;
758176471Sdes
759176471Sdes	if (!data)
760176471Sdes		datalen = 0;
761210064Sjhb	buflen = strlen(name) + 1 + datalen;
762176471Sdes	buf = malloc(buflen, M_KTRACE, M_WAITOK);
763210064Sjhb	strcpy(buf, name);
764210064Sjhb	bcopy(data, buf + strlen(name) + 1, datalen);
765176471Sdes	if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
766176471Sdes		free(buf, M_KTRACE);
767176471Sdes		return;
768176471Sdes	}
769176471Sdes	req->ktr_buffer = buf;
770176471Sdes	req->ktr_header.ktr_len = buflen;
771176471Sdes	ktr_submitrequest(curthread, req);
772176471Sdes}
773226269Sdes
774226269Sdesvoid
775226269Sdesktrcapfail(needed, held)
776226269Sdes	cap_rights_t needed;
777226269Sdes	cap_rights_t held;
778226269Sdes{
779226269Sdes	struct thread *td = curthread;
780226269Sdes	struct ktr_request *req;
781226269Sdes	struct ktr_cap_fail *kcf;
782226269Sdes
783226269Sdes	req = ktr_getrequest(KTR_CAPFAIL);
784226269Sdes	if (req == NULL)
785226269Sdes		return;
786226269Sdes	kcf = &req->ktr_data.ktr_cap_fail;
787226269Sdes	kcf->cap_needed = needed;
788226269Sdes	kcf->cap_held = held;
789226269Sdes	ktr_enqueuerequest(td, req);
790226269Sdes	ktrace_exit(td);
791226269Sdes}
792114026Sjhb#endif /* KTRACE */
7931541Srgrimes
7941541Srgrimes/* Interface and common routines */
7951541Srgrimes
79612221Sbde#ifndef _SYS_SYSPROTO_H_
7971541Srgrimesstruct ktrace_args {
7981541Srgrimes	char	*fname;
7991541Srgrimes	int	ops;
8001541Srgrimes	int	facs;
8011541Srgrimes	int	pid;
8021541Srgrimes};
80312221Sbde#endif
8041541Srgrimes/* ARGSUSED */
8051549Srgrimesint
806225617Skmacysys_ktrace(td, uap)
80783366Sjulian	struct thread *td;
8081541Srgrimes	register struct ktrace_args *uap;
8091541Srgrimes{
81013203Swollman#ifdef KTRACE
8111541Srgrimes	register struct vnode *vp = NULL;
8121541Srgrimes	register struct proc *p;
8131541Srgrimes	struct pgrp *pg;
8141541Srgrimes	int facs = uap->facs & ~KTRFAC_ROOT;
8151541Srgrimes	int ops = KTROP(uap->ops);
8161541Srgrimes	int descend = uap->ops & KTRFLAG_DESCEND;
817147576Spjd	int nfound, ret = 0;
818157233Sjhb	int flags, error = 0, vfslocked;
8191541Srgrimes	struct nameidata nd;
820112198Sjhb	struct ucred *cred;
8211541Srgrimes
822114026Sjhb	/*
823114026Sjhb	 * Need something to (un)trace.
824114026Sjhb	 */
825114026Sjhb	if (ops != KTROP_CLEARFILE && facs == 0)
826114026Sjhb		return (EINVAL);
827114026Sjhb
828152376Srwatson	ktrace_enter(td);
8291541Srgrimes	if (ops != KTROP_CLEAR) {
8301541Srgrimes		/*
8311541Srgrimes		 * an operation which requires a file argument.
8321541Srgrimes		 */
833157233Sjhb		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
834157233Sjhb		    uap->fname, td);
83562550Smckusick		flags = FREAD | FWRITE | O_NOFOLLOW;
836170152Skib		error = vn_open(&nd, &flags, 0, NULL);
8373308Sphk		if (error) {
838152376Srwatson			ktrace_exit(td);
8391541Srgrimes			return (error);
8401541Srgrimes		}
841157233Sjhb		vfslocked = NDHASGIANT(&nd);
84254655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
8431541Srgrimes		vp = nd.ni_vp;
844175294Sattilio		VOP_UNLOCK(vp, 0);
8451541Srgrimes		if (vp->v_type != VREG) {
84691406Sjhb			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
847157233Sjhb			VFS_UNLOCK_GIANT(vfslocked);
848152376Srwatson			ktrace_exit(td);
8491541Srgrimes			return (EACCES);
8501541Srgrimes		}
851157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
8521541Srgrimes	}
8531541Srgrimes	/*
85485397Sdillon	 * Clear all uses of the tracefile.
8551541Srgrimes	 */
8561541Srgrimes	if (ops == KTROP_CLEARFILE) {
857166678Smpp		int vrele_count;
858166678Smpp
859166678Smpp		vrele_count = 0;
86074927Sjhb		sx_slock(&allproc_lock);
861166073Sdelphij		FOREACH_PROC_IN_SYSTEM(p) {
86294618Sjhb			PROC_LOCK(p);
863112198Sjhb			if (p->p_tracevp == vp) {
86497993Sjhb				if (ktrcanset(td, p)) {
86597993Sjhb					mtx_lock(&ktrace_mtx);
866214158Sjhb					ktr_freeproc(p, &cred, NULL);
86797993Sjhb					mtx_unlock(&ktrace_mtx);
868166678Smpp					vrele_count++;
869112198Sjhb					crfree(cred);
870166678Smpp				} else
8711541Srgrimes					error = EPERM;
872166678Smpp			}
873166678Smpp			PROC_UNLOCK(p);
8741541Srgrimes		}
87574927Sjhb		sx_sunlock(&allproc_lock);
876166678Smpp		if (vrele_count > 0) {
877166678Smpp			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
878166678Smpp			while (vrele_count-- > 0)
879166678Smpp				vrele(vp);
880166678Smpp			VFS_UNLOCK_GIANT(vfslocked);
881166678Smpp		}
8821541Srgrimes		goto done;
8831541Srgrimes	}
8841541Srgrimes	/*
8851541Srgrimes	 * do it
8861541Srgrimes	 */
887114026Sjhb	sx_slock(&proctree_lock);
8881541Srgrimes	if (uap->pid < 0) {
8891541Srgrimes		/*
8901541Srgrimes		 * by process group
8911541Srgrimes		 */
8921541Srgrimes		pg = pgfind(-uap->pid);
8931541Srgrimes		if (pg == NULL) {
89494861Sjhb			sx_sunlock(&proctree_lock);
8951541Srgrimes			error = ESRCH;
8961541Srgrimes			goto done;
8971541Srgrimes		}
89891140Stanimura		/*
89991140Stanimura		 * ktrops() may call vrele(). Lock pg_members
90094861Sjhb		 * by the proctree_lock rather than pg_mtx.
90191140Stanimura		 */
90291140Stanimura		PGRP_UNLOCK(pg);
903147576Spjd		nfound = 0;
904147576Spjd		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
905147576Spjd			PROC_LOCK(p);
906220390Sjhb			if (p->p_state == PRS_NEW ||
907220390Sjhb			    p_cansee(td, p) != 0) {
908147576Spjd				PROC_UNLOCK(p);
909147576Spjd				continue;
910147576Spjd			}
911147576Spjd			nfound++;
9121541Srgrimes			if (descend)
91394618Sjhb				ret |= ktrsetchildren(td, p, ops, facs, vp);
9148876Srgrimes			else
91594618Sjhb				ret |= ktrops(td, p, ops, facs, vp);
916147576Spjd		}
917147576Spjd		if (nfound == 0) {
918147576Spjd			sx_sunlock(&proctree_lock);
919147576Spjd			error = ESRCH;
920147576Spjd			goto done;
921147576Spjd		}
9221541Srgrimes	} else {
9231541Srgrimes		/*
9241541Srgrimes		 * by pid
9251541Srgrimes		 */
9261541Srgrimes		p = pfind(uap->pid);
927211439Sjhb		if (p == NULL)
9281541Srgrimes			error = ESRCH;
929211439Sjhb		else
930211439Sjhb			error = p_cansee(td, p);
931147520Spjd		if (error) {
932211439Sjhb			if (p != NULL)
933211439Sjhb				PROC_UNLOCK(p);
934147520Spjd			sx_sunlock(&proctree_lock);
935147183Spjd			goto done;
936147520Spjd		}
9371541Srgrimes		if (descend)
93894618Sjhb			ret |= ktrsetchildren(td, p, ops, facs, vp);
9391541Srgrimes		else
94094618Sjhb			ret |= ktrops(td, p, ops, facs, vp);
9411541Srgrimes	}
942114026Sjhb	sx_sunlock(&proctree_lock);
9431541Srgrimes	if (!ret)
9441541Srgrimes		error = EPERM;
9451541Srgrimesdone:
946114026Sjhb	if (vp != NULL) {
947157233Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
94891406Sjhb		(void) vn_close(vp, FWRITE, td->td_ucred, td);
949157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
950114026Sjhb	}
951152376Srwatson	ktrace_exit(td);
9521541Srgrimes	return (error);
953114026Sjhb#else /* !KTRACE */
954114026Sjhb	return (ENOSYS);
955114026Sjhb#endif /* KTRACE */
9561541Srgrimes}
9571541Srgrimes
95818398Sphk/* ARGSUSED */
95918398Sphkint
960225617Skmacysys_utrace(td, uap)
96183366Sjulian	struct thread *td;
96218398Sphk	register struct utrace_args *uap;
96318398Sphk{
96483366Sjulian
96513203Swollman#ifdef KTRACE
96697993Sjhb	struct ktr_request *req;
96799009Salfred	void *cp;
968103237Sjhb	int error;
96918398Sphk
970103237Sjhb	if (!KTRPOINT(td, KTR_USER))
971103237Sjhb		return (0);
97270792Salfred	if (uap->len > KTR_USER_MAXLEN)
97370707Salfred		return (EINVAL);
974111119Simp	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
975103237Sjhb	error = copyin(uap->addr, cp, uap->len);
976104230Sphk	if (error) {
977104230Sphk		free(cp, M_KTRACE);
978103237Sjhb		return (error);
979104230Sphk	}
98097993Sjhb	req = ktr_getrequest(KTR_USER);
981104230Sphk	if (req == NULL) {
982104230Sphk		free(cp, M_KTRACE);
983122457Sjkoshy		return (ENOMEM);
984104230Sphk	}
985151927Srwatson	req->ktr_buffer = cp;
986103237Sjhb	req->ktr_header.ktr_len = uap->len;
987152376Srwatson	ktr_submitrequest(td, req);
98818398Sphk	return (0);
989114026Sjhb#else /* !KTRACE */
99018398Sphk	return (ENOSYS);
991114026Sjhb#endif /* KTRACE */
99218398Sphk}
99318398Sphk
99418398Sphk#ifdef KTRACE
99512819Sphkstatic int
99694618Sjhbktrops(td, p, ops, facs, vp)
99794618Sjhb	struct thread *td;
99894618Sjhb	struct proc *p;
9991541Srgrimes	int ops, facs;
10001541Srgrimes	struct vnode *vp;
10011541Srgrimes{
100297993Sjhb	struct vnode *tracevp = NULL;
1003112198Sjhb	struct ucred *tracecred = NULL;
10041541Srgrimes
1005211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
100694618Sjhb	if (!ktrcanset(td, p)) {
100794618Sjhb		PROC_UNLOCK(p);
10081541Srgrimes		return (0);
100994618Sjhb	}
1010211439Sjhb	if (p->p_flag & P_WEXIT) {
1011211439Sjhb		/* If the process is exiting, just ignore it. */
1012211439Sjhb		PROC_UNLOCK(p);
1013211439Sjhb		return (1);
1014211439Sjhb	}
101597993Sjhb	mtx_lock(&ktrace_mtx);
10161541Srgrimes	if (ops == KTROP_SET) {
1017112198Sjhb		if (p->p_tracevp != vp) {
10181541Srgrimes			/*
101994618Sjhb			 * if trace file already in use, relinquish below
10201541Srgrimes			 */
1021112198Sjhb			tracevp = p->p_tracevp;
102297993Sjhb			VREF(vp);
1023112198Sjhb			p->p_tracevp = vp;
10241541Srgrimes		}
1025112198Sjhb		if (p->p_tracecred != td->td_ucred) {
1026112198Sjhb			tracecred = p->p_tracecred;
1027112198Sjhb			p->p_tracecred = crhold(td->td_ucred);
1028112198Sjhb		}
10291541Srgrimes		p->p_traceflag |= facs;
1030170587Srwatson		if (priv_check(td, PRIV_KTRACE) == 0)
10311541Srgrimes			p->p_traceflag |= KTRFAC_ROOT;
10328876Srgrimes	} else {
10331541Srgrimes		/* KTROP_CLEAR */
1034214158Sjhb		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0)
10351541Srgrimes			/* no more tracing */
1036214158Sjhb			ktr_freeproc(p, &tracecred, &tracevp);
10371541Srgrimes	}
103897993Sjhb	mtx_unlock(&ktrace_mtx);
1039219311Sdchagin	if ((p->p_traceflag & KTRFAC_MASK) != 0)
1040219311Sdchagin		ktrprocctor_entered(td, p);
104194618Sjhb	PROC_UNLOCK(p);
1042114026Sjhb	if (tracevp != NULL) {
1043155031Sjeff		int vfslocked;
1044155031Sjeff
1045155031Sjeff		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
104697993Sjhb		vrele(tracevp);
1047155031Sjeff		VFS_UNLOCK_GIANT(vfslocked);
1048114026Sjhb	}
1049112198Sjhb	if (tracecred != NULL)
1050112198Sjhb		crfree(tracecred);
10511541Srgrimes
10521541Srgrimes	return (1);
10531541Srgrimes}
10541541Srgrimes
105512819Sphkstatic int
105694618Sjhbktrsetchildren(td, top, ops, facs, vp)
105794618Sjhb	struct thread *td;
105894618Sjhb	struct proc *top;
10591541Srgrimes	int ops, facs;
10601541Srgrimes	struct vnode *vp;
10611541Srgrimes{
10621541Srgrimes	register struct proc *p;
10631541Srgrimes	register int ret = 0;
10641541Srgrimes
10651541Srgrimes	p = top;
1066211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
1067114026Sjhb	sx_assert(&proctree_lock, SX_LOCKED);
10681541Srgrimes	for (;;) {
106994618Sjhb		ret |= ktrops(td, p, ops, facs, vp);
10701541Srgrimes		/*
10711541Srgrimes		 * If this process has children, descend to them next,
10721541Srgrimes		 * otherwise do any siblings, and if done with this level,
10731541Srgrimes		 * follow back up the tree (but not past top).
10741541Srgrimes		 */
107553212Sphk		if (!LIST_EMPTY(&p->p_children))
107653212Sphk			p = LIST_FIRST(&p->p_children);
10771541Srgrimes		else for (;;) {
1078114026Sjhb			if (p == top)
10791541Srgrimes				return (ret);
108053212Sphk			if (LIST_NEXT(p, p_sibling)) {
108153212Sphk				p = LIST_NEXT(p, p_sibling);
10821541Srgrimes				break;
10831541Srgrimes			}
108414529Shsu			p = p->p_pptr;
10851541Srgrimes		}
1086211439Sjhb		PROC_LOCK(p);
10871541Srgrimes	}
10881541Srgrimes	/*NOTREACHED*/
10891541Srgrimes}
10901541Srgrimes
109112819Sphkstatic void
1092152376Srwatsonktr_writerequest(struct thread *td, struct ktr_request *req)
109397993Sjhb{
109497993Sjhb	struct ktr_header *kth;
10951541Srgrimes	struct vnode *vp;
109697993Sjhb	struct proc *p;
109797993Sjhb	struct ucred *cred;
10981541Srgrimes	struct uio auio;
109997993Sjhb	struct iovec aiov[3];
110062976Smckusick	struct mount *mp;
110197993Sjhb	int datalen, buflen, vrele_count;
1102157233Sjhb	int error, vfslocked;
11031541Srgrimes
110497993Sjhb	/*
1105152376Srwatson	 * We hold the vnode and credential for use in I/O in case ktrace is
1106152376Srwatson	 * disabled on the process as we write out the request.
1107152376Srwatson	 *
1108152376Srwatson	 * XXXRW: This is not ideal: we could end up performing a write after
1109152376Srwatson	 * the vnode has been closed.
1110152376Srwatson	 */
1111152376Srwatson	mtx_lock(&ktrace_mtx);
1112152376Srwatson	vp = td->td_proc->p_tracevp;
1113152376Srwatson	cred = td->td_proc->p_tracecred;
1114152376Srwatson
1115152376Srwatson	/*
111697993Sjhb	 * If vp is NULL, the vp has been cleared out from under this
1117152376Srwatson	 * request, so just drop it.  Make sure the credential and vnode are
1118152376Srwatson	 * in sync: we should have both or neither.
111997993Sjhb	 */
1120152376Srwatson	if (vp == NULL) {
1121152376Srwatson		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
1122185583Sbz		mtx_unlock(&ktrace_mtx);
11231541Srgrimes		return;
1124152376Srwatson	}
1125185583Sbz	VREF(vp);
1126152376Srwatson	KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
1127185583Sbz	crhold(cred);
1128185583Sbz	mtx_unlock(&ktrace_mtx);
1129152376Srwatson
113097993Sjhb	kth = &req->ktr_header;
1131189707Sjhb	KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
1132189707Sjhb	    sizeof(data_lengths) / sizeof(data_lengths[0]),
1133189707Sjhb	    ("data_lengths array overflow"));
1134118607Sjhb	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
113597993Sjhb	buflen = kth->ktr_len;
11361541Srgrimes	auio.uio_iov = &aiov[0];
11371541Srgrimes	auio.uio_offset = 0;
11381541Srgrimes	auio.uio_segflg = UIO_SYSSPACE;
11391541Srgrimes	auio.uio_rw = UIO_WRITE;
11401541Srgrimes	aiov[0].iov_base = (caddr_t)kth;
11411541Srgrimes	aiov[0].iov_len = sizeof(struct ktr_header);
11421541Srgrimes	auio.uio_resid = sizeof(struct ktr_header);
11431541Srgrimes	auio.uio_iovcnt = 1;
114497993Sjhb	auio.uio_td = td;
114597993Sjhb	if (datalen != 0) {
114697993Sjhb		aiov[1].iov_base = (caddr_t)&req->ktr_data;
114797993Sjhb		aiov[1].iov_len = datalen;
114897993Sjhb		auio.uio_resid += datalen;
11491541Srgrimes		auio.uio_iovcnt++;
115097993Sjhb		kth->ktr_len += datalen;
11511541Srgrimes	}
115297993Sjhb	if (buflen != 0) {
1153151927Srwatson		KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
1154151927Srwatson		aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
115597993Sjhb		aiov[auio.uio_iovcnt].iov_len = buflen;
115697993Sjhb		auio.uio_resid += buflen;
115797993Sjhb		auio.uio_iovcnt++;
1158103235Sjhb	}
1159152376Srwatson
1160157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
116162976Smckusick	vn_start_write(vp, &mp, V_WAIT);
1162175202Sattilio	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1163101123Srwatson#ifdef MAC
1164172930Srwatson	error = mac_vnode_check_write(cred, NOCRED, vp);
1165101123Srwatson	if (error == 0)
1166101123Srwatson#endif
1167101123Srwatson		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1168175294Sattilio	VOP_UNLOCK(vp, 0);
116962976Smckusick	vn_finished_write(mp);
1170185583Sbz	crfree(cred);
1171185583Sbz	if (!error) {
1172185583Sbz		vrele(vp);
1173185583Sbz		VFS_UNLOCK_GIANT(vfslocked);
1174185583Sbz		return;
1175185583Sbz	}
1176157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
1177185583Sbz
11781541Srgrimes	/*
117997993Sjhb	 * If error encountered, give up tracing on this vnode.  We defer
118097993Sjhb	 * all the vrele()'s on the vnode until after we are finished walking
118197993Sjhb	 * the various lists to avoid needlessly holding locks.
1182185583Sbz	 * NB: at this point we still hold the vnode reference that must
1183185583Sbz	 * not go away as we need the valid vnode to compare with. Thus let
1184185583Sbz	 * vrele_count start at 1 and the reference will be freed
1185185583Sbz	 * by the loop at the end after our last use of vp.
11861541Srgrimes	 */
11871541Srgrimes	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
11881541Srgrimes	    error);
1189185583Sbz	vrele_count = 1;
119097993Sjhb	/*
119197993Sjhb	 * First, clear this vnode from being used by any processes in the
119297993Sjhb	 * system.
119397993Sjhb	 * XXX - If one process gets an EPERM writing to the vnode, should
119497993Sjhb	 * we really do this?  Other processes might have suitable
119597993Sjhb	 * credentials for the operation.
119697993Sjhb	 */
1197112198Sjhb	cred = NULL;
119874927Sjhb	sx_slock(&allproc_lock);
1199166073Sdelphij	FOREACH_PROC_IN_SYSTEM(p) {
120097993Sjhb		PROC_LOCK(p);
1201112198Sjhb		if (p->p_tracevp == vp) {
120297993Sjhb			mtx_lock(&ktrace_mtx);
1203214158Sjhb			ktr_freeproc(p, &cred, NULL);
120497993Sjhb			mtx_unlock(&ktrace_mtx);
120597993Sjhb			vrele_count++;
12061541Srgrimes		}
120797993Sjhb		PROC_UNLOCK(p);
1208112198Sjhb		if (cred != NULL) {
1209112198Sjhb			crfree(cred);
1210112198Sjhb			cred = NULL;
1211112198Sjhb		}
12121541Srgrimes	}
121374927Sjhb	sx_sunlock(&allproc_lock);
1214152376Srwatson
1215157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
121697993Sjhb	while (vrele_count-- > 0)
121797993Sjhb		vrele(vp);
1218157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
12191541Srgrimes}
12201541Srgrimes
12211541Srgrimes/*
12221541Srgrimes * Return true if caller has permission to set the ktracing state
12231541Srgrimes * of target.  Essentially, the target can't possess any
12241541Srgrimes * more permissions than the caller.  KTRFAC_ROOT signifies that
12258876Srgrimes * root previously set the tracing status on the target process, and
12261541Srgrimes * so, only root may further change it.
12271541Srgrimes */
122812819Sphkstatic int
122994618Sjhbktrcanset(td, targetp)
123094618Sjhb	struct thread *td;
123194618Sjhb	struct proc *targetp;
12321541Srgrimes{
12331541Srgrimes
123494618Sjhb	PROC_LOCK_ASSERT(targetp, MA_OWNED);
123579335Srwatson	if (targetp->p_traceflag & KTRFAC_ROOT &&
1236170587Srwatson	    priv_check(td, PRIV_KTRACE))
123746155Sphk		return (0);
12381541Srgrimes
123996886Sjhb	if (p_candebug(td, targetp) != 0)
124079335Srwatson		return (0);
124179335Srwatson
124279335Srwatson	return (1);
12431541Srgrimes}
12441541Srgrimes
124513203Swollman#endif /* KTRACE */
1246