kern_ktrace.c revision 211512
1139804Simp/*-
21541Srgrimes * Copyright (c) 1989, 1993
3152376Srwatson *	The Regents of the University of California.
4152376Srwatson * Copyright (c) 2005 Robert N. M. Watson
5152376Srwatson * All rights reserved.
61541Srgrimes *
71541Srgrimes * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
91541Srgrimes * are met:
101541Srgrimes * 1. Redistributions of source code must retain the above copyright
111541Srgrimes *    notice, this list of conditions and the following disclaimer.
121541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
131541Srgrimes *    notice, this list of conditions and the following disclaimer in the
141541Srgrimes *    documentation and/or other materials provided with the distribution.
151541Srgrimes * 4. Neither the name of the University nor the names of its contributors
161541Srgrimes *    may be used to endorse or promote products derived from this software
171541Srgrimes *    without specific prior written permission.
181541Srgrimes *
191541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
201541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
261541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
301541Srgrimes *
311541Srgrimes *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
321541Srgrimes */
331541Srgrimes
34116182Sobrien#include <sys/cdefs.h>
35116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/kern_ktrace.c 211512 2010-08-19 16:38:58Z jhb $");
36116182Sobrien
3713203Swollman#include "opt_ktrace.h"
381541Srgrimes
391541Srgrimes#include <sys/param.h>
402112Swollman#include <sys/systm.h>
4197993Sjhb#include <sys/fcntl.h>
4297993Sjhb#include <sys/kernel.h>
4397993Sjhb#include <sys/kthread.h>
4476166Smarkm#include <sys/lock.h>
4576166Smarkm#include <sys/mutex.h>
4697993Sjhb#include <sys/malloc.h>
47155031Sjeff#include <sys/mount.h>
4897993Sjhb#include <sys/namei.h>
49164033Srwatson#include <sys/priv.h>
501541Srgrimes#include <sys/proc.h>
5197993Sjhb#include <sys/unistd.h>
521541Srgrimes#include <sys/vnode.h>
53176471Sdes#include <sys/socket.h>
54176471Sdes#include <sys/stat.h>
551541Srgrimes#include <sys/ktrace.h>
5674927Sjhb#include <sys/sx.h>
5797993Sjhb#include <sys/sysctl.h>
581541Srgrimes#include <sys/syslog.h>
5997993Sjhb#include <sys/sysproto.h>
601541Srgrimes
61163606Srwatson#include <security/mac/mac_framework.h>
62163606Srwatson
63152376Srwatson/*
64152376Srwatson * The ktrace facility allows the tracing of certain key events in user space
65152376Srwatson * processes, such as system calls, signal delivery, context switches, and
66152376Srwatson * user generated events using utrace(2).  It works by streaming event
67152376Srwatson * records and data to a vnode associated with the process using the
68152376Srwatson * ktrace(2) system call.  In general, records can be written directly from
69152376Srwatson * the context that generates the event.  One important exception to this is
70152376Srwatson * during a context switch, where sleeping is not permitted.  To handle this
71152376Srwatson * case, trace events are generated using in-kernel ktr_request records, and
72152376Srwatson * then delivered to disk at a convenient moment -- either immediately, the
73152376Srwatson * next traceable event, at system call return, or at process exit.
74152376Srwatson *
75152376Srwatson * When dealing with multiple threads or processes writing to the same event
76152376Srwatson * log, ordering guarantees are weak: specifically, if an event has multiple
77152376Srwatson * records (i.e., system call enter and return), they may be interlaced with
78152376Srwatson * records from another event.  Process and thread ID information is provided
79152376Srwatson * in the record, and user applications can de-interlace events if required.
80152376Srwatson */
81152376Srwatson
8230354Sphkstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
8330309Sphk
8413203Swollman#ifdef KTRACE
8512577Sbde
8697993Sjhb#ifndef KTRACE_REQUEST_POOL
8797993Sjhb#define	KTRACE_REQUEST_POOL	100
8897993Sjhb#endif
8912819Sphk
9097993Sjhbstruct ktr_request {
9197993Sjhb	struct	ktr_header ktr_header;
92151927Srwatson	void	*ktr_buffer;
9397993Sjhb	union {
9497993Sjhb		struct	ktr_syscall ktr_syscall;
9597993Sjhb		struct	ktr_sysret ktr_sysret;
9697993Sjhb		struct	ktr_genio ktr_genio;
9797993Sjhb		struct	ktr_psig ktr_psig;
9897993Sjhb		struct	ktr_csw ktr_csw;
9997993Sjhb	} ktr_data;
10097993Sjhb	STAILQ_ENTRY(ktr_request) ktr_list;
10197993Sjhb};
10297993Sjhb
10397993Sjhbstatic int data_lengths[] = {
10497993Sjhb	0,					/* none */
10597993Sjhb	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
10697993Sjhb	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
10797993Sjhb	0,					/* KTR_NAMEI */
10897993Sjhb	sizeof(struct ktr_genio),		/* KTR_GENIO */
10997993Sjhb	sizeof(struct ktr_psig),		/* KTR_PSIG */
110211512Sjhb	sizeof(struct ktr_csw),		/* KTR_CSW */
111176471Sdes	0,					/* KTR_USER */
112176471Sdes	0,					/* KTR_STRUCT */
113189707Sjhb	0,					/* KTR_SYSCTL */
11497993Sjhb};
11597993Sjhb
11697993Sjhbstatic STAILQ_HEAD(, ktr_request) ktr_free;
11797993Sjhb
118141633Sphkstatic SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
119103234Sjhb
120118607Sjhbstatic u_int ktr_requestpool = KTRACE_REQUEST_POOL;
121103234SjhbTUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
12297993Sjhb
123118607Sjhbstatic u_int ktr_geniosize = PAGE_SIZE;
124103234SjhbTUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
125103234SjhbSYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
126103234Sjhb    0, "Maximum size of genio event payload");
127103234Sjhb
12897993Sjhbstatic int print_message = 1;
12997993Sjhbstruct mtx ktrace_mtx;
130152376Srwatsonstatic struct sx ktrace_sx;
13197993Sjhb
13297993Sjhbstatic void ktrace_init(void *dummy);
13397993Sjhbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
134118607Sjhbstatic u_int ktrace_resize_pool(u_int newsize);
13597993Sjhbstatic struct ktr_request *ktr_getrequest(int type);
136152376Srwatsonstatic void ktr_submitrequest(struct thread *td, struct ktr_request *req);
13797993Sjhbstatic void ktr_freerequest(struct ktr_request *req);
138152376Srwatsonstatic void ktr_writerequest(struct thread *td, struct ktr_request *req);
13997993Sjhbstatic int ktrcanset(struct thread *,struct proc *);
14097993Sjhbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
14197993Sjhbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
14297993Sjhb
143152376Srwatson/*
144152376Srwatson * ktrace itself generates events, such as context switches, which we do not
145152376Srwatson * wish to trace.  Maintain a flag, TDP_INKTRACE, on each thread to determine
146152376Srwatson * whether or not it is in a region where tracing of events should be
147152376Srwatson * suppressed.
148152376Srwatson */
14997993Sjhbstatic void
150152376Srwatsonktrace_enter(struct thread *td)
151152376Srwatson{
152152376Srwatson
153152376Srwatson	KASSERT(!(td->td_pflags & TDP_INKTRACE), ("ktrace_enter: flag set"));
154152376Srwatson	td->td_pflags |= TDP_INKTRACE;
155152376Srwatson}
156152376Srwatson
157152376Srwatsonstatic void
158152376Srwatsonktrace_exit(struct thread *td)
159152376Srwatson{
160152376Srwatson
161152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_exit: flag not set"));
162152376Srwatson	td->td_pflags &= ~TDP_INKTRACE;
163152376Srwatson}
164152376Srwatson
165152376Srwatsonstatic void
166152376Srwatsonktrace_assert(struct thread *td)
167152376Srwatson{
168152376Srwatson
169152376Srwatson	KASSERT(td->td_pflags & TDP_INKTRACE, ("ktrace_assert: flag not set"));
170152376Srwatson}
171152376Srwatson
172152376Srwatsonstatic void
17397993Sjhbktrace_init(void *dummy)
1741541Srgrimes{
17597993Sjhb	struct ktr_request *req;
17697993Sjhb	int i;
1771541Srgrimes
17897993Sjhb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
179152376Srwatson	sx_init(&ktrace_sx, "ktrace_sx");
18097993Sjhb	STAILQ_INIT(&ktr_free);
18197993Sjhb	for (i = 0; i < ktr_requestpool; i++) {
182111119Simp		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
18397993Sjhb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
18497993Sjhb	}
1851541Srgrimes}
18697993SjhbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
1871541Srgrimes
18897993Sjhbstatic int
18997993Sjhbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
19097993Sjhb{
19197993Sjhb	struct thread *td;
192118607Sjhb	u_int newsize, oldsize, wantsize;
19397993Sjhb	int error;
19497993Sjhb
19597993Sjhb	/* Handle easy read-only case first to avoid warnings from GCC. */
19697993Sjhb	if (!req->newptr) {
19797993Sjhb		mtx_lock(&ktrace_mtx);
19897993Sjhb		oldsize = ktr_requestpool;
19997993Sjhb		mtx_unlock(&ktrace_mtx);
200118607Sjhb		return (SYSCTL_OUT(req, &oldsize, sizeof(u_int)));
20197993Sjhb	}
20297993Sjhb
203118607Sjhb	error = SYSCTL_IN(req, &wantsize, sizeof(u_int));
20497993Sjhb	if (error)
20597993Sjhb		return (error);
20697993Sjhb	td = curthread;
207152376Srwatson	ktrace_enter(td);
20897993Sjhb	mtx_lock(&ktrace_mtx);
20997993Sjhb	oldsize = ktr_requestpool;
21097993Sjhb	newsize = ktrace_resize_pool(wantsize);
21197993Sjhb	mtx_unlock(&ktrace_mtx);
212152376Srwatson	ktrace_exit(td);
213118607Sjhb	error = SYSCTL_OUT(req, &oldsize, sizeof(u_int));
21497993Sjhb	if (error)
21597993Sjhb		return (error);
216122478Sjkoshy	if (wantsize > oldsize && newsize < wantsize)
21797993Sjhb		return (ENOSPC);
21897993Sjhb	return (0);
21997993Sjhb}
220103234SjhbSYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
221211102Sgavin    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU",
222211102Sgavin    "Pool buffer size for ktrace(1)");
22397993Sjhb
224118607Sjhbstatic u_int
225118607Sjhbktrace_resize_pool(u_int newsize)
22697993Sjhb{
22797993Sjhb	struct ktr_request *req;
228122478Sjkoshy	int bound;
22997993Sjhb
23097993Sjhb	mtx_assert(&ktrace_mtx, MA_OWNED);
23197993Sjhb	print_message = 1;
232122478Sjkoshy	bound = newsize - ktr_requestpool;
233122478Sjkoshy	if (bound == 0)
234122478Sjkoshy		return (ktr_requestpool);
235122478Sjkoshy	if (bound < 0)
23697993Sjhb		/* Shrink pool down to newsize if possible. */
237122478Sjkoshy		while (bound++ < 0) {
23897993Sjhb			req = STAILQ_FIRST(&ktr_free);
23997993Sjhb			if (req == NULL)
24097993Sjhb				return (ktr_requestpool);
24197993Sjhb			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
24297993Sjhb			ktr_requestpool--;
24397993Sjhb			mtx_unlock(&ktrace_mtx);
24497993Sjhb			free(req, M_KTRACE);
24597993Sjhb			mtx_lock(&ktrace_mtx);
24697993Sjhb		}
24797993Sjhb	else
24897993Sjhb		/* Grow pool up to newsize. */
249122478Sjkoshy		while (bound-- > 0) {
25097993Sjhb			mtx_unlock(&ktrace_mtx);
25197993Sjhb			req = malloc(sizeof(struct ktr_request), M_KTRACE,
252111119Simp			    M_WAITOK);
25397993Sjhb			mtx_lock(&ktrace_mtx);
25497993Sjhb			STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
25597993Sjhb			ktr_requestpool++;
25697993Sjhb		}
25797993Sjhb	return (ktr_requestpool);
25897993Sjhb}
25997993Sjhb
260198411Sjhb/* ktr_getrequest() assumes that ktr_comm[] is the same size as td_name[]. */
261198411SjhbCTASSERT(sizeof(((struct ktr_header *)NULL)->ktr_comm) ==
262198411Sjhb    (sizeof((struct thread *)NULL)->td_name));
263198411Sjhb
26497993Sjhbstatic struct ktr_request *
26597993Sjhbktr_getrequest(int type)
26697993Sjhb{
26797993Sjhb	struct ktr_request *req;
26897993Sjhb	struct thread *td = curthread;
26997993Sjhb	struct proc *p = td->td_proc;
27097993Sjhb	int pm;
27197993Sjhb
272152376Srwatson	ktrace_enter(td);	/* XXX: In caller instead? */
273152430Srwatson	mtx_lock(&ktrace_mtx);
27497993Sjhb	if (!KTRCHECK(td, type)) {
275152430Srwatson		mtx_unlock(&ktrace_mtx);
276152376Srwatson		ktrace_exit(td);
27797993Sjhb		return (NULL);
27897993Sjhb	}
27997993Sjhb	req = STAILQ_FIRST(&ktr_free);
28097993Sjhb	if (req != NULL) {
28197993Sjhb		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
28297993Sjhb		req->ktr_header.ktr_type = type;
283112199Sjhb		if (p->p_traceflag & KTRFAC_DROP) {
284112199Sjhb			req->ktr_header.ktr_type |= KTR_DROP;
285112199Sjhb			p->p_traceflag &= ~KTRFAC_DROP;
286112199Sjhb		}
287152430Srwatson		mtx_unlock(&ktrace_mtx);
28897993Sjhb		microtime(&req->ktr_header.ktr_time);
28997993Sjhb		req->ktr_header.ktr_pid = p->p_pid;
290151929Srwatson		req->ktr_header.ktr_tid = td->td_tid;
291198411Sjhb		bcopy(td->td_name, req->ktr_header.ktr_comm,
292198411Sjhb		    sizeof(req->ktr_header.ktr_comm));
293151927Srwatson		req->ktr_buffer = NULL;
29497993Sjhb		req->ktr_header.ktr_len = 0;
29597993Sjhb	} else {
296112199Sjhb		p->p_traceflag |= KTRFAC_DROP;
29797993Sjhb		pm = print_message;
29897993Sjhb		print_message = 0;
29997993Sjhb		mtx_unlock(&ktrace_mtx);
30097993Sjhb		if (pm)
30197993Sjhb			printf("Out of ktrace request objects.\n");
302152376Srwatson		ktrace_exit(td);
30397993Sjhb	}
30497993Sjhb	return (req);
30597993Sjhb}
30697993Sjhb
307152376Srwatson/*
308152376Srwatson * Some trace generation environments don't permit direct access to VFS,
309152376Srwatson * such as during a context switch where sleeping is not allowed.  Under these
310152376Srwatson * circumstances, queue a request to the thread to be written asynchronously
311152376Srwatson * later.
312152376Srwatson */
31397993Sjhbstatic void
314152376Srwatsonktr_enqueuerequest(struct thread *td, struct ktr_request *req)
31597993Sjhb{
31697993Sjhb
31797993Sjhb	mtx_lock(&ktrace_mtx);
318152376Srwatson	STAILQ_INSERT_TAIL(&td->td_proc->p_ktr, req, ktr_list);
319118599Sjhb	mtx_unlock(&ktrace_mtx);
320152376Srwatson	ktrace_exit(td);
32197993Sjhb}
32297993Sjhb
323152376Srwatson/*
324152376Srwatson * Drain any pending ktrace records from the per-thread queue to disk.  This
325152376Srwatson * is used both internally before committing other records, and also on
326152376Srwatson * system call return.  We drain all the ones we can find at the time when
327152376Srwatson * drain is requested, but don't keep draining after that as those events
328189707Sjhb * may be approximately "after" the current event.
329152376Srwatson */
33097993Sjhbstatic void
331152376Srwatsonktr_drain(struct thread *td)
332152376Srwatson{
333152376Srwatson	struct ktr_request *queued_req;
334152376Srwatson	STAILQ_HEAD(, ktr_request) local_queue;
335152376Srwatson
336152376Srwatson	ktrace_assert(td);
337152376Srwatson	sx_assert(&ktrace_sx, SX_XLOCKED);
338152376Srwatson
339211512Sjhb	STAILQ_INIT(&local_queue);
340152376Srwatson
341152376Srwatson	if (!STAILQ_EMPTY(&td->td_proc->p_ktr)) {
342152376Srwatson		mtx_lock(&ktrace_mtx);
343152376Srwatson		STAILQ_CONCAT(&local_queue, &td->td_proc->p_ktr);
344152376Srwatson		mtx_unlock(&ktrace_mtx);
345152376Srwatson
346152376Srwatson		while ((queued_req = STAILQ_FIRST(&local_queue))) {
347152376Srwatson			STAILQ_REMOVE_HEAD(&local_queue, ktr_list);
348152376Srwatson			ktr_writerequest(td, queued_req);
349152376Srwatson			ktr_freerequest(queued_req);
350152376Srwatson		}
351152376Srwatson	}
352152376Srwatson}
353152376Srwatson
354152376Srwatson/*
355152376Srwatson * Submit a trace record for immediate commit to disk -- to be used only
356152376Srwatson * where entering VFS is OK.  First drain any pending records that may have
357152376Srwatson * been cached in the thread.
358152376Srwatson */
359152376Srwatsonstatic void
360152376Srwatsonktr_submitrequest(struct thread *td, struct ktr_request *req)
361152376Srwatson{
362152376Srwatson
363152376Srwatson	ktrace_assert(td);
364152376Srwatson
365152376Srwatson	sx_xlock(&ktrace_sx);
366152376Srwatson	ktr_drain(td);
367152376Srwatson	ktr_writerequest(td, req);
368152376Srwatson	ktr_freerequest(req);
369152376Srwatson	sx_xunlock(&ktrace_sx);
370152376Srwatson
371152376Srwatson	ktrace_exit(td);
372152376Srwatson}
373152376Srwatson
374152376Srwatsonstatic void
37597993Sjhbktr_freerequest(struct ktr_request *req)
37697993Sjhb{
37797993Sjhb
378151927Srwatson	if (req->ktr_buffer != NULL)
379151927Srwatson		free(req->ktr_buffer, M_KTRACE);
38097993Sjhb	mtx_lock(&ktrace_mtx);
38197993Sjhb	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
38297993Sjhb	mtx_unlock(&ktrace_mtx);
38397993Sjhb}
38497993Sjhb
3851549Srgrimesvoid
38697993Sjhbktrsyscall(code, narg, args)
38747955Sdt	int code, narg;
38847955Sdt	register_t args[];
3891541Srgrimes{
39097993Sjhb	struct ktr_request *req;
39197993Sjhb	struct ktr_syscall *ktp;
39297993Sjhb	size_t buflen;
393103233Sjhb	char *buf = NULL;
3941541Srgrimes
395103233Sjhb	buflen = sizeof(register_t) * narg;
396103233Sjhb	if (buflen > 0) {
397111119Simp		buf = malloc(buflen, M_KTRACE, M_WAITOK);
398103233Sjhb		bcopy(args, buf, buflen);
399103233Sjhb	}
40097993Sjhb	req = ktr_getrequest(KTR_SYSCALL);
401104230Sphk	if (req == NULL) {
402104230Sphk		if (buf != NULL)
403104230Sphk			free(buf, M_KTRACE);
40497993Sjhb		return;
405104230Sphk	}
40697993Sjhb	ktp = &req->ktr_data.ktr_syscall;
4071541Srgrimes	ktp->ktr_code = code;
4081541Srgrimes	ktp->ktr_narg = narg;
40997993Sjhb	if (buflen > 0) {
41097993Sjhb		req->ktr_header.ktr_len = buflen;
411151927Srwatson		req->ktr_buffer = buf;
41297993Sjhb	}
413152376Srwatson	ktr_submitrequest(curthread, req);
4141541Srgrimes}
4151541Srgrimes
4161549Srgrimesvoid
41797993Sjhbktrsysret(code, error, retval)
41847955Sdt	int code, error;
41947955Sdt	register_t retval;
4201541Srgrimes{
42197993Sjhb	struct ktr_request *req;
42297993Sjhb	struct ktr_sysret *ktp;
4231541Srgrimes
42497993Sjhb	req = ktr_getrequest(KTR_SYSRET);
42597993Sjhb	if (req == NULL)
42697993Sjhb		return;
42797993Sjhb	ktp = &req->ktr_data.ktr_sysret;
42897993Sjhb	ktp->ktr_code = code;
42997993Sjhb	ktp->ktr_error = error;
43097993Sjhb	ktp->ktr_retval = retval;		/* what about val2 ? */
431152376Srwatson	ktr_submitrequest(curthread, req);
4321541Srgrimes}
4331541Srgrimes
434152376Srwatson/*
435152376Srwatson * When a process exits, drain per-process asynchronous trace records.
436152376Srwatson */
4371549Srgrimesvoid
438152376Srwatsonktrprocexit(struct thread *td)
439152376Srwatson{
440152376Srwatson
441152376Srwatson	ktrace_enter(td);
442152376Srwatson	sx_xlock(&ktrace_sx);
443152376Srwatson	ktr_drain(td);
444152376Srwatson	sx_xunlock(&ktrace_sx);
445152376Srwatson	ktrace_exit(td);
446152376Srwatson}
447152376Srwatson
448152376Srwatson/*
449152376Srwatson * When a thread returns, drain any asynchronous records generated by the
450152376Srwatson * system call.
451152376Srwatson */
452152376Srwatsonvoid
453152376Srwatsonktruserret(struct thread *td)
454152376Srwatson{
455152376Srwatson
456152376Srwatson	ktrace_enter(td);
457152376Srwatson	sx_xlock(&ktrace_sx);
458152376Srwatson	ktr_drain(td);
459152376Srwatson	sx_xunlock(&ktrace_sx);
460152376Srwatson	ktrace_exit(td);
461152376Srwatson}
462152376Srwatson
463152376Srwatsonvoid
46497993Sjhbktrnamei(path)
4651541Srgrimes	char *path;
4661541Srgrimes{
46797993Sjhb	struct ktr_request *req;
46897993Sjhb	int namelen;
469103233Sjhb	char *buf = NULL;
4701541Srgrimes
471103233Sjhb	namelen = strlen(path);
472103233Sjhb	if (namelen > 0) {
473111119Simp		buf = malloc(namelen, M_KTRACE, M_WAITOK);
474103233Sjhb		bcopy(path, buf, namelen);
475103233Sjhb	}
47697993Sjhb	req = ktr_getrequest(KTR_NAMEI);
477104230Sphk	if (req == NULL) {
478104230Sphk		if (buf != NULL)
479104230Sphk			free(buf, M_KTRACE);
48097993Sjhb		return;
481104230Sphk	}
48297993Sjhb	if (namelen > 0) {
48397993Sjhb		req->ktr_header.ktr_len = namelen;
484151927Srwatson		req->ktr_buffer = buf;
48597993Sjhb	}
486152376Srwatson	ktr_submitrequest(curthread, req);
4871541Srgrimes}
4881541Srgrimes
4891549Srgrimesvoid
490189707Sjhbktrsysctl(name, namelen)
491189707Sjhb	int *name;
492189707Sjhb	u_int namelen;
493189707Sjhb{
494189707Sjhb	struct ktr_request *req;
495189707Sjhb	u_int mib[CTL_MAXNAME + 2];
496189707Sjhb	char *mibname;
497189707Sjhb	size_t mibnamelen;
498189707Sjhb	int error;
499189707Sjhb
500189707Sjhb	/* Lookup name of mib. */
501189707Sjhb	KASSERT(namelen <= CTL_MAXNAME, ("sysctl MIB too long"));
502189707Sjhb	mib[0] = 0;
503189707Sjhb	mib[1] = 1;
504189707Sjhb	bcopy(name, mib + 2, namelen * sizeof(*name));
505189707Sjhb	mibnamelen = 128;
506189707Sjhb	mibname = malloc(mibnamelen, M_KTRACE, M_WAITOK);
507189707Sjhb	error = kernel_sysctl(curthread, mib, namelen + 2, mibname, &mibnamelen,
508189707Sjhb	    NULL, 0, &mibnamelen, 0);
509189707Sjhb	if (error) {
510189707Sjhb		free(mibname, M_KTRACE);
511189707Sjhb		return;
512189707Sjhb	}
513189707Sjhb	req = ktr_getrequest(KTR_SYSCTL);
514189707Sjhb	if (req == NULL) {
515189707Sjhb		free(mibname, M_KTRACE);
516189707Sjhb		return;
517189707Sjhb	}
518189707Sjhb	req->ktr_header.ktr_len = mibnamelen;
519189707Sjhb	req->ktr_buffer = mibname;
520189707Sjhb	ktr_submitrequest(curthread, req);
521189707Sjhb}
522189707Sjhb
523189707Sjhbvoid
52497993Sjhbktrgenio(fd, rw, uio, error)
5251541Srgrimes	int fd;
5261541Srgrimes	enum uio_rw rw;
52762378Sgreen	struct uio *uio;
52862378Sgreen	int error;
5291541Srgrimes{
53097993Sjhb	struct ktr_request *req;
53197993Sjhb	struct ktr_genio *ktg;
532103235Sjhb	int datalen;
533103235Sjhb	char *buf;
5348876Srgrimes
535131897Sphk	if (error) {
536131897Sphk		free(uio, M_IOV);
5371541Srgrimes		return;
538131897Sphk	}
539103235Sjhb	uio->uio_offset = 0;
540103235Sjhb	uio->uio_rw = UIO_WRITE;
541103235Sjhb	datalen = imin(uio->uio_resid, ktr_geniosize);
542111119Simp	buf = malloc(datalen, M_KTRACE, M_WAITOK);
543131897Sphk	error = uiomove(buf, datalen, uio);
544131897Sphk	free(uio, M_IOV);
545131897Sphk	if (error) {
546103235Sjhb		free(buf, M_KTRACE);
547103235Sjhb		return;
548103235Sjhb	}
54997993Sjhb	req = ktr_getrequest(KTR_GENIO);
550103235Sjhb	if (req == NULL) {
551103235Sjhb		free(buf, M_KTRACE);
55297993Sjhb		return;
553103235Sjhb	}
55497993Sjhb	ktg = &req->ktr_data.ktr_genio;
55597993Sjhb	ktg->ktr_fd = fd;
55697993Sjhb	ktg->ktr_rw = rw;
557103235Sjhb	req->ktr_header.ktr_len = datalen;
558151927Srwatson	req->ktr_buffer = buf;
559152376Srwatson	ktr_submitrequest(curthread, req);
5601541Srgrimes}
5611541Srgrimes
5621549Srgrimesvoid
56397993Sjhbktrpsig(sig, action, mask, code)
56451941Smarcel	int sig;
5651541Srgrimes	sig_t action;
56651791Smarcel	sigset_t *mask;
56751941Smarcel	int code;
5681541Srgrimes{
56997993Sjhb	struct ktr_request *req;
57097993Sjhb	struct ktr_psig	*kp;
5711541Srgrimes
57297993Sjhb	req = ktr_getrequest(KTR_PSIG);
57397993Sjhb	if (req == NULL)
57497993Sjhb		return;
57597993Sjhb	kp = &req->ktr_data.ktr_psig;
57697993Sjhb	kp->signo = (char)sig;
57797993Sjhb	kp->action = action;
57897993Sjhb	kp->mask = *mask;
57997993Sjhb	kp->code = code;
580152376Srwatson	ktr_enqueuerequest(curthread, req);
5811541Srgrimes}
5821541Srgrimes
5831549Srgrimesvoid
58497993Sjhbktrcsw(out, user)
5851541Srgrimes	int out, user;
5861541Srgrimes{
58797993Sjhb	struct ktr_request *req;
58897993Sjhb	struct ktr_csw *kc;
5891541Srgrimes
59097993Sjhb	req = ktr_getrequest(KTR_CSW);
59197993Sjhb	if (req == NULL)
59297993Sjhb		return;
59397993Sjhb	kc = &req->ktr_data.ktr_csw;
59497993Sjhb	kc->out = out;
59597993Sjhb	kc->user = user;
596152376Srwatson	ktr_enqueuerequest(curthread, req);
5971541Srgrimes}
598176471Sdes
599176471Sdesvoid
600210064Sjhbktrstruct(name, data, datalen)
601176471Sdes	const char *name;
602176471Sdes	void *data;
603176471Sdes	size_t datalen;
604176471Sdes{
605176471Sdes	struct ktr_request *req;
606176471Sdes	char *buf = NULL;
607176471Sdes	size_t buflen;
608176471Sdes
609176471Sdes	if (!data)
610176471Sdes		datalen = 0;
611210064Sjhb	buflen = strlen(name) + 1 + datalen;
612176471Sdes	buf = malloc(buflen, M_KTRACE, M_WAITOK);
613210064Sjhb	strcpy(buf, name);
614210064Sjhb	bcopy(data, buf + strlen(name) + 1, datalen);
615176471Sdes	if ((req = ktr_getrequest(KTR_STRUCT)) == NULL) {
616176471Sdes		free(buf, M_KTRACE);
617176471Sdes		return;
618176471Sdes	}
619176471Sdes	req->ktr_buffer = buf;
620176471Sdes	req->ktr_header.ktr_len = buflen;
621176471Sdes	ktr_submitrequest(curthread, req);
622176471Sdes}
623114026Sjhb#endif /* KTRACE */
6241541Srgrimes
6251541Srgrimes/* Interface and common routines */
6261541Srgrimes
62712221Sbde#ifndef _SYS_SYSPROTO_H_
6281541Srgrimesstruct ktrace_args {
6291541Srgrimes	char	*fname;
6301541Srgrimes	int	ops;
6311541Srgrimes	int	facs;
6321541Srgrimes	int	pid;
6331541Srgrimes};
63412221Sbde#endif
6351541Srgrimes/* ARGSUSED */
6361549Srgrimesint
63783366Sjulianktrace(td, uap)
63883366Sjulian	struct thread *td;
6391541Srgrimes	register struct ktrace_args *uap;
6401541Srgrimes{
64113203Swollman#ifdef KTRACE
6421541Srgrimes	register struct vnode *vp = NULL;
6431541Srgrimes	register struct proc *p;
6441541Srgrimes	struct pgrp *pg;
6451541Srgrimes	int facs = uap->facs & ~KTRFAC_ROOT;
6461541Srgrimes	int ops = KTROP(uap->ops);
6471541Srgrimes	int descend = uap->ops & KTRFLAG_DESCEND;
648147576Spjd	int nfound, ret = 0;
649157233Sjhb	int flags, error = 0, vfslocked;
6501541Srgrimes	struct nameidata nd;
651112198Sjhb	struct ucred *cred;
6521541Srgrimes
653114026Sjhb	/*
654114026Sjhb	 * Need something to (un)trace.
655114026Sjhb	 */
656114026Sjhb	if (ops != KTROP_CLEARFILE && facs == 0)
657114026Sjhb		return (EINVAL);
658114026Sjhb
659152376Srwatson	ktrace_enter(td);
6601541Srgrimes	if (ops != KTROP_CLEAR) {
6611541Srgrimes		/*
6621541Srgrimes		 * an operation which requires a file argument.
6631541Srgrimes		 */
664157233Sjhb		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
665157233Sjhb		    uap->fname, td);
66662550Smckusick		flags = FREAD | FWRITE | O_NOFOLLOW;
667170152Skib		error = vn_open(&nd, &flags, 0, NULL);
6683308Sphk		if (error) {
669152376Srwatson			ktrace_exit(td);
6701541Srgrimes			return (error);
6711541Srgrimes		}
672157233Sjhb		vfslocked = NDHASGIANT(&nd);
67354655Seivind		NDFREE(&nd, NDF_ONLY_PNBUF);
6741541Srgrimes		vp = nd.ni_vp;
675175294Sattilio		VOP_UNLOCK(vp, 0);
6761541Srgrimes		if (vp->v_type != VREG) {
67791406Sjhb			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
678157233Sjhb			VFS_UNLOCK_GIANT(vfslocked);
679152376Srwatson			ktrace_exit(td);
6801541Srgrimes			return (EACCES);
6811541Srgrimes		}
682157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
6831541Srgrimes	}
6841541Srgrimes	/*
68585397Sdillon	 * Clear all uses of the tracefile.
6861541Srgrimes	 */
6871541Srgrimes	if (ops == KTROP_CLEARFILE) {
688166678Smpp		int vrele_count;
689166678Smpp
690166678Smpp		vrele_count = 0;
69174927Sjhb		sx_slock(&allproc_lock);
692166073Sdelphij		FOREACH_PROC_IN_SYSTEM(p) {
69394618Sjhb			PROC_LOCK(p);
694112198Sjhb			if (p->p_tracevp == vp) {
69597993Sjhb				if (ktrcanset(td, p)) {
69697993Sjhb					mtx_lock(&ktrace_mtx);
697112198Sjhb					cred = p->p_tracecred;
698112198Sjhb					p->p_tracecred = NULL;
699112198Sjhb					p->p_tracevp = NULL;
7001541Srgrimes					p->p_traceflag = 0;
70197993Sjhb					mtx_unlock(&ktrace_mtx);
702166678Smpp					vrele_count++;
703112198Sjhb					crfree(cred);
704166678Smpp				} else
7051541Srgrimes					error = EPERM;
706166678Smpp			}
707166678Smpp			PROC_UNLOCK(p);
7081541Srgrimes		}
70974927Sjhb		sx_sunlock(&allproc_lock);
710166678Smpp		if (vrele_count > 0) {
711166678Smpp			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
712166678Smpp			while (vrele_count-- > 0)
713166678Smpp				vrele(vp);
714166678Smpp			VFS_UNLOCK_GIANT(vfslocked);
715166678Smpp		}
7161541Srgrimes		goto done;
7171541Srgrimes	}
7181541Srgrimes	/*
7191541Srgrimes	 * do it
7201541Srgrimes	 */
721114026Sjhb	sx_slock(&proctree_lock);
7221541Srgrimes	if (uap->pid < 0) {
7231541Srgrimes		/*
7241541Srgrimes		 * by process group
7251541Srgrimes		 */
7261541Srgrimes		pg = pgfind(-uap->pid);
7271541Srgrimes		if (pg == NULL) {
72894861Sjhb			sx_sunlock(&proctree_lock);
7291541Srgrimes			error = ESRCH;
7301541Srgrimes			goto done;
7311541Srgrimes		}
73291140Stanimura		/*
73391140Stanimura		 * ktrops() may call vrele(). Lock pg_members
73494861Sjhb		 * by the proctree_lock rather than pg_mtx.
73591140Stanimura		 */
73691140Stanimura		PGRP_UNLOCK(pg);
737147576Spjd		nfound = 0;
738147576Spjd		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
739147576Spjd			PROC_LOCK(p);
740147576Spjd			if (p_cansee(td, p) != 0) {
741147576Spjd				PROC_UNLOCK(p);
742147576Spjd				continue;
743147576Spjd			}
744147576Spjd			nfound++;
7451541Srgrimes			if (descend)
74694618Sjhb				ret |= ktrsetchildren(td, p, ops, facs, vp);
7478876Srgrimes			else
74894618Sjhb				ret |= ktrops(td, p, ops, facs, vp);
749147576Spjd		}
750147576Spjd		if (nfound == 0) {
751147576Spjd			sx_sunlock(&proctree_lock);
752147576Spjd			error = ESRCH;
753147576Spjd			goto done;
754147576Spjd		}
7551541Srgrimes	} else {
7561541Srgrimes		/*
7571541Srgrimes		 * by pid
7581541Srgrimes		 */
7591541Srgrimes		p = pfind(uap->pid);
760211439Sjhb		if (p == NULL)
7611541Srgrimes			error = ESRCH;
762211439Sjhb		else
763211439Sjhb			error = p_cansee(td, p);
764147520Spjd		if (error) {
765211439Sjhb			if (p != NULL)
766211439Sjhb				PROC_UNLOCK(p);
767147520Spjd			sx_sunlock(&proctree_lock);
768147183Spjd			goto done;
769147520Spjd		}
7701541Srgrimes		if (descend)
77194618Sjhb			ret |= ktrsetchildren(td, p, ops, facs, vp);
7721541Srgrimes		else
77394618Sjhb			ret |= ktrops(td, p, ops, facs, vp);
7741541Srgrimes	}
775114026Sjhb	sx_sunlock(&proctree_lock);
7761541Srgrimes	if (!ret)
7771541Srgrimes		error = EPERM;
7781541Srgrimesdone:
779114026Sjhb	if (vp != NULL) {
780157233Sjhb		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
78191406Sjhb		(void) vn_close(vp, FWRITE, td->td_ucred, td);
782157233Sjhb		VFS_UNLOCK_GIANT(vfslocked);
783114026Sjhb	}
784152376Srwatson	ktrace_exit(td);
7851541Srgrimes	return (error);
786114026Sjhb#else /* !KTRACE */
787114026Sjhb	return (ENOSYS);
788114026Sjhb#endif /* KTRACE */
7891541Srgrimes}
7901541Srgrimes
79118398Sphk/* ARGSUSED */
79218398Sphkint
79383366Sjulianutrace(td, uap)
79483366Sjulian	struct thread *td;
79518398Sphk	register struct utrace_args *uap;
79618398Sphk{
79783366Sjulian
79813203Swollman#ifdef KTRACE
79997993Sjhb	struct ktr_request *req;
80099009Salfred	void *cp;
801103237Sjhb	int error;
80218398Sphk
803103237Sjhb	if (!KTRPOINT(td, KTR_USER))
804103237Sjhb		return (0);
80570792Salfred	if (uap->len > KTR_USER_MAXLEN)
80670707Salfred		return (EINVAL);
807111119Simp	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
808103237Sjhb	error = copyin(uap->addr, cp, uap->len);
809104230Sphk	if (error) {
810104230Sphk		free(cp, M_KTRACE);
811103237Sjhb		return (error);
812104230Sphk	}
81397993Sjhb	req = ktr_getrequest(KTR_USER);
814104230Sphk	if (req == NULL) {
815104230Sphk		free(cp, M_KTRACE);
816122457Sjkoshy		return (ENOMEM);
817104230Sphk	}
818151927Srwatson	req->ktr_buffer = cp;
819103237Sjhb	req->ktr_header.ktr_len = uap->len;
820152376Srwatson	ktr_submitrequest(td, req);
82118398Sphk	return (0);
822114026Sjhb#else /* !KTRACE */
82318398Sphk	return (ENOSYS);
824114026Sjhb#endif /* KTRACE */
82518398Sphk}
82618398Sphk
82718398Sphk#ifdef KTRACE
82812819Sphkstatic int
82994618Sjhbktrops(td, p, ops, facs, vp)
83094618Sjhb	struct thread *td;
83194618Sjhb	struct proc *p;
8321541Srgrimes	int ops, facs;
8331541Srgrimes	struct vnode *vp;
8341541Srgrimes{
83597993Sjhb	struct vnode *tracevp = NULL;
836112198Sjhb	struct ucred *tracecred = NULL;
8371541Srgrimes
838211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
83994618Sjhb	if (!ktrcanset(td, p)) {
84094618Sjhb		PROC_UNLOCK(p);
8411541Srgrimes		return (0);
84294618Sjhb	}
843211439Sjhb	if (p->p_flag & P_WEXIT) {
844211439Sjhb		/* If the process is exiting, just ignore it. */
845211439Sjhb		PROC_UNLOCK(p);
846211439Sjhb		return (1);
847211439Sjhb	}
84897993Sjhb	mtx_lock(&ktrace_mtx);
8491541Srgrimes	if (ops == KTROP_SET) {
850112198Sjhb		if (p->p_tracevp != vp) {
8511541Srgrimes			/*
85294618Sjhb			 * if trace file already in use, relinquish below
8531541Srgrimes			 */
854112198Sjhb			tracevp = p->p_tracevp;
85597993Sjhb			VREF(vp);
856112198Sjhb			p->p_tracevp = vp;
8571541Srgrimes		}
858112198Sjhb		if (p->p_tracecred != td->td_ucred) {
859112198Sjhb			tracecred = p->p_tracecred;
860112198Sjhb			p->p_tracecred = crhold(td->td_ucred);
861112198Sjhb		}
8621541Srgrimes		p->p_traceflag |= facs;
863170587Srwatson		if (priv_check(td, PRIV_KTRACE) == 0)
8641541Srgrimes			p->p_traceflag |= KTRFAC_ROOT;
8658876Srgrimes	} else {
8661541Srgrimes		/* KTROP_CLEAR */
8671541Srgrimes		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
8681541Srgrimes			/* no more tracing */
8691541Srgrimes			p->p_traceflag = 0;
870112198Sjhb			tracevp = p->p_tracevp;
871112198Sjhb			p->p_tracevp = NULL;
872112198Sjhb			tracecred = p->p_tracecred;
873112198Sjhb			p->p_tracecred = NULL;
8741541Srgrimes		}
8751541Srgrimes	}
87697993Sjhb	mtx_unlock(&ktrace_mtx);
87794618Sjhb	PROC_UNLOCK(p);
878114026Sjhb	if (tracevp != NULL) {
879155031Sjeff		int vfslocked;
880155031Sjeff
881155031Sjeff		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
88297993Sjhb		vrele(tracevp);
883155031Sjeff		VFS_UNLOCK_GIANT(vfslocked);
884114026Sjhb	}
885112198Sjhb	if (tracecred != NULL)
886112198Sjhb		crfree(tracecred);
8871541Srgrimes
8881541Srgrimes	return (1);
8891541Srgrimes}
8901541Srgrimes
89112819Sphkstatic int
89294618Sjhbktrsetchildren(td, top, ops, facs, vp)
89394618Sjhb	struct thread *td;
89494618Sjhb	struct proc *top;
8951541Srgrimes	int ops, facs;
8961541Srgrimes	struct vnode *vp;
8971541Srgrimes{
8981541Srgrimes	register struct proc *p;
8991541Srgrimes	register int ret = 0;
9001541Srgrimes
9011541Srgrimes	p = top;
902211439Sjhb	PROC_LOCK_ASSERT(p, MA_OWNED);
903114026Sjhb	sx_assert(&proctree_lock, SX_LOCKED);
9041541Srgrimes	for (;;) {
90594618Sjhb		ret |= ktrops(td, p, ops, facs, vp);
9061541Srgrimes		/*
9071541Srgrimes		 * If this process has children, descend to them next,
9081541Srgrimes		 * otherwise do any siblings, and if done with this level,
9091541Srgrimes		 * follow back up the tree (but not past top).
9101541Srgrimes		 */
91153212Sphk		if (!LIST_EMPTY(&p->p_children))
91253212Sphk			p = LIST_FIRST(&p->p_children);
9131541Srgrimes		else for (;;) {
914114026Sjhb			if (p == top)
9151541Srgrimes				return (ret);
91653212Sphk			if (LIST_NEXT(p, p_sibling)) {
91753212Sphk				p = LIST_NEXT(p, p_sibling);
9181541Srgrimes				break;
9191541Srgrimes			}
92014529Shsu			p = p->p_pptr;
9211541Srgrimes		}
922211439Sjhb		PROC_LOCK(p);
9231541Srgrimes	}
9241541Srgrimes	/*NOTREACHED*/
9251541Srgrimes}
9261541Srgrimes
92712819Sphkstatic void
928152376Srwatsonktr_writerequest(struct thread *td, struct ktr_request *req)
92997993Sjhb{
93097993Sjhb	struct ktr_header *kth;
9311541Srgrimes	struct vnode *vp;
93297993Sjhb	struct proc *p;
93397993Sjhb	struct ucred *cred;
9341541Srgrimes	struct uio auio;
93597993Sjhb	struct iovec aiov[3];
93662976Smckusick	struct mount *mp;
93797993Sjhb	int datalen, buflen, vrele_count;
938157233Sjhb	int error, vfslocked;
9391541Srgrimes
94097993Sjhb	/*
941152376Srwatson	 * We hold the vnode and credential for use in I/O in case ktrace is
942152376Srwatson	 * disabled on the process as we write out the request.
943152376Srwatson	 *
944152376Srwatson	 * XXXRW: This is not ideal: we could end up performing a write after
945152376Srwatson	 * the vnode has been closed.
946152376Srwatson	 */
947152376Srwatson	mtx_lock(&ktrace_mtx);
948152376Srwatson	vp = td->td_proc->p_tracevp;
949152376Srwatson	cred = td->td_proc->p_tracecred;
950152376Srwatson
951152376Srwatson	/*
95297993Sjhb	 * If vp is NULL, the vp has been cleared out from under this
953152376Srwatson	 * request, so just drop it.  Make sure the credential and vnode are
954152376Srwatson	 * in sync: we should have both or neither.
95597993Sjhb	 */
956152376Srwatson	if (vp == NULL) {
957152376Srwatson		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
958185583Sbz		mtx_unlock(&ktrace_mtx);
9591541Srgrimes		return;
960152376Srwatson	}
961185583Sbz	VREF(vp);
962152376Srwatson	KASSERT(cred != NULL, ("ktr_writerequest: cred == NULL"));
963185583Sbz	crhold(cred);
964185583Sbz	mtx_unlock(&ktrace_mtx);
965152376Srwatson
96697993Sjhb	kth = &req->ktr_header;
967189707Sjhb	KASSERT(((u_short)kth->ktr_type & ~KTR_DROP) <
968189707Sjhb	    sizeof(data_lengths) / sizeof(data_lengths[0]),
969189707Sjhb	    ("data_lengths array overflow"));
970118607Sjhb	datalen = data_lengths[(u_short)kth->ktr_type & ~KTR_DROP];
97197993Sjhb	buflen = kth->ktr_len;
9721541Srgrimes	auio.uio_iov = &aiov[0];
9731541Srgrimes	auio.uio_offset = 0;
9741541Srgrimes	auio.uio_segflg = UIO_SYSSPACE;
9751541Srgrimes	auio.uio_rw = UIO_WRITE;
9761541Srgrimes	aiov[0].iov_base = (caddr_t)kth;
9771541Srgrimes	aiov[0].iov_len = sizeof(struct ktr_header);
9781541Srgrimes	auio.uio_resid = sizeof(struct ktr_header);
9791541Srgrimes	auio.uio_iovcnt = 1;
98097993Sjhb	auio.uio_td = td;
98197993Sjhb	if (datalen != 0) {
98297993Sjhb		aiov[1].iov_base = (caddr_t)&req->ktr_data;
98397993Sjhb		aiov[1].iov_len = datalen;
98497993Sjhb		auio.uio_resid += datalen;
9851541Srgrimes		auio.uio_iovcnt++;
98697993Sjhb		kth->ktr_len += datalen;
9871541Srgrimes	}
98897993Sjhb	if (buflen != 0) {
989151927Srwatson		KASSERT(req->ktr_buffer != NULL, ("ktrace: nothing to write"));
990151927Srwatson		aiov[auio.uio_iovcnt].iov_base = req->ktr_buffer;
99197993Sjhb		aiov[auio.uio_iovcnt].iov_len = buflen;
99297993Sjhb		auio.uio_resid += buflen;
99397993Sjhb		auio.uio_iovcnt++;
994103235Sjhb	}
995152376Srwatson
996157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
99762976Smckusick	vn_start_write(vp, &mp, V_WAIT);
998175202Sattilio	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
999101123Srwatson#ifdef MAC
1000172930Srwatson	error = mac_vnode_check_write(cred, NOCRED, vp);
1001101123Srwatson	if (error == 0)
1002101123Srwatson#endif
1003101123Srwatson		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
1004175294Sattilio	VOP_UNLOCK(vp, 0);
100562976Smckusick	vn_finished_write(mp);
1006185583Sbz	crfree(cred);
1007185583Sbz	if (!error) {
1008185583Sbz		vrele(vp);
1009185583Sbz		VFS_UNLOCK_GIANT(vfslocked);
1010185583Sbz		return;
1011185583Sbz	}
1012157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
1013185583Sbz
10141541Srgrimes	/*
101597993Sjhb	 * If error encountered, give up tracing on this vnode.  We defer
101697993Sjhb	 * all the vrele()'s on the vnode until after we are finished walking
101797993Sjhb	 * the various lists to avoid needlessly holding locks.
1018185583Sbz	 * NB: at this point we still hold the vnode reference that must
1019185583Sbz	 * not go away as we need the valid vnode to compare with. Thus let
1020185583Sbz	 * vrele_count start at 1 and the reference will be freed
1021185583Sbz	 * by the loop at the end after our last use of vp.
10221541Srgrimes	 */
10231541Srgrimes	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
10241541Srgrimes	    error);
1025185583Sbz	vrele_count = 1;
102697993Sjhb	/*
102797993Sjhb	 * First, clear this vnode from being used by any processes in the
102897993Sjhb	 * system.
102997993Sjhb	 * XXX - If one process gets an EPERM writing to the vnode, should
103097993Sjhb	 * we really do this?  Other processes might have suitable
103197993Sjhb	 * credentials for the operation.
103297993Sjhb	 */
1033112198Sjhb	cred = NULL;
103474927Sjhb	sx_slock(&allproc_lock);
1035166073Sdelphij	FOREACH_PROC_IN_SYSTEM(p) {
103697993Sjhb		PROC_LOCK(p);
1037112198Sjhb		if (p->p_tracevp == vp) {
103897993Sjhb			mtx_lock(&ktrace_mtx);
1039112198Sjhb			p->p_tracevp = NULL;
10401541Srgrimes			p->p_traceflag = 0;
1041112198Sjhb			cred = p->p_tracecred;
1042112198Sjhb			p->p_tracecred = NULL;
104397993Sjhb			mtx_unlock(&ktrace_mtx);
104497993Sjhb			vrele_count++;
10451541Srgrimes		}
104697993Sjhb		PROC_UNLOCK(p);
1047112198Sjhb		if (cred != NULL) {
1048112198Sjhb			crfree(cred);
1049112198Sjhb			cred = NULL;
1050112198Sjhb		}
10511541Srgrimes	}
105274927Sjhb	sx_sunlock(&allproc_lock);
1053152376Srwatson
105497993Sjhb	/*
1055152376Srwatson	 * We can't clear any pending requests in threads that have cached
1056152376Srwatson	 * them but not yet committed them, as those are per-thread.  The
1057152376Srwatson	 * thread will have to clear it itself on system call return.
105897993Sjhb	 */
1059157233Sjhb	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
106097993Sjhb	while (vrele_count-- > 0)
106197993Sjhb		vrele(vp);
1062157233Sjhb	VFS_UNLOCK_GIANT(vfslocked);
10631541Srgrimes}
10641541Srgrimes
10651541Srgrimes/*
10661541Srgrimes * Return true if caller has permission to set the ktracing state
10671541Srgrimes * of target.  Essentially, the target can't possess any
10681541Srgrimes * more permissions than the caller.  KTRFAC_ROOT signifies that
10698876Srgrimes * root previously set the tracing status on the target process, and
10701541Srgrimes * so, only root may further change it.
10711541Srgrimes */
107212819Sphkstatic int
107394618Sjhbktrcanset(td, targetp)
107494618Sjhb	struct thread *td;
107594618Sjhb	struct proc *targetp;
10761541Srgrimes{
10771541Srgrimes
107894618Sjhb	PROC_LOCK_ASSERT(targetp, MA_OWNED);
107979335Srwatson	if (targetp->p_traceflag & KTRFAC_ROOT &&
1080170587Srwatson	    priv_check(td, PRIV_KTRACE))
108146155Sphk		return (0);
10821541Srgrimes
108396886Sjhb	if (p_candebug(td, targetp) != 0)
108479335Srwatson		return (0);
108579335Srwatson
108679335Srwatson	return (1);
10871541Srgrimes}
10881541Srgrimes
108913203Swollman#endif /* KTRACE */
1090