kern_ktrace.c revision 103234
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
34 * $FreeBSD: head/sys/kern/kern_ktrace.c 103234 2002-09-11 20:49:55Z jhb $
35 */
36
37#include "opt_ktrace.h"
38#include "opt_mac.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/fcntl.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/namei.h>
51#include <sys/proc.h>
52#include <sys/unistd.h>
53#include <sys/vnode.h>
54#include <sys/ktrace.h>
55#include <sys/sema.h>
56#include <sys/sx.h>
57#include <sys/sysctl.h>
58#include <sys/syslog.h>
59#include <sys/sysproto.h>
60
61static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
62
63#ifdef KTRACE
64
65#ifndef KTRACE_REQUEST_POOL
66#define	KTRACE_REQUEST_POOL	100
67#endif
68
69struct ktr_request {
70	struct	ktr_header ktr_header;
71	struct	ucred *ktr_cred;
72	struct	vnode *ktr_vp;
73	union {
74		struct	ktr_syscall ktr_syscall;
75		struct	ktr_sysret ktr_sysret;
76		struct	ktr_genio ktr_genio;
77		struct	ktr_psig ktr_psig;
78		struct	ktr_csw ktr_csw;
79	} ktr_data;
80	int	ktr_synchronous;
81	STAILQ_ENTRY(ktr_request) ktr_list;
82};
83
84static int data_lengths[] = {
85	0,					/* none */
86	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
87	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
88	0,					/* KTR_NAMEI */
89	sizeof(struct ktr_genio),		/* KTR_GENIO */
90	sizeof(struct ktr_psig),		/* KTR_PSIG */
91	sizeof(struct ktr_csw),			/* KTR_CSW */
92	0					/* KTR_USER */
93};
94
95static STAILQ_HEAD(, ktr_request) ktr_todo;
96static STAILQ_HEAD(, ktr_request) ktr_free;
97
98SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
99
100static uint ktr_requestpool = KTRACE_REQUEST_POOL;
101TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
102
103static uint ktr_geniosize = PAGE_SIZE;
104TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
105SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
106    0, "Maximum size of genio event payload");
107
108static int print_message = 1;
109struct mtx ktrace_mtx;
110static struct sema ktrace_sema;
111
112static void ktrace_init(void *dummy);
113static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
114static uint ktrace_resize_pool(uint newsize);
115static struct ktr_request *ktr_getrequest(int type);
116static void ktr_submitrequest(struct ktr_request *req);
117static void ktr_freerequest(struct ktr_request *req);
118static void ktr_loop(void *dummy);
119static void ktr_writerequest(struct ktr_request *req);
120static int ktrcanset(struct thread *,struct proc *);
121static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
122static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
123
124static void
125ktrace_init(void *dummy)
126{
127	struct ktr_request *req;
128	int i;
129
130	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
131	sema_init(&ktrace_sema, 0, "ktrace");
132	STAILQ_INIT(&ktr_todo);
133	STAILQ_INIT(&ktr_free);
134	for (i = 0; i < ktr_requestpool; i++) {
135		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
136		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
137	}
138	kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, "ktrace");
139}
140SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
141
142static int
143sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
144{
145	struct thread *td;
146	uint newsize, oldsize, wantsize;
147	int error;
148
149	/* Handle easy read-only case first to avoid warnings from GCC. */
150	if (!req->newptr) {
151		mtx_lock(&ktrace_mtx);
152		oldsize = ktr_requestpool;
153		mtx_unlock(&ktrace_mtx);
154		return (SYSCTL_OUT(req, &oldsize, sizeof(uint)));
155	}
156
157	error = SYSCTL_IN(req, &wantsize, sizeof(uint));
158	if (error)
159		return (error);
160	td = curthread;
161	td->td_inktrace = 1;
162	mtx_lock(&ktrace_mtx);
163	oldsize = ktr_requestpool;
164	newsize = ktrace_resize_pool(wantsize);
165	mtx_unlock(&ktrace_mtx);
166	td->td_inktrace = 0;
167	error = SYSCTL_OUT(req, &oldsize, sizeof(uint));
168	if (error)
169		return (error);
170	if (newsize != wantsize)
171		return (ENOSPC);
172	return (0);
173}
174SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
175    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
176
177static uint
178ktrace_resize_pool(uint newsize)
179{
180	struct ktr_request *req;
181
182	mtx_assert(&ktrace_mtx, MA_OWNED);
183	print_message = 1;
184	if (newsize == ktr_requestpool)
185		return (newsize);
186	if (newsize < ktr_requestpool)
187		/* Shrink pool down to newsize if possible. */
188		while (ktr_requestpool > newsize) {
189			req = STAILQ_FIRST(&ktr_free);
190			if (req == NULL)
191				return (ktr_requestpool);
192			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
193			ktr_requestpool--;
194			mtx_unlock(&ktrace_mtx);
195			free(req, M_KTRACE);
196			mtx_lock(&ktrace_mtx);
197		}
198	else
199		/* Grow pool up to newsize. */
200		while (ktr_requestpool < newsize) {
201			mtx_unlock(&ktrace_mtx);
202			req = malloc(sizeof(struct ktr_request), M_KTRACE,
203			    M_WAITOK);
204			mtx_lock(&ktrace_mtx);
205			STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
206			ktr_requestpool++;
207		}
208	return (ktr_requestpool);
209}
210
211static struct ktr_request *
212ktr_getrequest(int type)
213{
214	struct ktr_request *req;
215	struct thread *td = curthread;
216	struct proc *p = td->td_proc;
217	int pm;
218
219	td->td_inktrace = 1;
220	mtx_lock(&ktrace_mtx);
221	if (!KTRCHECK(td, type)) {
222		mtx_unlock(&ktrace_mtx);
223		td->td_inktrace = 0;
224		return (NULL);
225	}
226	req = STAILQ_FIRST(&ktr_free);
227	if (req != NULL) {
228		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
229		req->ktr_header.ktr_type = type;
230		KASSERT(p->p_tracep != NULL, ("ktrace: no trace vnode"));
231		req->ktr_vp = p->p_tracep;
232		VREF(p->p_tracep);
233		mtx_unlock(&ktrace_mtx);
234		microtime(&req->ktr_header.ktr_time);
235		req->ktr_header.ktr_pid = p->p_pid;
236		bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
237		req->ktr_cred = crhold(td->td_ucred);
238		req->ktr_header.ktr_buffer = NULL;
239		req->ktr_header.ktr_len = 0;
240		req->ktr_synchronous = 0;
241	} else {
242		pm = print_message;
243		print_message = 0;
244		mtx_unlock(&ktrace_mtx);
245		if (pm)
246			printf("Out of ktrace request objects.\n");
247		td->td_inktrace = 0;
248	}
249	return (req);
250}
251
252static void
253ktr_submitrequest(struct ktr_request *req)
254{
255
256	mtx_lock(&ktrace_mtx);
257	STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
258	sema_post(&ktrace_sema);
259	if (req->ktr_synchronous) {
260		/*
261		 * For a synchronous request, we wait for the ktrace thread
262		 * to get to our item in the todo list and wake us up.  Then
263		 * we write the request out ourselves and wake the ktrace
264		 * thread back up.
265		 */
266		msleep(req, &ktrace_mtx, curthread->td_priority, "ktrsync", 0);
267		mtx_unlock(&ktrace_mtx);
268		ktr_writerequest(req);
269		mtx_lock(&ktrace_mtx);
270		wakeup(req);
271	}
272	mtx_unlock(&ktrace_mtx);
273	curthread->td_inktrace = 0;
274}
275
276static void
277ktr_freerequest(struct ktr_request *req)
278{
279
280	crfree(req->ktr_cred);
281	if (req->ktr_vp != NULL) {
282		mtx_lock(&Giant);
283		vrele(req->ktr_vp);
284		mtx_unlock(&Giant);
285	}
286	mtx_lock(&ktrace_mtx);
287	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
288	mtx_unlock(&ktrace_mtx);
289}
290
291static void
292ktr_loop(void *dummy)
293{
294	struct ktr_request *req;
295	struct thread *td;
296	struct ucred *cred;
297
298	/* Only cache these values once. */
299	td = curthread;
300	cred = td->td_ucred;
301	for (;;) {
302		sema_wait(&ktrace_sema);
303		mtx_lock(&ktrace_mtx);
304		req = STAILQ_FIRST(&ktr_todo);
305		STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
306		KASSERT(req != NULL, ("got a NULL request"));
307		if (req->ktr_synchronous) {
308			wakeup(req);
309			msleep(req, &ktrace_mtx, curthread->td_priority,
310			    "ktrwait", 0);
311			mtx_unlock(&ktrace_mtx);
312		} else {
313			mtx_unlock(&ktrace_mtx);
314			/*
315			 * It is not enough just to pass the cached cred
316			 * to the VOP's in ktr_writerequest().  Some VFS
317			 * operations use curthread->td_ucred, so we need
318			 * to modify our thread's credentials as well.
319			 * Evil.
320			 */
321			td->td_ucred = req->ktr_cred;
322			ktr_writerequest(req);
323			td->td_ucred = cred;
324		}
325		ktr_freerequest(req);
326	}
327}
328
329/*
330 * MPSAFE
331 */
332void
333ktrsyscall(code, narg, args)
334	int code, narg;
335	register_t args[];
336{
337	struct ktr_request *req;
338	struct ktr_syscall *ktp;
339	size_t buflen;
340	char *buf = NULL;
341
342	buflen = sizeof(register_t) * narg;
343	if (buflen > 0) {
344		buf = malloc(buflen, M_KTRACE, M_WAITOK);
345		bcopy(args, buf, buflen);
346	}
347	req = ktr_getrequest(KTR_SYSCALL);
348	if (req == NULL)
349		return;
350	ktp = &req->ktr_data.ktr_syscall;
351	ktp->ktr_code = code;
352	ktp->ktr_narg = narg;
353	if (buflen > 0) {
354		req->ktr_header.ktr_len = buflen;
355		req->ktr_header.ktr_buffer = buf;
356	}
357	ktr_submitrequest(req);
358}
359
360/*
361 * MPSAFE
362 */
363void
364ktrsysret(code, error, retval)
365	int code, error;
366	register_t retval;
367{
368	struct ktr_request *req;
369	struct ktr_sysret *ktp;
370
371	req = ktr_getrequest(KTR_SYSRET);
372	if (req == NULL)
373		return;
374	ktp = &req->ktr_data.ktr_sysret;
375	ktp->ktr_code = code;
376	ktp->ktr_error = error;
377	ktp->ktr_retval = retval;		/* what about val2 ? */
378	ktr_submitrequest(req);
379}
380
381void
382ktrnamei(path)
383	char *path;
384{
385	struct ktr_request *req;
386	int namelen;
387	char *buf = NULL;
388
389	namelen = strlen(path);
390	if (namelen > 0) {
391		buf = malloc(namelen, M_KTRACE, M_WAITOK);
392		bcopy(path, buf, namelen);
393	}
394	req = ktr_getrequest(KTR_NAMEI);
395	if (req == NULL)
396		return;
397	if (namelen > 0) {
398		req->ktr_header.ktr_len = namelen;
399		req->ktr_header.ktr_buffer = buf;
400	}
401	ktr_submitrequest(req);
402}
403
404/*
405 * Since the uio may not stay valid, we can not hand off this request to
406 * the thread and need to process it synchronously.  However, we wish to
407 * keep the relative order of records in a trace file correct, so we
408 * do put this request on the queue (if it isn't empty) and then block.
409 * The ktrace thread waks us back up when it is time for this event to
410 * be posted and blocks until we have completed writing out the event
411 * and woken it back up.
412 */
413void
414ktrgenio(fd, rw, uio, error)
415	int fd;
416	enum uio_rw rw;
417	struct uio *uio;
418	int error;
419{
420	struct ktr_request *req;
421	struct ktr_genio *ktg;
422
423	if (error)
424		return;
425	req = ktr_getrequest(KTR_GENIO);
426	if (req == NULL)
427		return;
428	ktg = &req->ktr_data.ktr_genio;
429	ktg->ktr_fd = fd;
430	ktg->ktr_rw = rw;
431	req->ktr_header.ktr_buffer = uio;
432	uio->uio_offset = 0;
433	uio->uio_rw = UIO_WRITE;
434	req->ktr_synchronous = 1;
435	ktr_submitrequest(req);
436}
437
438void
439ktrpsig(sig, action, mask, code)
440	int sig;
441	sig_t action;
442	sigset_t *mask;
443	int code;
444{
445	struct ktr_request *req;
446	struct ktr_psig	*kp;
447
448	req = ktr_getrequest(KTR_PSIG);
449	if (req == NULL)
450		return;
451	kp = &req->ktr_data.ktr_psig;
452	kp->signo = (char)sig;
453	kp->action = action;
454	kp->mask = *mask;
455	kp->code = code;
456	ktr_submitrequest(req);
457}
458
459void
460ktrcsw(out, user)
461	int out, user;
462{
463	struct ktr_request *req;
464	struct ktr_csw *kc;
465
466	req = ktr_getrequest(KTR_CSW);
467	if (req == NULL)
468		return;
469	kc = &req->ktr_data.ktr_csw;
470	kc->out = out;
471	kc->user = user;
472	ktr_submitrequest(req);
473}
474#endif
475
476/* Interface and common routines */
477
478/*
479 * ktrace system call
480 */
481#ifndef _SYS_SYSPROTO_H_
482struct ktrace_args {
483	char	*fname;
484	int	ops;
485	int	facs;
486	int	pid;
487};
488#endif
489/* ARGSUSED */
490int
491ktrace(td, uap)
492	struct thread *td;
493	register struct ktrace_args *uap;
494{
495#ifdef KTRACE
496	register struct vnode *vp = NULL;
497	register struct proc *p;
498	struct pgrp *pg;
499	int facs = uap->facs & ~KTRFAC_ROOT;
500	int ops = KTROP(uap->ops);
501	int descend = uap->ops & KTRFLAG_DESCEND;
502	int ret = 0;
503	int flags, error = 0;
504	struct nameidata nd;
505
506	td->td_inktrace = 1;
507	if (ops != KTROP_CLEAR) {
508		/*
509		 * an operation which requires a file argument.
510		 */
511		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
512		flags = FREAD | FWRITE | O_NOFOLLOW;
513		error = vn_open(&nd, &flags, 0);
514		if (error) {
515			td->td_inktrace = 0;
516			return (error);
517		}
518		NDFREE(&nd, NDF_ONLY_PNBUF);
519		vp = nd.ni_vp;
520		VOP_UNLOCK(vp, 0, td);
521		if (vp->v_type != VREG) {
522			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
523			td->td_inktrace = 0;
524			return (EACCES);
525		}
526	}
527	/*
528	 * Clear all uses of the tracefile.
529	 */
530	if (ops == KTROP_CLEARFILE) {
531		sx_slock(&allproc_lock);
532		LIST_FOREACH(p, &allproc, p_list) {
533			PROC_LOCK(p);
534			if (p->p_tracep == vp) {
535				if (ktrcanset(td, p)) {
536					mtx_lock(&ktrace_mtx);
537					p->p_tracep = NULL;
538					p->p_traceflag = 0;
539					mtx_unlock(&ktrace_mtx);
540					PROC_UNLOCK(p);
541					(void) vn_close(vp, FREAD|FWRITE,
542						td->td_ucred, td);
543				} else {
544					PROC_UNLOCK(p);
545					error = EPERM;
546				}
547			} else
548				PROC_UNLOCK(p);
549		}
550		sx_sunlock(&allproc_lock);
551		goto done;
552	}
553	/*
554	 * need something to (un)trace (XXX - why is this here?)
555	 */
556	if (!facs) {
557		error = EINVAL;
558		goto done;
559	}
560	/*
561	 * do it
562	 */
563	if (uap->pid < 0) {
564		/*
565		 * by process group
566		 */
567		sx_slock(&proctree_lock);
568		pg = pgfind(-uap->pid);
569		if (pg == NULL) {
570			sx_sunlock(&proctree_lock);
571			error = ESRCH;
572			goto done;
573		}
574		/*
575		 * ktrops() may call vrele(). Lock pg_members
576		 * by the proctree_lock rather than pg_mtx.
577		 */
578		PGRP_UNLOCK(pg);
579		LIST_FOREACH(p, &pg->pg_members, p_pglist)
580			if (descend)
581				ret |= ktrsetchildren(td, p, ops, facs, vp);
582			else
583				ret |= ktrops(td, p, ops, facs, vp);
584		sx_sunlock(&proctree_lock);
585	} else {
586		/*
587		 * by pid
588		 */
589		p = pfind(uap->pid);
590		if (p == NULL) {
591			error = ESRCH;
592			goto done;
593		}
594		PROC_UNLOCK(p);
595		/* XXX: UNLOCK above has a race */
596		if (descend)
597			ret |= ktrsetchildren(td, p, ops, facs, vp);
598		else
599			ret |= ktrops(td, p, ops, facs, vp);
600	}
601	if (!ret)
602		error = EPERM;
603done:
604	if (vp != NULL)
605		(void) vn_close(vp, FWRITE, td->td_ucred, td);
606	td->td_inktrace = 0;
607	return (error);
608#else
609	return ENOSYS;
610#endif
611}
612
613/*
614 * utrace system call
615 */
616/* ARGSUSED */
617int
618utrace(td, uap)
619	struct thread *td;
620	register struct utrace_args *uap;
621{
622
623#ifdef KTRACE
624	struct ktr_request *req;
625	void *cp;
626
627	if (uap->len > KTR_USER_MAXLEN)
628		return (EINVAL);
629	req = ktr_getrequest(KTR_USER);
630	if (req == NULL)
631		return (0);
632	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
633	if (!copyin(uap->addr, cp, uap->len)) {
634		req->ktr_header.ktr_buffer = cp;
635		req->ktr_header.ktr_len = uap->len;
636		ktr_submitrequest(req);
637	} else {
638		ktr_freerequest(req);
639		td->td_inktrace = 0;
640	}
641	return (0);
642#else
643	return (ENOSYS);
644#endif
645}
646
647#ifdef KTRACE
648static int
649ktrops(td, p, ops, facs, vp)
650	struct thread *td;
651	struct proc *p;
652	int ops, facs;
653	struct vnode *vp;
654{
655	struct vnode *tracevp = NULL;
656
657	PROC_LOCK(p);
658	if (!ktrcanset(td, p)) {
659		PROC_UNLOCK(p);
660		return (0);
661	}
662	mtx_lock(&ktrace_mtx);
663	if (ops == KTROP_SET) {
664		if (p->p_tracep != vp) {
665			/*
666			 * if trace file already in use, relinquish below
667			 */
668			tracevp = p->p_tracep;
669			VREF(vp);
670			p->p_tracep = vp;
671		}
672		p->p_traceflag |= facs;
673		if (td->td_ucred->cr_uid == 0)
674			p->p_traceflag |= KTRFAC_ROOT;
675	} else {
676		/* KTROP_CLEAR */
677		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
678			/* no more tracing */
679			p->p_traceflag = 0;
680			tracevp = p->p_tracep;
681			p->p_tracep = NULL;
682		}
683	}
684	mtx_unlock(&ktrace_mtx);
685	PROC_UNLOCK(p);
686	if (tracevp != NULL)
687		vrele(tracevp);
688
689	return (1);
690}
691
692static int
693ktrsetchildren(td, top, ops, facs, vp)
694	struct thread *td;
695	struct proc *top;
696	int ops, facs;
697	struct vnode *vp;
698{
699	register struct proc *p;
700	register int ret = 0;
701
702	p = top;
703	sx_slock(&proctree_lock);
704	for (;;) {
705		ret |= ktrops(td, p, ops, facs, vp);
706		/*
707		 * If this process has children, descend to them next,
708		 * otherwise do any siblings, and if done with this level,
709		 * follow back up the tree (but not past top).
710		 */
711		if (!LIST_EMPTY(&p->p_children))
712			p = LIST_FIRST(&p->p_children);
713		else for (;;) {
714			if (p == top) {
715				sx_sunlock(&proctree_lock);
716				return (ret);
717			}
718			if (LIST_NEXT(p, p_sibling)) {
719				p = LIST_NEXT(p, p_sibling);
720				break;
721			}
722			p = p->p_pptr;
723		}
724	}
725	/*NOTREACHED*/
726}
727
728static void
729ktr_writerequest(struct ktr_request *req)
730{
731	struct ktr_header *kth;
732	struct vnode *vp;
733	struct uio *uio = NULL;
734	struct proc *p;
735	struct thread *td;
736	struct ucred *cred;
737	struct uio auio;
738	struct iovec aiov[3];
739	struct mount *mp;
740	int datalen, buflen, vrele_count;
741	int error;
742
743	vp = req->ktr_vp;
744	/*
745	 * If vp is NULL, the vp has been cleared out from under this
746	 * request, so just drop it.
747	 */
748	if (vp == NULL)
749		return;
750	kth = &req->ktr_header;
751	datalen = data_lengths[kth->ktr_type];
752	buflen = kth->ktr_len;
753	cred = req->ktr_cred;
754	td = curthread;
755	auio.uio_iov = &aiov[0];
756	auio.uio_offset = 0;
757	auio.uio_segflg = UIO_SYSSPACE;
758	auio.uio_rw = UIO_WRITE;
759	aiov[0].iov_base = (caddr_t)kth;
760	aiov[0].iov_len = sizeof(struct ktr_header);
761	auio.uio_resid = sizeof(struct ktr_header);
762	auio.uio_iovcnt = 1;
763	auio.uio_td = td;
764	if (datalen != 0) {
765		aiov[1].iov_base = (caddr_t)&req->ktr_data;
766		aiov[1].iov_len = datalen;
767		auio.uio_resid += datalen;
768		auio.uio_iovcnt++;
769		kth->ktr_len += datalen;
770	}
771	if (buflen != 0) {
772		KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
773		aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
774		aiov[auio.uio_iovcnt].iov_len = buflen;
775		auio.uio_resid += buflen;
776		auio.uio_iovcnt++;
777	} else
778		uio = kth->ktr_buffer;
779	KASSERT((uio == NULL) ^ (kth->ktr_type == KTR_GENIO),
780	    ("ktrace: uio and genio mismatch"));
781	if (uio != NULL)
782		kth->ktr_len += uio->uio_resid;
783	mtx_lock(&Giant);
784	vn_start_write(vp, &mp, V_WAIT);
785	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
786	(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
787#ifdef MAC
788	error = mac_check_vnode_write(cred, NOCRED, vp);
789	if (error == 0)
790#endif
791		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
792	if (error == 0 && uio != NULL) {
793		(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
794		error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, cred);
795	}
796	VOP_UNLOCK(vp, 0, td);
797	vn_finished_write(mp);
798	mtx_unlock(&Giant);
799	if (buflen != 0)
800		free(kth->ktr_buffer, M_KTRACE);
801	if (!error)
802		return;
803	/*
804	 * If error encountered, give up tracing on this vnode.  We defer
805	 * all the vrele()'s on the vnode until after we are finished walking
806	 * the various lists to avoid needlessly holding locks.
807	 */
808	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
809	    error);
810	vrele_count = 0;
811	/*
812	 * First, clear this vnode from being used by any processes in the
813	 * system.
814	 * XXX - If one process gets an EPERM writing to the vnode, should
815	 * we really do this?  Other processes might have suitable
816	 * credentials for the operation.
817	 */
818	sx_slock(&allproc_lock);
819	LIST_FOREACH(p, &allproc, p_list) {
820		PROC_LOCK(p);
821		if (p->p_tracep == vp) {
822			mtx_lock(&ktrace_mtx);
823			p->p_tracep = NULL;
824			p->p_traceflag = 0;
825			mtx_unlock(&ktrace_mtx);
826			vrele_count++;
827		}
828		PROC_UNLOCK(p);
829	}
830	sx_sunlock(&allproc_lock);
831	/*
832	 * Second, clear this vnode from any pending requests.
833	 */
834	mtx_lock(&ktrace_mtx);
835	STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
836		if (req->ktr_vp == vp) {
837			req->ktr_vp = NULL;
838			vrele_count++;
839		}
840	}
841	mtx_unlock(&ktrace_mtx);
842	mtx_lock(&Giant);
843	while (vrele_count-- > 0)
844		vrele(vp);
845	mtx_unlock(&Giant);
846}
847
848/*
849 * Return true if caller has permission to set the ktracing state
850 * of target.  Essentially, the target can't possess any
851 * more permissions than the caller.  KTRFAC_ROOT signifies that
852 * root previously set the tracing status on the target process, and
853 * so, only root may further change it.
854 */
855static int
856ktrcanset(td, targetp)
857	struct thread *td;
858	struct proc *targetp;
859{
860
861	PROC_LOCK_ASSERT(targetp, MA_OWNED);
862	if (targetp->p_traceflag & KTRFAC_ROOT &&
863	    suser_cred(td->td_ucred, PRISON_ROOT))
864		return (0);
865
866	if (p_candebug(td, targetp) != 0)
867		return (0);
868
869	return (1);
870}
871
872#endif /* KTRACE */
873