kern_ktrace.c revision 103235
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
34 * $FreeBSD: head/sys/kern/kern_ktrace.c 103235 2002-09-11 20:56:05Z jhb $
35 */
36
37#include "opt_ktrace.h"
38#include "opt_mac.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/fcntl.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/kthread.h>
46#include <sys/lock.h>
47#include <sys/mutex.h>
48#include <sys/mac.h>
49#include <sys/malloc.h>
50#include <sys/namei.h>
51#include <sys/proc.h>
52#include <sys/unistd.h>
53#include <sys/vnode.h>
54#include <sys/ktrace.h>
55#include <sys/sema.h>
56#include <sys/sx.h>
57#include <sys/sysctl.h>
58#include <sys/syslog.h>
59#include <sys/sysproto.h>
60
61static MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
62
63#ifdef KTRACE
64
65#ifndef KTRACE_REQUEST_POOL
66#define	KTRACE_REQUEST_POOL	100
67#endif
68
69struct ktr_request {
70	struct	ktr_header ktr_header;
71	struct	ucred *ktr_cred;
72	struct	vnode *ktr_vp;
73	union {
74		struct	ktr_syscall ktr_syscall;
75		struct	ktr_sysret ktr_sysret;
76		struct	ktr_genio ktr_genio;
77		struct	ktr_psig ktr_psig;
78		struct	ktr_csw ktr_csw;
79	} ktr_data;
80	int	ktr_synchronous;
81	STAILQ_ENTRY(ktr_request) ktr_list;
82};
83
84static int data_lengths[] = {
85	0,					/* none */
86	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
87	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
88	0,					/* KTR_NAMEI */
89	sizeof(struct ktr_genio),		/* KTR_GENIO */
90	sizeof(struct ktr_psig),		/* KTR_PSIG */
91	sizeof(struct ktr_csw),			/* KTR_CSW */
92	0					/* KTR_USER */
93};
94
95static STAILQ_HEAD(, ktr_request) ktr_todo;
96static STAILQ_HEAD(, ktr_request) ktr_free;
97
98SYSCTL_NODE(_kern, OID_AUTO, ktrace, CTLFLAG_RD, 0, "KTRACE options");
99
100static uint ktr_requestpool = KTRACE_REQUEST_POOL;
101TUNABLE_INT("kern.ktrace.request_pool", &ktr_requestpool);
102
103static uint ktr_geniosize = PAGE_SIZE;
104TUNABLE_INT("kern.ktrace.genio_size", &ktr_geniosize);
105SYSCTL_UINT(_kern_ktrace, OID_AUTO, genio_size, CTLFLAG_RW, &ktr_geniosize,
106    0, "Maximum size of genio event payload");
107
108static int print_message = 1;
109struct mtx ktrace_mtx;
110static struct sema ktrace_sema;
111
112static void ktrace_init(void *dummy);
113static int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
114static uint ktrace_resize_pool(uint newsize);
115static struct ktr_request *ktr_getrequest(int type);
116static void ktr_submitrequest(struct ktr_request *req);
117static void ktr_freerequest(struct ktr_request *req);
118static void ktr_loop(void *dummy);
119static void ktr_writerequest(struct ktr_request *req);
120static int ktrcanset(struct thread *,struct proc *);
121static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
122static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
123
124static void
125ktrace_init(void *dummy)
126{
127	struct ktr_request *req;
128	int i;
129
130	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
131	sema_init(&ktrace_sema, 0, "ktrace");
132	STAILQ_INIT(&ktr_todo);
133	STAILQ_INIT(&ktr_free);
134	for (i = 0; i < ktr_requestpool; i++) {
135		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
136		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
137	}
138	kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, "ktrace");
139}
140SYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
141
142static int
143sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
144{
145	struct thread *td;
146	uint newsize, oldsize, wantsize;
147	int error;
148
149	/* Handle easy read-only case first to avoid warnings from GCC. */
150	if (!req->newptr) {
151		mtx_lock(&ktrace_mtx);
152		oldsize = ktr_requestpool;
153		mtx_unlock(&ktrace_mtx);
154		return (SYSCTL_OUT(req, &oldsize, sizeof(uint)));
155	}
156
157	error = SYSCTL_IN(req, &wantsize, sizeof(uint));
158	if (error)
159		return (error);
160	td = curthread;
161	td->td_inktrace = 1;
162	mtx_lock(&ktrace_mtx);
163	oldsize = ktr_requestpool;
164	newsize = ktrace_resize_pool(wantsize);
165	mtx_unlock(&ktrace_mtx);
166	td->td_inktrace = 0;
167	error = SYSCTL_OUT(req, &oldsize, sizeof(uint));
168	if (error)
169		return (error);
170	if (newsize != wantsize)
171		return (ENOSPC);
172	return (0);
173}
174SYSCTL_PROC(_kern_ktrace, OID_AUTO, request_pool, CTLTYPE_UINT|CTLFLAG_RW,
175    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
176
177static uint
178ktrace_resize_pool(uint newsize)
179{
180	struct ktr_request *req;
181
182	mtx_assert(&ktrace_mtx, MA_OWNED);
183	print_message = 1;
184	if (newsize == ktr_requestpool)
185		return (newsize);
186	if (newsize < ktr_requestpool)
187		/* Shrink pool down to newsize if possible. */
188		while (ktr_requestpool > newsize) {
189			req = STAILQ_FIRST(&ktr_free);
190			if (req == NULL)
191				return (ktr_requestpool);
192			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
193			ktr_requestpool--;
194			mtx_unlock(&ktrace_mtx);
195			free(req, M_KTRACE);
196			mtx_lock(&ktrace_mtx);
197		}
198	else
199		/* Grow pool up to newsize. */
200		while (ktr_requestpool < newsize) {
201			mtx_unlock(&ktrace_mtx);
202			req = malloc(sizeof(struct ktr_request), M_KTRACE,
203			    M_WAITOK);
204			mtx_lock(&ktrace_mtx);
205			STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
206			ktr_requestpool++;
207		}
208	return (ktr_requestpool);
209}
210
211static struct ktr_request *
212ktr_getrequest(int type)
213{
214	struct ktr_request *req;
215	struct thread *td = curthread;
216	struct proc *p = td->td_proc;
217	int pm;
218
219	td->td_inktrace = 1;
220	mtx_lock(&ktrace_mtx);
221	if (!KTRCHECK(td, type)) {
222		mtx_unlock(&ktrace_mtx);
223		td->td_inktrace = 0;
224		return (NULL);
225	}
226	req = STAILQ_FIRST(&ktr_free);
227	if (req != NULL) {
228		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
229		req->ktr_header.ktr_type = type;
230		KASSERT(p->p_tracep != NULL, ("ktrace: no trace vnode"));
231		req->ktr_vp = p->p_tracep;
232		VREF(p->p_tracep);
233		mtx_unlock(&ktrace_mtx);
234		microtime(&req->ktr_header.ktr_time);
235		req->ktr_header.ktr_pid = p->p_pid;
236		bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
237		req->ktr_cred = crhold(td->td_ucred);
238		req->ktr_header.ktr_buffer = NULL;
239		req->ktr_header.ktr_len = 0;
240		req->ktr_synchronous = 0;
241	} else {
242		pm = print_message;
243		print_message = 0;
244		mtx_unlock(&ktrace_mtx);
245		if (pm)
246			printf("Out of ktrace request objects.\n");
247		td->td_inktrace = 0;
248	}
249	return (req);
250}
251
252static void
253ktr_submitrequest(struct ktr_request *req)
254{
255
256	mtx_lock(&ktrace_mtx);
257	STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
258	sema_post(&ktrace_sema);
259	if (req->ktr_synchronous) {
260		/*
261		 * For a synchronous request, we wait for the ktrace thread
262		 * to get to our item in the todo list and wake us up.  Then
263		 * we write the request out ourselves and wake the ktrace
264		 * thread back up.
265		 */
266		msleep(req, &ktrace_mtx, curthread->td_priority, "ktrsync", 0);
267		mtx_unlock(&ktrace_mtx);
268		ktr_writerequest(req);
269		mtx_lock(&ktrace_mtx);
270		wakeup(req);
271	}
272	mtx_unlock(&ktrace_mtx);
273	curthread->td_inktrace = 0;
274}
275
276static void
277ktr_freerequest(struct ktr_request *req)
278{
279
280	crfree(req->ktr_cred);
281	if (req->ktr_vp != NULL) {
282		mtx_lock(&Giant);
283		vrele(req->ktr_vp);
284		mtx_unlock(&Giant);
285	}
286	if (req->ktr_header.ktr_buffer != NULL)
287		free(req->ktr_header.ktr_buffer, M_KTRACE);
288	mtx_lock(&ktrace_mtx);
289	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
290	mtx_unlock(&ktrace_mtx);
291}
292
293static void
294ktr_loop(void *dummy)
295{
296	struct ktr_request *req;
297	struct thread *td;
298	struct ucred *cred;
299
300	/* Only cache these values once. */
301	td = curthread;
302	cred = td->td_ucred;
303	for (;;) {
304		sema_wait(&ktrace_sema);
305		mtx_lock(&ktrace_mtx);
306		req = STAILQ_FIRST(&ktr_todo);
307		STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
308		KASSERT(req != NULL, ("got a NULL request"));
309		if (req->ktr_synchronous) {
310			wakeup(req);
311			msleep(req, &ktrace_mtx, curthread->td_priority,
312			    "ktrwait", 0);
313			mtx_unlock(&ktrace_mtx);
314		} else {
315			mtx_unlock(&ktrace_mtx);
316			/*
317			 * It is not enough just to pass the cached cred
318			 * to the VOP's in ktr_writerequest().  Some VFS
319			 * operations use curthread->td_ucred, so we need
320			 * to modify our thread's credentials as well.
321			 * Evil.
322			 */
323			td->td_ucred = req->ktr_cred;
324			ktr_writerequest(req);
325			td->td_ucred = cred;
326		}
327		ktr_freerequest(req);
328	}
329}
330
331/*
332 * MPSAFE
333 */
334void
335ktrsyscall(code, narg, args)
336	int code, narg;
337	register_t args[];
338{
339	struct ktr_request *req;
340	struct ktr_syscall *ktp;
341	size_t buflen;
342	char *buf = NULL;
343
344	buflen = sizeof(register_t) * narg;
345	if (buflen > 0) {
346		buf = malloc(buflen, M_KTRACE, M_WAITOK);
347		bcopy(args, buf, buflen);
348	}
349	req = ktr_getrequest(KTR_SYSCALL);
350	if (req == NULL)
351		return;
352	ktp = &req->ktr_data.ktr_syscall;
353	ktp->ktr_code = code;
354	ktp->ktr_narg = narg;
355	if (buflen > 0) {
356		req->ktr_header.ktr_len = buflen;
357		req->ktr_header.ktr_buffer = buf;
358	}
359	ktr_submitrequest(req);
360}
361
362/*
363 * MPSAFE
364 */
365void
366ktrsysret(code, error, retval)
367	int code, error;
368	register_t retval;
369{
370	struct ktr_request *req;
371	struct ktr_sysret *ktp;
372
373	req = ktr_getrequest(KTR_SYSRET);
374	if (req == NULL)
375		return;
376	ktp = &req->ktr_data.ktr_sysret;
377	ktp->ktr_code = code;
378	ktp->ktr_error = error;
379	ktp->ktr_retval = retval;		/* what about val2 ? */
380	ktr_submitrequest(req);
381}
382
383void
384ktrnamei(path)
385	char *path;
386{
387	struct ktr_request *req;
388	int namelen;
389	char *buf = NULL;
390
391	namelen = strlen(path);
392	if (namelen > 0) {
393		buf = malloc(namelen, M_KTRACE, M_WAITOK);
394		bcopy(path, buf, namelen);
395	}
396	req = ktr_getrequest(KTR_NAMEI);
397	if (req == NULL)
398		return;
399	if (namelen > 0) {
400		req->ktr_header.ktr_len = namelen;
401		req->ktr_header.ktr_buffer = buf;
402	}
403	ktr_submitrequest(req);
404}
405
406/*
407 * Since the uio may not stay valid, we can not hand off this request to
408 * the thread and need to process it synchronously.  However, we wish to
409 * keep the relative order of records in a trace file correct, so we
410 * do put this request on the queue (if it isn't empty) and then block.
411 * The ktrace thread waks us back up when it is time for this event to
412 * be posted and blocks until we have completed writing out the event
413 * and woken it back up.
414 */
415void
416ktrgenio(fd, rw, uio, error)
417	int fd;
418	enum uio_rw rw;
419	struct uio *uio;
420	int error;
421{
422	struct ktr_request *req;
423	struct ktr_genio *ktg;
424	int datalen;
425	char *buf;
426
427	if (error)
428		return;
429	uio->uio_offset = 0;
430	uio->uio_rw = UIO_WRITE;
431	datalen = imin(uio->uio_resid, ktr_geniosize);
432	buf = malloc(datalen, M_KTRACE, M_WAITOK);
433	if (uiomove(buf, datalen, uio)) {
434		free(buf, M_KTRACE);
435		return;
436	}
437	req = ktr_getrequest(KTR_GENIO);
438	if (req == NULL) {
439		free(buf, M_KTRACE);
440		return;
441	}
442	ktg = &req->ktr_data.ktr_genio;
443	ktg->ktr_fd = fd;
444	ktg->ktr_rw = rw;
445	req->ktr_header.ktr_len = datalen;
446	req->ktr_header.ktr_buffer = buf;
447	ktr_submitrequest(req);
448}
449
450void
451ktrpsig(sig, action, mask, code)
452	int sig;
453	sig_t action;
454	sigset_t *mask;
455	int code;
456{
457	struct ktr_request *req;
458	struct ktr_psig	*kp;
459
460	req = ktr_getrequest(KTR_PSIG);
461	if (req == NULL)
462		return;
463	kp = &req->ktr_data.ktr_psig;
464	kp->signo = (char)sig;
465	kp->action = action;
466	kp->mask = *mask;
467	kp->code = code;
468	ktr_submitrequest(req);
469}
470
471void
472ktrcsw(out, user)
473	int out, user;
474{
475	struct ktr_request *req;
476	struct ktr_csw *kc;
477
478	req = ktr_getrequest(KTR_CSW);
479	if (req == NULL)
480		return;
481	kc = &req->ktr_data.ktr_csw;
482	kc->out = out;
483	kc->user = user;
484	ktr_submitrequest(req);
485}
486#endif
487
488/* Interface and common routines */
489
490/*
491 * ktrace system call
492 */
493#ifndef _SYS_SYSPROTO_H_
494struct ktrace_args {
495	char	*fname;
496	int	ops;
497	int	facs;
498	int	pid;
499};
500#endif
501/* ARGSUSED */
502int
503ktrace(td, uap)
504	struct thread *td;
505	register struct ktrace_args *uap;
506{
507#ifdef KTRACE
508	register struct vnode *vp = NULL;
509	register struct proc *p;
510	struct pgrp *pg;
511	int facs = uap->facs & ~KTRFAC_ROOT;
512	int ops = KTROP(uap->ops);
513	int descend = uap->ops & KTRFLAG_DESCEND;
514	int ret = 0;
515	int flags, error = 0;
516	struct nameidata nd;
517
518	td->td_inktrace = 1;
519	if (ops != KTROP_CLEAR) {
520		/*
521		 * an operation which requires a file argument.
522		 */
523		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
524		flags = FREAD | FWRITE | O_NOFOLLOW;
525		error = vn_open(&nd, &flags, 0);
526		if (error) {
527			td->td_inktrace = 0;
528			return (error);
529		}
530		NDFREE(&nd, NDF_ONLY_PNBUF);
531		vp = nd.ni_vp;
532		VOP_UNLOCK(vp, 0, td);
533		if (vp->v_type != VREG) {
534			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
535			td->td_inktrace = 0;
536			return (EACCES);
537		}
538	}
539	/*
540	 * Clear all uses of the tracefile.
541	 */
542	if (ops == KTROP_CLEARFILE) {
543		sx_slock(&allproc_lock);
544		LIST_FOREACH(p, &allproc, p_list) {
545			PROC_LOCK(p);
546			if (p->p_tracep == vp) {
547				if (ktrcanset(td, p)) {
548					mtx_lock(&ktrace_mtx);
549					p->p_tracep = NULL;
550					p->p_traceflag = 0;
551					mtx_unlock(&ktrace_mtx);
552					PROC_UNLOCK(p);
553					(void) vn_close(vp, FREAD|FWRITE,
554						td->td_ucred, td);
555				} else {
556					PROC_UNLOCK(p);
557					error = EPERM;
558				}
559			} else
560				PROC_UNLOCK(p);
561		}
562		sx_sunlock(&allproc_lock);
563		goto done;
564	}
565	/*
566	 * need something to (un)trace (XXX - why is this here?)
567	 */
568	if (!facs) {
569		error = EINVAL;
570		goto done;
571	}
572	/*
573	 * do it
574	 */
575	if (uap->pid < 0) {
576		/*
577		 * by process group
578		 */
579		sx_slock(&proctree_lock);
580		pg = pgfind(-uap->pid);
581		if (pg == NULL) {
582			sx_sunlock(&proctree_lock);
583			error = ESRCH;
584			goto done;
585		}
586		/*
587		 * ktrops() may call vrele(). Lock pg_members
588		 * by the proctree_lock rather than pg_mtx.
589		 */
590		PGRP_UNLOCK(pg);
591		LIST_FOREACH(p, &pg->pg_members, p_pglist)
592			if (descend)
593				ret |= ktrsetchildren(td, p, ops, facs, vp);
594			else
595				ret |= ktrops(td, p, ops, facs, vp);
596		sx_sunlock(&proctree_lock);
597	} else {
598		/*
599		 * by pid
600		 */
601		p = pfind(uap->pid);
602		if (p == NULL) {
603			error = ESRCH;
604			goto done;
605		}
606		PROC_UNLOCK(p);
607		/* XXX: UNLOCK above has a race */
608		if (descend)
609			ret |= ktrsetchildren(td, p, ops, facs, vp);
610		else
611			ret |= ktrops(td, p, ops, facs, vp);
612	}
613	if (!ret)
614		error = EPERM;
615done:
616	if (vp != NULL)
617		(void) vn_close(vp, FWRITE, td->td_ucred, td);
618	td->td_inktrace = 0;
619	return (error);
620#else
621	return ENOSYS;
622#endif
623}
624
625/*
626 * utrace system call
627 */
628/* ARGSUSED */
629int
630utrace(td, uap)
631	struct thread *td;
632	register struct utrace_args *uap;
633{
634
635#ifdef KTRACE
636	struct ktr_request *req;
637	void *cp;
638
639	if (uap->len > KTR_USER_MAXLEN)
640		return (EINVAL);
641	req = ktr_getrequest(KTR_USER);
642	if (req == NULL)
643		return (0);
644	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
645	if (!copyin(uap->addr, cp, uap->len)) {
646		req->ktr_header.ktr_buffer = cp;
647		req->ktr_header.ktr_len = uap->len;
648		ktr_submitrequest(req);
649	} else {
650		ktr_freerequest(req);
651		td->td_inktrace = 0;
652	}
653	return (0);
654#else
655	return (ENOSYS);
656#endif
657}
658
659#ifdef KTRACE
660static int
661ktrops(td, p, ops, facs, vp)
662	struct thread *td;
663	struct proc *p;
664	int ops, facs;
665	struct vnode *vp;
666{
667	struct vnode *tracevp = NULL;
668
669	PROC_LOCK(p);
670	if (!ktrcanset(td, p)) {
671		PROC_UNLOCK(p);
672		return (0);
673	}
674	mtx_lock(&ktrace_mtx);
675	if (ops == KTROP_SET) {
676		if (p->p_tracep != vp) {
677			/*
678			 * if trace file already in use, relinquish below
679			 */
680			tracevp = p->p_tracep;
681			VREF(vp);
682			p->p_tracep = vp;
683		}
684		p->p_traceflag |= facs;
685		if (td->td_ucred->cr_uid == 0)
686			p->p_traceflag |= KTRFAC_ROOT;
687	} else {
688		/* KTROP_CLEAR */
689		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
690			/* no more tracing */
691			p->p_traceflag = 0;
692			tracevp = p->p_tracep;
693			p->p_tracep = NULL;
694		}
695	}
696	mtx_unlock(&ktrace_mtx);
697	PROC_UNLOCK(p);
698	if (tracevp != NULL)
699		vrele(tracevp);
700
701	return (1);
702}
703
704static int
705ktrsetchildren(td, top, ops, facs, vp)
706	struct thread *td;
707	struct proc *top;
708	int ops, facs;
709	struct vnode *vp;
710{
711	register struct proc *p;
712	register int ret = 0;
713
714	p = top;
715	sx_slock(&proctree_lock);
716	for (;;) {
717		ret |= ktrops(td, p, ops, facs, vp);
718		/*
719		 * If this process has children, descend to them next,
720		 * otherwise do any siblings, and if done with this level,
721		 * follow back up the tree (but not past top).
722		 */
723		if (!LIST_EMPTY(&p->p_children))
724			p = LIST_FIRST(&p->p_children);
725		else for (;;) {
726			if (p == top) {
727				sx_sunlock(&proctree_lock);
728				return (ret);
729			}
730			if (LIST_NEXT(p, p_sibling)) {
731				p = LIST_NEXT(p, p_sibling);
732				break;
733			}
734			p = p->p_pptr;
735		}
736	}
737	/*NOTREACHED*/
738}
739
740static void
741ktr_writerequest(struct ktr_request *req)
742{
743	struct ktr_header *kth;
744	struct vnode *vp;
745	struct proc *p;
746	struct thread *td;
747	struct ucred *cred;
748	struct uio auio;
749	struct iovec aiov[3];
750	struct mount *mp;
751	int datalen, buflen, vrele_count;
752	int error;
753
754	vp = req->ktr_vp;
755	/*
756	 * If vp is NULL, the vp has been cleared out from under this
757	 * request, so just drop it.
758	 */
759	if (vp == NULL)
760		return;
761	kth = &req->ktr_header;
762	datalen = data_lengths[kth->ktr_type];
763	buflen = kth->ktr_len;
764	cred = req->ktr_cred;
765	td = curthread;
766	auio.uio_iov = &aiov[0];
767	auio.uio_offset = 0;
768	auio.uio_segflg = UIO_SYSSPACE;
769	auio.uio_rw = UIO_WRITE;
770	aiov[0].iov_base = (caddr_t)kth;
771	aiov[0].iov_len = sizeof(struct ktr_header);
772	auio.uio_resid = sizeof(struct ktr_header);
773	auio.uio_iovcnt = 1;
774	auio.uio_td = td;
775	if (datalen != 0) {
776		aiov[1].iov_base = (caddr_t)&req->ktr_data;
777		aiov[1].iov_len = datalen;
778		auio.uio_resid += datalen;
779		auio.uio_iovcnt++;
780		kth->ktr_len += datalen;
781	}
782	if (buflen != 0) {
783		KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
784		aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
785		aiov[auio.uio_iovcnt].iov_len = buflen;
786		auio.uio_resid += buflen;
787		auio.uio_iovcnt++;
788	}
789	mtx_lock(&Giant);
790	vn_start_write(vp, &mp, V_WAIT);
791	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
792	(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
793#ifdef MAC
794	error = mac_check_vnode_write(cred, NOCRED, vp);
795	if (error == 0)
796#endif
797		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
798	VOP_UNLOCK(vp, 0, td);
799	vn_finished_write(mp);
800	mtx_unlock(&Giant);
801	if (!error)
802		return;
803	/*
804	 * If error encountered, give up tracing on this vnode.  We defer
805	 * all the vrele()'s on the vnode until after we are finished walking
806	 * the various lists to avoid needlessly holding locks.
807	 */
808	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
809	    error);
810	vrele_count = 0;
811	/*
812	 * First, clear this vnode from being used by any processes in the
813	 * system.
814	 * XXX - If one process gets an EPERM writing to the vnode, should
815	 * we really do this?  Other processes might have suitable
816	 * credentials for the operation.
817	 */
818	sx_slock(&allproc_lock);
819	LIST_FOREACH(p, &allproc, p_list) {
820		PROC_LOCK(p);
821		if (p->p_tracep == vp) {
822			mtx_lock(&ktrace_mtx);
823			p->p_tracep = NULL;
824			p->p_traceflag = 0;
825			mtx_unlock(&ktrace_mtx);
826			vrele_count++;
827		}
828		PROC_UNLOCK(p);
829	}
830	sx_sunlock(&allproc_lock);
831	/*
832	 * Second, clear this vnode from any pending requests.
833	 */
834	mtx_lock(&ktrace_mtx);
835	STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
836		if (req->ktr_vp == vp) {
837			req->ktr_vp = NULL;
838			vrele_count++;
839		}
840	}
841	mtx_unlock(&ktrace_mtx);
842	mtx_lock(&Giant);
843	while (vrele_count-- > 0)
844		vrele(vp);
845	mtx_unlock(&Giant);
846}
847
848/*
849 * Return true if caller has permission to set the ktracing state
850 * of target.  Essentially, the target can't possess any
851 * more permissions than the caller.  KTRFAC_ROOT signifies that
852 * root previously set the tracing status on the target process, and
853 * so, only root may further change it.
854 */
855static int
856ktrcanset(td, targetp)
857	struct thread *td;
858	struct proc *targetp;
859{
860
861	PROC_LOCK_ASSERT(targetp, MA_OWNED);
862	if (targetp->p_traceflag & KTRFAC_ROOT &&
863	    suser_cred(td->td_ucred, PRISON_ROOT))
864		return (0);
865
866	if (p_candebug(td, targetp) != 0)
867		return (0);
868
869	return (1);
870}
871
872#endif /* KTRACE */
873