kern_ktrace.c revision 103233
1282785Sgjb/*
2282785Sgjb * Copyright (c) 1989, 1993
3282785Sgjb *	The Regents of the University of California.  All rights reserved.
4282785Sgjb *
5282785Sgjb * Redistribution and use in source and binary forms, with or without
6282785Sgjb * modification, are permitted provided that the following conditions
7282785Sgjb * are met:
8282785Sgjb * 1. Redistributions of source code must retain the above copyright
9282785Sgjb *    notice, this list of conditions and the following disclaimer.
10282785Sgjb * 2. Redistributions in binary form must reproduce the above copyright
11282785Sgjb *    notice, this list of conditions and the following disclaimer in the
12282785Sgjb *    documentation and/or other materials provided with the distribution.
13282785Sgjb * 3. All advertising materials mentioning features or use of this software
14282787Sgjb *    must display the following acknowledgement:
15282785Sgjb *	This product includes software developed by the University of
16282785Sgjb *	California, Berkeley and its contributors.
17282785Sgjb * 4. Neither the name of the University nor the names of its contributors
18282785Sgjb *    may be used to endorse or promote products derived from this software
19282785Sgjb *    without specific prior written permission.
20282785Sgjb *
21282785Sgjb * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22282785Sgjb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23282785Sgjb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24282785Sgjb * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25282785Sgjb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26282785Sgjb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27282785Sgjb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28282785Sgjb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29282785Sgjb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30282785Sgjb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31282785Sgjb * SUCH DAMAGE.
32282785Sgjb *
33282785Sgjb *	@(#)kern_ktrace.c	8.2 (Berkeley) 9/23/93
34282785Sgjb * $FreeBSD: head/sys/kern/kern_ktrace.c 103233 2002-09-11 20:46:50Z jhb $
35282785Sgjb */
36282785Sgjb
37282785Sgjb#include "opt_ktrace.h"
38282785Sgjb#include "opt_mac.h"
39282785Sgjb
40282785Sgjb#include <sys/param.h>
41282785Sgjb#include <sys/systm.h>
42282785Sgjb#include <sys/fcntl.h>
43282785Sgjb#include <sys/jail.h>
44282785Sgjb#include <sys/kernel.h>
45282785Sgjb#include <sys/kthread.h>
46282785Sgjb#include <sys/lock.h>
47282785Sgjb#include <sys/mutex.h>
48282785Sgjb#include <sys/mac.h>
49282785Sgjb#include <sys/malloc.h>
50282785Sgjb#include <sys/namei.h>
51282785Sgjb#include <sys/proc.h>
52282785Sgjb#include <sys/unistd.h>
53282785Sgjb#include <sys/vnode.h>
54282785Sgjb#include <sys/ktrace.h>
55282785Sgjb#include <sys/sema.h>
56282785Sgjb#include <sys/sx.h>
57282785Sgjb#include <sys/sysctl.h>
58282789Sgjb#include <sys/syslog.h>
59282789Sgjb#include <sys/sysproto.h>
60282789Sgjb
61282785Sgjbstatic MALLOC_DEFINE(M_KTRACE, "KTRACE", "KTRACE");
62282785Sgjb
63282785Sgjb#ifdef KTRACE
64282787Sgjb
65282787Sgjb#ifndef KTRACE_REQUEST_POOL
66282787Sgjb#define	KTRACE_REQUEST_POOL	100
67282787Sgjb#endif
68282787Sgjb
69282792Sgjbstruct ktr_request {
70282792Sgjb	struct	ktr_header ktr_header;
71282787Sgjb	struct	ucred *ktr_cred;
72282791Sgjb	struct	vnode *ktr_vp;
73282793Sgjb	union {
74282793Sgjb		struct	ktr_syscall ktr_syscall;
75282793Sgjb		struct	ktr_sysret ktr_sysret;
76282793Sgjb		struct	ktr_genio ktr_genio;
77282791Sgjb		struct	ktr_psig ktr_psig;
78282787Sgjb		struct	ktr_csw ktr_csw;
79282787Sgjb	} ktr_data;
80282785Sgjb	int	ktr_synchronous;
81282785Sgjb	STAILQ_ENTRY(ktr_request) ktr_list;
82282785Sgjb};
83282785Sgjb
84282785Sgjbstatic int data_lengths[] = {
85282785Sgjb	0,					/* none */
86282785Sgjb	offsetof(struct ktr_syscall, ktr_args),	/* KTR_SYSCALL */
87282785Sgjb	sizeof(struct ktr_sysret),		/* KTR_SYSRET */
88282785Sgjb	0,					/* KTR_NAMEI */
89282785Sgjb	sizeof(struct ktr_genio),		/* KTR_GENIO */
90282785Sgjb	sizeof(struct ktr_psig),		/* KTR_PSIG */
91282785Sgjb	sizeof(struct ktr_csw),			/* KTR_CSW */
92282785Sgjb	0					/* KTR_USER */
93282785Sgjb};
94282796Sgjb
95282794Sgjbstatic STAILQ_HEAD(, ktr_request) ktr_todo;
96282795Sgjbstatic STAILQ_HEAD(, ktr_request) ktr_free;
97282785Sgjb
98282785Sgjbstatic uint ktr_requestpool = KTRACE_REQUEST_POOL;
99282785SgjbTUNABLE_INT("kern.ktrace_request_pool", &ktr_requestpool);
100282785Sgjb
101282785Sgjbstatic int print_message = 1;
102282785Sgjbstruct mtx ktrace_mtx;
103282785Sgjbstatic struct sema ktrace_sema;
104282787Sgjb
105282785Sgjbstatic void ktrace_init(void *dummy);
106282787Sgjbstatic int sysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS);
107282785Sgjbstatic uint ktrace_resize_pool(uint newsize);
108282785Sgjbstatic struct ktr_request *ktr_getrequest(int type);
109282785Sgjbstatic void ktr_submitrequest(struct ktr_request *req);
110282785Sgjbstatic void ktr_freerequest(struct ktr_request *req);
111282785Sgjbstatic void ktr_loop(void *dummy);
112282785Sgjbstatic void ktr_writerequest(struct ktr_request *req);
113282785Sgjbstatic int ktrcanset(struct thread *,struct proc *);
114282785Sgjbstatic int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
115282785Sgjbstatic int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
116282785Sgjb
117282785Sgjbstatic void
118282785Sgjbktrace_init(void *dummy)
119282785Sgjb{
120282785Sgjb	struct ktr_request *req;
121282785Sgjb	int i;
122282785Sgjb
123282785Sgjb	mtx_init(&ktrace_mtx, "ktrace", NULL, MTX_DEF | MTX_QUIET);
124282785Sgjb	sema_init(&ktrace_sema, 0, "ktrace");
125282785Sgjb	STAILQ_INIT(&ktr_todo);
126282785Sgjb	STAILQ_INIT(&ktr_free);
127282785Sgjb	for (i = 0; i < ktr_requestpool; i++) {
128282785Sgjb		req = malloc(sizeof(struct ktr_request), M_KTRACE, M_WAITOK);
129282785Sgjb		STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
130282785Sgjb	}
131282785Sgjb	kthread_create(ktr_loop, NULL, NULL, RFHIGHPID, "ktrace");
132282785Sgjb}
133282785SgjbSYSINIT(ktrace_init, SI_SUB_KTRACE, SI_ORDER_ANY, ktrace_init, NULL);
134282785Sgjb
135282785Sgjbstatic int
136282785Sgjbsysctl_kern_ktrace_request_pool(SYSCTL_HANDLER_ARGS)
137282785Sgjb{
138282785Sgjb	struct thread *td;
139282785Sgjb	uint newsize, oldsize, wantsize;
140282785Sgjb	int error;
141282785Sgjb
142282785Sgjb	/* Handle easy read-only case first to avoid warnings from GCC. */
143282785Sgjb	if (!req->newptr) {
144282789Sgjb		mtx_lock(&ktrace_mtx);
145282785Sgjb		oldsize = ktr_requestpool;
146		mtx_unlock(&ktrace_mtx);
147		return (SYSCTL_OUT(req, &oldsize, sizeof(uint)));
148	}
149
150	error = SYSCTL_IN(req, &wantsize, sizeof(uint));
151	if (error)
152		return (error);
153	td = curthread;
154	td->td_inktrace = 1;
155	mtx_lock(&ktrace_mtx);
156	oldsize = ktr_requestpool;
157	newsize = ktrace_resize_pool(wantsize);
158	mtx_unlock(&ktrace_mtx);
159	td->td_inktrace = 0;
160	error = SYSCTL_OUT(req, &oldsize, sizeof(uint));
161	if (error)
162		return (error);
163	if (newsize != wantsize)
164		return (ENOSPC);
165	return (0);
166}
167SYSCTL_PROC(_kern, OID_AUTO, ktrace_request_pool, CTLTYPE_UINT|CTLFLAG_RW,
168    &ktr_requestpool, 0, sysctl_kern_ktrace_request_pool, "IU", "");
169
170static uint
171ktrace_resize_pool(uint newsize)
172{
173	struct ktr_request *req;
174
175	mtx_assert(&ktrace_mtx, MA_OWNED);
176	print_message = 1;
177	if (newsize == ktr_requestpool)
178		return (newsize);
179	if (newsize < ktr_requestpool)
180		/* Shrink pool down to newsize if possible. */
181		while (ktr_requestpool > newsize) {
182			req = STAILQ_FIRST(&ktr_free);
183			if (req == NULL)
184				return (ktr_requestpool);
185			STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
186			ktr_requestpool--;
187			mtx_unlock(&ktrace_mtx);
188			free(req, M_KTRACE);
189			mtx_lock(&ktrace_mtx);
190		}
191	else
192		/* Grow pool up to newsize. */
193		while (ktr_requestpool < newsize) {
194			mtx_unlock(&ktrace_mtx);
195			req = malloc(sizeof(struct ktr_request), M_KTRACE,
196			    M_WAITOK);
197			mtx_lock(&ktrace_mtx);
198			STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
199			ktr_requestpool++;
200		}
201	return (ktr_requestpool);
202}
203
204static struct ktr_request *
205ktr_getrequest(int type)
206{
207	struct ktr_request *req;
208	struct thread *td = curthread;
209	struct proc *p = td->td_proc;
210	int pm;
211
212	td->td_inktrace = 1;
213	mtx_lock(&ktrace_mtx);
214	if (!KTRCHECK(td, type)) {
215		mtx_unlock(&ktrace_mtx);
216		td->td_inktrace = 0;
217		return (NULL);
218	}
219	req = STAILQ_FIRST(&ktr_free);
220	if (req != NULL) {
221		STAILQ_REMOVE_HEAD(&ktr_free, ktr_list);
222		req->ktr_header.ktr_type = type;
223		KASSERT(p->p_tracep != NULL, ("ktrace: no trace vnode"));
224		req->ktr_vp = p->p_tracep;
225		VREF(p->p_tracep);
226		mtx_unlock(&ktrace_mtx);
227		microtime(&req->ktr_header.ktr_time);
228		req->ktr_header.ktr_pid = p->p_pid;
229		bcopy(p->p_comm, req->ktr_header.ktr_comm, MAXCOMLEN + 1);
230		req->ktr_cred = crhold(td->td_ucred);
231		req->ktr_header.ktr_buffer = NULL;
232		req->ktr_header.ktr_len = 0;
233		req->ktr_synchronous = 0;
234	} else {
235		pm = print_message;
236		print_message = 0;
237		mtx_unlock(&ktrace_mtx);
238		if (pm)
239			printf("Out of ktrace request objects.\n");
240		td->td_inktrace = 0;
241	}
242	return (req);
243}
244
245static void
246ktr_submitrequest(struct ktr_request *req)
247{
248
249	mtx_lock(&ktrace_mtx);
250	STAILQ_INSERT_TAIL(&ktr_todo, req, ktr_list);
251	sema_post(&ktrace_sema);
252	if (req->ktr_synchronous) {
253		/*
254		 * For a synchronous request, we wait for the ktrace thread
255		 * to get to our item in the todo list and wake us up.  Then
256		 * we write the request out ourselves and wake the ktrace
257		 * thread back up.
258		 */
259		msleep(req, &ktrace_mtx, curthread->td_priority, "ktrsync", 0);
260		mtx_unlock(&ktrace_mtx);
261		ktr_writerequest(req);
262		mtx_lock(&ktrace_mtx);
263		wakeup(req);
264	}
265	mtx_unlock(&ktrace_mtx);
266	curthread->td_inktrace = 0;
267}
268
269static void
270ktr_freerequest(struct ktr_request *req)
271{
272
273	crfree(req->ktr_cred);
274	if (req->ktr_vp != NULL) {
275		mtx_lock(&Giant);
276		vrele(req->ktr_vp);
277		mtx_unlock(&Giant);
278	}
279	mtx_lock(&ktrace_mtx);
280	STAILQ_INSERT_HEAD(&ktr_free, req, ktr_list);
281	mtx_unlock(&ktrace_mtx);
282}
283
284static void
285ktr_loop(void *dummy)
286{
287	struct ktr_request *req;
288	struct thread *td;
289	struct ucred *cred;
290
291	/* Only cache these values once. */
292	td = curthread;
293	cred = td->td_ucred;
294	for (;;) {
295		sema_wait(&ktrace_sema);
296		mtx_lock(&ktrace_mtx);
297		req = STAILQ_FIRST(&ktr_todo);
298		STAILQ_REMOVE_HEAD(&ktr_todo, ktr_list);
299		KASSERT(req != NULL, ("got a NULL request"));
300		if (req->ktr_synchronous) {
301			wakeup(req);
302			msleep(req, &ktrace_mtx, curthread->td_priority,
303			    "ktrwait", 0);
304			mtx_unlock(&ktrace_mtx);
305		} else {
306			mtx_unlock(&ktrace_mtx);
307			/*
308			 * It is not enough just to pass the cached cred
309			 * to the VOP's in ktr_writerequest().  Some VFS
310			 * operations use curthread->td_ucred, so we need
311			 * to modify our thread's credentials as well.
312			 * Evil.
313			 */
314			td->td_ucred = req->ktr_cred;
315			ktr_writerequest(req);
316			td->td_ucred = cred;
317		}
318		ktr_freerequest(req);
319	}
320}
321
322/*
323 * MPSAFE
324 */
325void
326ktrsyscall(code, narg, args)
327	int code, narg;
328	register_t args[];
329{
330	struct ktr_request *req;
331	struct ktr_syscall *ktp;
332	size_t buflen;
333	char *buf = NULL;
334
335	buflen = sizeof(register_t) * narg;
336	if (buflen > 0) {
337		buf = malloc(buflen, M_KTRACE, M_WAITOK);
338		bcopy(args, buf, buflen);
339	}
340	req = ktr_getrequest(KTR_SYSCALL);
341	if (req == NULL)
342		return;
343	ktp = &req->ktr_data.ktr_syscall;
344	ktp->ktr_code = code;
345	ktp->ktr_narg = narg;
346	if (buflen > 0) {
347		req->ktr_header.ktr_len = buflen;
348		req->ktr_header.ktr_buffer = buf;
349	}
350	ktr_submitrequest(req);
351}
352
353/*
354 * MPSAFE
355 */
356void
357ktrsysret(code, error, retval)
358	int code, error;
359	register_t retval;
360{
361	struct ktr_request *req;
362	struct ktr_sysret *ktp;
363
364	req = ktr_getrequest(KTR_SYSRET);
365	if (req == NULL)
366		return;
367	ktp = &req->ktr_data.ktr_sysret;
368	ktp->ktr_code = code;
369	ktp->ktr_error = error;
370	ktp->ktr_retval = retval;		/* what about val2 ? */
371	ktr_submitrequest(req);
372}
373
374void
375ktrnamei(path)
376	char *path;
377{
378	struct ktr_request *req;
379	int namelen;
380	char *buf = NULL;
381
382	namelen = strlen(path);
383	if (namelen > 0) {
384		buf = malloc(namelen, M_KTRACE, M_WAITOK);
385		bcopy(path, buf, namelen);
386	}
387	req = ktr_getrequest(KTR_NAMEI);
388	if (req == NULL)
389		return;
390	if (namelen > 0) {
391		req->ktr_header.ktr_len = namelen;
392		req->ktr_header.ktr_buffer = buf;
393	}
394	ktr_submitrequest(req);
395}
396
397/*
398 * Since the uio may not stay valid, we can not hand off this request to
399 * the thread and need to process it synchronously.  However, we wish to
400 * keep the relative order of records in a trace file correct, so we
401 * do put this request on the queue (if it isn't empty) and then block.
402 * The ktrace thread waks us back up when it is time for this event to
403 * be posted and blocks until we have completed writing out the event
404 * and woken it back up.
405 */
406void
407ktrgenio(fd, rw, uio, error)
408	int fd;
409	enum uio_rw rw;
410	struct uio *uio;
411	int error;
412{
413	struct ktr_request *req;
414	struct ktr_genio *ktg;
415
416	if (error)
417		return;
418	req = ktr_getrequest(KTR_GENIO);
419	if (req == NULL)
420		return;
421	ktg = &req->ktr_data.ktr_genio;
422	ktg->ktr_fd = fd;
423	ktg->ktr_rw = rw;
424	req->ktr_header.ktr_buffer = uio;
425	uio->uio_offset = 0;
426	uio->uio_rw = UIO_WRITE;
427	req->ktr_synchronous = 1;
428	ktr_submitrequest(req);
429}
430
431void
432ktrpsig(sig, action, mask, code)
433	int sig;
434	sig_t action;
435	sigset_t *mask;
436	int code;
437{
438	struct ktr_request *req;
439	struct ktr_psig	*kp;
440
441	req = ktr_getrequest(KTR_PSIG);
442	if (req == NULL)
443		return;
444	kp = &req->ktr_data.ktr_psig;
445	kp->signo = (char)sig;
446	kp->action = action;
447	kp->mask = *mask;
448	kp->code = code;
449	ktr_submitrequest(req);
450}
451
452void
453ktrcsw(out, user)
454	int out, user;
455{
456	struct ktr_request *req;
457	struct ktr_csw *kc;
458
459	req = ktr_getrequest(KTR_CSW);
460	if (req == NULL)
461		return;
462	kc = &req->ktr_data.ktr_csw;
463	kc->out = out;
464	kc->user = user;
465	ktr_submitrequest(req);
466}
467#endif
468
469/* Interface and common routines */
470
471/*
472 * ktrace system call
473 */
474#ifndef _SYS_SYSPROTO_H_
475struct ktrace_args {
476	char	*fname;
477	int	ops;
478	int	facs;
479	int	pid;
480};
481#endif
482/* ARGSUSED */
483int
484ktrace(td, uap)
485	struct thread *td;
486	register struct ktrace_args *uap;
487{
488#ifdef KTRACE
489	register struct vnode *vp = NULL;
490	register struct proc *p;
491	struct pgrp *pg;
492	int facs = uap->facs & ~KTRFAC_ROOT;
493	int ops = KTROP(uap->ops);
494	int descend = uap->ops & KTRFLAG_DESCEND;
495	int ret = 0;
496	int flags, error = 0;
497	struct nameidata nd;
498
499	td->td_inktrace = 1;
500	if (ops != KTROP_CLEAR) {
501		/*
502		 * an operation which requires a file argument.
503		 */
504		NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->fname, td);
505		flags = FREAD | FWRITE | O_NOFOLLOW;
506		error = vn_open(&nd, &flags, 0);
507		if (error) {
508			td->td_inktrace = 0;
509			return (error);
510		}
511		NDFREE(&nd, NDF_ONLY_PNBUF);
512		vp = nd.ni_vp;
513		VOP_UNLOCK(vp, 0, td);
514		if (vp->v_type != VREG) {
515			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
516			td->td_inktrace = 0;
517			return (EACCES);
518		}
519	}
520	/*
521	 * Clear all uses of the tracefile.
522	 */
523	if (ops == KTROP_CLEARFILE) {
524		sx_slock(&allproc_lock);
525		LIST_FOREACH(p, &allproc, p_list) {
526			PROC_LOCK(p);
527			if (p->p_tracep == vp) {
528				if (ktrcanset(td, p)) {
529					mtx_lock(&ktrace_mtx);
530					p->p_tracep = NULL;
531					p->p_traceflag = 0;
532					mtx_unlock(&ktrace_mtx);
533					PROC_UNLOCK(p);
534					(void) vn_close(vp, FREAD|FWRITE,
535						td->td_ucred, td);
536				} else {
537					PROC_UNLOCK(p);
538					error = EPERM;
539				}
540			} else
541				PROC_UNLOCK(p);
542		}
543		sx_sunlock(&allproc_lock);
544		goto done;
545	}
546	/*
547	 * need something to (un)trace (XXX - why is this here?)
548	 */
549	if (!facs) {
550		error = EINVAL;
551		goto done;
552	}
553	/*
554	 * do it
555	 */
556	if (uap->pid < 0) {
557		/*
558		 * by process group
559		 */
560		sx_slock(&proctree_lock);
561		pg = pgfind(-uap->pid);
562		if (pg == NULL) {
563			sx_sunlock(&proctree_lock);
564			error = ESRCH;
565			goto done;
566		}
567		/*
568		 * ktrops() may call vrele(). Lock pg_members
569		 * by the proctree_lock rather than pg_mtx.
570		 */
571		PGRP_UNLOCK(pg);
572		LIST_FOREACH(p, &pg->pg_members, p_pglist)
573			if (descend)
574				ret |= ktrsetchildren(td, p, ops, facs, vp);
575			else
576				ret |= ktrops(td, p, ops, facs, vp);
577		sx_sunlock(&proctree_lock);
578	} else {
579		/*
580		 * by pid
581		 */
582		p = pfind(uap->pid);
583		if (p == NULL) {
584			error = ESRCH;
585			goto done;
586		}
587		PROC_UNLOCK(p);
588		/* XXX: UNLOCK above has a race */
589		if (descend)
590			ret |= ktrsetchildren(td, p, ops, facs, vp);
591		else
592			ret |= ktrops(td, p, ops, facs, vp);
593	}
594	if (!ret)
595		error = EPERM;
596done:
597	if (vp != NULL)
598		(void) vn_close(vp, FWRITE, td->td_ucred, td);
599	td->td_inktrace = 0;
600	return (error);
601#else
602	return ENOSYS;
603#endif
604}
605
606/*
607 * utrace system call
608 */
609/* ARGSUSED */
610int
611utrace(td, uap)
612	struct thread *td;
613	register struct utrace_args *uap;
614{
615
616#ifdef KTRACE
617	struct ktr_request *req;
618	void *cp;
619
620	if (uap->len > KTR_USER_MAXLEN)
621		return (EINVAL);
622	req = ktr_getrequest(KTR_USER);
623	if (req == NULL)
624		return (0);
625	cp = malloc(uap->len, M_KTRACE, M_WAITOK);
626	if (!copyin(uap->addr, cp, uap->len)) {
627		req->ktr_header.ktr_buffer = cp;
628		req->ktr_header.ktr_len = uap->len;
629		ktr_submitrequest(req);
630	} else {
631		ktr_freerequest(req);
632		td->td_inktrace = 0;
633	}
634	return (0);
635#else
636	return (ENOSYS);
637#endif
638}
639
640#ifdef KTRACE
641static int
642ktrops(td, p, ops, facs, vp)
643	struct thread *td;
644	struct proc *p;
645	int ops, facs;
646	struct vnode *vp;
647{
648	struct vnode *tracevp = NULL;
649
650	PROC_LOCK(p);
651	if (!ktrcanset(td, p)) {
652		PROC_UNLOCK(p);
653		return (0);
654	}
655	mtx_lock(&ktrace_mtx);
656	if (ops == KTROP_SET) {
657		if (p->p_tracep != vp) {
658			/*
659			 * if trace file already in use, relinquish below
660			 */
661			tracevp = p->p_tracep;
662			VREF(vp);
663			p->p_tracep = vp;
664		}
665		p->p_traceflag |= facs;
666		if (td->td_ucred->cr_uid == 0)
667			p->p_traceflag |= KTRFAC_ROOT;
668	} else {
669		/* KTROP_CLEAR */
670		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
671			/* no more tracing */
672			p->p_traceflag = 0;
673			tracevp = p->p_tracep;
674			p->p_tracep = NULL;
675		}
676	}
677	mtx_unlock(&ktrace_mtx);
678	PROC_UNLOCK(p);
679	if (tracevp != NULL)
680		vrele(tracevp);
681
682	return (1);
683}
684
685static int
686ktrsetchildren(td, top, ops, facs, vp)
687	struct thread *td;
688	struct proc *top;
689	int ops, facs;
690	struct vnode *vp;
691{
692	register struct proc *p;
693	register int ret = 0;
694
695	p = top;
696	sx_slock(&proctree_lock);
697	for (;;) {
698		ret |= ktrops(td, p, ops, facs, vp);
699		/*
700		 * If this process has children, descend to them next,
701		 * otherwise do any siblings, and if done with this level,
702		 * follow back up the tree (but not past top).
703		 */
704		if (!LIST_EMPTY(&p->p_children))
705			p = LIST_FIRST(&p->p_children);
706		else for (;;) {
707			if (p == top) {
708				sx_sunlock(&proctree_lock);
709				return (ret);
710			}
711			if (LIST_NEXT(p, p_sibling)) {
712				p = LIST_NEXT(p, p_sibling);
713				break;
714			}
715			p = p->p_pptr;
716		}
717	}
718	/*NOTREACHED*/
719}
720
721static void
722ktr_writerequest(struct ktr_request *req)
723{
724	struct ktr_header *kth;
725	struct vnode *vp;
726	struct uio *uio = NULL;
727	struct proc *p;
728	struct thread *td;
729	struct ucred *cred;
730	struct uio auio;
731	struct iovec aiov[3];
732	struct mount *mp;
733	int datalen, buflen, vrele_count;
734	int error;
735
736	vp = req->ktr_vp;
737	/*
738	 * If vp is NULL, the vp has been cleared out from under this
739	 * request, so just drop it.
740	 */
741	if (vp == NULL)
742		return;
743	kth = &req->ktr_header;
744	datalen = data_lengths[kth->ktr_type];
745	buflen = kth->ktr_len;
746	cred = req->ktr_cred;
747	td = curthread;
748	auio.uio_iov = &aiov[0];
749	auio.uio_offset = 0;
750	auio.uio_segflg = UIO_SYSSPACE;
751	auio.uio_rw = UIO_WRITE;
752	aiov[0].iov_base = (caddr_t)kth;
753	aiov[0].iov_len = sizeof(struct ktr_header);
754	auio.uio_resid = sizeof(struct ktr_header);
755	auio.uio_iovcnt = 1;
756	auio.uio_td = td;
757	if (datalen != 0) {
758		aiov[1].iov_base = (caddr_t)&req->ktr_data;
759		aiov[1].iov_len = datalen;
760		auio.uio_resid += datalen;
761		auio.uio_iovcnt++;
762		kth->ktr_len += datalen;
763	}
764	if (buflen != 0) {
765		KASSERT(kth->ktr_buffer != NULL, ("ktrace: nothing to write"));
766		aiov[auio.uio_iovcnt].iov_base = kth->ktr_buffer;
767		aiov[auio.uio_iovcnt].iov_len = buflen;
768		auio.uio_resid += buflen;
769		auio.uio_iovcnt++;
770	} else
771		uio = kth->ktr_buffer;
772	KASSERT((uio == NULL) ^ (kth->ktr_type == KTR_GENIO),
773	    ("ktrace: uio and genio mismatch"));
774	if (uio != NULL)
775		kth->ktr_len += uio->uio_resid;
776	mtx_lock(&Giant);
777	vn_start_write(vp, &mp, V_WAIT);
778	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
779	(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
780#ifdef MAC
781	error = mac_check_vnode_write(cred, NOCRED, vp);
782	if (error == 0)
783#endif
784		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
785	if (error == 0 && uio != NULL) {
786		(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
787		error = VOP_WRITE(vp, uio, IO_UNIT | IO_APPEND, cred);
788	}
789	VOP_UNLOCK(vp, 0, td);
790	vn_finished_write(mp);
791	mtx_unlock(&Giant);
792	if (buflen != 0)
793		free(kth->ktr_buffer, M_KTRACE);
794	if (!error)
795		return;
796	/*
797	 * If error encountered, give up tracing on this vnode.  We defer
798	 * all the vrele()'s on the vnode until after we are finished walking
799	 * the various lists to avoid needlessly holding locks.
800	 */
801	log(LOG_NOTICE, "ktrace write failed, errno %d, tracing stopped\n",
802	    error);
803	vrele_count = 0;
804	/*
805	 * First, clear this vnode from being used by any processes in the
806	 * system.
807	 * XXX - If one process gets an EPERM writing to the vnode, should
808	 * we really do this?  Other processes might have suitable
809	 * credentials for the operation.
810	 */
811	sx_slock(&allproc_lock);
812	LIST_FOREACH(p, &allproc, p_list) {
813		PROC_LOCK(p);
814		if (p->p_tracep == vp) {
815			mtx_lock(&ktrace_mtx);
816			p->p_tracep = NULL;
817			p->p_traceflag = 0;
818			mtx_unlock(&ktrace_mtx);
819			vrele_count++;
820		}
821		PROC_UNLOCK(p);
822	}
823	sx_sunlock(&allproc_lock);
824	/*
825	 * Second, clear this vnode from any pending requests.
826	 */
827	mtx_lock(&ktrace_mtx);
828	STAILQ_FOREACH(req, &ktr_todo, ktr_list) {
829		if (req->ktr_vp == vp) {
830			req->ktr_vp = NULL;
831			vrele_count++;
832		}
833	}
834	mtx_unlock(&ktrace_mtx);
835	mtx_lock(&Giant);
836	while (vrele_count-- > 0)
837		vrele(vp);
838	mtx_unlock(&Giant);
839}
840
841/*
842 * Return true if caller has permission to set the ktracing state
843 * of target.  Essentially, the target can't possess any
844 * more permissions than the caller.  KTRFAC_ROOT signifies that
845 * root previously set the tracing status on the target process, and
846 * so, only root may further change it.
847 */
848static int
849ktrcanset(td, targetp)
850	struct thread *td;
851	struct proc *targetp;
852{
853
854	PROC_LOCK_ASSERT(targetp, MA_OWNED);
855	if (targetp->p_traceflag & KTRFAC_ROOT &&
856	    suser_cred(td->td_ucred, PRISON_ROOT))
857		return (0);
858
859	if (p_candebug(td, targetp) != 0)
860		return (0);
861
862	return (1);
863}
864
865#endif /* KTRACE */
866