audit_pipe.c revision 179726
1/*-
2 * Copyright (c) 2006 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * This software was developed by Robert Watson for the TrustedBSD Project.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/security/audit/audit_pipe.c 179726 2008-06-11 18:55:19Z ed $");
31
32#include <sys/param.h>
33#include <sys/condvar.h>
34#include <sys/conf.h>
35#include <sys/eventhandler.h>
36#include <sys/filio.h>
37#include <sys/kernel.h>
38#include <sys/lock.h>
39#include <sys/malloc.h>
40#include <sys/mutex.h>
41#include <sys/poll.h>
42#include <sys/proc.h>
43#include <sys/queue.h>
44#include <sys/selinfo.h>
45#include <sys/sigio.h>
46#include <sys/signal.h>
47#include <sys/signalvar.h>
48#include <sys/systm.h>
49#include <sys/uio.h>
50
51#include <security/audit/audit.h>
52#include <security/audit/audit_ioctl.h>
53#include <security/audit/audit_private.h>
54
55/*
56 * Implementation of a clonable special device providing a live stream of BSM
57 * audit data.  This is a "tee" of the data going to the file.  It provides
58 * unreliable but timely access to audit events.  Consumers of this interface
59 * should be very careful to avoid introducing event cycles.  Consumers may
60 * express interest via a set of preselection ioctls.
61 */
62
63/*
64 * Memory types.
65 */
66static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
67static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
68    "Audit pipe entries and buffers");
69static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
70    "Audit pipe preselection structure");
71
72/*
73 * Audit pipe buffer parameters.
74 */
75#define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
76#define	AUDIT_PIPE_QLIMIT_MIN		(0)
77#define	AUDIT_PIPE_QLIMIT_MAX		(1024)
78
79/*
80 * Description of an entry in an audit_pipe.
81 */
82struct audit_pipe_entry {
83	void				*ape_record;
84	u_int				 ape_record_len;
85	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
86};
87
88/*
89 * Audit pipes allow processes to express "interest" in the set of records
90 * that are delivered via the pipe.  They do this in a similar manner to the
91 * mechanism for audit trail configuration, by expressing two global masks,
92 * and optionally expressing per-auid masks.  The following data structure is
93 * the per-auid mask description.  The global state is stored in the audit
94 * pipe data structure.
95 *
96 * We may want to consider a more space/time-efficient data structure once
97 * usage patterns for per-auid specifications are clear.
98 */
99struct audit_pipe_preselect {
100	au_id_t					 app_auid;
101	au_mask_t				 app_mask;
102	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
103};
104
105/*
106 * Description of an individual audit_pipe.  Consists largely of a bounded
107 * length queue.
108 */
109#define	AUDIT_PIPE_ASYNC	0x00000001
110#define	AUDIT_PIPE_NBIO		0x00000002
111struct audit_pipe {
112	int				 ap_open;	/* Device open? */
113	u_int				 ap_flags;
114
115	struct selinfo			 ap_selinfo;
116	struct sigio			*ap_sigio;
117
118	u_int				 ap_qlen;
119	u_int				 ap_qlimit;
120
121	u_int64_t			 ap_inserts;	/* Records added. */
122	u_int64_t			 ap_reads;	/* Records read. */
123	u_int64_t			 ap_drops;	/* Records dropped. */
124	u_int64_t			 ap_truncates;	/* Records too long. */
125
126	/*
127	 * Fields relating to pipe interest: global masks for unmatched
128	 * processes (attributable, non-attributable), and a list of specific
129	 * interest specifications by auid.
130	 */
131	int				 ap_preselect_mode;
132	au_mask_t			 ap_preselect_flags;
133	au_mask_t			 ap_preselect_naflags;
134	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
135
136	/*
137	 * Current pending record list.
138	 */
139	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
140
141	/*
142	 * Global pipe list.
143	 */
144	TAILQ_ENTRY(audit_pipe)		 ap_list;
145};
146
147/*
148 * Global list of audit pipes, mutex to protect it and the pipes.  Finer
149 * grained locking may be desirable at some point.
150 */
151static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
152static struct mtx		 audit_pipe_mtx;
153
154/*
155 * This CV is used to wakeup on an audit record write.  Eventually, it might
156 * be per-pipe to avoid unnecessary wakeups when several pipes with different
157 * preselection masks are present.
158 */
159static struct cv		 audit_pipe_cv;
160
161/*
162 * Cloning related variables and constants.
163 */
164#define	AUDIT_PIPE_NAME		"auditpipe"
165static eventhandler_tag		 audit_pipe_eh_tag;
166static struct clonedevs		*audit_pipe_clones;
167
168/*
169 * Special device methods and definition.
170 */
171static d_open_t		audit_pipe_open;
172static d_close_t	audit_pipe_close;
173static d_read_t		audit_pipe_read;
174static d_ioctl_t	audit_pipe_ioctl;
175static d_poll_t		audit_pipe_poll;
176static d_kqfilter_t	audit_pipe_kqfilter;
177
178static struct cdevsw	audit_pipe_cdevsw = {
179	.d_version =	D_VERSION,
180	.d_flags =	D_PSEUDO | D_NEEDGIANT | D_NEEDMINOR,
181	.d_open =	audit_pipe_open,
182	.d_close =	audit_pipe_close,
183	.d_read =	audit_pipe_read,
184	.d_ioctl =	audit_pipe_ioctl,
185	.d_poll =	audit_pipe_poll,
186	.d_kqfilter =	audit_pipe_kqfilter,
187	.d_name =	AUDIT_PIPE_NAME,
188};
189
190static int	audit_pipe_kqread(struct knote *note, long hint);
191static void	audit_pipe_kqdetach(struct knote *note);
192
193static struct filterops audit_pipe_read_filterops = {
194	.f_isfd =	1,
195	.f_attach =	NULL,
196	.f_detach =	audit_pipe_kqdetach,
197	.f_event =	audit_pipe_kqread,
198};
199
200/*
201 * Some global statistics on audit pipes.
202 */
203static int		audit_pipe_count;	/* Current number of pipes. */
204static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
205static u_int64_t	audit_pipe_records;	/* Records seen. */
206static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
207
208/*
209 * Free an audit pipe entry.
210 */
211static void
212audit_pipe_entry_free(struct audit_pipe_entry *ape)
213{
214
215	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
216	free(ape, M_AUDIT_PIPE_ENTRY);
217}
218
219/*
220 * Find an audit pipe preselection specification for an auid, if any.
221 */
222static struct audit_pipe_preselect *
223audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
224{
225	struct audit_pipe_preselect *app;
226
227	mtx_assert(&audit_pipe_mtx, MA_OWNED);
228
229	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
230		if (app->app_auid == auid)
231			return (app);
232	}
233	return (NULL);
234}
235
236/*
237 * Query the per-pipe mask for a specific auid.
238 */
239static int
240audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
241    au_mask_t *maskp)
242{
243	struct audit_pipe_preselect *app;
244	int error;
245
246	mtx_lock(&audit_pipe_mtx);
247	app = audit_pipe_preselect_find(ap, auid);
248	if (app != NULL) {
249		*maskp = app->app_mask;
250		error = 0;
251	} else
252		error = ENOENT;
253	mtx_unlock(&audit_pipe_mtx);
254	return (error);
255}
256
257/*
258 * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
259 * otherwise, update the current entry.
260 */
261static void
262audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
263{
264	struct audit_pipe_preselect *app, *app_new;
265
266	/*
267	 * Pessimistically assume that the auid doesn't already have a mask
268	 * set, and allocate.  We will free it if it is unneeded.
269	 */
270	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
271	mtx_lock(&audit_pipe_mtx);
272	app = audit_pipe_preselect_find(ap, auid);
273	if (app == NULL) {
274		app = app_new;
275		app_new = NULL;
276		app->app_auid = auid;
277		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
278	}
279	app->app_mask = mask;
280	mtx_unlock(&audit_pipe_mtx);
281	if (app_new != NULL)
282		free(app_new, M_AUDIT_PIPE_PRESELECT);
283}
284
285/*
286 * Delete a per-auid mask on an audit pipe.
287 */
288static int
289audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
290{
291	struct audit_pipe_preselect *app;
292	int error;
293
294	mtx_lock(&audit_pipe_mtx);
295	app = audit_pipe_preselect_find(ap, auid);
296	if (app != NULL) {
297		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
298		error = 0;
299	} else
300		error = ENOENT;
301	mtx_unlock(&audit_pipe_mtx);
302	if (app != NULL)
303		free(app, M_AUDIT_PIPE_PRESELECT);
304	return (error);
305}
306
307/*
308 * Delete all per-auid masks on an audit pipe.
309 */
310static void
311audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
312{
313	struct audit_pipe_preselect *app;
314
315	mtx_assert(&audit_pipe_mtx, MA_OWNED);
316
317	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
318		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
319		free(app, M_AUDIT_PIPE_PRESELECT);
320	}
321}
322
323static void
324audit_pipe_preselect_flush(struct audit_pipe *ap)
325{
326
327	mtx_lock(&audit_pipe_mtx);
328	audit_pipe_preselect_flush_locked(ap);
329	mtx_unlock(&audit_pipe_mtx);
330}
331
332/*-
333 * Determine whether a specific audit pipe matches a record with these
334 * properties.  Algorithm is as follows:
335 *
336 * - If the pipe is configured to track the default trail configuration, then
337 *   use the results of global preselection matching.
338 * - If not, search for a specifically configured auid entry matching the
339 *   event.  If an entry is found, use that.
340 * - Otherwise, use the default flags or naflags configured for the pipe.
341 */
342static int
343audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
344    au_event_t event, au_class_t class, int sorf, int trail_preselect)
345{
346	struct audit_pipe_preselect *app;
347
348	mtx_assert(&audit_pipe_mtx, MA_OWNED);
349
350	switch (ap->ap_preselect_mode) {
351	case AUDITPIPE_PRESELECT_MODE_TRAIL:
352		return (trail_preselect);
353
354	case AUDITPIPE_PRESELECT_MODE_LOCAL:
355		app = audit_pipe_preselect_find(ap, auid);
356		if (app == NULL) {
357			if (auid == AU_DEFAUDITID)
358				return (au_preselect(event, class,
359				    &ap->ap_preselect_naflags, sorf));
360			else
361				return (au_preselect(event, class,
362				    &ap->ap_preselect_flags, sorf));
363		} else
364			return (au_preselect(event, class, &app->app_mask,
365			    sorf));
366
367	default:
368		panic("audit_pipe_preselect_check: mode %d",
369		    ap->ap_preselect_mode);
370	}
371
372	return (0);
373}
374
375/*
376 * Determine whether there exists a pipe interested in a record with specific
377 * properties.
378 */
379int
380audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
381    int sorf, int trail_preselect)
382{
383	struct audit_pipe *ap;
384
385	mtx_lock(&audit_pipe_mtx);
386	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
387		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
388		    trail_preselect)) {
389			mtx_unlock(&audit_pipe_mtx);
390			return (1);
391		}
392	}
393	mtx_unlock(&audit_pipe_mtx);
394	return (0);
395}
396
397/*
398 * Append individual record to a queue -- allocate queue-local buffer, and
399 * add to the queue.  We try to drop from the head of the queue so that more
400 * recent events take precedence over older ones, but if allocation fails we
401 * do drop the new event.
402 */
403static void
404audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
405{
406	struct audit_pipe_entry *ape, *ape_remove;
407
408	mtx_assert(&audit_pipe_mtx, MA_OWNED);
409
410	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
411	if (ape == NULL) {
412		ap->ap_drops++;
413		audit_pipe_drops++;
414		return;
415	}
416
417	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
418	if (ape->ape_record == NULL) {
419		free(ape, M_AUDIT_PIPE_ENTRY);
420		ap->ap_drops++;
421		audit_pipe_drops++;
422		return;
423	}
424
425	bcopy(record, ape->ape_record, record_len);
426	ape->ape_record_len = record_len;
427
428	if (ap->ap_qlen >= ap->ap_qlimit) {
429		ape_remove = TAILQ_FIRST(&ap->ap_queue);
430		TAILQ_REMOVE(&ap->ap_queue, ape_remove, ape_queue);
431		audit_pipe_entry_free(ape_remove);
432		ap->ap_qlen--;
433		ap->ap_drops++;
434		audit_pipe_drops++;
435	}
436
437	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
438	ap->ap_inserts++;
439	ap->ap_qlen++;
440	selwakeuppri(&ap->ap_selinfo, PSOCK);
441	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
442	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
443		pgsigio(&ap->ap_sigio, SIGIO, 0);
444}
445
446/*
447 * audit_pipe_submit(): audit_worker submits audit records via this
448 * interface, which arranges for them to be delivered to pipe queues.
449 */
450void
451audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
452    int trail_select, void *record, u_int record_len)
453{
454	struct audit_pipe *ap;
455
456	/*
457	 * Lockless read to avoid mutex overhead if pipes are not in use.
458	 */
459	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
460		return;
461
462	mtx_lock(&audit_pipe_mtx);
463	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
464		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
465		    trail_select))
466			audit_pipe_append(ap, record, record_len);
467	}
468	audit_pipe_records++;
469	mtx_unlock(&audit_pipe_mtx);
470	cv_broadcastpri(&audit_pipe_cv, PSOCK);
471}
472
473/*
474 * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
475 * since we don't currently have selection information available, it is
476 * delivered to the pipe unconditionally.
477 *
478 * XXXRW: This is a bug.  The BSM check routine for submitting a user record
479 * should parse that information and return it.
480 */
481void
482audit_pipe_submit_user(void *record, u_int record_len)
483{
484	struct audit_pipe *ap;
485
486	/*
487	 * Lockless read to avoid mutex overhead if pipes are not in use.
488	 */
489	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
490		return;
491
492	mtx_lock(&audit_pipe_mtx);
493	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list)
494		audit_pipe_append(ap, record, record_len);
495	audit_pipe_records++;
496	mtx_unlock(&audit_pipe_mtx);
497	cv_broadcastpri(&audit_pipe_cv, PSOCK);
498}
499
500
501/*
502 * Pop the next record off of an audit pipe.
503 */
504static struct audit_pipe_entry *
505audit_pipe_pop(struct audit_pipe *ap)
506{
507	struct audit_pipe_entry *ape;
508
509	mtx_assert(&audit_pipe_mtx, MA_OWNED);
510
511	ape = TAILQ_FIRST(&ap->ap_queue);
512	KASSERT((ape == NULL && ap->ap_qlen == 0) ||
513	    (ape != NULL && ap->ap_qlen != 0), ("audit_pipe_pop: qlen"));
514	if (ape == NULL)
515		return (NULL);
516	TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
517	ap->ap_qlen--;
518	return (ape);
519}
520
521/*
522 * Allocate a new audit pipe.  Connects the pipe, on success, to the global
523 * list and updates statistics.
524 */
525static struct audit_pipe *
526audit_pipe_alloc(void)
527{
528	struct audit_pipe *ap;
529
530	mtx_assert(&audit_pipe_mtx, MA_OWNED);
531
532	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
533	if (ap == NULL)
534		return (NULL);
535	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
536	TAILQ_INIT(&ap->ap_queue);
537	knlist_init(&ap->ap_selinfo.si_note, &audit_pipe_mtx, NULL, NULL,
538	    NULL);
539
540	/*
541	 * Default flags, naflags, and auid-specific preselection settings to
542	 * 0.  Initialize the mode to the global trail so that if praudit(1)
543	 * is run on /dev/auditpipe, it sees events associated with the
544	 * default trail.  Pipe-aware application can clear the flag, set
545	 * custom masks, and flush the pipe as needed.
546	 */
547	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
548	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
549	TAILQ_INIT(&ap->ap_preselect_list);
550	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
551
552	/*
553	 * Add to global list and update global statistics.
554	 */
555	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
556	audit_pipe_count++;
557	audit_pipe_ever++;
558
559	return (ap);
560}
561
562/*
563 * Flush all records currently present in an audit pipe; assume mutex is held.
564 */
565static void
566audit_pipe_flush(struct audit_pipe *ap)
567{
568	struct audit_pipe_entry *ape;
569
570	mtx_assert(&audit_pipe_mtx, MA_OWNED);
571
572	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
573		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
574		audit_pipe_entry_free(ape);
575		ap->ap_qlen--;
576	}
577	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qlen"));
578}
579
580/*
581 * Free an audit pipe; this means freeing all preselection state and all
582 * records in the pipe.  Assumes mutex is held to prevent any new records
583 * from being inserted during the free, and that the audit pipe is still on
584 * the global list.
585 */
586static void
587audit_pipe_free(struct audit_pipe *ap)
588{
589
590	mtx_assert(&audit_pipe_mtx, MA_OWNED);
591
592	audit_pipe_preselect_flush_locked(ap);
593	audit_pipe_flush(ap);
594	knlist_destroy(&ap->ap_selinfo.si_note);
595	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
596	free(ap, M_AUDIT_PIPE);
597	audit_pipe_count--;
598}
599
600/*
601 * Audit pipe clone routine -- provide specific requested audit pipe, or a
602 * fresh one if a specific one is not requested.
603 */
604static void
605audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
606    struct cdev **dev)
607{
608	int i, u;
609
610	if (*dev != NULL)
611		return;
612
613	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
614		u = -1;
615	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
616		return;
617
618	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
619	if (i) {
620		*dev = make_dev(&audit_pipe_cdevsw, unit2minor(u), UID_ROOT,
621		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
622		if (*dev != NULL) {
623			dev_ref(*dev);
624			(*dev)->si_flags |= SI_CHEAPCLONE;
625		}
626	}
627}
628
629/*
630 * Audit pipe open method.  Explicit privilege check isn't used as this
631 * allows file permissions on the special device to be used to grant audit
632 * review access.  Those file permissions should be managed carefully.
633 */
634static int
635audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
636{
637	struct audit_pipe *ap;
638
639	mtx_lock(&audit_pipe_mtx);
640	ap = dev->si_drv1;
641	if (ap == NULL) {
642		ap = audit_pipe_alloc();
643		if (ap == NULL) {
644			mtx_unlock(&audit_pipe_mtx);
645			return (ENOMEM);
646		}
647		dev->si_drv1 = ap;
648	} else {
649		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
650		mtx_unlock(&audit_pipe_mtx);
651		return (EBUSY);
652	}
653	ap->ap_open = 1;
654	mtx_unlock(&audit_pipe_mtx);
655	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
656	return (0);
657}
658
659/*
660 * Close audit pipe, tear down all records, etc.
661 */
662static int
663audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
664{
665	struct audit_pipe *ap;
666
667	ap = dev->si_drv1;
668	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
669	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
670	funsetown(&ap->ap_sigio);
671	mtx_lock(&audit_pipe_mtx);
672	ap->ap_open = 0;
673	audit_pipe_free(ap);
674	dev->si_drv1 = NULL;
675	mtx_unlock(&audit_pipe_mtx);
676	return (0);
677}
678
679/*
680 * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
681 * commands.
682 *
683 * Would be desirable to support filtering, although perhaps something simple
684 * like an event mask, as opposed to something complicated like BPF.
685 */
686static int
687audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
688    struct thread *td)
689{
690	struct auditpipe_ioctl_preselect *aip;
691	struct audit_pipe *ap;
692	au_mask_t *maskp;
693	int error, mode;
694	au_id_t auid;
695
696	ap = dev->si_drv1;
697	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
698
699	/*
700	 * Audit pipe ioctls: first come standard device node ioctls, then
701	 * manipulation of pipe settings, and finally, statistics query
702	 * ioctls.
703	 */
704	switch (cmd) {
705	case FIONBIO:
706		mtx_lock(&audit_pipe_mtx);
707		if (*(int *)data)
708			ap->ap_flags |= AUDIT_PIPE_NBIO;
709		else
710			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
711		mtx_unlock(&audit_pipe_mtx);
712		error = 0;
713		break;
714
715	case FIONREAD:
716		mtx_lock(&audit_pipe_mtx);
717		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
718			*(int *)data =
719			    TAILQ_FIRST(&ap->ap_queue)->ape_record_len;
720		else
721			*(int *)data = 0;
722		mtx_unlock(&audit_pipe_mtx);
723		error = 0;
724		break;
725
726	case FIOASYNC:
727		mtx_lock(&audit_pipe_mtx);
728		if (*(int *)data)
729			ap->ap_flags |= AUDIT_PIPE_ASYNC;
730		else
731			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
732		mtx_unlock(&audit_pipe_mtx);
733		error = 0;
734		break;
735
736	case FIOSETOWN:
737		error = fsetown(*(int *)data, &ap->ap_sigio);
738		break;
739
740	case FIOGETOWN:
741		*(int *)data = fgetown(&ap->ap_sigio);
742		error = 0;
743		break;
744
745	case AUDITPIPE_GET_QLEN:
746		*(u_int *)data = ap->ap_qlen;
747		error = 0;
748		break;
749
750	case AUDITPIPE_GET_QLIMIT:
751		*(u_int *)data = ap->ap_qlimit;
752		error = 0;
753		break;
754
755	case AUDITPIPE_SET_QLIMIT:
756		/* Lockless integer write. */
757		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
758		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
759			ap->ap_qlimit = *(u_int *)data;
760			error = 0;
761		} else
762			error = EINVAL;
763		break;
764
765	case AUDITPIPE_GET_QLIMIT_MIN:
766		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
767		error = 0;
768		break;
769
770	case AUDITPIPE_GET_QLIMIT_MAX:
771		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
772		error = 0;
773		break;
774
775	case AUDITPIPE_GET_PRESELECT_FLAGS:
776		mtx_lock(&audit_pipe_mtx);
777		maskp = (au_mask_t *)data;
778		*maskp = ap->ap_preselect_flags;
779		mtx_unlock(&audit_pipe_mtx);
780		error = 0;
781		break;
782
783	case AUDITPIPE_SET_PRESELECT_FLAGS:
784		mtx_lock(&audit_pipe_mtx);
785		maskp = (au_mask_t *)data;
786		ap->ap_preselect_flags = *maskp;
787		mtx_unlock(&audit_pipe_mtx);
788		error = 0;
789		break;
790
791	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
792		mtx_lock(&audit_pipe_mtx);
793		maskp = (au_mask_t *)data;
794		*maskp = ap->ap_preselect_naflags;
795		mtx_unlock(&audit_pipe_mtx);
796		error = 0;
797		break;
798
799	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
800		mtx_lock(&audit_pipe_mtx);
801		maskp = (au_mask_t *)data;
802		ap->ap_preselect_naflags = *maskp;
803		mtx_unlock(&audit_pipe_mtx);
804		error = 0;
805		break;
806
807	case AUDITPIPE_GET_PRESELECT_AUID:
808		aip = (struct auditpipe_ioctl_preselect *)data;
809		error = audit_pipe_preselect_get(ap, aip->aip_auid,
810		    &aip->aip_mask);
811		break;
812
813	case AUDITPIPE_SET_PRESELECT_AUID:
814		aip = (struct auditpipe_ioctl_preselect *)data;
815		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
816		error = 0;
817		break;
818
819	case AUDITPIPE_DELETE_PRESELECT_AUID:
820		auid = *(au_id_t *)data;
821		error = audit_pipe_preselect_delete(ap, auid);
822		break;
823
824	case AUDITPIPE_FLUSH_PRESELECT_AUID:
825		audit_pipe_preselect_flush(ap);
826		error = 0;
827		break;
828
829	case AUDITPIPE_GET_PRESELECT_MODE:
830		mtx_lock(&audit_pipe_mtx);
831		*(int *)data = ap->ap_preselect_mode;
832		mtx_unlock(&audit_pipe_mtx);
833		error = 0;
834		break;
835
836	case AUDITPIPE_SET_PRESELECT_MODE:
837		mode = *(int *)data;
838		switch (mode) {
839		case AUDITPIPE_PRESELECT_MODE_TRAIL:
840		case AUDITPIPE_PRESELECT_MODE_LOCAL:
841			mtx_lock(&audit_pipe_mtx);
842			ap->ap_preselect_mode = mode;
843			mtx_unlock(&audit_pipe_mtx);
844			error = 0;
845			break;
846
847		default:
848			error = EINVAL;
849		}
850		break;
851
852	case AUDITPIPE_FLUSH:
853		mtx_lock(&audit_pipe_mtx);
854		audit_pipe_flush(ap);
855		mtx_unlock(&audit_pipe_mtx);
856		error = 0;
857		break;
858
859	case AUDITPIPE_GET_MAXAUDITDATA:
860		*(u_int *)data = MAXAUDITDATA;
861		error = 0;
862		break;
863
864	case AUDITPIPE_GET_INSERTS:
865		*(u_int *)data = ap->ap_inserts;
866		error = 0;
867		break;
868
869	case AUDITPIPE_GET_READS:
870		*(u_int *)data = ap->ap_reads;
871		error = 0;
872		break;
873
874	case AUDITPIPE_GET_DROPS:
875		*(u_int *)data = ap->ap_drops;
876		error = 0;
877		break;
878
879	case AUDITPIPE_GET_TRUNCATES:
880		*(u_int *)data = ap->ap_truncates;
881		error = 0;
882		break;
883
884	default:
885		error = ENOTTY;
886	}
887	return (error);
888}
889
890/*
891 * Audit pipe read.  Pull one record off the queue and copy to user space.
892 * On error, the record is dropped.
893 *
894 * Providing more sophisticated behavior, such as partial reads, is tricky
895 * due to the potential for parallel I/O.  If partial read support is
896 * required, it will require a per-pipe "current record being read" along
897 * with an offset into that trecord which has already been read.  Threads
898 * performing partial reads will need to allocate per-thread copies of the
899 * data so that if another thread completes the read of the record, it can be
900 * freed without adding reference count logic.  If this is added, a flag to
901 * indicate that only atomic record reads are desired would be useful, as if
902 * different threads are all waiting for records on the pipe, they will want
903 * independent record reads, which is currently the behavior.
904 */
905static int
906audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
907{
908	struct audit_pipe_entry *ape;
909	struct audit_pipe *ap;
910	int error;
911
912	ap = dev->si_drv1;
913	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
914	mtx_lock(&audit_pipe_mtx);
915	do {
916		/*
917		 * Wait for a record that fits into the read buffer, dropping
918		 * records that would be truncated if actually passed to the
919		 * process.  This helps maintain the discreet record read
920		 * interface.
921		 */
922		while ((ape = audit_pipe_pop(ap)) == NULL) {
923			if (ap->ap_flags & AUDIT_PIPE_NBIO) {
924				mtx_unlock(&audit_pipe_mtx);
925				return (EAGAIN);
926			}
927			error = cv_wait_sig(&audit_pipe_cv, &audit_pipe_mtx);
928			if (error) {
929				mtx_unlock(&audit_pipe_mtx);
930				return (error);
931			}
932		}
933		if (ape->ape_record_len <= uio->uio_resid)
934			break;
935		audit_pipe_entry_free(ape);
936		ap->ap_truncates++;
937	} while (1);
938	ap->ap_reads++;
939	mtx_unlock(&audit_pipe_mtx);
940
941	/*
942	 * Now read record to user space memory.  Even if the read is short,
943	 * we abandon the remainder of the record, supporting only discreet
944	 * record reads.
945	 */
946	error = uiomove(ape->ape_record, ape->ape_record_len, uio);
947	audit_pipe_entry_free(ape);
948	return (error);
949}
950
951/*
952 * Audit pipe poll.
953 */
954static int
955audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
956{
957	struct audit_pipe *ap;
958	int revents;
959
960	revents = 0;
961	ap = dev->si_drv1;
962	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
963	if (events & (POLLIN | POLLRDNORM)) {
964		mtx_lock(&audit_pipe_mtx);
965		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
966			revents |= events & (POLLIN | POLLRDNORM);
967		else
968			selrecord(td, &ap->ap_selinfo);
969		mtx_unlock(&audit_pipe_mtx);
970	}
971	return (revents);
972}
973
974/*
975 * Audit pipe kqfilter.
976 */
977static int
978audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
979{
980	struct audit_pipe *ap;
981
982	ap = dev->si_drv1;
983	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
984
985	if (kn->kn_filter != EVFILT_READ)
986		return (EINVAL);
987
988	kn->kn_fop = &audit_pipe_read_filterops;
989	kn->kn_hook = ap;
990
991	mtx_lock(&audit_pipe_mtx);
992	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
993	mtx_unlock(&audit_pipe_mtx);
994	return (0);
995}
996
997/*
998 * Return true if there are records available for reading on the pipe.
999 */
1000static int
1001audit_pipe_kqread(struct knote *kn, long hint)
1002{
1003	struct audit_pipe_entry *ape;
1004	struct audit_pipe *ap;
1005
1006	mtx_assert(&audit_pipe_mtx, MA_OWNED);
1007
1008	ap = (struct audit_pipe *)kn->kn_hook;
1009	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1010
1011	if (ap->ap_qlen != 0) {
1012		ape = TAILQ_FIRST(&ap->ap_queue);
1013		KASSERT(ape != NULL, ("audit_pipe_kqread: ape == NULL"));
1014
1015		kn->kn_data = ape->ape_record_len;
1016		return (1);
1017	} else {
1018		kn->kn_data = 0;
1019		return (0);
1020	}
1021}
1022
1023/*
1024 * Detach kqueue state from audit pipe.
1025 */
1026static void
1027audit_pipe_kqdetach(struct knote *kn)
1028{
1029	struct audit_pipe *ap;
1030
1031	ap = (struct audit_pipe *)kn->kn_hook;
1032	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1033
1034	mtx_lock(&audit_pipe_mtx);
1035	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1036	mtx_unlock(&audit_pipe_mtx);
1037}
1038
1039/*
1040 * Initialize the audit pipe system.
1041 */
1042static void
1043audit_pipe_init(void *unused)
1044{
1045
1046	TAILQ_INIT(&audit_pipe_list);
1047	mtx_init(&audit_pipe_mtx, "audit_pipe_mtx", NULL, MTX_DEF);
1048	cv_init(&audit_pipe_cv, "audit_pipe_cv");
1049
1050	clone_setup(&audit_pipe_clones);
1051	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1052	    audit_pipe_clone, 0, 1000);
1053	if (audit_pipe_eh_tag == NULL)
1054		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1055}
1056
1057SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1058    NULL);
1059