audit_pipe.c revision 159269
1/*-
2 * Copyright (c) 2006 Robert N. M. Watson
3 * All rights reserved.
4 *
5 * This software was developed by Robert Watson for the TrustedBSD Project.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD: head/sys/security/audit/audit_pipe.c 159269 2006-06-05 14:48:17Z rwatson $
29 */
30
31#include <sys/param.h>
32#include <sys/condvar.h>
33#include <sys/conf.h>
34#include <sys/eventhandler.h>
35#include <sys/filio.h>
36#include <sys/kernel.h>
37#include <sys/lock.h>
38#include <sys/malloc.h>
39#include <sys/mutex.h>
40#include <sys/poll.h>
41#include <sys/proc.h>
42#include <sys/queue.h>
43#include <sys/selinfo.h>
44#include <sys/sigio.h>
45#include <sys/signal.h>
46#include <sys/signalvar.h>
47#include <sys/systm.h>
48#include <sys/uio.h>
49
50#include <security/audit/audit.h>
51#include <security/audit/audit_ioctl.h>
52#include <security/audit/audit_private.h>
53
54/*
55 * Implementation of a clonable special device providing a live stream of BSM
56 * audit data.  This is a "tee" of the data going to the file.  It provides
57 * unreliable but timely access to audit events.  Consumers of this interface
58 * should be very careful to avoid introducing event cycles.  Consumers may
59 * express interest via a set of preselection ioctls.
60 */
61
62/*
63 * Memory types.
64 */
65static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
66static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
67    "Audit pipe entries and buffers");
68static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_preselect",
69    "Audit pipe preselection structure");
70
71/*
72 * Audit pipe buffer parameters.
73 */
74#define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
75#define	AUDIT_PIPE_QLIMIT_MIN		(0)
76#define	AUDIT_PIPE_QLIMIT_MAX		(1024)
77
78/*
79 * Description of an entry in an audit_pipe.
80 */
81struct audit_pipe_entry {
82	void				*ape_record;
83	u_int				 ape_record_len;
84	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
85};
86
87/*
88 * Audit pipes allow processes to express "interest" in the set of records
89 * that are delivered via the pipe.  They do this in a similar manner to the
90 * mechanism for audit trail configuration, by expressing two global masks,
91 * and optionally expressing per-auid masks.  The following data structure is
92 * the per-auid mask description.  The global state is stored in the audit
93 * pipe data structure.
94 *
95 * We may want to consider a more space/time-efficient data structure once
96 * usage patterns for per-auid specifications are clear.
97 */
98struct audit_pipe_preselect {
99	au_id_t					 app_auid;
100	au_mask_t				 app_mask;
101	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
102};
103
104/*
105 * Description of an individual audit_pipe.  Consists largely of a bounded
106 * length queue.
107 */
108#define	AUDIT_PIPE_ASYNC	0x00000001
109#define	AUDIT_PIPE_NBIO		0x00000002
110struct audit_pipe {
111	int				 ap_open;	/* Device open? */
112	u_int				 ap_flags;
113
114	struct selinfo			 ap_selinfo;
115	struct sigio			*ap_sigio;
116
117	u_int				 ap_qlen;
118	u_int				 ap_qlimit;
119
120	u_int64_t			 ap_inserts;	/* Records added. */
121	u_int64_t			 ap_reads;	/* Records read. */
122	u_int64_t			 ap_drops;	/* Records dropped. */
123	u_int64_t			 ap_truncates;	/* Records too long. */
124
125	/*
126	 * Fields relating to pipe interest: global masks for unmatched
127	 * processes (attributable, non-attributable), and a list of specific
128	 * interest specifications by auid.
129	 */
130	int				 ap_preselect_mode;
131	au_mask_t			 ap_preselect_flags;
132	au_mask_t			 ap_preselect_naflags;
133	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
134
135	/*
136	 * Current pending record list.
137	 */
138	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
139
140	/*
141	 * Global pipe list.
142	 */
143	TAILQ_ENTRY(audit_pipe)		 ap_list;
144};
145
146/*
147 * Global list of audit pipes, mutex to protect it and the pipes.  Finer
148 * grained locking may be desirable at some point.
149 */
150static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
151static struct mtx		 audit_pipe_mtx;
152
153/*
154 * This CV is used to wakeup on an audit record write.  Eventually, it might
155 * be per-pipe to avoid unnecessary wakeups when several pipes with different
156 * preselection masks are present.
157 */
158static struct cv		 audit_pipe_cv;
159
160/*
161 * Cloning related variables and constants.
162 */
163#define	AUDIT_PIPE_NAME		"auditpipe"
164static eventhandler_tag		 audit_pipe_eh_tag;
165static struct clonedevs		*audit_pipe_clones;
166
167/*
168 * Special device methods and definition.
169 */
170static d_open_t		audit_pipe_open;
171static d_close_t	audit_pipe_close;
172static d_read_t		audit_pipe_read;
173static d_ioctl_t	audit_pipe_ioctl;
174static d_poll_t		audit_pipe_poll;
175
176static struct cdevsw	audit_pipe_cdevsw = {
177	.d_version =	D_VERSION,
178	.d_flags =	D_PSEUDO | D_NEEDGIANT,
179	.d_open =	audit_pipe_open,
180	.d_close =	audit_pipe_close,
181	.d_read =	audit_pipe_read,
182	.d_ioctl =	audit_pipe_ioctl,
183	.d_poll =	audit_pipe_poll,
184	.d_name =	AUDIT_PIPE_NAME,
185};
186
187/*
188 * Some global statistics on audit pipes.
189 */
190static int		audit_pipe_count;	/* Current number of pipes. */
191static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
192static u_int64_t	audit_pipe_records;	/* Records seen. */
193static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
194
195/*
196 * Free an audit pipe entry.
197 */
198static void
199audit_pipe_entry_free(struct audit_pipe_entry *ape)
200{
201
202	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
203	free(ape, M_AUDIT_PIPE_ENTRY);
204}
205
206/*
207 * Find an audit pipe preselection specification for an auid, if any.
208 */
209static struct audit_pipe_preselect *
210audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
211{
212	struct audit_pipe_preselect *app;
213
214	mtx_assert(&audit_pipe_mtx, MA_OWNED);
215
216	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
217		if (app->app_auid == auid)
218			return (app);
219	}
220	return (NULL);
221}
222
223/*
224 * Query the per-pipe mask for a specific auid.
225 */
226static int
227audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
228    au_mask_t *maskp)
229{
230	struct audit_pipe_preselect *app;
231	int error;
232
233	mtx_lock(&audit_pipe_mtx);
234	app = audit_pipe_preselect_find(ap, auid);
235	if (app != NULL) {
236		*maskp = app->app_mask;
237		error = 0;
238	} else
239		error = ENOENT;
240	mtx_unlock(&audit_pipe_mtx);
241	return (error);
242}
243
244/*
245 * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
246 * otherwise, update the current entry.
247 */
248static void
249audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
250{
251	struct audit_pipe_preselect *app, *app_new;
252
253	/*
254	 * Pessimistically assume that the auid doesn't already have a mask
255	 * set, and allocate.  We will free it if it is unneeded.
256	 */
257	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
258	mtx_lock(&audit_pipe_mtx);
259	app = audit_pipe_preselect_find(ap, auid);
260	if (app == NULL) {
261		app = app_new;
262		app_new = NULL;
263		app->app_auid = auid;
264		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
265	}
266	app->app_mask = mask;
267	mtx_unlock(&audit_pipe_mtx);
268	if (app_new != NULL)
269		free(app_new, M_AUDIT_PIPE_PRESELECT);
270}
271
272/*
273 * Delete a per-auid mask on an audit pipe.
274 */
275static int
276audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
277{
278	struct audit_pipe_preselect *app;
279	int error;
280
281	mtx_lock(&audit_pipe_mtx);
282	app = audit_pipe_preselect_find(ap, auid);
283	if (app != NULL) {
284		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
285		error = 0;
286	} else
287		error = ENOENT;
288	mtx_unlock(&audit_pipe_mtx);
289	if (app != NULL)
290		free(app, M_AUDIT_PIPE_PRESELECT);
291	return (error);
292}
293
294/*
295 * Delete all per-auid masks on an audit pipe.
296 */
297static void
298audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
299{
300	struct audit_pipe_preselect *app;
301
302	mtx_assert(&audit_pipe_mtx, MA_OWNED);
303
304	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
305		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
306		free(app, M_AUDIT_PIPE_PRESELECT);
307	}
308}
309
310static void
311audit_pipe_preselect_flush(struct audit_pipe *ap)
312{
313
314	mtx_lock(&audit_pipe_mtx);
315	audit_pipe_preselect_flush_locked(ap);
316	mtx_unlock(&audit_pipe_mtx);
317}
318
319/*
320 * Determine whether a specific audit pipe matches a record with these
321 * properties.  Algorithm is as follows:
322 *
323 * - If the pipe is configured to track the default trail configuration, then
324 *   use the results of global preselection matching.
325 * - If not, search for a specifically configured auid entry matching the
326 *   event.  If an entry is found, use that.
327 * - Otherwise, use the default flags or naflags configured for the pipe.
328 */
329static int
330audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
331    au_event_t event, au_class_t class, int sorf, int trail_preselect)
332{
333	struct audit_pipe_preselect *app;
334
335	mtx_assert(&audit_pipe_mtx, MA_OWNED);
336
337	switch (ap->ap_preselect_mode) {
338	case AUDITPIPE_PRESELECT_MODE_TRAIL:
339		return (trail_preselect);
340
341	case AUDITPIPE_PRESELECT_MODE_LOCAL:
342		app = audit_pipe_preselect_find(ap, auid);
343		if (app == NULL) {
344			if (auid == AU_DEFAUDITID)
345				return (au_preselect(event, class,
346				    &ap->ap_preselect_naflags, sorf));
347			else
348				return (au_preselect(event, class,
349				    &ap->ap_preselect_flags, sorf));
350		} else
351			return (au_preselect(event, class, &app->app_mask,
352			    sorf));
353
354	default:
355		panic("audit_pipe_preselect_check: mode %d",
356		    ap->ap_preselect_mode);
357	}
358
359	return (0);
360}
361
362/*
363 * Determine whether there exists a pipe interested in a record with specific
364 * properties.
365 */
366int
367audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
368    int sorf, int trail_preselect)
369{
370	struct audit_pipe *ap;
371
372	mtx_lock(&audit_pipe_mtx);
373	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
374		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
375		    trail_preselect)) {
376			mtx_unlock(&audit_pipe_mtx);
377			return (1);
378		}
379	}
380	mtx_unlock(&audit_pipe_mtx);
381	return (0);
382}
383
384/*
385 * Append individual record to a queue -- allocate queue-local buffer, and
386 * add to the queue.  We try to drop from the head of the queue so that more
387 * recent events take precedence over older ones, but if allocation fails we
388 * do drop the new event.
389 */
390static void
391audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
392{
393	struct audit_pipe_entry *ape, *ape_remove;
394
395	mtx_assert(&audit_pipe_mtx, MA_OWNED);
396
397	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
398	if (ape == NULL) {
399		ap->ap_drops++;
400		audit_pipe_drops++;
401		return;
402	}
403
404	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
405	if (ape->ape_record == NULL) {
406		free(ape, M_AUDIT_PIPE_ENTRY);
407		ap->ap_drops++;
408		audit_pipe_drops++;
409		return;
410	}
411
412	bcopy(record, ape->ape_record, record_len);
413	ape->ape_record_len = record_len;
414
415	if (ap->ap_qlen >= ap->ap_qlimit) {
416		ape_remove = TAILQ_FIRST(&ap->ap_queue);
417		TAILQ_REMOVE(&ap->ap_queue, ape_remove, ape_queue);
418		audit_pipe_entry_free(ape_remove);
419		ap->ap_qlen--;
420		ap->ap_drops++;
421		audit_pipe_drops++;
422	}
423
424	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
425	ap->ap_inserts++;
426	ap->ap_qlen++;
427	selwakeuppri(&ap->ap_selinfo, PSOCK);
428	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
429		pgsigio(&ap->ap_sigio, SIGIO, 0);
430}
431
432/*
433 * audit_pipe_submit(): audit_worker submits audit records via this
434 * interface, which arranges for them to be delivered to pipe queues.
435 */
436void
437audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
438    int trail_select, void *record, u_int record_len)
439{
440	struct audit_pipe *ap;
441
442	/*
443	 * Lockless read to avoid mutex overhead if pipes are not in use.
444	 */
445	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
446		return;
447
448	mtx_lock(&audit_pipe_mtx);
449	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
450		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
451		    trail_select))
452			audit_pipe_append(ap, record, record_len);
453	}
454	audit_pipe_records++;
455	mtx_unlock(&audit_pipe_mtx);
456	cv_signal(&audit_pipe_cv);
457}
458
459/*
460 * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
461 * since we don't currently have selection information available, it is
462 * delivered to the pipe unconditionally.
463 *
464 * XXXRW: This is a bug.  The BSM check routine for submitting a user record
465 * should parse that information and return it.
466 */
467void
468audit_pipe_submit_user(void *record, u_int record_len)
469{
470	struct audit_pipe *ap;
471
472	/*
473	 * Lockless read to avoid mutex overhead if pipes are not in use.
474	 */
475	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
476		return;
477
478	mtx_lock(&audit_pipe_mtx);
479	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list)
480		audit_pipe_append(ap, record, record_len);
481	audit_pipe_records++;
482	mtx_unlock(&audit_pipe_mtx);
483	cv_signal(&audit_pipe_cv);
484}
485
486
487/*
488 * Pop the next record off of an audit pipe.
489 */
490static struct audit_pipe_entry *
491audit_pipe_pop(struct audit_pipe *ap)
492{
493	struct audit_pipe_entry *ape;
494
495	mtx_assert(&audit_pipe_mtx, MA_OWNED);
496
497	ape = TAILQ_FIRST(&ap->ap_queue);
498	KASSERT((ape == NULL && ap->ap_qlen == 0) ||
499	    (ape != NULL && ap->ap_qlen != 0), ("audit_pipe_pop: qlen"));
500	if (ape == NULL)
501		return (NULL);
502	TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
503	ap->ap_qlen--;
504	return (ape);
505}
506
507/*
508 * Allocate a new audit pipe.  Connects the pipe, on success, to the global
509 * list and updates statistics.
510 */
511static struct audit_pipe *
512audit_pipe_alloc(void)
513{
514	struct audit_pipe *ap;
515
516	mtx_assert(&audit_pipe_mtx, MA_OWNED);
517
518	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
519	if (ap == NULL)
520		return (NULL);
521	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
522	TAILQ_INIT(&ap->ap_queue);
523
524	/*
525	 * Default flags, naflags, and auid-specific preselection settings to
526	 * 0.  Initialize the mode to the global trail so that if praudit(1)
527	 * is run on /dev/auditpipe, it sees events associated with the
528	 * default trail.  Pipe-aware application can clear the flag, set
529	 * custom masks, and flush the pipe as needed.
530	 */
531	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
532	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
533	TAILQ_INIT(&ap->ap_preselect_list);
534	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
535
536	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
537	audit_pipe_count++;
538	audit_pipe_ever++;
539
540	return (ap);
541}
542
543/*
544 * Flush all records currently present in an audit pipe; assume mutex is held.
545 */
546static void
547audit_pipe_flush(struct audit_pipe *ap)
548{
549	struct audit_pipe_entry *ape;
550
551	mtx_assert(&audit_pipe_mtx, MA_OWNED);
552
553	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
554		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
555		audit_pipe_entry_free(ape);
556		ap->ap_qlen--;
557	}
558	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qlen"));
559}
560
561/*
562 * Free an audit pipe; this means freeing all preselection state and all
563 * records in the pipe.  Assumes mutex is held to prevent any new records
564 * from being inserted during the free, and that the audit pipe is still on
565 * the global list.
566 */
567static void
568audit_pipe_free(struct audit_pipe *ap)
569{
570
571	mtx_assert(&audit_pipe_mtx, MA_OWNED);
572
573	audit_pipe_preselect_flush_locked(ap);
574	audit_pipe_flush(ap);
575	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
576	free(ap, M_AUDIT_PIPE);
577	audit_pipe_count--;
578}
579
580/*
581 * Audit pipe clone routine -- provide specific requested audit pipe, or a
582 * fresh one if a specific one is not requested.
583 */
584static void
585audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
586    struct cdev **dev)
587{
588	int i, u;
589
590	if (*dev != NULL)
591		return;
592
593	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
594		u = -1;
595	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
596		return;
597
598	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
599	if (i) {
600		*dev = make_dev(&audit_pipe_cdevsw, unit2minor(u), UID_ROOT,
601		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
602		if (*dev != NULL) {
603			dev_ref(*dev);
604			(*dev)->si_flags |= SI_CHEAPCLONE;
605		}
606	}
607}
608
609/*
610 * Audit pipe open method.  Explicit suser check isn't used as this allows
611 * file permissions on the special device to be used to grant audit review
612 * access.
613 */
614static int
615audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
616{
617	struct audit_pipe *ap;
618
619	mtx_lock(&audit_pipe_mtx);
620	ap = dev->si_drv1;
621	if (ap == NULL) {
622		ap = audit_pipe_alloc();
623		if (ap == NULL) {
624			mtx_unlock(&audit_pipe_mtx);
625			return (ENOMEM);
626		}
627		dev->si_drv1 = ap;
628	} else {
629		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
630		mtx_unlock(&audit_pipe_mtx);
631		return (EBUSY);
632	}
633	ap->ap_open = 1;
634	mtx_unlock(&audit_pipe_mtx);
635	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
636	return (0);
637}
638
639/*
640 * Close audit pipe, tear down all records, etc.
641 */
642static int
643audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
644{
645	struct audit_pipe *ap;
646
647	ap = dev->si_drv1;
648	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
649	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
650	funsetown(&ap->ap_sigio);
651	mtx_lock(&audit_pipe_mtx);
652	ap->ap_open = 0;
653	audit_pipe_free(ap);
654	dev->si_drv1 = NULL;
655	mtx_unlock(&audit_pipe_mtx);
656	return (0);
657}
658
659/*
660 * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
661 * commands.
662 *
663 * Would be desirable to support filtering, although perhaps something simple
664 * like an event mask, as opposed to something complicated like BPF.
665 */
666static int
667audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
668    struct thread *td)
669{
670	struct auditpipe_ioctl_preselect *aip;
671	struct audit_pipe *ap;
672	au_mask_t *maskp;
673	int error, mode;
674	au_id_t auid;
675
676	ap = dev->si_drv1;
677	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
678
679	/*
680	 * Audit pipe ioctls: first come standard device node ioctls, then
681	 * manipulation of pipe settings, and finally, statistics query
682	 * ioctls.
683	 */
684	switch (cmd) {
685	case FIONBIO:
686		mtx_lock(&audit_pipe_mtx);
687		if (*(int *)data)
688			ap->ap_flags |= AUDIT_PIPE_NBIO;
689		else
690			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
691		mtx_unlock(&audit_pipe_mtx);
692		error = 0;
693		break;
694
695	case FIONREAD:
696		mtx_lock(&audit_pipe_mtx);
697		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
698			*(int *)data =
699			    TAILQ_FIRST(&ap->ap_queue)->ape_record_len;
700		else
701			*(int *)data = 0;
702		mtx_unlock(&audit_pipe_mtx);
703		error = 0;
704		break;
705
706	case FIOASYNC:
707		mtx_lock(&audit_pipe_mtx);
708		if (*(int *)data)
709			ap->ap_flags |= AUDIT_PIPE_ASYNC;
710		else
711			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
712		mtx_unlock(&audit_pipe_mtx);
713		error = 0;
714		break;
715
716	case FIOSETOWN:
717		error = fsetown(*(int *)data, &ap->ap_sigio);
718		break;
719
720	case FIOGETOWN:
721		*(int *)data = fgetown(&ap->ap_sigio);
722		error = 0;
723		break;
724
725	case AUDITPIPE_GET_QLEN:
726		*(u_int *)data = ap->ap_qlen;
727		error = 0;
728		break;
729
730	case AUDITPIPE_GET_QLIMIT:
731		*(u_int *)data = ap->ap_qlimit;
732		error = 0;
733		break;
734
735	case AUDITPIPE_SET_QLIMIT:
736		/* Lockless integer write. */
737		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
738		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
739			ap->ap_qlimit = *(u_int *)data;
740			error = 0;
741		} else
742			error = EINVAL;
743		break;
744
745	case AUDITPIPE_GET_QLIMIT_MIN:
746		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
747		error = 0;
748		break;
749
750	case AUDITPIPE_GET_QLIMIT_MAX:
751		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
752		error = 0;
753		break;
754
755	case AUDITPIPE_GET_PRESELECT_FLAGS:
756		mtx_lock(&audit_pipe_mtx);
757		maskp = (au_mask_t *)data;
758		*maskp = ap->ap_preselect_flags;
759		mtx_unlock(&audit_pipe_mtx);
760		error = 0;
761		break;
762
763	case AUDITPIPE_SET_PRESELECT_FLAGS:
764		mtx_lock(&audit_pipe_mtx);
765		maskp = (au_mask_t *)data;
766		ap->ap_preselect_flags = *maskp;
767		mtx_unlock(&audit_pipe_mtx);
768		error = 0;
769		break;
770
771	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
772		mtx_lock(&audit_pipe_mtx);
773		maskp = (au_mask_t *)data;
774		*maskp = ap->ap_preselect_naflags;
775		mtx_unlock(&audit_pipe_mtx);
776		error = 0;
777		break;
778
779	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
780		mtx_lock(&audit_pipe_mtx);
781		maskp = (au_mask_t *)data;
782		ap->ap_preselect_naflags = *maskp;
783		mtx_unlock(&audit_pipe_mtx);
784		error = 0;
785		break;
786
787	case AUDITPIPE_GET_PRESELECT_AUID:
788		aip = (struct auditpipe_ioctl_preselect *)data;
789		error = audit_pipe_preselect_get(ap, aip->aip_auid,
790		    &aip->aip_mask);
791		break;
792
793	case AUDITPIPE_SET_PRESELECT_AUID:
794		aip = (struct auditpipe_ioctl_preselect *)data;
795		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
796		error = 0;
797		break;
798
799	case AUDITPIPE_DELETE_PRESELECT_AUID:
800		auid = *(au_id_t *)data;
801		error = audit_pipe_preselect_delete(ap, auid);
802		break;
803
804	case AUDITPIPE_FLUSH_PRESELECT_AUID:
805		audit_pipe_preselect_flush(ap);
806		error = 0;
807		break;
808
809	case AUDITPIPE_GET_PRESELECT_MODE:
810		mtx_lock(&audit_pipe_mtx);
811		*(int *)data = ap->ap_preselect_mode;
812		mtx_unlock(&audit_pipe_mtx);
813		error = 0;
814		break;
815
816	case AUDITPIPE_SET_PRESELECT_MODE:
817		mode = *(int *)data;
818		switch (mode) {
819		case AUDITPIPE_PRESELECT_MODE_TRAIL:
820		case AUDITPIPE_PRESELECT_MODE_LOCAL:
821			mtx_lock(&audit_pipe_mtx);
822			ap->ap_preselect_mode = mode;
823			mtx_unlock(&audit_pipe_mtx);
824			error = 0;
825			break;
826
827		default:
828			error = EINVAL;
829		}
830		break;
831
832	case AUDITPIPE_FLUSH:
833		mtx_lock(&audit_pipe_mtx);
834		audit_pipe_flush(ap);
835		mtx_unlock(&audit_pipe_mtx);
836		error = 0;
837		break;
838
839	case AUDITPIPE_GET_INSERTS:
840		*(u_int *)data = ap->ap_inserts;
841		error = 0;
842		break;
843
844	case AUDITPIPE_GET_READS:
845		*(u_int *)data = ap->ap_reads;
846		error = 0;
847		break;
848
849	case AUDITPIPE_GET_DROPS:
850		*(u_int *)data = ap->ap_drops;
851		error = 0;
852		break;
853
854	case AUDITPIPE_GET_TRUNCATES:
855		*(u_int *)data = ap->ap_truncates;
856		error = 0;
857		break;
858
859	default:
860		error = ENOTTY;
861	}
862	return (error);
863}
864
865/*
866 * Audit pipe read.  Pull one record off the queue and copy to user space.
867 * On error, the record is dropped.
868 *
869 * Providing more sophisticated behavior, such as partial reads, is tricky
870 * due to the potential for parallel I/O.  If partial read support is
871 * required, it will require a per-pipe "current record being read" along
872 * with an offset into that trecord which has already been read.  Threads
873 * performing partial reads will need to allocate per-thread copies of the
874 * data so that if another thread completes the read of the record, it can be
875 * freed without adding reference count logic.  If this is added, a flag to
876 * indicate that only atomic record reads are desired would be useful, as if
877 * different threads are all waiting for records on the pipe, they will want
878 * independent record reads, which is currently the behavior.
879 */
880static int
881audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
882{
883	struct audit_pipe_entry *ape;
884	struct audit_pipe *ap;
885	int error;
886
887	ap = dev->si_drv1;
888	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
889	mtx_lock(&audit_pipe_mtx);
890	do {
891		/*
892		 * Wait for a record that fits into the read buffer, dropping
893		 * records that would be truncated if actually passed to the
894		 * process.  This helps maintain the discreet record read
895		 * interface.
896		 */
897		while ((ape = audit_pipe_pop(ap)) == NULL) {
898			if (ap->ap_flags & AUDIT_PIPE_NBIO) {
899				mtx_unlock(&audit_pipe_mtx);
900				return (EAGAIN);
901			}
902			error = cv_wait_sig(&audit_pipe_cv, &audit_pipe_mtx);
903			if (error) {
904				mtx_unlock(&audit_pipe_mtx);
905				return (error);
906			}
907		}
908		if (ape->ape_record_len <= uio->uio_resid)
909			break;
910		audit_pipe_entry_free(ape);
911		ap->ap_truncates++;
912	} while (1);
913	mtx_unlock(&audit_pipe_mtx);
914
915	/*
916	 * Now read record to user space memory.  Even if the read is short,
917	 * we abandon the remainder of the record, supporting only discreet
918	 * record reads.
919	 */
920	error = uiomove(ape->ape_record, ape->ape_record_len, uio);
921	audit_pipe_entry_free(ape);
922	return (error);
923}
924
925/*
926 * Audit pipe poll.
927 */
928static int
929audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
930{
931	struct audit_pipe *ap;
932	int revents;
933
934	revents = 0;
935	ap = dev->si_drv1;
936	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
937	if (events & (POLLIN | POLLRDNORM)) {
938		mtx_lock(&audit_pipe_mtx);
939		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
940			revents |= events & (POLLIN | POLLRDNORM);
941		else
942			selrecord(td, &ap->ap_selinfo);
943		mtx_unlock(&audit_pipe_mtx);
944	}
945	return (revents);
946}
947
948/*
949 * Initialize the audit pipe system.
950 */
951static void
952audit_pipe_init(void *unused)
953{
954
955	TAILQ_INIT(&audit_pipe_list);
956	mtx_init(&audit_pipe_mtx, "audit_pipe_mtx", NULL, MTX_DEF);
957	cv_init(&audit_pipe_cv, "audit_pipe_cv");
958
959	clone_setup(&audit_pipe_clones);
960	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
961	    audit_pipe_clone, 0, 1000);
962	if (audit_pipe_eh_tag == NULL)
963		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
964}
965
966SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
967    NULL);
968