audit_pipe.c revision 184536
1/*-
2 * Copyright (c) 2006 Robert N. M. Watson
3 * Copyright (c) 2008 Apple, Inc.
4 * All rights reserved.
5 *
6 * This software was developed by Robert Watson for the TrustedBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/security/audit/audit_pipe.c 184536 2008-11-01 21:56:45Z rwatson $");
32
33#include <sys/param.h>
34#include <sys/condvar.h>
35#include <sys/conf.h>
36#include <sys/eventhandler.h>
37#include <sys/filio.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/poll.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/rwlock.h>
46#include <sys/selinfo.h>
47#include <sys/sigio.h>
48#include <sys/signal.h>
49#include <sys/signalvar.h>
50#include <sys/sx.h>
51#include <sys/systm.h>
52#include <sys/uio.h>
53
54#include <security/audit/audit.h>
55#include <security/audit/audit_ioctl.h>
56#include <security/audit/audit_private.h>
57
58/*
59 * Implementation of a clonable special device providing a live stream of BSM
60 * audit data.  This is a "tee" of the data going to the file.  It provides
61 * unreliable but timely access to audit events.  Consumers of this interface
62 * should be very careful to avoid introducing event cycles.  Consumers may
63 * express interest via a set of preselection ioctls.
64 */
65
66/*
67 * Memory types.
68 */
69static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
70static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
71    "Audit pipe entries and buffers");
72static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
73    "Audit pipe preselection structure");
74
75/*
76 * Audit pipe buffer parameters.
77 */
78#define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
79#define	AUDIT_PIPE_QLIMIT_MIN		(0)
80#define	AUDIT_PIPE_QLIMIT_MAX		(1024)
81
82/*
83 * Description of an entry in an audit_pipe.
84 */
85struct audit_pipe_entry {
86	void				*ape_record;
87	u_int				 ape_record_len;
88	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
89};
90
91/*
92 * Audit pipes allow processes to express "interest" in the set of records
93 * that are delivered via the pipe.  They do this in a similar manner to the
94 * mechanism for audit trail configuration, by expressing two global masks,
95 * and optionally expressing per-auid masks.  The following data structure is
96 * the per-auid mask description.  The global state is stored in the audit
97 * pipe data structure.
98 *
99 * We may want to consider a more space/time-efficient data structure once
100 * usage patterns for per-auid specifications are clear.
101 */
102struct audit_pipe_preselect {
103	au_id_t					 app_auid;
104	au_mask_t				 app_mask;
105	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
106};
107
108/*
109 * Description of an individual audit_pipe.  Consists largely of a bounded
110 * length queue.
111 */
112#define	AUDIT_PIPE_ASYNC	0x00000001
113#define	AUDIT_PIPE_NBIO		0x00000002
114struct audit_pipe {
115	int				 ap_open;	/* Device open? */
116	u_int				 ap_flags;
117
118	struct selinfo			 ap_selinfo;
119	struct sigio			*ap_sigio;
120
121	/*
122	 * Per-pipe mutex protecting most fields in this data structure.
123	 */
124	struct mtx			 ap_mtx;
125
126	/*
127	 * Per-pipe sleep lock serializing user-generated reads and flushes.
128	 * uiomove() is called to copy out the current head record's data
129	 * while the record remains in the queue, so we prevent other threads
130	 * from removing it using this lock.
131	 */
132	struct sx			 ap_sx;
133
134	/*
135	 * Condition variable to signal when data has been delivered to a
136	 * pipe.
137	 */
138	struct cv			 ap_cv;
139
140	/*
141	 * Various queue-reated variables: qlen and qlimit are a count of
142	 * records in the queue; qbyteslen is the number of bytes of data
143	 * across all records, and qoffset is the amount read so far of the
144	 * first record in the queue.  The number of bytes available for
145	 * reading in the queue is qbyteslen - qoffset.
146	 */
147	u_int				 ap_qlen;
148	u_int				 ap_qlimit;
149	u_int				 ap_qbyteslen;
150	u_int				 ap_qoffset;
151
152	u_int64_t			 ap_inserts;	/* Records added. */
153	u_int64_t			 ap_reads;	/* Records read. */
154	u_int64_t			 ap_drops;	/* Records dropped. */
155
156	/*
157	 * Fields relating to pipe interest: global masks for unmatched
158	 * processes (attributable, non-attributable), and a list of specific
159	 * interest specifications by auid.
160	 */
161	int				 ap_preselect_mode;
162	au_mask_t			 ap_preselect_flags;
163	au_mask_t			 ap_preselect_naflags;
164	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
165
166	/*
167	 * Current pending record list.  Protected by a combination of ap_mtx
168	 * and ap_sx.  Note particularly that *both* locks are required to
169	 * remove a record from the head of the queue, as an in-progress read		 * may sleep while copying and therefore cannot hold ap_mtx.
170	 */
171	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
172
173	/*
174	 * Global pipe list.
175	 */
176	TAILQ_ENTRY(audit_pipe)		 ap_list;
177};
178
179#define	AUDIT_PIPE_LOCK(ap)		mtx_lock(&(ap)->ap_mtx)
180#define	AUDIT_PIPE_LOCK_ASSERT(ap)	mtx_assert(&(ap)->ap_mtx, MA_OWNED)
181#define	AUDIT_PIPE_LOCK_DESTROY(ap)	mtx_destroy(&(ap)->ap_mtx)
182#define	AUDIT_PIPE_LOCK_INIT(ap)	mtx_init(&(ap)->ap_mtx, \
183					    "audit_pipe_mtx", NULL, MTX_DEF)
184#define	AUDIT_PIPE_UNLOCK(ap)		mtx_unlock(&(ap)->ap_mtx)
185#define	AUDIT_PIPE_MTX(ap)		(&(ap)->ap_mtx)
186
187#define	AUDIT_PIPE_SX_LOCK_DESTROY(ap)	sx_destroy(&(ap)->ap_sx)
188#define	AUDIT_PIPE_SX_LOCK_INIT(ap)	sx_init(&(ap)->ap_sx, "audit_pipe_sx")
189#define	AUDIT_PIPE_SX_XLOCK_ASSERT(ap)	sx_assert(&(ap)->ap_sx, SA_XLOCKED)
190#define	AUDIT_PIPE_SX_XLOCK_SIG(ap)	sx_xlock_sig(&(ap)->ap_sx)
191#define	AUDIT_PIPE_SX_XUNLOCK(ap)	sx_xunlock(&(ap)->ap_sx)
192
193/*
194 * Global list of audit pipes, rwlock to protect it.  Individual record
195 * queues on pipes are protected by per-pipe locks; these locks synchronize
196 * between threads walking the list to deliver to individual pipes and add/
197 * remove of pipes, and are mostly acquired for read.
198 */
199static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
200static struct rwlock		 audit_pipe_lock;
201
202#define	AUDIT_PIPE_LIST_LOCK_INIT()	rw_init(&audit_pipe_lock, \
203					    "audit_pipe_list_lock")
204#define	AUDIT_PIPE_LIST_RLOCK()		rw_rlock(&audit_pipe_lock)
205#define	AUDIT_PIPE_LIST_RUNLOCK()	rw_runlock(&audit_pipe_lock)
206#define	AUDIT_PIPE_LIST_WLOCK()		rw_wlock(&audit_pipe_lock)
207#define	AUDIT_PIPE_LIST_WLOCK_ASSERT()	rw_assert(&audit_pipe_lock, \
208					    RA_WLOCKED)
209#define	AUDIT_PIPE_LIST_WUNLOCK()	rw_wunlock(&audit_pipe_lock)
210
211/*
212 * Cloning related variables and constants.
213 */
214#define	AUDIT_PIPE_NAME		"auditpipe"
215static eventhandler_tag		 audit_pipe_eh_tag;
216static struct clonedevs		*audit_pipe_clones;
217
218/*
219 * Special device methods and definition.
220 */
221static d_open_t		audit_pipe_open;
222static d_close_t	audit_pipe_close;
223static d_read_t		audit_pipe_read;
224static d_ioctl_t	audit_pipe_ioctl;
225static d_poll_t		audit_pipe_poll;
226static d_kqfilter_t	audit_pipe_kqfilter;
227
228static struct cdevsw	audit_pipe_cdevsw = {
229	.d_version =	D_VERSION,
230	.d_flags =	D_PSEUDO | D_NEEDGIANT | D_NEEDMINOR,
231	.d_open =	audit_pipe_open,
232	.d_close =	audit_pipe_close,
233	.d_read =	audit_pipe_read,
234	.d_ioctl =	audit_pipe_ioctl,
235	.d_poll =	audit_pipe_poll,
236	.d_kqfilter =	audit_pipe_kqfilter,
237	.d_name =	AUDIT_PIPE_NAME,
238};
239
240static int	audit_pipe_kqread(struct knote *note, long hint);
241static void	audit_pipe_kqdetach(struct knote *note);
242
243static struct filterops audit_pipe_read_filterops = {
244	.f_isfd =	1,
245	.f_attach =	NULL,
246	.f_detach =	audit_pipe_kqdetach,
247	.f_event =	audit_pipe_kqread,
248};
249
250/*
251 * Some global statistics on audit pipes.
252 */
253static int		audit_pipe_count;	/* Current number of pipes. */
254static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
255static u_int64_t	audit_pipe_records;	/* Records seen. */
256static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
257
258/*
259 * Free an audit pipe entry.
260 */
261static void
262audit_pipe_entry_free(struct audit_pipe_entry *ape)
263{
264
265	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
266	free(ape, M_AUDIT_PIPE_ENTRY);
267}
268
269/*
270 * Find an audit pipe preselection specification for an auid, if any.
271 */
272static struct audit_pipe_preselect *
273audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
274{
275	struct audit_pipe_preselect *app;
276
277	AUDIT_PIPE_LOCK_ASSERT(ap);
278
279	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
280		if (app->app_auid == auid)
281			return (app);
282	}
283	return (NULL);
284}
285
286/*
287 * Query the per-pipe mask for a specific auid.
288 */
289static int
290audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
291    au_mask_t *maskp)
292{
293	struct audit_pipe_preselect *app;
294	int error;
295
296	AUDIT_PIPE_LOCK(ap);
297	app = audit_pipe_preselect_find(ap, auid);
298	if (app != NULL) {
299		*maskp = app->app_mask;
300		error = 0;
301	} else
302		error = ENOENT;
303	AUDIT_PIPE_UNLOCK(ap);
304	return (error);
305}
306
307/*
308 * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
309 * otherwise, update the current entry.
310 */
311static void
312audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
313{
314	struct audit_pipe_preselect *app, *app_new;
315
316	/*
317	 * Pessimistically assume that the auid doesn't already have a mask
318	 * set, and allocate.  We will free it if it is unneeded.
319	 */
320	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
321	AUDIT_PIPE_LOCK(ap);
322	app = audit_pipe_preselect_find(ap, auid);
323	if (app == NULL) {
324		app = app_new;
325		app_new = NULL;
326		app->app_auid = auid;
327		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
328	}
329	app->app_mask = mask;
330	AUDIT_PIPE_UNLOCK(ap);
331	if (app_new != NULL)
332		free(app_new, M_AUDIT_PIPE_PRESELECT);
333}
334
335/*
336 * Delete a per-auid mask on an audit pipe.
337 */
338static int
339audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
340{
341	struct audit_pipe_preselect *app;
342	int error;
343
344	AUDIT_PIPE_LOCK(ap);
345	app = audit_pipe_preselect_find(ap, auid);
346	if (app != NULL) {
347		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
348		error = 0;
349	} else
350		error = ENOENT;
351	AUDIT_PIPE_UNLOCK(ap);
352	if (app != NULL)
353		free(app, M_AUDIT_PIPE_PRESELECT);
354	return (error);
355}
356
357/*
358 * Delete all per-auid masks on an audit pipe.
359 */
360static void
361audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
362{
363	struct audit_pipe_preselect *app;
364
365	AUDIT_PIPE_LOCK_ASSERT(ap);
366
367	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
368		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
369		free(app, M_AUDIT_PIPE_PRESELECT);
370	}
371}
372
373static void
374audit_pipe_preselect_flush(struct audit_pipe *ap)
375{
376
377	AUDIT_PIPE_LOCK(ap);
378	audit_pipe_preselect_flush_locked(ap);
379	AUDIT_PIPE_UNLOCK(ap);
380}
381
382/*-
383 * Determine whether a specific audit pipe matches a record with these
384 * properties.  Algorithm is as follows:
385 *
386 * - If the pipe is configured to track the default trail configuration, then
387 *   use the results of global preselection matching.
388 * - If not, search for a specifically configured auid entry matching the
389 *   event.  If an entry is found, use that.
390 * - Otherwise, use the default flags or naflags configured for the pipe.
391 */
392static int
393audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
394    au_event_t event, au_class_t class, int sorf, int trail_preselect)
395{
396	struct audit_pipe_preselect *app;
397
398	AUDIT_PIPE_LOCK_ASSERT(ap);
399
400	switch (ap->ap_preselect_mode) {
401	case AUDITPIPE_PRESELECT_MODE_TRAIL:
402		return (trail_preselect);
403
404	case AUDITPIPE_PRESELECT_MODE_LOCAL:
405		app = audit_pipe_preselect_find(ap, auid);
406		if (app == NULL) {
407			if (auid == AU_DEFAUDITID)
408				return (au_preselect(event, class,
409				    &ap->ap_preselect_naflags, sorf));
410			else
411				return (au_preselect(event, class,
412				    &ap->ap_preselect_flags, sorf));
413		} else
414			return (au_preselect(event, class, &app->app_mask,
415			    sorf));
416
417	default:
418		panic("audit_pipe_preselect_check: mode %d",
419		    ap->ap_preselect_mode);
420	}
421
422	return (0);
423}
424
425/*
426 * Determine whether there exists a pipe interested in a record with specific
427 * properties.
428 */
429int
430audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
431    int sorf, int trail_preselect)
432{
433	struct audit_pipe *ap;
434
435	AUDIT_PIPE_LIST_RLOCK();
436	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
437		AUDIT_PIPE_LOCK(ap);
438		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
439		    trail_preselect)) {
440			AUDIT_PIPE_UNLOCK(ap);
441			AUDIT_PIPE_LIST_RUNLOCK();
442			return (1);
443		}
444		AUDIT_PIPE_UNLOCK(ap);
445	}
446	AUDIT_PIPE_LIST_RUNLOCK();
447	return (0);
448}
449
450/*
451 * Append individual record to a queue -- allocate queue-local buffer, and
452 * add to the queue.  If the queue is full or we can't allocate memory, drop
453 * the newest record.
454 */
455static void
456audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
457{
458	struct audit_pipe_entry *ape;
459
460	AUDIT_PIPE_LOCK_ASSERT(ap);
461
462	if (ap->ap_qlen >= ap->ap_qlimit) {
463		ap->ap_drops++;
464		audit_pipe_drops++;
465		return;
466	}
467
468	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
469	if (ape == NULL) {
470		ap->ap_drops++;
471		audit_pipe_drops++;
472		return;
473	}
474
475	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
476	if (ape->ape_record == NULL) {
477		free(ape, M_AUDIT_PIPE_ENTRY);
478		ap->ap_drops++;
479		audit_pipe_drops++;
480		return;
481	}
482
483	bcopy(record, ape->ape_record, record_len);
484	ape->ape_record_len = record_len;
485
486	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
487	ap->ap_inserts++;
488	ap->ap_qlen++;
489	ap->ap_qbyteslen += ape->ape_record_len;
490	selwakeuppri(&ap->ap_selinfo, PSOCK);
491	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
492	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
493		pgsigio(&ap->ap_sigio, SIGIO, 0);
494	cv_broadcast(&ap->ap_cv);
495}
496
497/*
498 * audit_pipe_submit(): audit_worker submits audit records via this
499 * interface, which arranges for them to be delivered to pipe queues.
500 */
501void
502audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
503    int trail_select, void *record, u_int record_len)
504{
505	struct audit_pipe *ap;
506
507	/*
508	 * Lockless read to avoid lock overhead if pipes are not in use.
509	 */
510	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
511		return;
512
513	AUDIT_PIPE_LIST_RLOCK();
514	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
515		AUDIT_PIPE_LOCK(ap);
516		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
517		    trail_select))
518			audit_pipe_append(ap, record, record_len);
519		AUDIT_PIPE_UNLOCK(ap);
520	}
521	AUDIT_PIPE_LIST_RUNLOCK();
522
523	/* Unlocked increment. */
524	audit_pipe_records++;
525}
526
527/*
528 * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
529 * since we don't currently have selection information available, it is
530 * delivered to the pipe unconditionally.
531 *
532 * XXXRW: This is a bug.  The BSM check routine for submitting a user record
533 * should parse that information and return it.
534 */
535void
536audit_pipe_submit_user(void *record, u_int record_len)
537{
538	struct audit_pipe *ap;
539
540	/*
541	 * Lockless read to avoid lock overhead if pipes are not in use.
542	 */
543	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
544		return;
545
546	AUDIT_PIPE_LIST_RLOCK();
547	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
548		AUDIT_PIPE_LOCK(ap);
549		audit_pipe_append(ap, record, record_len);
550		AUDIT_PIPE_UNLOCK(ap);
551	}
552	AUDIT_PIPE_LIST_RUNLOCK();
553
554	/* Unlocked increment. */
555	audit_pipe_records++;
556}
557
558/*
559 * Allocate a new audit pipe.  Connects the pipe, on success, to the global
560 * list and updates statistics.
561 */
562static struct audit_pipe *
563audit_pipe_alloc(void)
564{
565	struct audit_pipe *ap;
566
567	AUDIT_PIPE_LIST_WLOCK_ASSERT();
568
569	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
570	if (ap == NULL)
571		return (NULL);
572	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
573	TAILQ_INIT(&ap->ap_queue);
574	knlist_init(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap), NULL, NULL,
575	    NULL);
576	AUDIT_PIPE_LOCK_INIT(ap);
577	AUDIT_PIPE_SX_LOCK_INIT(ap);
578	cv_init(&ap->ap_cv, "audit_pipe");
579
580	/*
581	 * Default flags, naflags, and auid-specific preselection settings to
582	 * 0.  Initialize the mode to the global trail so that if praudit(1)
583	 * is run on /dev/auditpipe, it sees events associated with the
584	 * default trail.  Pipe-aware application can clear the flag, set
585	 * custom masks, and flush the pipe as needed.
586	 */
587	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
588	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
589	TAILQ_INIT(&ap->ap_preselect_list);
590	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
591
592	/*
593	 * Add to global list and update global statistics.
594	 */
595	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
596	audit_pipe_count++;
597	audit_pipe_ever++;
598
599	return (ap);
600}
601
602/*
603 * Flush all records currently present in an audit pipe; assume mutex is held.
604 */
605static void
606audit_pipe_flush(struct audit_pipe *ap)
607{
608	struct audit_pipe_entry *ape;
609
610	AUDIT_PIPE_LOCK_ASSERT(ap);
611
612	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
613		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
614		ap->ap_qbyteslen -= ape->ape_record_len;
615		audit_pipe_entry_free(ape);
616		ap->ap_qlen--;
617	}
618	ap->ap_qoffset = 0;
619
620	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
621	KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
622}
623
624/*
625 * Free an audit pipe; this means freeing all preselection state and all
626 * records in the pipe.  Assumes global write lock and pipe mutex are held to
627 * prevent any new records from being inserted during the free, and that the
628 * audit pipe is still on the global list.
629 */
630static void
631audit_pipe_free(struct audit_pipe *ap)
632{
633
634	AUDIT_PIPE_LIST_WLOCK_ASSERT();
635	AUDIT_PIPE_LOCK_ASSERT(ap);
636
637	audit_pipe_preselect_flush_locked(ap);
638	audit_pipe_flush(ap);
639	cv_destroy(&ap->ap_cv);
640	AUDIT_PIPE_SX_LOCK_DESTROY(ap);
641	AUDIT_PIPE_LOCK_DESTROY(ap);
642	knlist_destroy(&ap->ap_selinfo.si_note);
643	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
644	free(ap, M_AUDIT_PIPE);
645	audit_pipe_count--;
646}
647
648/*
649 * Audit pipe clone routine -- provide specific requested audit pipe, or a
650 * fresh one if a specific one is not requested.
651 */
652static void
653audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
654    struct cdev **dev)
655{
656	int i, u;
657
658	if (*dev != NULL)
659		return;
660
661	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
662		u = -1;
663	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
664		return;
665
666	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
667	if (i) {
668		*dev = make_dev(&audit_pipe_cdevsw, u, UID_ROOT,
669		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
670		if (*dev != NULL) {
671			dev_ref(*dev);
672			(*dev)->si_flags |= SI_CHEAPCLONE;
673		}
674	}
675}
676
677/*
678 * Audit pipe open method.  Explicit privilege check isn't used as this
679 * allows file permissions on the special device to be used to grant audit
680 * review access.  Those file permissions should be managed carefully.
681 */
682static int
683audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
684{
685	struct audit_pipe *ap;
686
687	AUDIT_PIPE_LIST_WLOCK();
688	ap = dev->si_drv1;
689	if (ap == NULL) {
690		ap = audit_pipe_alloc();
691		if (ap == NULL) {
692			AUDIT_PIPE_LIST_WUNLOCK();
693			return (ENOMEM);
694		}
695		dev->si_drv1 = ap;
696	} else {
697		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
698		AUDIT_PIPE_LIST_WUNLOCK();
699		return (EBUSY);
700	}
701	ap->ap_open = 1;	/* No lock required yet. */
702	AUDIT_PIPE_LIST_WUNLOCK();
703	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
704	return (0);
705}
706
707/*
708 * Close audit pipe, tear down all records, etc.
709 */
710static int
711audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
712{
713	struct audit_pipe *ap;
714
715	ap = dev->si_drv1;
716	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
717	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
718
719	funsetown(&ap->ap_sigio);
720	AUDIT_PIPE_LIST_WLOCK();
721	AUDIT_PIPE_LOCK(ap);
722	ap->ap_open = 0;
723	audit_pipe_free(ap);
724	dev->si_drv1 = NULL;
725	AUDIT_PIPE_LIST_WUNLOCK();
726	return (0);
727}
728
729/*
730 * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
731 * commands.
732 *
733 * Would be desirable to support filtering, although perhaps something simple
734 * like an event mask, as opposed to something complicated like BPF.
735 */
736static int
737audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
738    struct thread *td)
739{
740	struct auditpipe_ioctl_preselect *aip;
741	struct audit_pipe *ap;
742	au_mask_t *maskp;
743	int error, mode;
744	au_id_t auid;
745
746	ap = dev->si_drv1;
747	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
748
749	/*
750	 * Audit pipe ioctls: first come standard device node ioctls, then
751	 * manipulation of pipe settings, and finally, statistics query
752	 * ioctls.
753	 */
754	switch (cmd) {
755	case FIONBIO:
756		AUDIT_PIPE_LOCK(ap);
757		if (*(int *)data)
758			ap->ap_flags |= AUDIT_PIPE_NBIO;
759		else
760			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
761		AUDIT_PIPE_UNLOCK(ap);
762		error = 0;
763		break;
764
765	case FIONREAD:
766		AUDIT_PIPE_LOCK(ap);
767		*(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
768		AUDIT_PIPE_UNLOCK(ap);
769		error = 0;
770		break;
771
772	case FIOASYNC:
773		AUDIT_PIPE_LOCK(ap);
774		if (*(int *)data)
775			ap->ap_flags |= AUDIT_PIPE_ASYNC;
776		else
777			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
778		AUDIT_PIPE_UNLOCK(ap);
779		error = 0;
780		break;
781
782	case FIOSETOWN:
783		error = fsetown(*(int *)data, &ap->ap_sigio);
784		break;
785
786	case FIOGETOWN:
787		*(int *)data = fgetown(&ap->ap_sigio);
788		error = 0;
789		break;
790
791	case AUDITPIPE_GET_QLEN:
792		*(u_int *)data = ap->ap_qlen;
793		error = 0;
794		break;
795
796	case AUDITPIPE_GET_QLIMIT:
797		*(u_int *)data = ap->ap_qlimit;
798		error = 0;
799		break;
800
801	case AUDITPIPE_SET_QLIMIT:
802		/* Lockless integer write. */
803		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
804		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
805			ap->ap_qlimit = *(u_int *)data;
806			error = 0;
807		} else
808			error = EINVAL;
809		break;
810
811	case AUDITPIPE_GET_QLIMIT_MIN:
812		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
813		error = 0;
814		break;
815
816	case AUDITPIPE_GET_QLIMIT_MAX:
817		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
818		error = 0;
819		break;
820
821	case AUDITPIPE_GET_PRESELECT_FLAGS:
822		AUDIT_PIPE_LOCK(ap);
823		maskp = (au_mask_t *)data;
824		*maskp = ap->ap_preselect_flags;
825		AUDIT_PIPE_UNLOCK(ap);
826		error = 0;
827		break;
828
829	case AUDITPIPE_SET_PRESELECT_FLAGS:
830		AUDIT_PIPE_LOCK(ap);
831		maskp = (au_mask_t *)data;
832		ap->ap_preselect_flags = *maskp;
833		AUDIT_PIPE_UNLOCK(ap);
834		error = 0;
835		break;
836
837	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
838		AUDIT_PIPE_LOCK(ap);
839		maskp = (au_mask_t *)data;
840		*maskp = ap->ap_preselect_naflags;
841		AUDIT_PIPE_UNLOCK(ap);
842		error = 0;
843		break;
844
845	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
846		AUDIT_PIPE_LOCK(ap);
847		maskp = (au_mask_t *)data;
848		ap->ap_preselect_naflags = *maskp;
849		AUDIT_PIPE_UNLOCK(ap);
850		error = 0;
851		break;
852
853	case AUDITPIPE_GET_PRESELECT_AUID:
854		aip = (struct auditpipe_ioctl_preselect *)data;
855		error = audit_pipe_preselect_get(ap, aip->aip_auid,
856		    &aip->aip_mask);
857		break;
858
859	case AUDITPIPE_SET_PRESELECT_AUID:
860		aip = (struct auditpipe_ioctl_preselect *)data;
861		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
862		error = 0;
863		break;
864
865	case AUDITPIPE_DELETE_PRESELECT_AUID:
866		auid = *(au_id_t *)data;
867		error = audit_pipe_preselect_delete(ap, auid);
868		break;
869
870	case AUDITPIPE_FLUSH_PRESELECT_AUID:
871		audit_pipe_preselect_flush(ap);
872		error = 0;
873		break;
874
875	case AUDITPIPE_GET_PRESELECT_MODE:
876		AUDIT_PIPE_LOCK(ap);
877		*(int *)data = ap->ap_preselect_mode;
878		AUDIT_PIPE_UNLOCK(ap);
879		error = 0;
880		break;
881
882	case AUDITPIPE_SET_PRESELECT_MODE:
883		mode = *(int *)data;
884		switch (mode) {
885		case AUDITPIPE_PRESELECT_MODE_TRAIL:
886		case AUDITPIPE_PRESELECT_MODE_LOCAL:
887			AUDIT_PIPE_LOCK(ap);
888			ap->ap_preselect_mode = mode;
889			AUDIT_PIPE_UNLOCK(ap);
890			error = 0;
891			break;
892
893		default:
894			error = EINVAL;
895		}
896		break;
897
898	case AUDITPIPE_FLUSH:
899		if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
900			return (EINTR);
901		AUDIT_PIPE_LOCK(ap);
902		audit_pipe_flush(ap);
903		AUDIT_PIPE_UNLOCK(ap);
904		AUDIT_PIPE_SX_XUNLOCK(ap);
905		error = 0;
906		break;
907
908	case AUDITPIPE_GET_MAXAUDITDATA:
909		*(u_int *)data = MAXAUDITDATA;
910		error = 0;
911		break;
912
913	case AUDITPIPE_GET_INSERTS:
914		*(u_int *)data = ap->ap_inserts;
915		error = 0;
916		break;
917
918	case AUDITPIPE_GET_READS:
919		*(u_int *)data = ap->ap_reads;
920		error = 0;
921		break;
922
923	case AUDITPIPE_GET_DROPS:
924		*(u_int *)data = ap->ap_drops;
925		error = 0;
926		break;
927
928	case AUDITPIPE_GET_TRUNCATES:
929		*(u_int *)data = 0;
930		error = 0;
931		break;
932
933	default:
934		error = ENOTTY;
935	}
936	return (error);
937}
938
939/*
940 * Audit pipe read.  Read one or more partial or complete records to user
941 * memory.
942 */
943static int
944audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
945{
946	struct audit_pipe_entry *ape;
947	struct audit_pipe *ap;
948	u_int toread;
949	int error;
950
951	ap = dev->si_drv1;
952	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
953
954	/*
955	 * We hold an sx(9) lock over read and flush because we rely on the
956	 * stability of a record in the queue during uiomove(9).
957	 */
958	if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
959		return (EINTR);
960	AUDIT_PIPE_LOCK(ap);
961	while (TAILQ_EMPTY(&ap->ap_queue)) {
962		if (ap->ap_flags & AUDIT_PIPE_NBIO) {
963			AUDIT_PIPE_UNLOCK(ap);
964			AUDIT_PIPE_SX_XUNLOCK(ap);
965			return (EAGAIN);
966		}
967		error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
968		if (error) {
969			AUDIT_PIPE_UNLOCK(ap);
970			AUDIT_PIPE_SX_XUNLOCK(ap);
971			return (error);
972		}
973	}
974
975	/*
976	 * Copy as many remaining bytes from the current record to userspace
977	 * as we can.  Keep processing records until we run out of records in
978	 * the queue, or until the user buffer runs out of space.
979	 *
980	 * Note: we rely on the SX lock to maintain ape's stability here.
981	 */
982	ap->ap_reads++;
983	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
984	    uio->uio_resid > 0) {
985		AUDIT_PIPE_LOCK_ASSERT(ap);
986
987		KASSERT(ape->ape_record_len > ap->ap_qoffset,
988		    ("audit_pipe_read: record_len > qoffset (1)"));
989		toread = MIN(ape->ape_record_len - ap->ap_qoffset,
990		    uio->uio_resid);
991		AUDIT_PIPE_UNLOCK(ap);
992		error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
993		    toread, uio);
994		if (error) {
995			AUDIT_PIPE_SX_XUNLOCK(ap);
996			return (error);
997		}
998
999		/*
1000		 * If the copy succeeded, update book-keeping, and if no
1001		 * bytes remain in the current record, free it.
1002		 */
1003		AUDIT_PIPE_LOCK(ap);
1004		KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
1005		    ("audit_pipe_read: queue out of sync after uiomove"));
1006		ap->ap_qoffset += toread;
1007		KASSERT(ape->ape_record_len >= ap->ap_qoffset,
1008		    ("audit_pipe_read: record_len >= qoffset (2)"));
1009		if (ap->ap_qoffset == ape->ape_record_len) {
1010			TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
1011			ap->ap_qbyteslen -= ape->ape_record_len;
1012			audit_pipe_entry_free(ape);
1013			ap->ap_qlen--;
1014			ap->ap_qoffset = 0;
1015		}
1016	}
1017	AUDIT_PIPE_UNLOCK(ap);
1018	AUDIT_PIPE_SX_XUNLOCK(ap);
1019	return (0);
1020}
1021
1022/*
1023 * Audit pipe poll.
1024 */
1025static int
1026audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
1027{
1028	struct audit_pipe *ap;
1029	int revents;
1030
1031	revents = 0;
1032	ap = dev->si_drv1;
1033	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
1034
1035	if (events & (POLLIN | POLLRDNORM)) {
1036		AUDIT_PIPE_LOCK(ap);
1037		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
1038			revents |= events & (POLLIN | POLLRDNORM);
1039		else
1040			selrecord(td, &ap->ap_selinfo);
1041		AUDIT_PIPE_UNLOCK(ap);
1042	}
1043	return (revents);
1044}
1045
1046/*
1047 * Audit pipe kqfilter.
1048 */
1049static int
1050audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
1051{
1052	struct audit_pipe *ap;
1053
1054	ap = dev->si_drv1;
1055	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
1056
1057	if (kn->kn_filter != EVFILT_READ)
1058		return (EINVAL);
1059
1060	kn->kn_fop = &audit_pipe_read_filterops;
1061	kn->kn_hook = ap;
1062
1063	AUDIT_PIPE_LOCK(ap);
1064	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
1065	AUDIT_PIPE_UNLOCK(ap);
1066	return (0);
1067}
1068
1069/*
1070 * Return true if there are records available for reading on the pipe.
1071 */
1072static int
1073audit_pipe_kqread(struct knote *kn, long hint)
1074{
1075	struct audit_pipe_entry *ape;
1076	struct audit_pipe *ap;
1077
1078	ap = (struct audit_pipe *)kn->kn_hook;
1079	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1080
1081	AUDIT_PIPE_LOCK_ASSERT(ap);
1082
1083	if (ap->ap_qlen != 0) {
1084		ape = TAILQ_FIRST(&ap->ap_queue);
1085		KASSERT(ape != NULL, ("audit_pipe_kqread: ape == NULL"));
1086
1087		kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
1088		return (1);
1089	} else {
1090		kn->kn_data = 0;
1091		return (0);
1092	}
1093}
1094
1095/*
1096 * Detach kqueue state from audit pipe.
1097 */
1098static void
1099audit_pipe_kqdetach(struct knote *kn)
1100{
1101	struct audit_pipe *ap;
1102
1103	ap = (struct audit_pipe *)kn->kn_hook;
1104	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1105
1106	AUDIT_PIPE_LOCK(ap);
1107	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1108	AUDIT_PIPE_UNLOCK(ap);
1109}
1110
1111/*
1112 * Initialize the audit pipe system.
1113 */
1114static void
1115audit_pipe_init(void *unused)
1116{
1117
1118	TAILQ_INIT(&audit_pipe_list);
1119	AUDIT_PIPE_LIST_LOCK_INIT();
1120
1121	clone_setup(&audit_pipe_clones);
1122	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1123	    audit_pipe_clone, 0, 1000);
1124	if (audit_pipe_eh_tag == NULL)
1125		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1126}
1127
1128SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1129    NULL);
1130