audit_pipe.c revision 302408
1/*-
2 * Copyright (c) 2006 Robert N. M. Watson
3 * Copyright (c) 2008-2009 Apple, Inc.
4 * All rights reserved.
5 *
6 * This software was developed by Robert Watson for the TrustedBSD Project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: stable/11/sys/security/audit/audit_pipe.c 293826 2016-01-13 14:02:07Z kib $");
32
33#include <sys/param.h>
34#include <sys/condvar.h>
35#include <sys/conf.h>
36#include <sys/eventhandler.h>
37#include <sys/filio.h>
38#include <sys/kernel.h>
39#include <sys/lock.h>
40#include <sys/malloc.h>
41#include <sys/mutex.h>
42#include <sys/poll.h>
43#include <sys/proc.h>
44#include <sys/queue.h>
45#include <sys/rwlock.h>
46#include <sys/selinfo.h>
47#include <sys/sigio.h>
48#include <sys/signal.h>
49#include <sys/signalvar.h>
50#include <sys/sx.h>
51#include <sys/systm.h>
52#include <sys/uio.h>
53
54#include <security/audit/audit.h>
55#include <security/audit/audit_ioctl.h>
56#include <security/audit/audit_private.h>
57
58/*
59 * Implementation of a clonable special device providing a live stream of BSM
60 * audit data.  Consumers receive a "tee" of the system audit trail by
61 * default, but may also define alternative event selections using ioctls.
62 * This interface provides unreliable but timely access to audit events.
63 * Consumers should be very careful to avoid introducing event cycles.
64 */
65
66/*
67 * Memory types.
68 */
69static MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
70static MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
71    "Audit pipe entries and buffers");
72static MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
73    "Audit pipe preselection structure");
74
75/*
76 * Audit pipe buffer parameters.
77 */
78#define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
79#define	AUDIT_PIPE_QLIMIT_MIN		(1)
80#define	AUDIT_PIPE_QLIMIT_MAX		(1024)
81
82/*
83 * Description of an entry in an audit_pipe.
84 */
85struct audit_pipe_entry {
86	void				*ape_record;
87	u_int				 ape_record_len;
88	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
89};
90
91/*
92 * Audit pipes allow processes to express "interest" in the set of records
93 * that are delivered via the pipe.  They do this in a similar manner to the
94 * mechanism for audit trail configuration, by expressing two global masks,
95 * and optionally expressing per-auid masks.  The following data structure is
96 * the per-auid mask description.  The global state is stored in the audit
97 * pipe data structure.
98 *
99 * We may want to consider a more space/time-efficient data structure once
100 * usage patterns for per-auid specifications are clear.
101 */
102struct audit_pipe_preselect {
103	au_id_t					 app_auid;
104	au_mask_t				 app_mask;
105	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
106};
107
108/*
109 * Description of an individual audit_pipe.  Consists largely of a bounded
110 * length queue.
111 */
112#define	AUDIT_PIPE_ASYNC	0x00000001
113#define	AUDIT_PIPE_NBIO		0x00000002
114struct audit_pipe {
115	u_int				 ap_flags;
116
117	struct selinfo			 ap_selinfo;
118	struct sigio			*ap_sigio;
119
120	/*
121	 * Per-pipe mutex protecting most fields in this data structure.
122	 */
123	struct mtx			 ap_mtx;
124
125	/*
126	 * Per-pipe sleep lock serializing user-generated reads and flushes.
127	 * uiomove() is called to copy out the current head record's data
128	 * while the record remains in the queue, so we prevent other threads
129	 * from removing it using this lock.
130	 */
131	struct sx			 ap_sx;
132
133	/*
134	 * Condition variable to signal when data has been delivered to a
135	 * pipe.
136	 */
137	struct cv			 ap_cv;
138
139	/*
140	 * Various queue-reated variables: qlen and qlimit are a count of
141	 * records in the queue; qbyteslen is the number of bytes of data
142	 * across all records, and qoffset is the amount read so far of the
143	 * first record in the queue.  The number of bytes available for
144	 * reading in the queue is qbyteslen - qoffset.
145	 */
146	u_int				 ap_qlen;
147	u_int				 ap_qlimit;
148	u_int				 ap_qbyteslen;
149	u_int				 ap_qoffset;
150
151	/*
152	 * Per-pipe operation statistics.
153	 */
154	u_int64_t			 ap_inserts;	/* Records added. */
155	u_int64_t			 ap_reads;	/* Records read. */
156	u_int64_t			 ap_drops;	/* Records dropped. */
157
158	/*
159	 * Fields relating to pipe interest: global masks for unmatched
160	 * processes (attributable, non-attributable), and a list of specific
161	 * interest specifications by auid.
162	 */
163	int				 ap_preselect_mode;
164	au_mask_t			 ap_preselect_flags;
165	au_mask_t			 ap_preselect_naflags;
166	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
167
168	/*
169	 * Current pending record list.  Protected by a combination of ap_mtx
170	 * and ap_sx.  Note particularly that *both* locks are required to
171	 * remove a record from the head of the queue, as an in-progress read
172	 * may sleep while copying and therefore cannot hold ap_mtx.
173	 */
174	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
175
176	/*
177	 * Global pipe list.
178	 */
179	TAILQ_ENTRY(audit_pipe)		 ap_list;
180};
181
182#define	AUDIT_PIPE_LOCK(ap)		mtx_lock(&(ap)->ap_mtx)
183#define	AUDIT_PIPE_LOCK_ASSERT(ap)	mtx_assert(&(ap)->ap_mtx, MA_OWNED)
184#define	AUDIT_PIPE_LOCK_DESTROY(ap)	mtx_destroy(&(ap)->ap_mtx)
185#define	AUDIT_PIPE_LOCK_INIT(ap)	mtx_init(&(ap)->ap_mtx, \
186					    "audit_pipe_mtx", NULL, MTX_DEF)
187#define	AUDIT_PIPE_UNLOCK(ap)		mtx_unlock(&(ap)->ap_mtx)
188#define	AUDIT_PIPE_MTX(ap)		(&(ap)->ap_mtx)
189
190#define	AUDIT_PIPE_SX_LOCK_DESTROY(ap)	sx_destroy(&(ap)->ap_sx)
191#define	AUDIT_PIPE_SX_LOCK_INIT(ap)	sx_init(&(ap)->ap_sx, "audit_pipe_sx")
192#define	AUDIT_PIPE_SX_XLOCK_ASSERT(ap)	sx_assert(&(ap)->ap_sx, SA_XLOCKED)
193#define	AUDIT_PIPE_SX_XLOCK_SIG(ap)	sx_xlock_sig(&(ap)->ap_sx)
194#define	AUDIT_PIPE_SX_XUNLOCK(ap)	sx_xunlock(&(ap)->ap_sx)
195
196/*
197 * Global list of audit pipes, rwlock to protect it.  Individual record
198 * queues on pipes are protected by per-pipe locks; these locks synchronize
199 * between threads walking the list to deliver to individual pipes and add/
200 * remove of pipes, and are mostly acquired for read.
201 */
202static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
203static struct rwlock		 audit_pipe_lock;
204
205#define	AUDIT_PIPE_LIST_LOCK_INIT()	rw_init(&audit_pipe_lock, \
206					    "audit_pipe_list_lock")
207#define	AUDIT_PIPE_LIST_LOCK_DESTROY()	rw_destroy(&audit_pipe_lock)
208#define	AUDIT_PIPE_LIST_RLOCK()		rw_rlock(&audit_pipe_lock)
209#define	AUDIT_PIPE_LIST_RUNLOCK()	rw_runlock(&audit_pipe_lock)
210#define	AUDIT_PIPE_LIST_WLOCK()		rw_wlock(&audit_pipe_lock)
211#define	AUDIT_PIPE_LIST_WLOCK_ASSERT()	rw_assert(&audit_pipe_lock, \
212					    RA_WLOCKED)
213#define	AUDIT_PIPE_LIST_WUNLOCK()	rw_wunlock(&audit_pipe_lock)
214
215/*
216 * Audit pipe device.
217 */
218static struct cdev	*audit_pipe_dev;
219
220#define AUDIT_PIPE_NAME	"auditpipe"
221
222/*
223 * Special device methods and definition.
224 */
225static d_open_t		audit_pipe_open;
226static d_read_t		audit_pipe_read;
227static d_ioctl_t	audit_pipe_ioctl;
228static d_poll_t		audit_pipe_poll;
229static d_kqfilter_t	audit_pipe_kqfilter;
230
231static struct cdevsw	audit_pipe_cdevsw = {
232	.d_version =	D_VERSION,
233	.d_open =	audit_pipe_open,
234	.d_read =	audit_pipe_read,
235	.d_ioctl =	audit_pipe_ioctl,
236	.d_poll =	audit_pipe_poll,
237	.d_kqfilter =	audit_pipe_kqfilter,
238	.d_name =	AUDIT_PIPE_NAME,
239};
240
241static int	audit_pipe_kqread(struct knote *note, long hint);
242static void	audit_pipe_kqdetach(struct knote *note);
243
244static struct filterops audit_pipe_read_filterops = {
245	.f_isfd =	1,
246	.f_attach =	NULL,
247	.f_detach =	audit_pipe_kqdetach,
248	.f_event =	audit_pipe_kqread,
249};
250
251/*
252 * Some global statistics on audit pipes.
253 */
254static int		audit_pipe_count;	/* Current number of pipes. */
255static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
256static u_int64_t	audit_pipe_records;	/* Records seen. */
257static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
258
259/*
260 * Free an audit pipe entry.
261 */
262static void
263audit_pipe_entry_free(struct audit_pipe_entry *ape)
264{
265
266	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
267	free(ape, M_AUDIT_PIPE_ENTRY);
268}
269
270/*
271 * Find an audit pipe preselection specification for an auid, if any.
272 */
273static struct audit_pipe_preselect *
274audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
275{
276	struct audit_pipe_preselect *app;
277
278	AUDIT_PIPE_LOCK_ASSERT(ap);
279
280	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
281		if (app->app_auid == auid)
282			return (app);
283	}
284	return (NULL);
285}
286
287/*
288 * Query the per-pipe mask for a specific auid.
289 */
290static int
291audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
292    au_mask_t *maskp)
293{
294	struct audit_pipe_preselect *app;
295	int error;
296
297	AUDIT_PIPE_LOCK(ap);
298	app = audit_pipe_preselect_find(ap, auid);
299	if (app != NULL) {
300		*maskp = app->app_mask;
301		error = 0;
302	} else
303		error = ENOENT;
304	AUDIT_PIPE_UNLOCK(ap);
305	return (error);
306}
307
308/*
309 * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
310 * otherwise, update the current entry.
311 */
312static void
313audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
314{
315	struct audit_pipe_preselect *app, *app_new;
316
317	/*
318	 * Pessimistically assume that the auid doesn't already have a mask
319	 * set, and allocate.  We will free it if it is unneeded.
320	 */
321	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
322	AUDIT_PIPE_LOCK(ap);
323	app = audit_pipe_preselect_find(ap, auid);
324	if (app == NULL) {
325		app = app_new;
326		app_new = NULL;
327		app->app_auid = auid;
328		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
329	}
330	app->app_mask = mask;
331	AUDIT_PIPE_UNLOCK(ap);
332	if (app_new != NULL)
333		free(app_new, M_AUDIT_PIPE_PRESELECT);
334}
335
336/*
337 * Delete a per-auid mask on an audit pipe.
338 */
339static int
340audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
341{
342	struct audit_pipe_preselect *app;
343	int error;
344
345	AUDIT_PIPE_LOCK(ap);
346	app = audit_pipe_preselect_find(ap, auid);
347	if (app != NULL) {
348		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
349		error = 0;
350	} else
351		error = ENOENT;
352	AUDIT_PIPE_UNLOCK(ap);
353	if (app != NULL)
354		free(app, M_AUDIT_PIPE_PRESELECT);
355	return (error);
356}
357
358/*
359 * Delete all per-auid masks on an audit pipe.
360 */
361static void
362audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
363{
364	struct audit_pipe_preselect *app;
365
366	AUDIT_PIPE_LOCK_ASSERT(ap);
367
368	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
369		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
370		free(app, M_AUDIT_PIPE_PRESELECT);
371	}
372}
373
374static void
375audit_pipe_preselect_flush(struct audit_pipe *ap)
376{
377
378	AUDIT_PIPE_LOCK(ap);
379	audit_pipe_preselect_flush_locked(ap);
380	AUDIT_PIPE_UNLOCK(ap);
381}
382
383/*-
384 * Determine whether a specific audit pipe matches a record with these
385 * properties.  Algorithm is as follows:
386 *
387 * - If the pipe is configured to track the default trail configuration, then
388 *   use the results of global preselection matching.
389 * - If not, search for a specifically configured auid entry matching the
390 *   event.  If an entry is found, use that.
391 * - Otherwise, use the default flags or naflags configured for the pipe.
392 */
393static int
394audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
395    au_event_t event, au_class_t class, int sorf, int trail_preselect)
396{
397	struct audit_pipe_preselect *app;
398
399	AUDIT_PIPE_LOCK_ASSERT(ap);
400
401	switch (ap->ap_preselect_mode) {
402	case AUDITPIPE_PRESELECT_MODE_TRAIL:
403		return (trail_preselect);
404
405	case AUDITPIPE_PRESELECT_MODE_LOCAL:
406		app = audit_pipe_preselect_find(ap, auid);
407		if (app == NULL) {
408			if (auid == AU_DEFAUDITID)
409				return (au_preselect(event, class,
410				    &ap->ap_preselect_naflags, sorf));
411			else
412				return (au_preselect(event, class,
413				    &ap->ap_preselect_flags, sorf));
414		} else
415			return (au_preselect(event, class, &app->app_mask,
416			    sorf));
417
418	default:
419		panic("audit_pipe_preselect_check: mode %d",
420		    ap->ap_preselect_mode);
421	}
422
423	return (0);
424}
425
426/*
427 * Determine whether there exists a pipe interested in a record with specific
428 * properties.
429 */
430int
431audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
432    int sorf, int trail_preselect)
433{
434	struct audit_pipe *ap;
435
436	/* Lockless read to avoid acquiring the global lock if not needed. */
437	if (TAILQ_EMPTY(&audit_pipe_list))
438		return (0);
439
440	AUDIT_PIPE_LIST_RLOCK();
441	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
442		AUDIT_PIPE_LOCK(ap);
443		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
444		    trail_preselect)) {
445			AUDIT_PIPE_UNLOCK(ap);
446			AUDIT_PIPE_LIST_RUNLOCK();
447			return (1);
448		}
449		AUDIT_PIPE_UNLOCK(ap);
450	}
451	AUDIT_PIPE_LIST_RUNLOCK();
452	return (0);
453}
454
455/*
456 * Append individual record to a queue -- allocate queue-local buffer, and
457 * add to the queue.  If the queue is full or we can't allocate memory, drop
458 * the newest record.
459 */
460static void
461audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
462{
463	struct audit_pipe_entry *ape;
464
465	AUDIT_PIPE_LOCK_ASSERT(ap);
466
467	if (ap->ap_qlen >= ap->ap_qlimit) {
468		ap->ap_drops++;
469		audit_pipe_drops++;
470		return;
471	}
472
473	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
474	if (ape == NULL) {
475		ap->ap_drops++;
476		audit_pipe_drops++;
477		return;
478	}
479
480	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
481	if (ape->ape_record == NULL) {
482		free(ape, M_AUDIT_PIPE_ENTRY);
483		ap->ap_drops++;
484		audit_pipe_drops++;
485		return;
486	}
487
488	bcopy(record, ape->ape_record, record_len);
489	ape->ape_record_len = record_len;
490
491	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
492	ap->ap_inserts++;
493	ap->ap_qlen++;
494	ap->ap_qbyteslen += ape->ape_record_len;
495	selwakeuppri(&ap->ap_selinfo, PSOCK);
496	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
497	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
498		pgsigio(&ap->ap_sigio, SIGIO, 0);
499	cv_broadcast(&ap->ap_cv);
500}
501
502/*
503 * audit_pipe_submit(): audit_worker submits audit records via this
504 * interface, which arranges for them to be delivered to pipe queues.
505 */
506void
507audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
508    int trail_select, void *record, u_int record_len)
509{
510	struct audit_pipe *ap;
511
512	/*
513	 * Lockless read to avoid lock overhead if pipes are not in use.
514	 */
515	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
516		return;
517
518	AUDIT_PIPE_LIST_RLOCK();
519	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
520		AUDIT_PIPE_LOCK(ap);
521		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
522		    trail_select))
523			audit_pipe_append(ap, record, record_len);
524		AUDIT_PIPE_UNLOCK(ap);
525	}
526	AUDIT_PIPE_LIST_RUNLOCK();
527
528	/* Unlocked increment. */
529	audit_pipe_records++;
530}
531
532/*
533 * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
534 * since we don't currently have selection information available, it is
535 * delivered to the pipe unconditionally.
536 *
537 * XXXRW: This is a bug.  The BSM check routine for submitting a user record
538 * should parse that information and return it.
539 */
540void
541audit_pipe_submit_user(void *record, u_int record_len)
542{
543	struct audit_pipe *ap;
544
545	/*
546	 * Lockless read to avoid lock overhead if pipes are not in use.
547	 */
548	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
549		return;
550
551	AUDIT_PIPE_LIST_RLOCK();
552	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
553		AUDIT_PIPE_LOCK(ap);
554		audit_pipe_append(ap, record, record_len);
555		AUDIT_PIPE_UNLOCK(ap);
556	}
557	AUDIT_PIPE_LIST_RUNLOCK();
558
559	/* Unlocked increment. */
560	audit_pipe_records++;
561}
562
563/*
564 * Allocate a new audit pipe.  Connects the pipe, on success, to the global
565 * list and updates statistics.
566 */
567static struct audit_pipe *
568audit_pipe_alloc(void)
569{
570	struct audit_pipe *ap;
571
572	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
573	if (ap == NULL)
574		return (NULL);
575	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
576	TAILQ_INIT(&ap->ap_queue);
577	knlist_init_mtx(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap));
578	AUDIT_PIPE_LOCK_INIT(ap);
579	AUDIT_PIPE_SX_LOCK_INIT(ap);
580	cv_init(&ap->ap_cv, "audit_pipe");
581
582	/*
583	 * Default flags, naflags, and auid-specific preselection settings to
584	 * 0.  Initialize the mode to the global trail so that if praudit(1)
585	 * is run on /dev/auditpipe, it sees events associated with the
586	 * default trail.  Pipe-aware application can clear the flag, set
587	 * custom masks, and flush the pipe as needed.
588	 */
589	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
590	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
591	TAILQ_INIT(&ap->ap_preselect_list);
592	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
593
594	/*
595	 * Add to global list and update global statistics.
596	 */
597	AUDIT_PIPE_LIST_WLOCK();
598	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
599	audit_pipe_count++;
600	audit_pipe_ever++;
601	AUDIT_PIPE_LIST_WUNLOCK();
602
603	return (ap);
604}
605
606/*
607 * Flush all records currently present in an audit pipe; assume mutex is held.
608 */
609static void
610audit_pipe_flush(struct audit_pipe *ap)
611{
612	struct audit_pipe_entry *ape;
613
614	AUDIT_PIPE_LOCK_ASSERT(ap);
615
616	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
617		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
618		ap->ap_qbyteslen -= ape->ape_record_len;
619		audit_pipe_entry_free(ape);
620		ap->ap_qlen--;
621	}
622	ap->ap_qoffset = 0;
623
624	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
625	KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
626}
627
628/*
629 * Free an audit pipe; this means freeing all preselection state and all
630 * records in the pipe.  Assumes global write lock and pipe mutex are held to
631 * prevent any new records from being inserted during the free, and that the
632 * audit pipe is still on the global list.
633 */
634static void
635audit_pipe_free(struct audit_pipe *ap)
636{
637
638	AUDIT_PIPE_LIST_WLOCK_ASSERT();
639	AUDIT_PIPE_LOCK_ASSERT(ap);
640
641	audit_pipe_preselect_flush_locked(ap);
642	audit_pipe_flush(ap);
643	cv_destroy(&ap->ap_cv);
644	AUDIT_PIPE_SX_LOCK_DESTROY(ap);
645	AUDIT_PIPE_LOCK_DESTROY(ap);
646	seldrain(&ap->ap_selinfo);
647	knlist_destroy(&ap->ap_selinfo.si_note);
648	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
649	free(ap, M_AUDIT_PIPE);
650	audit_pipe_count--;
651}
652
653static void
654audit_pipe_dtor(void *arg)
655{
656	struct audit_pipe *ap;
657
658	ap = arg;
659	funsetown(&ap->ap_sigio);
660	AUDIT_PIPE_LIST_WLOCK();
661	AUDIT_PIPE_LOCK(ap);
662	audit_pipe_free(ap);
663	AUDIT_PIPE_LIST_WUNLOCK();
664}
665
666/*
667 * Audit pipe open method.  Explicit privilege check isn't used as this
668 * allows file permissions on the special device to be used to grant audit
669 * review access.  Those file permissions should be managed carefully.
670 */
671static int
672audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
673{
674	struct audit_pipe *ap;
675	int error;
676
677	ap = audit_pipe_alloc();
678	if (ap == NULL)
679		return (ENOMEM);
680	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
681	error = devfs_set_cdevpriv(ap, audit_pipe_dtor);
682	if (error != 0)
683		audit_pipe_dtor(ap);
684	return (error);
685}
686
687/*
688 * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
689 * commands.
690 */
691static int
692audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
693    struct thread *td)
694{
695	struct auditpipe_ioctl_preselect *aip;
696	struct audit_pipe *ap;
697	au_mask_t *maskp;
698	int error, mode;
699	au_id_t auid;
700
701	error = devfs_get_cdevpriv((void **)&ap);
702	if (error != 0)
703		return (error);
704
705	/*
706	 * Audit pipe ioctls: first come standard device node ioctls, then
707	 * manipulation of pipe settings, and finally, statistics query
708	 * ioctls.
709	 */
710	switch (cmd) {
711	case FIONBIO:
712		AUDIT_PIPE_LOCK(ap);
713		if (*(int *)data)
714			ap->ap_flags |= AUDIT_PIPE_NBIO;
715		else
716			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
717		AUDIT_PIPE_UNLOCK(ap);
718		error = 0;
719		break;
720
721	case FIONREAD:
722		AUDIT_PIPE_LOCK(ap);
723		*(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
724		AUDIT_PIPE_UNLOCK(ap);
725		error = 0;
726		break;
727
728	case FIOASYNC:
729		AUDIT_PIPE_LOCK(ap);
730		if (*(int *)data)
731			ap->ap_flags |= AUDIT_PIPE_ASYNC;
732		else
733			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
734		AUDIT_PIPE_UNLOCK(ap);
735		error = 0;
736		break;
737
738	case FIOSETOWN:
739		error = fsetown(*(int *)data, &ap->ap_sigio);
740		break;
741
742	case FIOGETOWN:
743		*(int *)data = fgetown(&ap->ap_sigio);
744		error = 0;
745		break;
746
747	case AUDITPIPE_GET_QLEN:
748		*(u_int *)data = ap->ap_qlen;
749		error = 0;
750		break;
751
752	case AUDITPIPE_GET_QLIMIT:
753		*(u_int *)data = ap->ap_qlimit;
754		error = 0;
755		break;
756
757	case AUDITPIPE_SET_QLIMIT:
758		/* Lockless integer write. */
759		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
760		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
761			ap->ap_qlimit = *(u_int *)data;
762			error = 0;
763		} else
764			error = EINVAL;
765		break;
766
767	case AUDITPIPE_GET_QLIMIT_MIN:
768		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
769		error = 0;
770		break;
771
772	case AUDITPIPE_GET_QLIMIT_MAX:
773		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
774		error = 0;
775		break;
776
777	case AUDITPIPE_GET_PRESELECT_FLAGS:
778		AUDIT_PIPE_LOCK(ap);
779		maskp = (au_mask_t *)data;
780		*maskp = ap->ap_preselect_flags;
781		AUDIT_PIPE_UNLOCK(ap);
782		error = 0;
783		break;
784
785	case AUDITPIPE_SET_PRESELECT_FLAGS:
786		AUDIT_PIPE_LOCK(ap);
787		maskp = (au_mask_t *)data;
788		ap->ap_preselect_flags = *maskp;
789		AUDIT_PIPE_UNLOCK(ap);
790		error = 0;
791		break;
792
793	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
794		AUDIT_PIPE_LOCK(ap);
795		maskp = (au_mask_t *)data;
796		*maskp = ap->ap_preselect_naflags;
797		AUDIT_PIPE_UNLOCK(ap);
798		error = 0;
799		break;
800
801	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
802		AUDIT_PIPE_LOCK(ap);
803		maskp = (au_mask_t *)data;
804		ap->ap_preselect_naflags = *maskp;
805		AUDIT_PIPE_UNLOCK(ap);
806		error = 0;
807		break;
808
809	case AUDITPIPE_GET_PRESELECT_AUID:
810		aip = (struct auditpipe_ioctl_preselect *)data;
811		error = audit_pipe_preselect_get(ap, aip->aip_auid,
812		    &aip->aip_mask);
813		break;
814
815	case AUDITPIPE_SET_PRESELECT_AUID:
816		aip = (struct auditpipe_ioctl_preselect *)data;
817		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
818		error = 0;
819		break;
820
821	case AUDITPIPE_DELETE_PRESELECT_AUID:
822		auid = *(au_id_t *)data;
823		error = audit_pipe_preselect_delete(ap, auid);
824		break;
825
826	case AUDITPIPE_FLUSH_PRESELECT_AUID:
827		audit_pipe_preselect_flush(ap);
828		error = 0;
829		break;
830
831	case AUDITPIPE_GET_PRESELECT_MODE:
832		AUDIT_PIPE_LOCK(ap);
833		*(int *)data = ap->ap_preselect_mode;
834		AUDIT_PIPE_UNLOCK(ap);
835		error = 0;
836		break;
837
838	case AUDITPIPE_SET_PRESELECT_MODE:
839		mode = *(int *)data;
840		switch (mode) {
841		case AUDITPIPE_PRESELECT_MODE_TRAIL:
842		case AUDITPIPE_PRESELECT_MODE_LOCAL:
843			AUDIT_PIPE_LOCK(ap);
844			ap->ap_preselect_mode = mode;
845			AUDIT_PIPE_UNLOCK(ap);
846			error = 0;
847			break;
848
849		default:
850			error = EINVAL;
851		}
852		break;
853
854	case AUDITPIPE_FLUSH:
855		if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
856			return (EINTR);
857		AUDIT_PIPE_LOCK(ap);
858		audit_pipe_flush(ap);
859		AUDIT_PIPE_UNLOCK(ap);
860		AUDIT_PIPE_SX_XUNLOCK(ap);
861		error = 0;
862		break;
863
864	case AUDITPIPE_GET_MAXAUDITDATA:
865		*(u_int *)data = MAXAUDITDATA;
866		error = 0;
867		break;
868
869	case AUDITPIPE_GET_INSERTS:
870		*(u_int *)data = ap->ap_inserts;
871		error = 0;
872		break;
873
874	case AUDITPIPE_GET_READS:
875		*(u_int *)data = ap->ap_reads;
876		error = 0;
877		break;
878
879	case AUDITPIPE_GET_DROPS:
880		*(u_int *)data = ap->ap_drops;
881		error = 0;
882		break;
883
884	case AUDITPIPE_GET_TRUNCATES:
885		*(u_int *)data = 0;
886		error = 0;
887		break;
888
889	default:
890		error = ENOTTY;
891	}
892	return (error);
893}
894
895/*
896 * Audit pipe read.  Read one or more partial or complete records to user
897 * memory.
898 */
899static int
900audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
901{
902	struct audit_pipe_entry *ape;
903	struct audit_pipe *ap;
904	u_int toread;
905	int error;
906
907	error = devfs_get_cdevpriv((void **)&ap);
908	if (error != 0)
909		return (error);
910
911	/*
912	 * We hold an sx(9) lock over read and flush because we rely on the
913	 * stability of a record in the queue during uiomove(9).
914	 */
915	if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
916		return (EINTR);
917	AUDIT_PIPE_LOCK(ap);
918	while (TAILQ_EMPTY(&ap->ap_queue)) {
919		if (ap->ap_flags & AUDIT_PIPE_NBIO) {
920			AUDIT_PIPE_UNLOCK(ap);
921			AUDIT_PIPE_SX_XUNLOCK(ap);
922			return (EAGAIN);
923		}
924		error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
925		if (error) {
926			AUDIT_PIPE_UNLOCK(ap);
927			AUDIT_PIPE_SX_XUNLOCK(ap);
928			return (error);
929		}
930	}
931
932	/*
933	 * Copy as many remaining bytes from the current record to userspace
934	 * as we can.  Keep processing records until we run out of records in
935	 * the queue, or until the user buffer runs out of space.
936	 *
937	 * Note: we rely on the SX lock to maintain ape's stability here.
938	 */
939	ap->ap_reads++;
940	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
941	    uio->uio_resid > 0) {
942		AUDIT_PIPE_LOCK_ASSERT(ap);
943
944		KASSERT(ape->ape_record_len > ap->ap_qoffset,
945		    ("audit_pipe_read: record_len > qoffset (1)"));
946		toread = MIN(ape->ape_record_len - ap->ap_qoffset,
947		    uio->uio_resid);
948		AUDIT_PIPE_UNLOCK(ap);
949		error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
950		    toread, uio);
951		if (error) {
952			AUDIT_PIPE_SX_XUNLOCK(ap);
953			return (error);
954		}
955
956		/*
957		 * If the copy succeeded, update book-keeping, and if no
958		 * bytes remain in the current record, free it.
959		 */
960		AUDIT_PIPE_LOCK(ap);
961		KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
962		    ("audit_pipe_read: queue out of sync after uiomove"));
963		ap->ap_qoffset += toread;
964		KASSERT(ape->ape_record_len >= ap->ap_qoffset,
965		    ("audit_pipe_read: record_len >= qoffset (2)"));
966		if (ap->ap_qoffset == ape->ape_record_len) {
967			TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
968			ap->ap_qbyteslen -= ape->ape_record_len;
969			audit_pipe_entry_free(ape);
970			ap->ap_qlen--;
971			ap->ap_qoffset = 0;
972		}
973	}
974	AUDIT_PIPE_UNLOCK(ap);
975	AUDIT_PIPE_SX_XUNLOCK(ap);
976	return (0);
977}
978
979/*
980 * Audit pipe poll.
981 */
982static int
983audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
984{
985	struct audit_pipe *ap;
986	int error, revents;
987
988	revents = 0;
989	error = devfs_get_cdevpriv((void **)&ap);
990	if (error != 0)
991		return (error);
992	if (events & (POLLIN | POLLRDNORM)) {
993		AUDIT_PIPE_LOCK(ap);
994		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
995			revents |= events & (POLLIN | POLLRDNORM);
996		else
997			selrecord(td, &ap->ap_selinfo);
998		AUDIT_PIPE_UNLOCK(ap);
999	}
1000	return (revents);
1001}
1002
1003/*
1004 * Audit pipe kqfilter.
1005 */
1006static int
1007audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
1008{
1009	struct audit_pipe *ap;
1010	int error;
1011
1012	error = devfs_get_cdevpriv((void **)&ap);
1013	if (error != 0)
1014		return (error);
1015	if (kn->kn_filter != EVFILT_READ)
1016		return (EINVAL);
1017
1018	kn->kn_fop = &audit_pipe_read_filterops;
1019	kn->kn_hook = ap;
1020
1021	AUDIT_PIPE_LOCK(ap);
1022	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
1023	AUDIT_PIPE_UNLOCK(ap);
1024	return (0);
1025}
1026
1027/*
1028 * Return true if there are records available for reading on the pipe.
1029 */
1030static int
1031audit_pipe_kqread(struct knote *kn, long hint)
1032{
1033	struct audit_pipe *ap;
1034
1035	ap = (struct audit_pipe *)kn->kn_hook;
1036	AUDIT_PIPE_LOCK_ASSERT(ap);
1037
1038	if (ap->ap_qlen != 0) {
1039		kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
1040		return (1);
1041	} else {
1042		kn->kn_data = 0;
1043		return (0);
1044	}
1045}
1046
1047/*
1048 * Detach kqueue state from audit pipe.
1049 */
1050static void
1051audit_pipe_kqdetach(struct knote *kn)
1052{
1053	struct audit_pipe *ap;
1054
1055	ap = (struct audit_pipe *)kn->kn_hook;
1056	AUDIT_PIPE_LOCK(ap);
1057	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1058	AUDIT_PIPE_UNLOCK(ap);
1059}
1060
1061/*
1062 * Initialize the audit pipe system.
1063 */
1064static void
1065audit_pipe_init(void *unused)
1066{
1067
1068	TAILQ_INIT(&audit_pipe_list);
1069	AUDIT_PIPE_LIST_LOCK_INIT();
1070	audit_pipe_dev = make_dev(&audit_pipe_cdevsw, 0, UID_ROOT,
1071		GID_WHEEL, 0600, "%s", AUDIT_PIPE_NAME);
1072	if (audit_pipe_dev == NULL) {
1073		AUDIT_PIPE_LIST_LOCK_DESTROY();
1074		panic("Can't initialize audit pipe subsystem");
1075	}
1076}
1077
1078SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1079    NULL);
1080