audit_pipe.c revision 184545
1295009Sjkim/*-
2280297Sjkim * Copyright (c) 2006 Robert N. M. Watson
3280297Sjkim * Copyright (c) 2008 Apple, Inc.
4280297Sjkim * All rights reserved.
5142425Snectar *
6142425Snectar * This software was developed by Robert Watson for the TrustedBSD Project.
7142425Snectar *
8142425Snectar * Redistribution and use in source and binary forms, with or without
9142425Snectar * modification, are permitted provided that the following conditions
10142425Snectar * are met:
11142425Snectar * 1. Redistributions of source code must retain the above copyright
12142425Snectar *    notice, this list of conditions and the following disclaimer.
13142425Snectar * 2. Redistributions in binary form must reproduce the above copyright
14280297Sjkim *    notice, this list of conditions and the following disclaimer in the
15142425Snectar *    documentation and/or other materials provided with the distribution.
16142425Snectar *
17142425Snectar * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18142425Snectar * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19142425Snectar * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20142425Snectar * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21142425Snectar * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22142425Snectar * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23142425Snectar * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24142425Snectar * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25142425Snectar * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26142425Snectar * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27142425Snectar * SUCH DAMAGE.
28142425Snectar */
29142425Snectar
30142425Snectar#include <sys/cdefs.h>
31142425Snectar__FBSDID("$FreeBSD: head/sys/security/audit/audit_pipe.c 184545 2008-11-02 00:25:48Z rwatson $");
32142425Snectar
33142425Snectar#include <sys/param.h>
34142425Snectar#include <sys/condvar.h>
35142425Snectar#include <sys/conf.h>
36142425Snectar#include <sys/eventhandler.h>
37142425Snectar#include <sys/filio.h>
38142425Snectar#include <sys/kernel.h>
39142425Snectar#include <sys/lock.h>
40142425Snectar#include <sys/malloc.h>
41142425Snectar#include <sys/mutex.h>
42142425Snectar#include <sys/poll.h>
43142425Snectar#include <sys/proc.h>
44142425Snectar#include <sys/queue.h>
45142425Snectar#include <sys/rwlock.h>
46142425Snectar#include <sys/selinfo.h>
47142425Snectar#include <sys/sigio.h>
48142425Snectar#include <sys/signal.h>
49142425Snectar#include <sys/signalvar.h>
50142425Snectar#include <sys/sx.h>
51142425Snectar#include <sys/systm.h>
52142425Snectar#include <sys/uio.h>
53142425Snectar
54142425Snectar#include <security/audit/audit.h>
55142425Snectar#include <security/audit/audit_ioctl.h>
56142425Snectar#include <security/audit/audit_private.h>
57142425Snectar
58142425Snectar/*
59142425Snectar * Implementation of a clonable special device providing a live stream of BSM
60160814Ssimon * audit data.  Consumers receive a "tee" of the system audit trail by
61160814Ssimon * default, but may also define alternative event selections using ioctls.
62160814Ssimon * This interface provides unreliable but timely access to audit events.
63160814Ssimon * Consumers should be very careful to avoid introducing event cycles.
64280297Sjkim */
65142425Snectar
66280297Sjkim/*
67280297Sjkim * Memory types.
68280297Sjkim */
69280297Sjkimstatic MALLOC_DEFINE(M_AUDIT_PIPE, "audit_pipe", "Audit pipes");
70280297Sjkimstatic MALLOC_DEFINE(M_AUDIT_PIPE_ENTRY, "audit_pipeent",
71280297Sjkim    "Audit pipe entries and buffers");
72142425Snectarstatic MALLOC_DEFINE(M_AUDIT_PIPE_PRESELECT, "audit_pipe_presel",
73280297Sjkim    "Audit pipe preselection structure");
74280297Sjkim
75160814Ssimon/*
76280297Sjkim * Audit pipe buffer parameters.
77280297Sjkim */
78280297Sjkim#define	AUDIT_PIPE_QLIMIT_DEFAULT	(128)
79280297Sjkim#define	AUDIT_PIPE_QLIMIT_MIN		(0)
80280297Sjkim#define	AUDIT_PIPE_QLIMIT_MAX		(1024)
81142425Snectar
82280297Sjkim/*
83280297Sjkim * Description of an entry in an audit_pipe.
84160814Ssimon */
85280297Sjkimstruct audit_pipe_entry {
86280297Sjkim	void				*ape_record;
87280297Sjkim	u_int				 ape_record_len;
88280297Sjkim	TAILQ_ENTRY(audit_pipe_entry)	 ape_queue;
89280297Sjkim};
90280297Sjkim
91160814Ssimon/*
92280297Sjkim * Audit pipes allow processes to express "interest" in the set of records
93280297Sjkim * that are delivered via the pipe.  They do this in a similar manner to the
94280297Sjkim * mechanism for audit trail configuration, by expressing two global masks,
95280297Sjkim * and optionally expressing per-auid masks.  The following data structure is
96280297Sjkim * the per-auid mask description.  The global state is stored in the audit
97280297Sjkim * pipe data structure.
98160814Ssimon *
99280297Sjkim * We may want to consider a more space/time-efficient data structure once
100280297Sjkim * usage patterns for per-auid specifications are clear.
101280297Sjkim */
102280297Sjkimstruct audit_pipe_preselect {
103280297Sjkim	au_id_t					 app_auid;
104142425Snectar	au_mask_t				 app_mask;
105280297Sjkim	TAILQ_ENTRY(audit_pipe_preselect)	 app_list;
106280297Sjkim};
107160814Ssimon
108280297Sjkim/*
109280297Sjkim * Description of an individual audit_pipe.  Consists largely of a bounded
110280297Sjkim * length queue.
111280297Sjkim */
112280297Sjkim#define	AUDIT_PIPE_ASYNC	0x00000001
113142425Snectar#define	AUDIT_PIPE_NBIO		0x00000002
114280297Sjkimstruct audit_pipe {
115280297Sjkim	int				 ap_open;	/* Device open? */
116160814Ssimon	u_int				 ap_flags;
117280297Sjkim
118280297Sjkim	struct selinfo			 ap_selinfo;
119280297Sjkim	struct sigio			*ap_sigio;
120280297Sjkim
121280297Sjkim	/*
122142425Snectar	 * Per-pipe mutex protecting most fields in this data structure.
123280297Sjkim	 */
124280297Sjkim	struct mtx			 ap_mtx;
125160814Ssimon
126280297Sjkim	/*
127280297Sjkim	 * Per-pipe sleep lock serializing user-generated reads and flushes.
128280297Sjkim	 * uiomove() is called to copy out the current head record's data
129280297Sjkim	 * while the record remains in the queue, so we prevent other threads
130280297Sjkim	 * from removing it using this lock.
131142425Snectar	 */
132280297Sjkim	struct sx			 ap_sx;
133280297Sjkim
134160814Ssimon	/*
135280297Sjkim	 * Condition variable to signal when data has been delivered to a
136280297Sjkim	 * pipe.
137280297Sjkim	 */
138280297Sjkim	struct cv			 ap_cv;
139280297Sjkim
140142425Snectar	/*
141280297Sjkim	 * Various queue-reated variables: qlen and qlimit are a count of
142280297Sjkim	 * records in the queue; qbyteslen is the number of bytes of data
143160814Ssimon	 * across all records, and qoffset is the amount read so far of the
144280297Sjkim	 * first record in the queue.  The number of bytes available for
145280297Sjkim	 * reading in the queue is qbyteslen - qoffset.
146280297Sjkim	 */
147280297Sjkim	u_int				 ap_qlen;
148280297Sjkim	u_int				 ap_qlimit;
149280297Sjkim	u_int				 ap_qbyteslen;
150160814Ssimon	u_int				 ap_qoffset;
151280297Sjkim
152280297Sjkim	/*
153280297Sjkim	 * Per-pipe operation statistics.
154280297Sjkim	 */
155280297Sjkim	u_int64_t			 ap_inserts;	/* Records added. */
156280297Sjkim	u_int64_t			 ap_reads;	/* Records read. */
157160814Ssimon	u_int64_t			 ap_drops;	/* Records dropped. */
158280297Sjkim
159280297Sjkim	/*
160280297Sjkim	 * Fields relating to pipe interest: global masks for unmatched
161280297Sjkim	 * processes (attributable, non-attributable), and a list of specific
162280297Sjkim	 * interest specifications by auid.
163160814Ssimon	 */
164160814Ssimon	int				 ap_preselect_mode;
165	au_mask_t			 ap_preselect_flags;
166	au_mask_t			 ap_preselect_naflags;
167	TAILQ_HEAD(, audit_pipe_preselect)	ap_preselect_list;
168
169	/*
170	 * Current pending record list.  Protected by a combination of ap_mtx
171	 * and ap_sx.  Note particularly that *both* locks are required to
172	 * remove a record from the head of the queue, as an in-progress read		 * may sleep while copying and therefore cannot hold ap_mtx.
173	 */
174	TAILQ_HEAD(, audit_pipe_entry)	 ap_queue;
175
176	/*
177	 * Global pipe list.
178	 */
179	TAILQ_ENTRY(audit_pipe)		 ap_list;
180};
181
182#define	AUDIT_PIPE_LOCK(ap)		mtx_lock(&(ap)->ap_mtx)
183#define	AUDIT_PIPE_LOCK_ASSERT(ap)	mtx_assert(&(ap)->ap_mtx, MA_OWNED)
184#define	AUDIT_PIPE_LOCK_DESTROY(ap)	mtx_destroy(&(ap)->ap_mtx)
185#define	AUDIT_PIPE_LOCK_INIT(ap)	mtx_init(&(ap)->ap_mtx, \
186					    "audit_pipe_mtx", NULL, MTX_DEF)
187#define	AUDIT_PIPE_UNLOCK(ap)		mtx_unlock(&(ap)->ap_mtx)
188#define	AUDIT_PIPE_MTX(ap)		(&(ap)->ap_mtx)
189
190#define	AUDIT_PIPE_SX_LOCK_DESTROY(ap)	sx_destroy(&(ap)->ap_sx)
191#define	AUDIT_PIPE_SX_LOCK_INIT(ap)	sx_init(&(ap)->ap_sx, "audit_pipe_sx")
192#define	AUDIT_PIPE_SX_XLOCK_ASSERT(ap)	sx_assert(&(ap)->ap_sx, SA_XLOCKED)
193#define	AUDIT_PIPE_SX_XLOCK_SIG(ap)	sx_xlock_sig(&(ap)->ap_sx)
194#define	AUDIT_PIPE_SX_XUNLOCK(ap)	sx_xunlock(&(ap)->ap_sx)
195
196/*
197 * Global list of audit pipes, rwlock to protect it.  Individual record
198 * queues on pipes are protected by per-pipe locks; these locks synchronize
199 * between threads walking the list to deliver to individual pipes and add/
200 * remove of pipes, and are mostly acquired for read.
201 */
202static TAILQ_HEAD(, audit_pipe)	 audit_pipe_list;
203static struct rwlock		 audit_pipe_lock;
204
205#define	AUDIT_PIPE_LIST_LOCK_INIT()	rw_init(&audit_pipe_lock, \
206					    "audit_pipe_list_lock")
207#define	AUDIT_PIPE_LIST_RLOCK()		rw_rlock(&audit_pipe_lock)
208#define	AUDIT_PIPE_LIST_RUNLOCK()	rw_runlock(&audit_pipe_lock)
209#define	AUDIT_PIPE_LIST_WLOCK()		rw_wlock(&audit_pipe_lock)
210#define	AUDIT_PIPE_LIST_WLOCK_ASSERT()	rw_assert(&audit_pipe_lock, \
211					    RA_WLOCKED)
212#define	AUDIT_PIPE_LIST_WUNLOCK()	rw_wunlock(&audit_pipe_lock)
213
214/*
215 * Cloning related variables and constants.
216 */
217#define	AUDIT_PIPE_NAME		"auditpipe"
218static eventhandler_tag		 audit_pipe_eh_tag;
219static struct clonedevs		*audit_pipe_clones;
220
221/*
222 * Special device methods and definition.
223 */
224static d_open_t		audit_pipe_open;
225static d_close_t	audit_pipe_close;
226static d_read_t		audit_pipe_read;
227static d_ioctl_t	audit_pipe_ioctl;
228static d_poll_t		audit_pipe_poll;
229static d_kqfilter_t	audit_pipe_kqfilter;
230
231static struct cdevsw	audit_pipe_cdevsw = {
232	.d_version =	D_VERSION,
233	.d_flags =	D_PSEUDO | D_NEEDGIANT | D_NEEDMINOR,
234	.d_open =	audit_pipe_open,
235	.d_close =	audit_pipe_close,
236	.d_read =	audit_pipe_read,
237	.d_ioctl =	audit_pipe_ioctl,
238	.d_poll =	audit_pipe_poll,
239	.d_kqfilter =	audit_pipe_kqfilter,
240	.d_name =	AUDIT_PIPE_NAME,
241};
242
243static int	audit_pipe_kqread(struct knote *note, long hint);
244static void	audit_pipe_kqdetach(struct knote *note);
245
246static struct filterops audit_pipe_read_filterops = {
247	.f_isfd =	1,
248	.f_attach =	NULL,
249	.f_detach =	audit_pipe_kqdetach,
250	.f_event =	audit_pipe_kqread,
251};
252
253/*
254 * Some global statistics on audit pipes.
255 */
256static int		audit_pipe_count;	/* Current number of pipes. */
257static u_int64_t	audit_pipe_ever;	/* Pipes ever allocated. */
258static u_int64_t	audit_pipe_records;	/* Records seen. */
259static u_int64_t	audit_pipe_drops;	/* Global record drop count. */
260
261/*
262 * Free an audit pipe entry.
263 */
264static void
265audit_pipe_entry_free(struct audit_pipe_entry *ape)
266{
267
268	free(ape->ape_record, M_AUDIT_PIPE_ENTRY);
269	free(ape, M_AUDIT_PIPE_ENTRY);
270}
271
272/*
273 * Find an audit pipe preselection specification for an auid, if any.
274 */
275static struct audit_pipe_preselect *
276audit_pipe_preselect_find(struct audit_pipe *ap, au_id_t auid)
277{
278	struct audit_pipe_preselect *app;
279
280	AUDIT_PIPE_LOCK_ASSERT(ap);
281
282	TAILQ_FOREACH(app, &ap->ap_preselect_list, app_list) {
283		if (app->app_auid == auid)
284			return (app);
285	}
286	return (NULL);
287}
288
289/*
290 * Query the per-pipe mask for a specific auid.
291 */
292static int
293audit_pipe_preselect_get(struct audit_pipe *ap, au_id_t auid,
294    au_mask_t *maskp)
295{
296	struct audit_pipe_preselect *app;
297	int error;
298
299	AUDIT_PIPE_LOCK(ap);
300	app = audit_pipe_preselect_find(ap, auid);
301	if (app != NULL) {
302		*maskp = app->app_mask;
303		error = 0;
304	} else
305		error = ENOENT;
306	AUDIT_PIPE_UNLOCK(ap);
307	return (error);
308}
309
310/*
311 * Set the per-pipe mask for a specific auid.  Add a new entry if needed;
312 * otherwise, update the current entry.
313 */
314static void
315audit_pipe_preselect_set(struct audit_pipe *ap, au_id_t auid, au_mask_t mask)
316{
317	struct audit_pipe_preselect *app, *app_new;
318
319	/*
320	 * Pessimistically assume that the auid doesn't already have a mask
321	 * set, and allocate.  We will free it if it is unneeded.
322	 */
323	app_new = malloc(sizeof(*app_new), M_AUDIT_PIPE_PRESELECT, M_WAITOK);
324	AUDIT_PIPE_LOCK(ap);
325	app = audit_pipe_preselect_find(ap, auid);
326	if (app == NULL) {
327		app = app_new;
328		app_new = NULL;
329		app->app_auid = auid;
330		TAILQ_INSERT_TAIL(&ap->ap_preselect_list, app, app_list);
331	}
332	app->app_mask = mask;
333	AUDIT_PIPE_UNLOCK(ap);
334	if (app_new != NULL)
335		free(app_new, M_AUDIT_PIPE_PRESELECT);
336}
337
338/*
339 * Delete a per-auid mask on an audit pipe.
340 */
341static int
342audit_pipe_preselect_delete(struct audit_pipe *ap, au_id_t auid)
343{
344	struct audit_pipe_preselect *app;
345	int error;
346
347	AUDIT_PIPE_LOCK(ap);
348	app = audit_pipe_preselect_find(ap, auid);
349	if (app != NULL) {
350		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
351		error = 0;
352	} else
353		error = ENOENT;
354	AUDIT_PIPE_UNLOCK(ap);
355	if (app != NULL)
356		free(app, M_AUDIT_PIPE_PRESELECT);
357	return (error);
358}
359
360/*
361 * Delete all per-auid masks on an audit pipe.
362 */
363static void
364audit_pipe_preselect_flush_locked(struct audit_pipe *ap)
365{
366	struct audit_pipe_preselect *app;
367
368	AUDIT_PIPE_LOCK_ASSERT(ap);
369
370	while ((app = TAILQ_FIRST(&ap->ap_preselect_list)) != NULL) {
371		TAILQ_REMOVE(&ap->ap_preselect_list, app, app_list);
372		free(app, M_AUDIT_PIPE_PRESELECT);
373	}
374}
375
376static void
377audit_pipe_preselect_flush(struct audit_pipe *ap)
378{
379
380	AUDIT_PIPE_LOCK(ap);
381	audit_pipe_preselect_flush_locked(ap);
382	AUDIT_PIPE_UNLOCK(ap);
383}
384
385/*-
386 * Determine whether a specific audit pipe matches a record with these
387 * properties.  Algorithm is as follows:
388 *
389 * - If the pipe is configured to track the default trail configuration, then
390 *   use the results of global preselection matching.
391 * - If not, search for a specifically configured auid entry matching the
392 *   event.  If an entry is found, use that.
393 * - Otherwise, use the default flags or naflags configured for the pipe.
394 */
395static int
396audit_pipe_preselect_check(struct audit_pipe *ap, au_id_t auid,
397    au_event_t event, au_class_t class, int sorf, int trail_preselect)
398{
399	struct audit_pipe_preselect *app;
400
401	AUDIT_PIPE_LOCK_ASSERT(ap);
402
403	switch (ap->ap_preselect_mode) {
404	case AUDITPIPE_PRESELECT_MODE_TRAIL:
405		return (trail_preselect);
406
407	case AUDITPIPE_PRESELECT_MODE_LOCAL:
408		app = audit_pipe_preselect_find(ap, auid);
409		if (app == NULL) {
410			if (auid == AU_DEFAUDITID)
411				return (au_preselect(event, class,
412				    &ap->ap_preselect_naflags, sorf));
413			else
414				return (au_preselect(event, class,
415				    &ap->ap_preselect_flags, sorf));
416		} else
417			return (au_preselect(event, class, &app->app_mask,
418			    sorf));
419
420	default:
421		panic("audit_pipe_preselect_check: mode %d",
422		    ap->ap_preselect_mode);
423	}
424
425	return (0);
426}
427
428/*
429 * Determine whether there exists a pipe interested in a record with specific
430 * properties.
431 */
432int
433audit_pipe_preselect(au_id_t auid, au_event_t event, au_class_t class,
434    int sorf, int trail_preselect)
435{
436	struct audit_pipe *ap;
437
438	AUDIT_PIPE_LIST_RLOCK();
439	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
440		AUDIT_PIPE_LOCK(ap);
441		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
442		    trail_preselect)) {
443			AUDIT_PIPE_UNLOCK(ap);
444			AUDIT_PIPE_LIST_RUNLOCK();
445			return (1);
446		}
447		AUDIT_PIPE_UNLOCK(ap);
448	}
449	AUDIT_PIPE_LIST_RUNLOCK();
450	return (0);
451}
452
453/*
454 * Append individual record to a queue -- allocate queue-local buffer, and
455 * add to the queue.  If the queue is full or we can't allocate memory, drop
456 * the newest record.
457 */
458static void
459audit_pipe_append(struct audit_pipe *ap, void *record, u_int record_len)
460{
461	struct audit_pipe_entry *ape;
462
463	AUDIT_PIPE_LOCK_ASSERT(ap);
464
465	if (ap->ap_qlen >= ap->ap_qlimit) {
466		ap->ap_drops++;
467		audit_pipe_drops++;
468		return;
469	}
470
471	ape = malloc(sizeof(*ape), M_AUDIT_PIPE_ENTRY, M_NOWAIT | M_ZERO);
472	if (ape == NULL) {
473		ap->ap_drops++;
474		audit_pipe_drops++;
475		return;
476	}
477
478	ape->ape_record = malloc(record_len, M_AUDIT_PIPE_ENTRY, M_NOWAIT);
479	if (ape->ape_record == NULL) {
480		free(ape, M_AUDIT_PIPE_ENTRY);
481		ap->ap_drops++;
482		audit_pipe_drops++;
483		return;
484	}
485
486	bcopy(record, ape->ape_record, record_len);
487	ape->ape_record_len = record_len;
488
489	TAILQ_INSERT_TAIL(&ap->ap_queue, ape, ape_queue);
490	ap->ap_inserts++;
491	ap->ap_qlen++;
492	ap->ap_qbyteslen += ape->ape_record_len;
493	selwakeuppri(&ap->ap_selinfo, PSOCK);
494	KNOTE_LOCKED(&ap->ap_selinfo.si_note, 0);
495	if (ap->ap_flags & AUDIT_PIPE_ASYNC)
496		pgsigio(&ap->ap_sigio, SIGIO, 0);
497	cv_broadcast(&ap->ap_cv);
498}
499
500/*
501 * audit_pipe_submit(): audit_worker submits audit records via this
502 * interface, which arranges for them to be delivered to pipe queues.
503 */
504void
505audit_pipe_submit(au_id_t auid, au_event_t event, au_class_t class, int sorf,
506    int trail_select, void *record, u_int record_len)
507{
508	struct audit_pipe *ap;
509
510	/*
511	 * Lockless read to avoid lock overhead if pipes are not in use.
512	 */
513	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
514		return;
515
516	AUDIT_PIPE_LIST_RLOCK();
517	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
518		AUDIT_PIPE_LOCK(ap);
519		if (audit_pipe_preselect_check(ap, auid, event, class, sorf,
520		    trail_select))
521			audit_pipe_append(ap, record, record_len);
522		AUDIT_PIPE_UNLOCK(ap);
523	}
524	AUDIT_PIPE_LIST_RUNLOCK();
525
526	/* Unlocked increment. */
527	audit_pipe_records++;
528}
529
530/*
531 * audit_pipe_submit_user(): the same as audit_pipe_submit(), except that
532 * since we don't currently have selection information available, it is
533 * delivered to the pipe unconditionally.
534 *
535 * XXXRW: This is a bug.  The BSM check routine for submitting a user record
536 * should parse that information and return it.
537 */
538void
539audit_pipe_submit_user(void *record, u_int record_len)
540{
541	struct audit_pipe *ap;
542
543	/*
544	 * Lockless read to avoid lock overhead if pipes are not in use.
545	 */
546	if (TAILQ_FIRST(&audit_pipe_list) == NULL)
547		return;
548
549	AUDIT_PIPE_LIST_RLOCK();
550	TAILQ_FOREACH(ap, &audit_pipe_list, ap_list) {
551		AUDIT_PIPE_LOCK(ap);
552		audit_pipe_append(ap, record, record_len);
553		AUDIT_PIPE_UNLOCK(ap);
554	}
555	AUDIT_PIPE_LIST_RUNLOCK();
556
557	/* Unlocked increment. */
558	audit_pipe_records++;
559}
560
561/*
562 * Allocate a new audit pipe.  Connects the pipe, on success, to the global
563 * list and updates statistics.
564 */
565static struct audit_pipe *
566audit_pipe_alloc(void)
567{
568	struct audit_pipe *ap;
569
570	AUDIT_PIPE_LIST_WLOCK_ASSERT();
571
572	ap = malloc(sizeof(*ap), M_AUDIT_PIPE, M_NOWAIT | M_ZERO);
573	if (ap == NULL)
574		return (NULL);
575	ap->ap_qlimit = AUDIT_PIPE_QLIMIT_DEFAULT;
576	TAILQ_INIT(&ap->ap_queue);
577	knlist_init(&ap->ap_selinfo.si_note, AUDIT_PIPE_MTX(ap), NULL, NULL,
578	    NULL);
579	AUDIT_PIPE_LOCK_INIT(ap);
580	AUDIT_PIPE_SX_LOCK_INIT(ap);
581	cv_init(&ap->ap_cv, "audit_pipe");
582
583	/*
584	 * Default flags, naflags, and auid-specific preselection settings to
585	 * 0.  Initialize the mode to the global trail so that if praudit(1)
586	 * is run on /dev/auditpipe, it sees events associated with the
587	 * default trail.  Pipe-aware application can clear the flag, set
588	 * custom masks, and flush the pipe as needed.
589	 */
590	bzero(&ap->ap_preselect_flags, sizeof(ap->ap_preselect_flags));
591	bzero(&ap->ap_preselect_naflags, sizeof(ap->ap_preselect_naflags));
592	TAILQ_INIT(&ap->ap_preselect_list);
593	ap->ap_preselect_mode = AUDITPIPE_PRESELECT_MODE_TRAIL;
594
595	/*
596	 * Add to global list and update global statistics.
597	 */
598	TAILQ_INSERT_HEAD(&audit_pipe_list, ap, ap_list);
599	audit_pipe_count++;
600	audit_pipe_ever++;
601
602	return (ap);
603}
604
605/*
606 * Flush all records currently present in an audit pipe; assume mutex is held.
607 */
608static void
609audit_pipe_flush(struct audit_pipe *ap)
610{
611	struct audit_pipe_entry *ape;
612
613	AUDIT_PIPE_LOCK_ASSERT(ap);
614
615	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL) {
616		TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
617		ap->ap_qbyteslen -= ape->ape_record_len;
618		audit_pipe_entry_free(ape);
619		ap->ap_qlen--;
620	}
621	ap->ap_qoffset = 0;
622
623	KASSERT(ap->ap_qlen == 0, ("audit_pipe_free: ap_qbyteslen"));
624	KASSERT(ap->ap_qbyteslen == 0, ("audit_pipe_flush: ap_qbyteslen"));
625}
626
627/*
628 * Free an audit pipe; this means freeing all preselection state and all
629 * records in the pipe.  Assumes global write lock and pipe mutex are held to
630 * prevent any new records from being inserted during the free, and that the
631 * audit pipe is still on the global list.
632 */
633static void
634audit_pipe_free(struct audit_pipe *ap)
635{
636
637	AUDIT_PIPE_LIST_WLOCK_ASSERT();
638	AUDIT_PIPE_LOCK_ASSERT(ap);
639
640	audit_pipe_preselect_flush_locked(ap);
641	audit_pipe_flush(ap);
642	cv_destroy(&ap->ap_cv);
643	AUDIT_PIPE_SX_LOCK_DESTROY(ap);
644	AUDIT_PIPE_LOCK_DESTROY(ap);
645	knlist_destroy(&ap->ap_selinfo.si_note);
646	TAILQ_REMOVE(&audit_pipe_list, ap, ap_list);
647	free(ap, M_AUDIT_PIPE);
648	audit_pipe_count--;
649}
650
651/*
652 * Audit pipe clone routine -- provide specific requested audit pipe, or a
653 * fresh one if a specific one is not requested.
654 */
655static void
656audit_pipe_clone(void *arg, struct ucred *cred, char *name, int namelen,
657    struct cdev **dev)
658{
659	int i, u;
660
661	if (*dev != NULL)
662		return;
663
664	if (strcmp(name, AUDIT_PIPE_NAME) == 0)
665		u = -1;
666	else if (dev_stdclone(name, NULL, AUDIT_PIPE_NAME, &u) != 1)
667		return;
668
669	i = clone_create(&audit_pipe_clones, &audit_pipe_cdevsw, &u, dev, 0);
670	if (i) {
671		*dev = make_dev(&audit_pipe_cdevsw, u, UID_ROOT,
672		    GID_WHEEL, 0600, "%s%d", AUDIT_PIPE_NAME, u);
673		if (*dev != NULL) {
674			dev_ref(*dev);
675			(*dev)->si_flags |= SI_CHEAPCLONE;
676		}
677	}
678}
679
680/*
681 * Audit pipe open method.  Explicit privilege check isn't used as this
682 * allows file permissions on the special device to be used to grant audit
683 * review access.  Those file permissions should be managed carefully.
684 */
685static int
686audit_pipe_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
687{
688	struct audit_pipe *ap;
689
690	AUDIT_PIPE_LIST_WLOCK();
691	ap = dev->si_drv1;
692	if (ap == NULL) {
693		ap = audit_pipe_alloc();
694		if (ap == NULL) {
695			AUDIT_PIPE_LIST_WUNLOCK();
696			return (ENOMEM);
697		}
698		dev->si_drv1 = ap;
699	} else {
700		KASSERT(ap->ap_open, ("audit_pipe_open: ap && !ap_open"));
701		AUDIT_PIPE_LIST_WUNLOCK();
702		return (EBUSY);
703	}
704	ap->ap_open = 1;	/* No lock required yet. */
705	AUDIT_PIPE_LIST_WUNLOCK();
706	fsetown(td->td_proc->p_pid, &ap->ap_sigio);
707	return (0);
708}
709
710/*
711 * Close audit pipe, tear down all records, etc.
712 */
713static int
714audit_pipe_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
715{
716	struct audit_pipe *ap;
717
718	ap = dev->si_drv1;
719	KASSERT(ap != NULL, ("audit_pipe_close: ap == NULL"));
720	KASSERT(ap->ap_open, ("audit_pipe_close: !ap_open"));
721
722	funsetown(&ap->ap_sigio);
723	AUDIT_PIPE_LIST_WLOCK();
724	AUDIT_PIPE_LOCK(ap);
725	ap->ap_open = 0;
726	audit_pipe_free(ap);
727	dev->si_drv1 = NULL;
728	AUDIT_PIPE_LIST_WUNLOCK();
729	return (0);
730}
731
732/*
733 * Audit pipe ioctl() routine.  Handle file descriptor and audit pipe layer
734 * commands.
735 */
736static int
737audit_pipe_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag,
738    struct thread *td)
739{
740	struct auditpipe_ioctl_preselect *aip;
741	struct audit_pipe *ap;
742	au_mask_t *maskp;
743	int error, mode;
744	au_id_t auid;
745
746	ap = dev->si_drv1;
747	KASSERT(ap != NULL, ("audit_pipe_ioctl: ap == NULL"));
748
749	/*
750	 * Audit pipe ioctls: first come standard device node ioctls, then
751	 * manipulation of pipe settings, and finally, statistics query
752	 * ioctls.
753	 */
754	switch (cmd) {
755	case FIONBIO:
756		AUDIT_PIPE_LOCK(ap);
757		if (*(int *)data)
758			ap->ap_flags |= AUDIT_PIPE_NBIO;
759		else
760			ap->ap_flags &= ~AUDIT_PIPE_NBIO;
761		AUDIT_PIPE_UNLOCK(ap);
762		error = 0;
763		break;
764
765	case FIONREAD:
766		AUDIT_PIPE_LOCK(ap);
767		*(int *)data = ap->ap_qbyteslen - ap->ap_qoffset;
768		AUDIT_PIPE_UNLOCK(ap);
769		error = 0;
770		break;
771
772	case FIOASYNC:
773		AUDIT_PIPE_LOCK(ap);
774		if (*(int *)data)
775			ap->ap_flags |= AUDIT_PIPE_ASYNC;
776		else
777			ap->ap_flags &= ~AUDIT_PIPE_ASYNC;
778		AUDIT_PIPE_UNLOCK(ap);
779		error = 0;
780		break;
781
782	case FIOSETOWN:
783		error = fsetown(*(int *)data, &ap->ap_sigio);
784		break;
785
786	case FIOGETOWN:
787		*(int *)data = fgetown(&ap->ap_sigio);
788		error = 0;
789		break;
790
791	case AUDITPIPE_GET_QLEN:
792		*(u_int *)data = ap->ap_qlen;
793		error = 0;
794		break;
795
796	case AUDITPIPE_GET_QLIMIT:
797		*(u_int *)data = ap->ap_qlimit;
798		error = 0;
799		break;
800
801	case AUDITPIPE_SET_QLIMIT:
802		/* Lockless integer write. */
803		if (*(u_int *)data >= AUDIT_PIPE_QLIMIT_MIN ||
804		    *(u_int *)data <= AUDIT_PIPE_QLIMIT_MAX) {
805			ap->ap_qlimit = *(u_int *)data;
806			error = 0;
807		} else
808			error = EINVAL;
809		break;
810
811	case AUDITPIPE_GET_QLIMIT_MIN:
812		*(u_int *)data = AUDIT_PIPE_QLIMIT_MIN;
813		error = 0;
814		break;
815
816	case AUDITPIPE_GET_QLIMIT_MAX:
817		*(u_int *)data = AUDIT_PIPE_QLIMIT_MAX;
818		error = 0;
819		break;
820
821	case AUDITPIPE_GET_PRESELECT_FLAGS:
822		AUDIT_PIPE_LOCK(ap);
823		maskp = (au_mask_t *)data;
824		*maskp = ap->ap_preselect_flags;
825		AUDIT_PIPE_UNLOCK(ap);
826		error = 0;
827		break;
828
829	case AUDITPIPE_SET_PRESELECT_FLAGS:
830		AUDIT_PIPE_LOCK(ap);
831		maskp = (au_mask_t *)data;
832		ap->ap_preselect_flags = *maskp;
833		AUDIT_PIPE_UNLOCK(ap);
834		error = 0;
835		break;
836
837	case AUDITPIPE_GET_PRESELECT_NAFLAGS:
838		AUDIT_PIPE_LOCK(ap);
839		maskp = (au_mask_t *)data;
840		*maskp = ap->ap_preselect_naflags;
841		AUDIT_PIPE_UNLOCK(ap);
842		error = 0;
843		break;
844
845	case AUDITPIPE_SET_PRESELECT_NAFLAGS:
846		AUDIT_PIPE_LOCK(ap);
847		maskp = (au_mask_t *)data;
848		ap->ap_preselect_naflags = *maskp;
849		AUDIT_PIPE_UNLOCK(ap);
850		error = 0;
851		break;
852
853	case AUDITPIPE_GET_PRESELECT_AUID:
854		aip = (struct auditpipe_ioctl_preselect *)data;
855		error = audit_pipe_preselect_get(ap, aip->aip_auid,
856		    &aip->aip_mask);
857		break;
858
859	case AUDITPIPE_SET_PRESELECT_AUID:
860		aip = (struct auditpipe_ioctl_preselect *)data;
861		audit_pipe_preselect_set(ap, aip->aip_auid, aip->aip_mask);
862		error = 0;
863		break;
864
865	case AUDITPIPE_DELETE_PRESELECT_AUID:
866		auid = *(au_id_t *)data;
867		error = audit_pipe_preselect_delete(ap, auid);
868		break;
869
870	case AUDITPIPE_FLUSH_PRESELECT_AUID:
871		audit_pipe_preselect_flush(ap);
872		error = 0;
873		break;
874
875	case AUDITPIPE_GET_PRESELECT_MODE:
876		AUDIT_PIPE_LOCK(ap);
877		*(int *)data = ap->ap_preselect_mode;
878		AUDIT_PIPE_UNLOCK(ap);
879		error = 0;
880		break;
881
882	case AUDITPIPE_SET_PRESELECT_MODE:
883		mode = *(int *)data;
884		switch (mode) {
885		case AUDITPIPE_PRESELECT_MODE_TRAIL:
886		case AUDITPIPE_PRESELECT_MODE_LOCAL:
887			AUDIT_PIPE_LOCK(ap);
888			ap->ap_preselect_mode = mode;
889			AUDIT_PIPE_UNLOCK(ap);
890			error = 0;
891			break;
892
893		default:
894			error = EINVAL;
895		}
896		break;
897
898	case AUDITPIPE_FLUSH:
899		if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
900			return (EINTR);
901		AUDIT_PIPE_LOCK(ap);
902		audit_pipe_flush(ap);
903		AUDIT_PIPE_UNLOCK(ap);
904		AUDIT_PIPE_SX_XUNLOCK(ap);
905		error = 0;
906		break;
907
908	case AUDITPIPE_GET_MAXAUDITDATA:
909		*(u_int *)data = MAXAUDITDATA;
910		error = 0;
911		break;
912
913	case AUDITPIPE_GET_INSERTS:
914		*(u_int *)data = ap->ap_inserts;
915		error = 0;
916		break;
917
918	case AUDITPIPE_GET_READS:
919		*(u_int *)data = ap->ap_reads;
920		error = 0;
921		break;
922
923	case AUDITPIPE_GET_DROPS:
924		*(u_int *)data = ap->ap_drops;
925		error = 0;
926		break;
927
928	case AUDITPIPE_GET_TRUNCATES:
929		*(u_int *)data = 0;
930		error = 0;
931		break;
932
933	default:
934		error = ENOTTY;
935	}
936	return (error);
937}
938
939/*
940 * Audit pipe read.  Read one or more partial or complete records to user
941 * memory.
942 */
943static int
944audit_pipe_read(struct cdev *dev, struct uio *uio, int flag)
945{
946	struct audit_pipe_entry *ape;
947	struct audit_pipe *ap;
948	u_int toread;
949	int error;
950
951	ap = dev->si_drv1;
952	KASSERT(ap != NULL, ("audit_pipe_read: ap == NULL"));
953
954	/*
955	 * We hold an sx(9) lock over read and flush because we rely on the
956	 * stability of a record in the queue during uiomove(9).
957	 */
958	if (AUDIT_PIPE_SX_XLOCK_SIG(ap) != 0)
959		return (EINTR);
960	AUDIT_PIPE_LOCK(ap);
961	while (TAILQ_EMPTY(&ap->ap_queue)) {
962		if (ap->ap_flags & AUDIT_PIPE_NBIO) {
963			AUDIT_PIPE_UNLOCK(ap);
964			AUDIT_PIPE_SX_XUNLOCK(ap);
965			return (EAGAIN);
966		}
967		error = cv_wait_sig(&ap->ap_cv, AUDIT_PIPE_MTX(ap));
968		if (error) {
969			AUDIT_PIPE_UNLOCK(ap);
970			AUDIT_PIPE_SX_XUNLOCK(ap);
971			return (error);
972		}
973	}
974
975	/*
976	 * Copy as many remaining bytes from the current record to userspace
977	 * as we can.  Keep processing records until we run out of records in
978	 * the queue, or until the user buffer runs out of space.
979	 *
980	 * Note: we rely on the SX lock to maintain ape's stability here.
981	 */
982	ap->ap_reads++;
983	while ((ape = TAILQ_FIRST(&ap->ap_queue)) != NULL &&
984	    uio->uio_resid > 0) {
985		AUDIT_PIPE_LOCK_ASSERT(ap);
986
987		KASSERT(ape->ape_record_len > ap->ap_qoffset,
988		    ("audit_pipe_read: record_len > qoffset (1)"));
989		toread = MIN(ape->ape_record_len - ap->ap_qoffset,
990		    uio->uio_resid);
991		AUDIT_PIPE_UNLOCK(ap);
992		error = uiomove((char *)ape->ape_record + ap->ap_qoffset,
993		    toread, uio);
994		if (error) {
995			AUDIT_PIPE_SX_XUNLOCK(ap);
996			return (error);
997		}
998
999		/*
1000		 * If the copy succeeded, update book-keeping, and if no
1001		 * bytes remain in the current record, free it.
1002		 */
1003		AUDIT_PIPE_LOCK(ap);
1004		KASSERT(TAILQ_FIRST(&ap->ap_queue) == ape,
1005		    ("audit_pipe_read: queue out of sync after uiomove"));
1006		ap->ap_qoffset += toread;
1007		KASSERT(ape->ape_record_len >= ap->ap_qoffset,
1008		    ("audit_pipe_read: record_len >= qoffset (2)"));
1009		if (ap->ap_qoffset == ape->ape_record_len) {
1010			TAILQ_REMOVE(&ap->ap_queue, ape, ape_queue);
1011			ap->ap_qbyteslen -= ape->ape_record_len;
1012			audit_pipe_entry_free(ape);
1013			ap->ap_qlen--;
1014			ap->ap_qoffset = 0;
1015		}
1016	}
1017	AUDIT_PIPE_UNLOCK(ap);
1018	AUDIT_PIPE_SX_XUNLOCK(ap);
1019	return (0);
1020}
1021
1022/*
1023 * Audit pipe poll.
1024 */
1025static int
1026audit_pipe_poll(struct cdev *dev, int events, struct thread *td)
1027{
1028	struct audit_pipe *ap;
1029	int revents;
1030
1031	revents = 0;
1032	ap = dev->si_drv1;
1033	KASSERT(ap != NULL, ("audit_pipe_poll: ap == NULL"));
1034
1035	if (events & (POLLIN | POLLRDNORM)) {
1036		AUDIT_PIPE_LOCK(ap);
1037		if (TAILQ_FIRST(&ap->ap_queue) != NULL)
1038			revents |= events & (POLLIN | POLLRDNORM);
1039		else
1040			selrecord(td, &ap->ap_selinfo);
1041		AUDIT_PIPE_UNLOCK(ap);
1042	}
1043	return (revents);
1044}
1045
1046/*
1047 * Audit pipe kqfilter.
1048 */
1049static int
1050audit_pipe_kqfilter(struct cdev *dev, struct knote *kn)
1051{
1052	struct audit_pipe *ap;
1053
1054	ap = dev->si_drv1;
1055	KASSERT(ap != NULL, ("audit_pipe_kqfilter: ap == NULL"));
1056
1057	if (kn->kn_filter != EVFILT_READ)
1058		return (EINVAL);
1059
1060	kn->kn_fop = &audit_pipe_read_filterops;
1061	kn->kn_hook = ap;
1062
1063	AUDIT_PIPE_LOCK(ap);
1064	knlist_add(&ap->ap_selinfo.si_note, kn, 1);
1065	AUDIT_PIPE_UNLOCK(ap);
1066	return (0);
1067}
1068
1069/*
1070 * Return true if there are records available for reading on the pipe.
1071 */
1072static int
1073audit_pipe_kqread(struct knote *kn, long hint)
1074{
1075	struct audit_pipe_entry *ape;
1076	struct audit_pipe *ap;
1077
1078	ap = (struct audit_pipe *)kn->kn_hook;
1079	KASSERT(ap != NULL, ("audit_pipe_kqread: ap == NULL"));
1080
1081	AUDIT_PIPE_LOCK_ASSERT(ap);
1082
1083	if (ap->ap_qlen != 0) {
1084		ape = TAILQ_FIRST(&ap->ap_queue);
1085		KASSERT(ape != NULL, ("audit_pipe_kqread: ape == NULL"));
1086
1087		kn->kn_data = ap->ap_qbyteslen - ap->ap_qoffset;
1088		return (1);
1089	} else {
1090		kn->kn_data = 0;
1091		return (0);
1092	}
1093}
1094
1095/*
1096 * Detach kqueue state from audit pipe.
1097 */
1098static void
1099audit_pipe_kqdetach(struct knote *kn)
1100{
1101	struct audit_pipe *ap;
1102
1103	ap = (struct audit_pipe *)kn->kn_hook;
1104	KASSERT(ap != NULL, ("audit_pipe_kqdetach: ap == NULL"));
1105
1106	AUDIT_PIPE_LOCK(ap);
1107	knlist_remove(&ap->ap_selinfo.si_note, kn, 1);
1108	AUDIT_PIPE_UNLOCK(ap);
1109}
1110
1111/*
1112 * Initialize the audit pipe system.
1113 */
1114static void
1115audit_pipe_init(void *unused)
1116{
1117
1118	TAILQ_INIT(&audit_pipe_list);
1119	AUDIT_PIPE_LIST_LOCK_INIT();
1120
1121	clone_setup(&audit_pipe_clones);
1122	audit_pipe_eh_tag = EVENTHANDLER_REGISTER(dev_clone,
1123	    audit_pipe_clone, 0, 1000);
1124	if (audit_pipe_eh_tag == NULL)
1125		panic("audit_pipe_init: EVENTHANDLER_REGISTER");
1126}
1127
1128SYSINIT(audit_pipe_init, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, audit_pipe_init,
1129    NULL);
1130