kern_alq.c revision 206028
1/*-
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
4 * Copyright (c) 2009-2010, The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed at the Centre for Advanced
8 * Internet Architectures, Swinburne University of Technology, Melbourne,
9 * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice unmodified, this list of conditions, and the following
16 *    disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 206028 2010-04-01 01:27:10Z lstewart $");
35
36#include "opt_mac.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/kthread.h>
42#include <sys/lock.h>
43#include <sys/mount.h>
44#include <sys/mutex.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/vnode.h>
48#include <sys/alq.h>
49#include <sys/malloc.h>
50#include <sys/unistd.h>
51#include <sys/fcntl.h>
52#include <sys/eventhandler.h>
53
54#include <security/mac/mac_framework.h>
55
56/* Async. Logging Queue */
57struct alq {
58	int	aq_entmax;		/* Max entries */
59	int	aq_entlen;		/* Entry length */
60	char	*aq_entbuf;		/* Buffer for stored entries */
61	int	aq_flags;		/* Queue flags */
62	struct mtx	aq_mtx;		/* Queue lock */
63	struct vnode	*aq_vp;		/* Open vnode handle */
64	struct ucred	*aq_cred;	/* Credentials of the opening thread */
65	struct ale	*aq_first;	/* First ent */
66	struct ale	*aq_entfree;	/* First free ent */
67	struct ale	*aq_entvalid;	/* First ent valid for writing */
68	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
69	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
70};
71
72#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
73#define	AQ_ACTIVE	0x0002		/* on the active list */
74#define	AQ_FLUSHING	0x0004		/* doing IO */
75#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
76
77#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
78#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
79
80static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
81
82/*
83 * The ald_mtx protects the ald_queues list and the ald_active list.
84 */
85static struct mtx ald_mtx;
86static LIST_HEAD(, alq) ald_queues;
87static LIST_HEAD(, alq) ald_active;
88static int ald_shutingdown = 0;
89struct thread *ald_thread;
90static struct proc *ald_proc;
91
92#define	ALD_LOCK()	mtx_lock(&ald_mtx)
93#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
94
95/* Daemon functions */
96static int ald_add(struct alq *);
97static int ald_rem(struct alq *);
98static void ald_startup(void *);
99static void ald_daemon(void);
100static void ald_shutdown(void *, int);
101static void ald_activate(struct alq *);
102static void ald_deactivate(struct alq *);
103
104/* Internal queue functions */
105static void alq_shutdown(struct alq *);
106static void alq_destroy(struct alq *);
107static int alq_doio(struct alq *);
108
109
110/*
111 * Add a new queue to the global list.  Fail if we're shutting down.
112 */
113static int
114ald_add(struct alq *alq)
115{
116	int error;
117
118	error = 0;
119
120	ALD_LOCK();
121	if (ald_shutingdown) {
122		error = EBUSY;
123		goto done;
124	}
125	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
126done:
127	ALD_UNLOCK();
128	return (error);
129}
130
131/*
132 * Remove a queue from the global list unless we're shutting down.  If so,
133 * the ald will take care of cleaning up it's resources.
134 */
135static int
136ald_rem(struct alq *alq)
137{
138	int error;
139
140	error = 0;
141
142	ALD_LOCK();
143	if (ald_shutingdown) {
144		error = EBUSY;
145		goto done;
146	}
147	LIST_REMOVE(alq, aq_link);
148done:
149	ALD_UNLOCK();
150	return (error);
151}
152
153/*
154 * Put a queue on the active list.  This will schedule it for writing.
155 */
156static void
157ald_activate(struct alq *alq)
158{
159	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
160	wakeup(&ald_active);
161}
162
163static void
164ald_deactivate(struct alq *alq)
165{
166	LIST_REMOVE(alq, aq_act);
167	alq->aq_flags &= ~AQ_ACTIVE;
168}
169
170static void
171ald_startup(void *unused)
172{
173	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
174	LIST_INIT(&ald_queues);
175	LIST_INIT(&ald_active);
176}
177
178static void
179ald_daemon(void)
180{
181	int needwakeup;
182	struct alq *alq;
183
184	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
185
186	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
187	    SHUTDOWN_PRI_FIRST);
188
189	ALD_LOCK();
190
191	for (;;) {
192		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
193		    !ald_shutingdown)
194			mtx_sleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
195
196		/* Don't shutdown until all active ALQs are flushed. */
197		if (ald_shutingdown && alq == NULL) {
198			ALD_UNLOCK();
199			break;
200		}
201
202		ALQ_LOCK(alq);
203		ald_deactivate(alq);
204		ALD_UNLOCK();
205		needwakeup = alq_doio(alq);
206		ALQ_UNLOCK(alq);
207		if (needwakeup)
208			wakeup(alq);
209		ALD_LOCK();
210	}
211
212	kproc_exit(0);
213}
214
215static void
216ald_shutdown(void *arg, int howto)
217{
218	struct alq *alq;
219
220	ALD_LOCK();
221
222	/* Ensure no new queues can be created. */
223	ald_shutingdown = 1;
224
225	/* Shutdown all ALQs prior to terminating the ald_daemon. */
226	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
227		LIST_REMOVE(alq, aq_link);
228		ALD_UNLOCK();
229		alq_shutdown(alq);
230		ALD_LOCK();
231	}
232
233	/* At this point, all ALQs are flushed and shutdown. */
234
235	/*
236	 * Wake ald_daemon so that it exits. It won't be able to do
237	 * anything until we mtx_sleep because we hold the ald_mtx.
238	 */
239	wakeup(&ald_active);
240
241	/* Wait for ald_daemon to exit. */
242	mtx_sleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
243
244	ALD_UNLOCK();
245}
246
247static void
248alq_shutdown(struct alq *alq)
249{
250	ALQ_LOCK(alq);
251
252	/* Stop any new writers. */
253	alq->aq_flags |= AQ_SHUTDOWN;
254
255	/* Drain IO */
256	while (alq->aq_flags & AQ_ACTIVE) {
257		alq->aq_flags |= AQ_WANTED;
258		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
259	}
260	ALQ_UNLOCK(alq);
261
262	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
263	    curthread);
264	crfree(alq->aq_cred);
265}
266
267void
268alq_destroy(struct alq *alq)
269{
270	/* Drain all pending IO. */
271	alq_shutdown(alq);
272
273	mtx_destroy(&alq->aq_mtx);
274	free(alq->aq_first, M_ALD);
275	free(alq->aq_entbuf, M_ALD);
276	free(alq, M_ALD);
277}
278
279/*
280 * Flush all pending data to disk.  This operation will block.
281 */
282static int
283alq_doio(struct alq *alq)
284{
285	struct thread *td;
286	struct mount *mp;
287	struct vnode *vp;
288	struct uio auio;
289	struct iovec aiov[2];
290	struct ale *ale;
291	struct ale *alstart;
292	int totlen;
293	int iov;
294	int vfslocked;
295
296	vp = alq->aq_vp;
297	td = curthread;
298	totlen = 0;
299	iov = 0;
300
301	alstart = ale = alq->aq_entvalid;
302	alq->aq_entvalid = NULL;
303
304	bzero(&aiov, sizeof(aiov));
305	bzero(&auio, sizeof(auio));
306
307	do {
308		if (aiov[iov].iov_base == NULL)
309			aiov[iov].iov_base = ale->ae_data;
310		aiov[iov].iov_len += alq->aq_entlen;
311		totlen += alq->aq_entlen;
312		/* Check to see if we're wrapping the buffer */
313		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
314			iov++;
315		ale->ae_flags &= ~AE_VALID;
316		ale = ale->ae_next;
317	} while (ale->ae_flags & AE_VALID);
318
319	alq->aq_flags |= AQ_FLUSHING;
320	ALQ_UNLOCK(alq);
321
322	if (iov == 2 || aiov[iov].iov_base == NULL)
323		iov--;
324
325	auio.uio_iov = &aiov[0];
326	auio.uio_offset = 0;
327	auio.uio_segflg = UIO_SYSSPACE;
328	auio.uio_rw = UIO_WRITE;
329	auio.uio_iovcnt = iov + 1;
330	auio.uio_resid = totlen;
331	auio.uio_td = td;
332
333	/*
334	 * Do all of the junk required to write now.
335	 */
336	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
337	vn_start_write(vp, &mp, V_WAIT);
338	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
339	/*
340	 * XXX: VOP_WRITE error checks are ignored.
341	 */
342#ifdef MAC
343	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
344#endif
345		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
346	VOP_UNLOCK(vp, 0);
347	vn_finished_write(mp);
348	VFS_UNLOCK_GIANT(vfslocked);
349
350	ALQ_LOCK(alq);
351	alq->aq_flags &= ~AQ_FLUSHING;
352
353	if (alq->aq_entfree == NULL)
354		alq->aq_entfree = alstart;
355
356	if (alq->aq_flags & AQ_WANTED) {
357		alq->aq_flags &= ~AQ_WANTED;
358		return (1);
359	}
360
361	return(0);
362}
363
364static struct kproc_desc ald_kp = {
365        "ALQ Daemon",
366        ald_daemon,
367        &ald_proc
368};
369
370SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
371SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
372
373
374/* User visible queue functions */
375
376/*
377 * Create the queue data structure, allocate the buffer, and open the file.
378 */
379int
380alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
381    int size, int count)
382{
383	struct thread *td;
384	struct nameidata nd;
385	struct ale *ale;
386	struct ale *alp;
387	struct alq *alq;
388	char *bufp;
389	int flags;
390	int error;
391	int i, vfslocked;
392
393	*alqp = NULL;
394	td = curthread;
395
396	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
397	flags = FWRITE | O_NOFOLLOW | O_CREAT;
398
399	error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL);
400	if (error)
401		return (error);
402
403	vfslocked = NDHASGIANT(&nd);
404	NDFREE(&nd, NDF_ONLY_PNBUF);
405	/* We just unlock so we hold a reference */
406	VOP_UNLOCK(nd.ni_vp, 0);
407	VFS_UNLOCK_GIANT(vfslocked);
408
409	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
410	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
411	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
412	alq->aq_vp = nd.ni_vp;
413	alq->aq_cred = crhold(cred);
414	alq->aq_entmax = count;
415	alq->aq_entlen = size;
416	alq->aq_entfree = alq->aq_first;
417
418	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
419
420	bufp = alq->aq_entbuf;
421	ale = alq->aq_first;
422	alp = NULL;
423
424	/* Match up entries with buffers */
425	for (i = 0; i < count; i++) {
426		if (alp)
427			alp->ae_next = ale;
428		ale->ae_data = bufp;
429		alp = ale;
430		ale++;
431		bufp += size;
432	}
433
434	alp->ae_next = alq->aq_first;
435
436	if ((error = ald_add(alq)) != 0) {
437		alq_destroy(alq);
438		return (error);
439	}
440
441	*alqp = alq;
442
443	return (0);
444}
445
446/*
447 * Copy a new entry into the queue.  If the operation would block either
448 * wait or return an error depending on the value of waitok.
449 */
450int
451alq_write(struct alq *alq, void *data, int waitok)
452{
453	struct ale *ale;
454
455	if ((ale = alq_get(alq, waitok)) == NULL)
456		return (EWOULDBLOCK);
457
458	bcopy(data, ale->ae_data, alq->aq_entlen);
459	alq_post(alq, ale);
460
461	return (0);
462}
463
464struct ale *
465alq_get(struct alq *alq, int waitok)
466{
467	struct ale *ale;
468	struct ale *aln;
469
470	ale = NULL;
471
472	ALQ_LOCK(alq);
473
474	/* Loop until we get an entry or we're shutting down */
475	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
476	    (ale = alq->aq_entfree) == NULL &&
477	    (waitok & ALQ_WAITOK)) {
478		alq->aq_flags |= AQ_WANTED;
479		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
480	}
481
482	if (ale != NULL) {
483		aln = ale->ae_next;
484		if ((aln->ae_flags & AE_VALID) == 0)
485			alq->aq_entfree = aln;
486		else
487			alq->aq_entfree = NULL;
488	} else
489		ALQ_UNLOCK(alq);
490
491
492	return (ale);
493}
494
495void
496alq_post(struct alq *alq, struct ale *ale)
497{
498	int activate;
499
500	ale->ae_flags |= AE_VALID;
501
502	if (alq->aq_entvalid == NULL)
503		alq->aq_entvalid = ale;
504
505	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
506		alq->aq_flags |= AQ_ACTIVE;
507		activate = 1;
508	} else
509		activate = 0;
510
511	ALQ_UNLOCK(alq);
512	if (activate) {
513		ALD_LOCK();
514		ald_activate(alq);
515		ALD_UNLOCK();
516	}
517}
518
519void
520alq_flush(struct alq *alq)
521{
522	int needwakeup = 0;
523
524	ALD_LOCK();
525	ALQ_LOCK(alq);
526	if (alq->aq_flags & AQ_ACTIVE) {
527		ald_deactivate(alq);
528		ALD_UNLOCK();
529		needwakeup = alq_doio(alq);
530	} else
531		ALD_UNLOCK();
532	ALQ_UNLOCK(alq);
533
534	if (needwakeup)
535		wakeup(alq);
536}
537
538/*
539 * Flush remaining data, close the file and free all resources.
540 */
541void
542alq_close(struct alq *alq)
543{
544	/* Only flush and destroy alq if not already shutting down. */
545	if (ald_rem(alq) == 0)
546		alq_destroy(alq);
547}
548
549static int
550alq_load_handler(module_t mod, int what, void *arg)
551{
552	int ret;
553
554	ret = 0;
555
556	switch (what) {
557	case MOD_LOAD:
558	case MOD_SHUTDOWN:
559		break;
560
561	case MOD_QUIESCE:
562		ALD_LOCK();
563		/* Only allow unload if there are no open queues. */
564		if (LIST_FIRST(&ald_queues) == NULL) {
565			ald_shutingdown = 1;
566			ALD_UNLOCK();
567			ald_shutdown(NULL, 0);
568			mtx_destroy(&ald_mtx);
569		} else {
570			ALD_UNLOCK();
571			ret = EBUSY;
572		}
573		break;
574
575	case MOD_UNLOAD:
576		/* If MOD_QUIESCE failed we must fail here too. */
577		if (ald_shutingdown == 0)
578			ret = EBUSY;
579		break;
580
581	default:
582		ret = EINVAL;
583		break;
584	}
585
586	return (ret);
587}
588
589static moduledata_t alq_mod =
590{
591	"alq",
592	alq_load_handler,
593	NULL
594};
595
596DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
597MODULE_VERSION(alq, 1);
598