kern_alq.c revision 205959
1/*-
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * Copyright (c) 2008-2009, Lawrence Stewart <lstewart@freebsd.org>
4 * Copyright (c) 2009-2010, The FreeBSD Foundation
5 * All rights reserved.
6 *
7 * Portions of this software were developed at the Centre for Advanced
8 * Internet Architectures, Swinburne University of Technology, Melbourne,
9 * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice unmodified, this list of conditions, and the following
16 *    disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 205959 2010-03-31 03:58:57Z lstewart $");
35
36#include "opt_mac.h"
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/kthread.h>
42#include <sys/lock.h>
43#include <sys/mount.h>
44#include <sys/mutex.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/vnode.h>
48#include <sys/alq.h>
49#include <sys/malloc.h>
50#include <sys/unistd.h>
51#include <sys/fcntl.h>
52#include <sys/eventhandler.h>
53
54#include <security/mac/mac_framework.h>
55
56/* Async. Logging Queue */
57struct alq {
58	int	aq_entmax;		/* Max entries */
59	int	aq_entlen;		/* Entry length */
60	char	*aq_entbuf;		/* Buffer for stored entries */
61	int	aq_flags;		/* Queue flags */
62	struct mtx	aq_mtx;		/* Queue lock */
63	struct vnode	*aq_vp;		/* Open vnode handle */
64	struct ucred	*aq_cred;	/* Credentials of the opening thread */
65	struct ale	*aq_first;	/* First ent */
66	struct ale	*aq_entfree;	/* First free ent */
67	struct ale	*aq_entvalid;	/* First ent valid for writing */
68	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
69	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
70};
71
72#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
73#define	AQ_ACTIVE	0x0002		/* on the active list */
74#define	AQ_FLUSHING	0x0004		/* doing IO */
75#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
76
77#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
78#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
79
80static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
81
82/*
83 * The ald_mtx protects the ald_queues list and the ald_active list.
84 */
85static struct mtx ald_mtx;
86static LIST_HEAD(, alq) ald_queues;
87static LIST_HEAD(, alq) ald_active;
88static int ald_shutingdown = 0;
89struct thread *ald_thread;
90static struct proc *ald_proc;
91
92#define	ALD_LOCK()	mtx_lock(&ald_mtx)
93#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
94
95/* Daemon functions */
96static int ald_add(struct alq *);
97static int ald_rem(struct alq *);
98static void ald_startup(void *);
99static void ald_daemon(void);
100static void ald_shutdown(void *, int);
101static void ald_activate(struct alq *);
102static void ald_deactivate(struct alq *);
103
104/* Internal queue functions */
105static void alq_shutdown(struct alq *);
106static int alq_doio(struct alq *);
107
108
109/*
110 * Add a new queue to the global list.  Fail if we're shutting down.
111 */
112static int
113ald_add(struct alq *alq)
114{
115	int error;
116
117	error = 0;
118
119	ALD_LOCK();
120	if (ald_shutingdown) {
121		error = EBUSY;
122		goto done;
123	}
124	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
125done:
126	ALD_UNLOCK();
127	return (error);
128}
129
130/*
131 * Remove a queue from the global list unless we're shutting down.  If so,
132 * the ald will take care of cleaning up it's resources.
133 */
134static int
135ald_rem(struct alq *alq)
136{
137	int error;
138
139	error = 0;
140
141	ALD_LOCK();
142	if (ald_shutingdown) {
143		error = EBUSY;
144		goto done;
145	}
146	LIST_REMOVE(alq, aq_link);
147done:
148	ALD_UNLOCK();
149	return (error);
150}
151
152/*
153 * Put a queue on the active list.  This will schedule it for writing.
154 */
155static void
156ald_activate(struct alq *alq)
157{
158	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
159	wakeup(&ald_active);
160}
161
162static void
163ald_deactivate(struct alq *alq)
164{
165	LIST_REMOVE(alq, aq_act);
166	alq->aq_flags &= ~AQ_ACTIVE;
167}
168
169static void
170ald_startup(void *unused)
171{
172	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
173	LIST_INIT(&ald_queues);
174	LIST_INIT(&ald_active);
175}
176
177static void
178ald_daemon(void)
179{
180	int needwakeup;
181	struct alq *alq;
182
183	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
184
185	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
186	    SHUTDOWN_PRI_FIRST);
187
188	ALD_LOCK();
189
190	for (;;) {
191		while ((alq = LIST_FIRST(&ald_active)) == NULL &&
192		    !ald_shutingdown)
193			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
194
195		/* Don't shutdown until all active ALQs are flushed. */
196		if (ald_shutingdown && alq == NULL) {
197			ALD_UNLOCK();
198			break;
199		}
200
201		ALQ_LOCK(alq);
202		ald_deactivate(alq);
203		ALD_UNLOCK();
204		needwakeup = alq_doio(alq);
205		ALQ_UNLOCK(alq);
206		if (needwakeup)
207			wakeup(alq);
208		ALD_LOCK();
209	}
210
211	kproc_exit(0);
212}
213
214static void
215ald_shutdown(void *arg, int howto)
216{
217	struct alq *alq;
218
219	ALD_LOCK();
220
221	/* Ensure no new queues can be created. */
222	ald_shutingdown = 1;
223
224	/* Shutdown all ALQs prior to terminating the ald_daemon. */
225	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
226		LIST_REMOVE(alq, aq_link);
227		ALD_UNLOCK();
228		alq_shutdown(alq);
229		ALD_LOCK();
230	}
231
232	/* At this point, all ALQs are flushed and shutdown. */
233
234	/*
235	 * Wake ald_daemon so that it exits. It won't be able to do
236	 * anything until we msleep because we hold the ald_mtx.
237	 */
238	wakeup(&ald_active);
239
240	/* Wait for ald_daemon to exit. */
241	msleep(ald_proc, &ald_mtx, PWAIT, "aldslp", 0);
242
243	ALD_UNLOCK();
244}
245
246static void
247alq_shutdown(struct alq *alq)
248{
249	ALQ_LOCK(alq);
250
251	/* Stop any new writers. */
252	alq->aq_flags |= AQ_SHUTDOWN;
253
254	/* Drain IO */
255	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
256		alq->aq_flags |= AQ_WANTED;
257		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
258	}
259	ALQ_UNLOCK(alq);
260
261	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
262	    curthread);
263	crfree(alq->aq_cred);
264}
265
266/*
267 * Flush all pending data to disk.  This operation will block.
268 */
269static int
270alq_doio(struct alq *alq)
271{
272	struct thread *td;
273	struct mount *mp;
274	struct vnode *vp;
275	struct uio auio;
276	struct iovec aiov[2];
277	struct ale *ale;
278	struct ale *alstart;
279	int totlen;
280	int iov;
281	int vfslocked;
282
283	vp = alq->aq_vp;
284	td = curthread;
285	totlen = 0;
286	iov = 0;
287
288	alstart = ale = alq->aq_entvalid;
289	alq->aq_entvalid = NULL;
290
291	bzero(&aiov, sizeof(aiov));
292	bzero(&auio, sizeof(auio));
293
294	do {
295		if (aiov[iov].iov_base == NULL)
296			aiov[iov].iov_base = ale->ae_data;
297		aiov[iov].iov_len += alq->aq_entlen;
298		totlen += alq->aq_entlen;
299		/* Check to see if we're wrapping the buffer */
300		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
301			iov++;
302		ale->ae_flags &= ~AE_VALID;
303		ale = ale->ae_next;
304	} while (ale->ae_flags & AE_VALID);
305
306	alq->aq_flags |= AQ_FLUSHING;
307	ALQ_UNLOCK(alq);
308
309	if (iov == 2 || aiov[iov].iov_base == NULL)
310		iov--;
311
312	auio.uio_iov = &aiov[0];
313	auio.uio_offset = 0;
314	auio.uio_segflg = UIO_SYSSPACE;
315	auio.uio_rw = UIO_WRITE;
316	auio.uio_iovcnt = iov + 1;
317	auio.uio_resid = totlen;
318	auio.uio_td = td;
319
320	/*
321	 * Do all of the junk required to write now.
322	 */
323	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
324	vn_start_write(vp, &mp, V_WAIT);
325	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
326	/*
327	 * XXX: VOP_WRITE error checks are ignored.
328	 */
329#ifdef MAC
330	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
331#endif
332		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
333	VOP_UNLOCK(vp, 0);
334	vn_finished_write(mp);
335	VFS_UNLOCK_GIANT(vfslocked);
336
337	ALQ_LOCK(alq);
338	alq->aq_flags &= ~AQ_FLUSHING;
339
340	if (alq->aq_entfree == NULL)
341		alq->aq_entfree = alstart;
342
343	if (alq->aq_flags & AQ_WANTED) {
344		alq->aq_flags &= ~AQ_WANTED;
345		return (1);
346	}
347
348	return(0);
349}
350
351static struct kproc_desc ald_kp = {
352        "ALQ Daemon",
353        ald_daemon,
354        &ald_proc
355};
356
357SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
358SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
359
360
361/* User visible queue functions */
362
363/*
364 * Create the queue data structure, allocate the buffer, and open the file.
365 */
366int
367alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
368    int size, int count)
369{
370	struct thread *td;
371	struct nameidata nd;
372	struct ale *ale;
373	struct ale *alp;
374	struct alq *alq;
375	char *bufp;
376	int flags;
377	int error;
378	int i, vfslocked;
379
380	*alqp = NULL;
381	td = curthread;
382
383	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
384	flags = FWRITE | O_NOFOLLOW | O_CREAT;
385
386	error = vn_open_cred(&nd, &flags, cmode, 0, cred, NULL);
387	if (error)
388		return (error);
389
390	vfslocked = NDHASGIANT(&nd);
391	NDFREE(&nd, NDF_ONLY_PNBUF);
392	/* We just unlock so we hold a reference */
393	VOP_UNLOCK(nd.ni_vp, 0);
394	VFS_UNLOCK_GIANT(vfslocked);
395
396	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
397	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
398	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
399	alq->aq_vp = nd.ni_vp;
400	alq->aq_cred = crhold(cred);
401	alq->aq_entmax = count;
402	alq->aq_entlen = size;
403	alq->aq_entfree = alq->aq_first;
404
405	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
406
407	bufp = alq->aq_entbuf;
408	ale = alq->aq_first;
409	alp = NULL;
410
411	/* Match up entries with buffers */
412	for (i = 0; i < count; i++) {
413		if (alp)
414			alp->ae_next = ale;
415		ale->ae_data = bufp;
416		alp = ale;
417		ale++;
418		bufp += size;
419	}
420
421	alp->ae_next = alq->aq_first;
422
423	if ((error = ald_add(alq)) != 0)
424		return (error);
425	*alqp = alq;
426
427	return (0);
428}
429
430/*
431 * Copy a new entry into the queue.  If the operation would block either
432 * wait or return an error depending on the value of waitok.
433 */
434int
435alq_write(struct alq *alq, void *data, int waitok)
436{
437	struct ale *ale;
438
439	if ((ale = alq_get(alq, waitok)) == NULL)
440		return (EWOULDBLOCK);
441
442	bcopy(data, ale->ae_data, alq->aq_entlen);
443	alq_post(alq, ale);
444
445	return (0);
446}
447
448struct ale *
449alq_get(struct alq *alq, int waitok)
450{
451	struct ale *ale;
452	struct ale *aln;
453
454	ale = NULL;
455
456	ALQ_LOCK(alq);
457
458	/* Loop until we get an entry or we're shutting down */
459	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
460	    (ale = alq->aq_entfree) == NULL &&
461	    (waitok & ALQ_WAITOK)) {
462		alq->aq_flags |= AQ_WANTED;
463		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
464	}
465
466	if (ale != NULL) {
467		aln = ale->ae_next;
468		if ((aln->ae_flags & AE_VALID) == 0)
469			alq->aq_entfree = aln;
470		else
471			alq->aq_entfree = NULL;
472	} else
473		ALQ_UNLOCK(alq);
474
475
476	return (ale);
477}
478
479void
480alq_post(struct alq *alq, struct ale *ale)
481{
482	int activate;
483
484	ale->ae_flags |= AE_VALID;
485
486	if (alq->aq_entvalid == NULL)
487		alq->aq_entvalid = ale;
488
489	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
490		alq->aq_flags |= AQ_ACTIVE;
491		activate = 1;
492	} else
493		activate = 0;
494
495	ALQ_UNLOCK(alq);
496	if (activate) {
497		ALD_LOCK();
498		ald_activate(alq);
499		ALD_UNLOCK();
500	}
501}
502
503void
504alq_flush(struct alq *alq)
505{
506	int needwakeup = 0;
507
508	ALD_LOCK();
509	ALQ_LOCK(alq);
510	if (alq->aq_flags & AQ_ACTIVE) {
511		ald_deactivate(alq);
512		ALD_UNLOCK();
513		needwakeup = alq_doio(alq);
514	} else
515		ALD_UNLOCK();
516	ALQ_UNLOCK(alq);
517
518	if (needwakeup)
519		wakeup(alq);
520}
521
522/*
523 * Flush remaining data, close the file and free all resources.
524 */
525void
526alq_close(struct alq *alq)
527{
528	/*
529	 * If we're already shuting down someone else will flush and close
530	 * the vnode.
531	 */
532	if (ald_rem(alq) != 0)
533		return;
534
535	/*
536	 * Drain all pending IO.
537	 */
538	alq_shutdown(alq);
539
540	mtx_destroy(&alq->aq_mtx);
541	free(alq->aq_first, M_ALD);
542	free(alq->aq_entbuf, M_ALD);
543	free(alq, M_ALD);
544}
545
546static int
547alq_load_handler(module_t mod, int what, void *arg)
548{
549	int ret;
550
551	ret = 0;
552
553	switch (what) {
554	case MOD_LOAD:
555	case MOD_SHUTDOWN:
556		break;
557
558	case MOD_QUIESCE:
559		ALD_LOCK();
560		/* Only allow unload if there are no open queues. */
561		if (LIST_FIRST(&ald_queues) == NULL) {
562			ald_shutingdown = 1;
563			ALD_UNLOCK();
564			ald_shutdown(NULL, 0);
565			mtx_destroy(&ald_mtx);
566		} else {
567			ALD_UNLOCK();
568			ret = EBUSY;
569		}
570		break;
571
572	case MOD_UNLOAD:
573		/* If MOD_QUIESCE failed we must fail here too. */
574		if (ald_shutingdown == 0)
575			ret = EBUSY;
576		break;
577
578	default:
579		ret = EINVAL;
580		break;
581	}
582
583	return (ret);
584}
585
586static moduledata_t alq_mod =
587{
588	"alq",
589	alq_load_handler,
590	NULL
591};
592
593DECLARE_MODULE(alq, alq_mod, SI_SUB_SMP, SI_ORDER_ANY);
594MODULE_VERSION(alq, 1);
595