kern_alq.c revision 157233
1/*-
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 157233 2006-03-28 21:30:22Z jhb $");
29
30#include "opt_mac.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/lock.h>
37#include <sys/mac.h>
38#include <sys/mount.h>
39#include <sys/mutex.h>
40#include <sys/namei.h>
41#include <sys/proc.h>
42#include <sys/vnode.h>
43#include <sys/alq.h>
44#include <sys/malloc.h>
45#include <sys/unistd.h>
46#include <sys/fcntl.h>
47#include <sys/eventhandler.h>
48
49/* Async. Logging Queue */
50struct alq {
51	int	aq_entmax;		/* Max entries */
52	int	aq_entlen;		/* Entry length */
53	char	*aq_entbuf;		/* Buffer for stored entries */
54	int	aq_flags;		/* Queue flags */
55	struct mtx	aq_mtx;		/* Queue lock */
56	struct vnode	*aq_vp;		/* Open vnode handle */
57	struct ucred	*aq_cred;	/* Credentials of the opening thread */
58	struct ale	*aq_first;	/* First ent */
59	struct ale	*aq_entfree;	/* First free ent */
60	struct ale	*aq_entvalid;	/* First ent valid for writing */
61	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
62	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
63};
64
65#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
66#define	AQ_ACTIVE	0x0002		/* on the active list */
67#define	AQ_FLUSHING	0x0004		/* doing IO */
68#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
69
70#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
71#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
72
73static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
74
75/*
76 * The ald_mtx protects the ald_queues list and the ald_active list.
77 */
78static struct mtx ald_mtx;
79static LIST_HEAD(, alq) ald_queues;
80static LIST_HEAD(, alq) ald_active;
81static int ald_shutingdown = 0;
82struct thread *ald_thread;
83static struct proc *ald_proc;
84
85#define	ALD_LOCK()	mtx_lock(&ald_mtx)
86#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
87
88/* Daemon functions */
89static int ald_add(struct alq *);
90static int ald_rem(struct alq *);
91static void ald_startup(void *);
92static void ald_daemon(void);
93static void ald_shutdown(void *, int);
94static void ald_activate(struct alq *);
95static void ald_deactivate(struct alq *);
96
97/* Internal queue functions */
98static void alq_shutdown(struct alq *);
99static int alq_doio(struct alq *);
100
101
102/*
103 * Add a new queue to the global list.  Fail if we're shutting down.
104 */
105static int
106ald_add(struct alq *alq)
107{
108	int error;
109
110	error = 0;
111
112	ALD_LOCK();
113	if (ald_shutingdown) {
114		error = EBUSY;
115		goto done;
116	}
117	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
118done:
119	ALD_UNLOCK();
120	return (error);
121}
122
123/*
124 * Remove a queue from the global list unless we're shutting down.  If so,
125 * the ald will take care of cleaning up it's resources.
126 */
127static int
128ald_rem(struct alq *alq)
129{
130	int error;
131
132	error = 0;
133
134	ALD_LOCK();
135	if (ald_shutingdown) {
136		error = EBUSY;
137		goto done;
138	}
139	LIST_REMOVE(alq, aq_link);
140done:
141	ALD_UNLOCK();
142	return (error);
143}
144
145/*
146 * Put a queue on the active list.  This will schedule it for writing.
147 */
148static void
149ald_activate(struct alq *alq)
150{
151	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
152	wakeup(&ald_active);
153}
154
155static void
156ald_deactivate(struct alq *alq)
157{
158	LIST_REMOVE(alq, aq_act);
159	alq->aq_flags &= ~AQ_ACTIVE;
160}
161
162static void
163ald_startup(void *unused)
164{
165	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
166	LIST_INIT(&ald_queues);
167	LIST_INIT(&ald_active);
168}
169
170static void
171ald_daemon(void)
172{
173	int needwakeup;
174	struct alq *alq;
175
176	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
177
178	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
179	    SHUTDOWN_PRI_FIRST);
180
181	ALD_LOCK();
182
183	for (;;) {
184		while ((alq = LIST_FIRST(&ald_active)) == NULL)
185			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
186
187		ALQ_LOCK(alq);
188		ald_deactivate(alq);
189		ALD_UNLOCK();
190		needwakeup = alq_doio(alq);
191		ALQ_UNLOCK(alq);
192		if (needwakeup)
193			wakeup(alq);
194		ALD_LOCK();
195	}
196}
197
198static void
199ald_shutdown(void *arg, int howto)
200{
201	struct alq *alq;
202
203	ALD_LOCK();
204	ald_shutingdown = 1;
205
206	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
207		LIST_REMOVE(alq, aq_link);
208		ALD_UNLOCK();
209		alq_shutdown(alq);
210		ALD_LOCK();
211	}
212	ALD_UNLOCK();
213}
214
215static void
216alq_shutdown(struct alq *alq)
217{
218	ALQ_LOCK(alq);
219
220	/* Stop any new writers. */
221	alq->aq_flags |= AQ_SHUTDOWN;
222
223	/* Drain IO */
224	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
225		alq->aq_flags |= AQ_WANTED;
226		ALQ_UNLOCK(alq);
227		tsleep(alq, PWAIT, "aldclose", 0);
228		ALQ_LOCK(alq);
229	}
230	ALQ_UNLOCK(alq);
231
232	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
233	    curthread);
234	crfree(alq->aq_cred);
235}
236
237/*
238 * Flush all pending data to disk.  This operation will block.
239 */
240static int
241alq_doio(struct alq *alq)
242{
243	struct thread *td;
244	struct mount *mp;
245	struct vnode *vp;
246	struct uio auio;
247	struct iovec aiov[2];
248	struct ale *ale;
249	struct ale *alstart;
250	int totlen;
251	int iov;
252	int vfslocked;
253
254	vp = alq->aq_vp;
255	td = curthread;
256	totlen = 0;
257	iov = 0;
258
259	alstart = ale = alq->aq_entvalid;
260	alq->aq_entvalid = NULL;
261
262	bzero(&aiov, sizeof(aiov));
263	bzero(&auio, sizeof(auio));
264
265	do {
266		if (aiov[iov].iov_base == NULL)
267			aiov[iov].iov_base = ale->ae_data;
268		aiov[iov].iov_len += alq->aq_entlen;
269		totlen += alq->aq_entlen;
270		/* Check to see if we're wrapping the buffer */
271		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
272			iov++;
273		ale->ae_flags &= ~AE_VALID;
274		ale = ale->ae_next;
275	} while (ale->ae_flags & AE_VALID);
276
277	alq->aq_flags |= AQ_FLUSHING;
278	ALQ_UNLOCK(alq);
279
280	if (iov == 2 || aiov[iov].iov_base == NULL)
281		iov--;
282
283	auio.uio_iov = &aiov[0];
284	auio.uio_offset = 0;
285	auio.uio_segflg = UIO_SYSSPACE;
286	auio.uio_rw = UIO_WRITE;
287	auio.uio_iovcnt = iov + 1;
288	auio.uio_resid = totlen;
289	auio.uio_td = td;
290
291	/*
292	 * Do all of the junk required to write now.
293	 */
294	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
295	vn_start_write(vp, &mp, V_WAIT);
296	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
297	VOP_LEASE(vp, td, alq->aq_cred, LEASE_WRITE);
298	/*
299	 * XXX: VOP_WRITE error checks are ignored.
300	 */
301#ifdef MAC
302	if (mac_check_vnode_write(alq->aq_cred, NOCRED, vp) == 0)
303#endif
304		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
305	VOP_UNLOCK(vp, 0, td);
306	vn_finished_write(mp);
307	VFS_UNLOCK_GIANT(vfslocked);
308
309	ALQ_LOCK(alq);
310	alq->aq_flags &= ~AQ_FLUSHING;
311
312	if (alq->aq_entfree == NULL)
313		alq->aq_entfree = alstart;
314
315	if (alq->aq_flags & AQ_WANTED) {
316		alq->aq_flags &= ~AQ_WANTED;
317		return (1);
318	}
319
320	return(0);
321}
322
323static struct kproc_desc ald_kp = {
324        "ALQ Daemon",
325        ald_daemon,
326        &ald_proc
327};
328
329SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp)
330SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL)
331
332
333/* User visible queue functions */
334
335/*
336 * Create the queue data structure, allocate the buffer, and open the file.
337 */
338int
339alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
340    int size, int count)
341{
342	struct thread *td;
343	struct nameidata nd;
344	struct ale *ale;
345	struct ale *alp;
346	struct alq *alq;
347	char *bufp;
348	int flags;
349	int error;
350	int i, vfslocked;
351
352	*alqp = NULL;
353	td = curthread;
354
355	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
356	flags = FWRITE | O_NOFOLLOW | O_CREAT;
357
358	error = vn_open_cred(&nd, &flags, cmode, cred, -1);
359	if (error)
360		return (error);
361
362	vfslocked = NDHASGIANT(&nd);
363	NDFREE(&nd, NDF_ONLY_PNBUF);
364	/* We just unlock so we hold a reference */
365	VOP_UNLOCK(nd.ni_vp, 0, td);
366	VFS_UNLOCK_GIANT(vfslocked);
367
368	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
369	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
370	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
371	alq->aq_vp = nd.ni_vp;
372	alq->aq_cred = crhold(cred);
373	alq->aq_entmax = count;
374	alq->aq_entlen = size;
375	alq->aq_entfree = alq->aq_first;
376
377	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
378
379	bufp = alq->aq_entbuf;
380	ale = alq->aq_first;
381	alp = NULL;
382
383	/* Match up entries with buffers */
384	for (i = 0; i < count; i++) {
385		if (alp)
386			alp->ae_next = ale;
387		ale->ae_data = bufp;
388		alp = ale;
389		ale++;
390		bufp += size;
391	}
392
393	alp->ae_next = alq->aq_first;
394
395	if ((error = ald_add(alq)) != 0)
396		return (error);
397	*alqp = alq;
398
399	return (0);
400}
401
402/*
403 * Copy a new entry into the queue.  If the operation would block either
404 * wait or return an error depending on the value of waitok.
405 */
406int
407alq_write(struct alq *alq, void *data, int waitok)
408{
409	struct ale *ale;
410
411	if ((ale = alq_get(alq, waitok)) == NULL)
412		return (EWOULDBLOCK);
413
414	bcopy(data, ale->ae_data, alq->aq_entlen);
415	alq_post(alq, ale);
416
417	return (0);
418}
419
420struct ale *
421alq_get(struct alq *alq, int waitok)
422{
423	struct ale *ale;
424	struct ale *aln;
425
426	ale = NULL;
427
428	ALQ_LOCK(alq);
429
430	/* Loop until we get an entry or we're shutting down */
431	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
432	    (ale = alq->aq_entfree) == NULL &&
433	    (waitok & ALQ_WAITOK)) {
434		alq->aq_flags |= AQ_WANTED;
435		ALQ_UNLOCK(alq);
436		tsleep(alq, PWAIT, "alqget", 0);
437		ALQ_LOCK(alq);
438	}
439
440	if (ale != NULL) {
441		aln = ale->ae_next;
442		if ((aln->ae_flags & AE_VALID) == 0)
443			alq->aq_entfree = aln;
444		else
445			alq->aq_entfree = NULL;
446	} else
447		ALQ_UNLOCK(alq);
448
449
450	return (ale);
451}
452
453void
454alq_post(struct alq *alq, struct ale *ale)
455{
456	int activate;
457
458	ale->ae_flags |= AE_VALID;
459
460	if (alq->aq_entvalid == NULL)
461		alq->aq_entvalid = ale;
462
463	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
464		alq->aq_flags |= AQ_ACTIVE;
465		activate = 1;
466	} else
467		activate = 0;
468
469	ALQ_UNLOCK(alq);
470	if (activate) {
471		ALD_LOCK();
472		ald_activate(alq);
473		ALD_UNLOCK();
474	}
475}
476
477void
478alq_flush(struct alq *alq)
479{
480	int needwakeup = 0;
481
482	ALD_LOCK();
483	ALQ_LOCK(alq);
484	if (alq->aq_flags & AQ_ACTIVE) {
485		ald_deactivate(alq);
486		ALD_UNLOCK();
487		needwakeup = alq_doio(alq);
488	} else
489		ALD_UNLOCK();
490	ALQ_UNLOCK(alq);
491
492	if (needwakeup)
493		wakeup(alq);
494}
495
496/*
497 * Flush remaining data, close the file and free all resources.
498 */
499void
500alq_close(struct alq *alq)
501{
502	/*
503	 * If we're already shuting down someone else will flush and close
504	 * the vnode.
505	 */
506	if (ald_rem(alq) != 0)
507		return;
508
509	/*
510	 * Drain all pending IO.
511	 */
512	alq_shutdown(alq);
513
514	mtx_destroy(&alq->aq_mtx);
515	free(alq->aq_first, M_ALD);
516	free(alq->aq_entbuf, M_ALD);
517	free(alq, M_ALD);
518}
519