kern_alq.c revision 193511
1/*-
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 193511 2009-06-05 14:55:22Z rwatson $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/lock.h>
35#include <sys/mount.h>
36#include <sys/mutex.h>
37#include <sys/namei.h>
38#include <sys/proc.h>
39#include <sys/vnode.h>
40#include <sys/alq.h>
41#include <sys/malloc.h>
42#include <sys/unistd.h>
43#include <sys/fcntl.h>
44#include <sys/eventhandler.h>
45
46#include <security/mac/mac_framework.h>
47
48/* Async. Logging Queue */
49struct alq {
50	int	aq_entmax;		/* Max entries */
51	int	aq_entlen;		/* Entry length */
52	char	*aq_entbuf;		/* Buffer for stored entries */
53	int	aq_flags;		/* Queue flags */
54	struct mtx	aq_mtx;		/* Queue lock */
55	struct vnode	*aq_vp;		/* Open vnode handle */
56	struct ucred	*aq_cred;	/* Credentials of the opening thread */
57	struct ale	*aq_first;	/* First ent */
58	struct ale	*aq_entfree;	/* First free ent */
59	struct ale	*aq_entvalid;	/* First ent valid for writing */
60	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
61	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
62};
63
64#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
65#define	AQ_ACTIVE	0x0002		/* on the active list */
66#define	AQ_FLUSHING	0x0004		/* doing IO */
67#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
68
69#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
70#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
71
72static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
73
74/*
75 * The ald_mtx protects the ald_queues list and the ald_active list.
76 */
77static struct mtx ald_mtx;
78static LIST_HEAD(, alq) ald_queues;
79static LIST_HEAD(, alq) ald_active;
80static int ald_shutingdown = 0;
81struct thread *ald_thread;
82static struct proc *ald_proc;
83
84#define	ALD_LOCK()	mtx_lock(&ald_mtx)
85#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
86
87/* Daemon functions */
88static int ald_add(struct alq *);
89static int ald_rem(struct alq *);
90static void ald_startup(void *);
91static void ald_daemon(void);
92static void ald_shutdown(void *, int);
93static void ald_activate(struct alq *);
94static void ald_deactivate(struct alq *);
95
96/* Internal queue functions */
97static void alq_shutdown(struct alq *);
98static int alq_doio(struct alq *);
99
100
101/*
102 * Add a new queue to the global list.  Fail if we're shutting down.
103 */
104static int
105ald_add(struct alq *alq)
106{
107	int error;
108
109	error = 0;
110
111	ALD_LOCK();
112	if (ald_shutingdown) {
113		error = EBUSY;
114		goto done;
115	}
116	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
117done:
118	ALD_UNLOCK();
119	return (error);
120}
121
122/*
123 * Remove a queue from the global list unless we're shutting down.  If so,
124 * the ald will take care of cleaning up it's resources.
125 */
126static int
127ald_rem(struct alq *alq)
128{
129	int error;
130
131	error = 0;
132
133	ALD_LOCK();
134	if (ald_shutingdown) {
135		error = EBUSY;
136		goto done;
137	}
138	LIST_REMOVE(alq, aq_link);
139done:
140	ALD_UNLOCK();
141	return (error);
142}
143
144/*
145 * Put a queue on the active list.  This will schedule it for writing.
146 */
147static void
148ald_activate(struct alq *alq)
149{
150	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
151	wakeup(&ald_active);
152}
153
154static void
155ald_deactivate(struct alq *alq)
156{
157	LIST_REMOVE(alq, aq_act);
158	alq->aq_flags &= ~AQ_ACTIVE;
159}
160
161static void
162ald_startup(void *unused)
163{
164	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
165	LIST_INIT(&ald_queues);
166	LIST_INIT(&ald_active);
167}
168
169static void
170ald_daemon(void)
171{
172	int needwakeup;
173	struct alq *alq;
174
175	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
176
177	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
178	    SHUTDOWN_PRI_FIRST);
179
180	ALD_LOCK();
181
182	for (;;) {
183		while ((alq = LIST_FIRST(&ald_active)) == NULL)
184			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
185
186		ALQ_LOCK(alq);
187		ald_deactivate(alq);
188		ALD_UNLOCK();
189		needwakeup = alq_doio(alq);
190		ALQ_UNLOCK(alq);
191		if (needwakeup)
192			wakeup(alq);
193		ALD_LOCK();
194	}
195}
196
197static void
198ald_shutdown(void *arg, int howto)
199{
200	struct alq *alq;
201
202	ALD_LOCK();
203	ald_shutingdown = 1;
204
205	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
206		LIST_REMOVE(alq, aq_link);
207		ALD_UNLOCK();
208		alq_shutdown(alq);
209		ALD_LOCK();
210	}
211	ALD_UNLOCK();
212}
213
214static void
215alq_shutdown(struct alq *alq)
216{
217	ALQ_LOCK(alq);
218
219	/* Stop any new writers. */
220	alq->aq_flags |= AQ_SHUTDOWN;
221
222	/* Drain IO */
223	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
224		alq->aq_flags |= AQ_WANTED;
225		msleep_spin(alq, &alq->aq_mtx, "aldclose", 0);
226	}
227	ALQ_UNLOCK(alq);
228
229	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
230	    curthread);
231	crfree(alq->aq_cred);
232}
233
234/*
235 * Flush all pending data to disk.  This operation will block.
236 */
237static int
238alq_doio(struct alq *alq)
239{
240	struct thread *td;
241	struct mount *mp;
242	struct vnode *vp;
243	struct uio auio;
244	struct iovec aiov[2];
245	struct ale *ale;
246	struct ale *alstart;
247	int totlen;
248	int iov;
249	int vfslocked;
250
251	vp = alq->aq_vp;
252	td = curthread;
253	totlen = 0;
254	iov = 0;
255
256	alstart = ale = alq->aq_entvalid;
257	alq->aq_entvalid = NULL;
258
259	bzero(&aiov, sizeof(aiov));
260	bzero(&auio, sizeof(auio));
261
262	do {
263		if (aiov[iov].iov_base == NULL)
264			aiov[iov].iov_base = ale->ae_data;
265		aiov[iov].iov_len += alq->aq_entlen;
266		totlen += alq->aq_entlen;
267		/* Check to see if we're wrapping the buffer */
268		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
269			iov++;
270		ale->ae_flags &= ~AE_VALID;
271		ale = ale->ae_next;
272	} while (ale->ae_flags & AE_VALID);
273
274	alq->aq_flags |= AQ_FLUSHING;
275	ALQ_UNLOCK(alq);
276
277	if (iov == 2 || aiov[iov].iov_base == NULL)
278		iov--;
279
280	auio.uio_iov = &aiov[0];
281	auio.uio_offset = 0;
282	auio.uio_segflg = UIO_SYSSPACE;
283	auio.uio_rw = UIO_WRITE;
284	auio.uio_iovcnt = iov + 1;
285	auio.uio_resid = totlen;
286	auio.uio_td = td;
287
288	/*
289	 * Do all of the junk required to write now.
290	 */
291	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
292	vn_start_write(vp, &mp, V_WAIT);
293	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
294	/*
295	 * XXX: VOP_WRITE error checks are ignored.
296	 */
297#ifdef MAC
298	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
299#endif
300		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
301	VOP_UNLOCK(vp, 0);
302	vn_finished_write(mp);
303	VFS_UNLOCK_GIANT(vfslocked);
304
305	ALQ_LOCK(alq);
306	alq->aq_flags &= ~AQ_FLUSHING;
307
308	if (alq->aq_entfree == NULL)
309		alq->aq_entfree = alstart;
310
311	if (alq->aq_flags & AQ_WANTED) {
312		alq->aq_flags &= ~AQ_WANTED;
313		return (1);
314	}
315
316	return(0);
317}
318
319static struct kproc_desc ald_kp = {
320        "ALQ Daemon",
321        ald_daemon,
322        &ald_proc
323};
324
325SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp);
326SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL);
327
328
329/* User visible queue functions */
330
331/*
332 * Create the queue data structure, allocate the buffer, and open the file.
333 */
334int
335alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
336    int size, int count)
337{
338	struct thread *td;
339	struct nameidata nd;
340	struct ale *ale;
341	struct ale *alp;
342	struct alq *alq;
343	char *bufp;
344	int flags;
345	int error;
346	int i, vfslocked;
347
348	*alqp = NULL;
349	td = curthread;
350
351	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
352	flags = FWRITE | O_NOFOLLOW | O_CREAT;
353
354	error = vn_open_cred(&nd, &flags, cmode, cred, NULL);
355	if (error)
356		return (error);
357
358	vfslocked = NDHASGIANT(&nd);
359	NDFREE(&nd, NDF_ONLY_PNBUF);
360	/* We just unlock so we hold a reference */
361	VOP_UNLOCK(nd.ni_vp, 0);
362	VFS_UNLOCK_GIANT(vfslocked);
363
364	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
365	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
366	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
367	alq->aq_vp = nd.ni_vp;
368	alq->aq_cred = crhold(cred);
369	alq->aq_entmax = count;
370	alq->aq_entlen = size;
371	alq->aq_entfree = alq->aq_first;
372
373	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
374
375	bufp = alq->aq_entbuf;
376	ale = alq->aq_first;
377	alp = NULL;
378
379	/* Match up entries with buffers */
380	for (i = 0; i < count; i++) {
381		if (alp)
382			alp->ae_next = ale;
383		ale->ae_data = bufp;
384		alp = ale;
385		ale++;
386		bufp += size;
387	}
388
389	alp->ae_next = alq->aq_first;
390
391	if ((error = ald_add(alq)) != 0)
392		return (error);
393	*alqp = alq;
394
395	return (0);
396}
397
398/*
399 * Copy a new entry into the queue.  If the operation would block either
400 * wait or return an error depending on the value of waitok.
401 */
402int
403alq_write(struct alq *alq, void *data, int waitok)
404{
405	struct ale *ale;
406
407	if ((ale = alq_get(alq, waitok)) == NULL)
408		return (EWOULDBLOCK);
409
410	bcopy(data, ale->ae_data, alq->aq_entlen);
411	alq_post(alq, ale);
412
413	return (0);
414}
415
416struct ale *
417alq_get(struct alq *alq, int waitok)
418{
419	struct ale *ale;
420	struct ale *aln;
421
422	ale = NULL;
423
424	ALQ_LOCK(alq);
425
426	/* Loop until we get an entry or we're shutting down */
427	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
428	    (ale = alq->aq_entfree) == NULL &&
429	    (waitok & ALQ_WAITOK)) {
430		alq->aq_flags |= AQ_WANTED;
431		msleep_spin(alq, &alq->aq_mtx, "alqget", 0);
432	}
433
434	if (ale != NULL) {
435		aln = ale->ae_next;
436		if ((aln->ae_flags & AE_VALID) == 0)
437			alq->aq_entfree = aln;
438		else
439			alq->aq_entfree = NULL;
440	} else
441		ALQ_UNLOCK(alq);
442
443
444	return (ale);
445}
446
447void
448alq_post(struct alq *alq, struct ale *ale)
449{
450	int activate;
451
452	ale->ae_flags |= AE_VALID;
453
454	if (alq->aq_entvalid == NULL)
455		alq->aq_entvalid = ale;
456
457	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
458		alq->aq_flags |= AQ_ACTIVE;
459		activate = 1;
460	} else
461		activate = 0;
462
463	ALQ_UNLOCK(alq);
464	if (activate) {
465		ALD_LOCK();
466		ald_activate(alq);
467		ALD_UNLOCK();
468	}
469}
470
471void
472alq_flush(struct alq *alq)
473{
474	int needwakeup = 0;
475
476	ALD_LOCK();
477	ALQ_LOCK(alq);
478	if (alq->aq_flags & AQ_ACTIVE) {
479		ald_deactivate(alq);
480		ALD_UNLOCK();
481		needwakeup = alq_doio(alq);
482	} else
483		ALD_UNLOCK();
484	ALQ_UNLOCK(alq);
485
486	if (needwakeup)
487		wakeup(alq);
488}
489
490/*
491 * Flush remaining data, close the file and free all resources.
492 */
493void
494alq_close(struct alq *alq)
495{
496	/*
497	 * If we're already shuting down someone else will flush and close
498	 * the vnode.
499	 */
500	if (ald_rem(alq) != 0)
501		return;
502
503	/*
504	 * Drain all pending IO.
505	 */
506	alq_shutdown(alq);
507
508	mtx_destroy(&alq->aq_mtx);
509	free(alq->aq_first, M_ALD);
510	free(alq->aq_entbuf, M_ALD);
511	free(alq, M_ALD);
512}
513