kern_alq.c revision 175294
1/*-
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 175294 2008-01-13 14:44:15Z attilio $");
29
30#include "opt_mac.h"
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/kernel.h>
35#include <sys/kthread.h>
36#include <sys/lock.h>
37#include <sys/mount.h>
38#include <sys/mutex.h>
39#include <sys/namei.h>
40#include <sys/proc.h>
41#include <sys/vnode.h>
42#include <sys/alq.h>
43#include <sys/malloc.h>
44#include <sys/unistd.h>
45#include <sys/fcntl.h>
46#include <sys/eventhandler.h>
47
48#include <security/mac/mac_framework.h>
49
50/* Async. Logging Queue */
51struct alq {
52	int	aq_entmax;		/* Max entries */
53	int	aq_entlen;		/* Entry length */
54	char	*aq_entbuf;		/* Buffer for stored entries */
55	int	aq_flags;		/* Queue flags */
56	struct mtx	aq_mtx;		/* Queue lock */
57	struct vnode	*aq_vp;		/* Open vnode handle */
58	struct ucred	*aq_cred;	/* Credentials of the opening thread */
59	struct ale	*aq_first;	/* First ent */
60	struct ale	*aq_entfree;	/* First free ent */
61	struct ale	*aq_entvalid;	/* First ent valid for writing */
62	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
63	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
64};
65
66#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
67#define	AQ_ACTIVE	0x0002		/* on the active list */
68#define	AQ_FLUSHING	0x0004		/* doing IO */
69#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
70
71#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
72#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
73
74static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
75
76/*
77 * The ald_mtx protects the ald_queues list and the ald_active list.
78 */
79static struct mtx ald_mtx;
80static LIST_HEAD(, alq) ald_queues;
81static LIST_HEAD(, alq) ald_active;
82static int ald_shutingdown = 0;
83struct thread *ald_thread;
84static struct proc *ald_proc;
85
86#define	ALD_LOCK()	mtx_lock(&ald_mtx)
87#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
88
89/* Daemon functions */
90static int ald_add(struct alq *);
91static int ald_rem(struct alq *);
92static void ald_startup(void *);
93static void ald_daemon(void);
94static void ald_shutdown(void *, int);
95static void ald_activate(struct alq *);
96static void ald_deactivate(struct alq *);
97
98/* Internal queue functions */
99static void alq_shutdown(struct alq *);
100static int alq_doio(struct alq *);
101
102
103/*
104 * Add a new queue to the global list.  Fail if we're shutting down.
105 */
106static int
107ald_add(struct alq *alq)
108{
109	int error;
110
111	error = 0;
112
113	ALD_LOCK();
114	if (ald_shutingdown) {
115		error = EBUSY;
116		goto done;
117	}
118	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
119done:
120	ALD_UNLOCK();
121	return (error);
122}
123
124/*
125 * Remove a queue from the global list unless we're shutting down.  If so,
126 * the ald will take care of cleaning up it's resources.
127 */
128static int
129ald_rem(struct alq *alq)
130{
131	int error;
132
133	error = 0;
134
135	ALD_LOCK();
136	if (ald_shutingdown) {
137		error = EBUSY;
138		goto done;
139	}
140	LIST_REMOVE(alq, aq_link);
141done:
142	ALD_UNLOCK();
143	return (error);
144}
145
146/*
147 * Put a queue on the active list.  This will schedule it for writing.
148 */
149static void
150ald_activate(struct alq *alq)
151{
152	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
153	wakeup(&ald_active);
154}
155
156static void
157ald_deactivate(struct alq *alq)
158{
159	LIST_REMOVE(alq, aq_act);
160	alq->aq_flags &= ~AQ_ACTIVE;
161}
162
163static void
164ald_startup(void *unused)
165{
166	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
167	LIST_INIT(&ald_queues);
168	LIST_INIT(&ald_active);
169}
170
171static void
172ald_daemon(void)
173{
174	int needwakeup;
175	struct alq *alq;
176
177	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
178
179	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
180	    SHUTDOWN_PRI_FIRST);
181
182	ALD_LOCK();
183
184	for (;;) {
185		while ((alq = LIST_FIRST(&ald_active)) == NULL)
186			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
187
188		ALQ_LOCK(alq);
189		ald_deactivate(alq);
190		ALD_UNLOCK();
191		needwakeup = alq_doio(alq);
192		ALQ_UNLOCK(alq);
193		if (needwakeup)
194			wakeup(alq);
195		ALD_LOCK();
196	}
197}
198
199static void
200ald_shutdown(void *arg, int howto)
201{
202	struct alq *alq;
203
204	ALD_LOCK();
205	ald_shutingdown = 1;
206
207	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
208		LIST_REMOVE(alq, aq_link);
209		ALD_UNLOCK();
210		alq_shutdown(alq);
211		ALD_LOCK();
212	}
213	ALD_UNLOCK();
214}
215
216static void
217alq_shutdown(struct alq *alq)
218{
219	ALQ_LOCK(alq);
220
221	/* Stop any new writers. */
222	alq->aq_flags |= AQ_SHUTDOWN;
223
224	/* Drain IO */
225	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
226		alq->aq_flags |= AQ_WANTED;
227		ALQ_UNLOCK(alq);
228		tsleep(alq, PWAIT, "aldclose", 0);
229		ALQ_LOCK(alq);
230	}
231	ALQ_UNLOCK(alq);
232
233	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
234	    curthread);
235	crfree(alq->aq_cred);
236}
237
238/*
239 * Flush all pending data to disk.  This operation will block.
240 */
241static int
242alq_doio(struct alq *alq)
243{
244	struct thread *td;
245	struct mount *mp;
246	struct vnode *vp;
247	struct uio auio;
248	struct iovec aiov[2];
249	struct ale *ale;
250	struct ale *alstart;
251	int totlen;
252	int iov;
253	int vfslocked;
254
255	vp = alq->aq_vp;
256	td = curthread;
257	totlen = 0;
258	iov = 0;
259
260	alstart = ale = alq->aq_entvalid;
261	alq->aq_entvalid = NULL;
262
263	bzero(&aiov, sizeof(aiov));
264	bzero(&auio, sizeof(auio));
265
266	do {
267		if (aiov[iov].iov_base == NULL)
268			aiov[iov].iov_base = ale->ae_data;
269		aiov[iov].iov_len += alq->aq_entlen;
270		totlen += alq->aq_entlen;
271		/* Check to see if we're wrapping the buffer */
272		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
273			iov++;
274		ale->ae_flags &= ~AE_VALID;
275		ale = ale->ae_next;
276	} while (ale->ae_flags & AE_VALID);
277
278	alq->aq_flags |= AQ_FLUSHING;
279	ALQ_UNLOCK(alq);
280
281	if (iov == 2 || aiov[iov].iov_base == NULL)
282		iov--;
283
284	auio.uio_iov = &aiov[0];
285	auio.uio_offset = 0;
286	auio.uio_segflg = UIO_SYSSPACE;
287	auio.uio_rw = UIO_WRITE;
288	auio.uio_iovcnt = iov + 1;
289	auio.uio_resid = totlen;
290	auio.uio_td = td;
291
292	/*
293	 * Do all of the junk required to write now.
294	 */
295	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
296	vn_start_write(vp, &mp, V_WAIT);
297	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
298	VOP_LEASE(vp, td, alq->aq_cred, LEASE_WRITE);
299	/*
300	 * XXX: VOP_WRITE error checks are ignored.
301	 */
302#ifdef MAC
303	if (mac_vnode_check_write(alq->aq_cred, NOCRED, vp) == 0)
304#endif
305		VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
306	VOP_UNLOCK(vp, 0);
307	vn_finished_write(mp);
308	VFS_UNLOCK_GIANT(vfslocked);
309
310	ALQ_LOCK(alq);
311	alq->aq_flags &= ~AQ_FLUSHING;
312
313	if (alq->aq_entfree == NULL)
314		alq->aq_entfree = alstart;
315
316	if (alq->aq_flags & AQ_WANTED) {
317		alq->aq_flags &= ~AQ_WANTED;
318		return (1);
319	}
320
321	return(0);
322}
323
324static struct kproc_desc ald_kp = {
325        "ALQ Daemon",
326        ald_daemon,
327        &ald_proc
328};
329
330SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp)
331SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL)
332
333
334/* User visible queue functions */
335
336/*
337 * Create the queue data structure, allocate the buffer, and open the file.
338 */
339int
340alq_open(struct alq **alqp, const char *file, struct ucred *cred, int cmode,
341    int size, int count)
342{
343	struct thread *td;
344	struct nameidata nd;
345	struct ale *ale;
346	struct ale *alp;
347	struct alq *alq;
348	char *bufp;
349	int flags;
350	int error;
351	int i, vfslocked;
352
353	*alqp = NULL;
354	td = curthread;
355
356	NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_SYSSPACE, file, td);
357	flags = FWRITE | O_NOFOLLOW | O_CREAT;
358
359	error = vn_open_cred(&nd, &flags, cmode, cred, NULL);
360	if (error)
361		return (error);
362
363	vfslocked = NDHASGIANT(&nd);
364	NDFREE(&nd, NDF_ONLY_PNBUF);
365	/* We just unlock so we hold a reference */
366	VOP_UNLOCK(nd.ni_vp, 0);
367	VFS_UNLOCK_GIANT(vfslocked);
368
369	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
370	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
371	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
372	alq->aq_vp = nd.ni_vp;
373	alq->aq_cred = crhold(cred);
374	alq->aq_entmax = count;
375	alq->aq_entlen = size;
376	alq->aq_entfree = alq->aq_first;
377
378	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
379
380	bufp = alq->aq_entbuf;
381	ale = alq->aq_first;
382	alp = NULL;
383
384	/* Match up entries with buffers */
385	for (i = 0; i < count; i++) {
386		if (alp)
387			alp->ae_next = ale;
388		ale->ae_data = bufp;
389		alp = ale;
390		ale++;
391		bufp += size;
392	}
393
394	alp->ae_next = alq->aq_first;
395
396	if ((error = ald_add(alq)) != 0)
397		return (error);
398	*alqp = alq;
399
400	return (0);
401}
402
403/*
404 * Copy a new entry into the queue.  If the operation would block either
405 * wait or return an error depending on the value of waitok.
406 */
407int
408alq_write(struct alq *alq, void *data, int waitok)
409{
410	struct ale *ale;
411
412	if ((ale = alq_get(alq, waitok)) == NULL)
413		return (EWOULDBLOCK);
414
415	bcopy(data, ale->ae_data, alq->aq_entlen);
416	alq_post(alq, ale);
417
418	return (0);
419}
420
421struct ale *
422alq_get(struct alq *alq, int waitok)
423{
424	struct ale *ale;
425	struct ale *aln;
426
427	ale = NULL;
428
429	ALQ_LOCK(alq);
430
431	/* Loop until we get an entry or we're shutting down */
432	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
433	    (ale = alq->aq_entfree) == NULL &&
434	    (waitok & ALQ_WAITOK)) {
435		alq->aq_flags |= AQ_WANTED;
436		ALQ_UNLOCK(alq);
437		tsleep(alq, PWAIT, "alqget", 0);
438		ALQ_LOCK(alq);
439	}
440
441	if (ale != NULL) {
442		aln = ale->ae_next;
443		if ((aln->ae_flags & AE_VALID) == 0)
444			alq->aq_entfree = aln;
445		else
446			alq->aq_entfree = NULL;
447	} else
448		ALQ_UNLOCK(alq);
449
450
451	return (ale);
452}
453
454void
455alq_post(struct alq *alq, struct ale *ale)
456{
457	int activate;
458
459	ale->ae_flags |= AE_VALID;
460
461	if (alq->aq_entvalid == NULL)
462		alq->aq_entvalid = ale;
463
464	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
465		alq->aq_flags |= AQ_ACTIVE;
466		activate = 1;
467	} else
468		activate = 0;
469
470	ALQ_UNLOCK(alq);
471	if (activate) {
472		ALD_LOCK();
473		ald_activate(alq);
474		ALD_UNLOCK();
475	}
476}
477
478void
479alq_flush(struct alq *alq)
480{
481	int needwakeup = 0;
482
483	ALD_LOCK();
484	ALQ_LOCK(alq);
485	if (alq->aq_flags & AQ_ACTIVE) {
486		ald_deactivate(alq);
487		ALD_UNLOCK();
488		needwakeup = alq_doio(alq);
489	} else
490		ALD_UNLOCK();
491	ALQ_UNLOCK(alq);
492
493	if (needwakeup)
494		wakeup(alq);
495}
496
497/*
498 * Flush remaining data, close the file and free all resources.
499 */
500void
501alq_close(struct alq *alq)
502{
503	/*
504	 * If we're already shuting down someone else will flush and close
505	 * the vnode.
506	 */
507	if (ald_rem(alq) != 0)
508		return;
509
510	/*
511	 * Drain all pending IO.
512	 */
513	alq_shutdown(alq);
514
515	mtx_destroy(&alq->aq_mtx);
516	free(alq->aq_first, M_ALD);
517	free(alq->aq_entbuf, M_ALD);
518	free(alq, M_ALD);
519}
520