kern_alq.c revision 118094
1/*
2 * Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice unmodified, this list of conditions, and the following
10 *    disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: head/sys/kern/kern_alq.c 118094 2003-07-27 17:04:56Z phk $");
29
30#include <sys/param.h>
31#include <sys/systm.h>
32#include <sys/kernel.h>
33#include <sys/kthread.h>
34#include <sys/lock.h>
35#include <sys/mutex.h>
36#include <sys/namei.h>
37#include <sys/proc.h>
38#include <sys/vnode.h>
39#include <sys/alq.h>
40#include <sys/malloc.h>
41#include <sys/unistd.h>
42#include <sys/fcntl.h>
43#include <sys/eventhandler.h>
44
45/* Async. Logging Queue */
46struct alq {
47	int	aq_entmax;		/* Max entries */
48	int	aq_entlen;		/* Entry length */
49	char	*aq_entbuf;		/* Buffer for stored entries */
50	int	aq_flags;		/* Queue flags */
51	struct mtx	aq_mtx;		/* Queue lock */
52	struct vnode	*aq_vp;		/* Open vnode handle */
53	struct ucred	*aq_cred;	/* Credentials of the opening thread */
54	struct ale	*aq_first;	/* First ent */
55	struct ale	*aq_entfree;	/* First free ent */
56	struct ale	*aq_entvalid;	/* First ent valid for writing */
57	LIST_ENTRY(alq)	aq_act;		/* List of active queues */
58	LIST_ENTRY(alq)	aq_link;	/* List of all queues */
59};
60
61#define	AQ_WANTED	0x0001		/* Wakeup sleeper when io is done */
62#define	AQ_ACTIVE	0x0002		/* on the active list */
63#define	AQ_FLUSHING	0x0004		/* doing IO */
64#define	AQ_SHUTDOWN	0x0008		/* Queue no longer valid */
65
66#define	ALQ_LOCK(alq)	mtx_lock_spin(&(alq)->aq_mtx)
67#define	ALQ_UNLOCK(alq)	mtx_unlock_spin(&(alq)->aq_mtx)
68
69static MALLOC_DEFINE(M_ALD, "ALD", "ALD");
70
71/*
72 * The ald_mtx protects the ald_queues list and the ald_active list.
73 */
74static struct mtx ald_mtx;
75static LIST_HEAD(, alq) ald_queues;
76static LIST_HEAD(, alq) ald_active;
77static int ald_shutingdown = 0;
78struct thread *ald_thread;
79static struct proc *ald_proc;
80
81#define	ALD_LOCK()	mtx_lock(&ald_mtx)
82#define	ALD_UNLOCK()	mtx_unlock(&ald_mtx)
83
84/* Daemon functions */
85static int ald_add(struct alq *);
86static int ald_rem(struct alq *);
87static void ald_startup(void *);
88static void ald_daemon(void);
89static void ald_shutdown(void *, int);
90static void ald_activate(struct alq *);
91static void ald_deactivate(struct alq *);
92
93/* Internal queue functions */
94static void alq_shutdown(struct alq *);
95static int alq_doio(struct alq *);
96
97
98/*
99 * Add a new queue to the global list.  Fail if we're shutting down.
100 */
101static int
102ald_add(struct alq *alq)
103{
104	int error;
105
106	error = 0;
107
108	ALD_LOCK();
109	if (ald_shutingdown) {
110		error = EBUSY;
111		goto done;
112	}
113	LIST_INSERT_HEAD(&ald_queues, alq, aq_link);
114done:
115	ALD_UNLOCK();
116	return (error);
117}
118
119/*
120 * Remove a queue from the global list unless we're shutting down.  If so,
121 * the ald will take care of cleaning up it's resources.
122 */
123static int
124ald_rem(struct alq *alq)
125{
126	int error;
127
128	error = 0;
129
130	ALD_LOCK();
131	if (ald_shutingdown) {
132		error = EBUSY;
133		goto done;
134	}
135	LIST_REMOVE(alq, aq_link);
136done:
137	ALD_UNLOCK();
138	return (error);
139}
140
141/*
142 * Put a queue on the active list.  This will schedule it for writing.
143 */
144static void
145ald_activate(struct alq *alq)
146{
147	LIST_INSERT_HEAD(&ald_active, alq, aq_act);
148	wakeup(&ald_active);
149}
150
151static void
152ald_deactivate(struct alq *alq)
153{
154	LIST_REMOVE(alq, aq_act);
155	alq->aq_flags &= ~AQ_ACTIVE;
156}
157
158static void
159ald_startup(void *unused)
160{
161	mtx_init(&ald_mtx, "ALDmtx", NULL, MTX_DEF|MTX_QUIET);
162	LIST_INIT(&ald_queues);
163	LIST_INIT(&ald_active);
164}
165
166static void
167ald_daemon(void)
168{
169	int needwakeup;
170	struct alq *alq;
171
172	mtx_lock(&Giant);
173
174	ald_thread = FIRST_THREAD_IN_PROC(ald_proc);
175
176	EVENTHANDLER_REGISTER(shutdown_pre_sync, ald_shutdown, NULL,
177	    SHUTDOWN_PRI_FIRST);
178
179	ALD_LOCK();
180
181	for (;;) {
182		while ((alq = LIST_FIRST(&ald_active)) == NULL)
183			msleep(&ald_active, &ald_mtx, PWAIT, "aldslp", 0);
184
185		ALQ_LOCK(alq);
186		ald_deactivate(alq);
187		ALD_UNLOCK();
188		needwakeup = alq_doio(alq);
189		ALQ_UNLOCK(alq);
190		if (needwakeup)
191			wakeup(alq);
192		ALD_LOCK();
193	}
194}
195
196static void
197ald_shutdown(void *arg, int howto)
198{
199	struct alq *alq;
200
201	ALD_LOCK();
202	ald_shutingdown = 1;
203
204	while ((alq = LIST_FIRST(&ald_queues)) != NULL) {
205		LIST_REMOVE(alq, aq_link);
206		ALD_UNLOCK();
207		alq_shutdown(alq);
208		ALD_LOCK();
209	}
210	ALD_UNLOCK();
211}
212
213static void
214alq_shutdown(struct alq *alq)
215{
216	ALQ_LOCK(alq);
217
218	/* Stop any new writers. */
219	alq->aq_flags |= AQ_SHUTDOWN;
220
221	/* Drain IO */
222	while (alq->aq_flags & (AQ_FLUSHING|AQ_ACTIVE)) {
223		alq->aq_flags |= AQ_WANTED;
224		ALQ_UNLOCK(alq);
225		tsleep(alq, PWAIT, "aldclose", 0);
226		ALQ_LOCK(alq);
227	}
228	ALQ_UNLOCK(alq);
229
230	vn_close(alq->aq_vp, FWRITE, alq->aq_cred,
231	    curthread);
232	crfree(alq->aq_cred);
233}
234
235/*
236 * Flush all pending data to disk.  This operation will block.
237 */
238static int
239alq_doio(struct alq *alq)
240{
241	struct thread *td;
242	struct mount *mp;
243	struct vnode *vp;
244	struct uio auio;
245	struct iovec aiov[2];
246	struct ale *ale;
247	struct ale *alstart;
248	int totlen;
249	int iov;
250
251	vp = alq->aq_vp;
252	td = curthread;
253	totlen = 0;
254	iov = 0;
255
256	alstart = ale = alq->aq_entvalid;
257	alq->aq_entvalid = NULL;
258
259	bzero(&aiov, sizeof(aiov));
260	bzero(&auio, sizeof(auio));
261
262	do {
263		if (aiov[iov].iov_base == NULL)
264			aiov[iov].iov_base = ale->ae_data;
265		aiov[iov].iov_len += alq->aq_entlen;
266		totlen += alq->aq_entlen;
267		/* Check to see if we're wrapping the buffer */
268		if (ale->ae_data + alq->aq_entlen != ale->ae_next->ae_data)
269			iov++;
270		ale->ae_flags &= ~AE_VALID;
271		ale = ale->ae_next;
272	} while (ale->ae_flags & AE_VALID);
273
274	alq->aq_flags |= AQ_FLUSHING;
275	ALQ_UNLOCK(alq);
276
277	if (iov == 2 || aiov[iov].iov_base == NULL)
278		iov--;
279
280	auio.uio_iov = &aiov[0];
281	auio.uio_offset = 0;
282	auio.uio_segflg = UIO_SYSSPACE;
283	auio.uio_rw = UIO_WRITE;
284	auio.uio_iovcnt = iov + 1;
285	auio.uio_resid = totlen;
286	auio.uio_td = td;
287
288	/*
289	 * Do all of the junk required to write now.
290	 */
291	vn_start_write(vp, &mp, V_WAIT);
292	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
293	VOP_LEASE(vp, td, alq->aq_cred, LEASE_WRITE);
294	/* XXX error ignored */
295	VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, alq->aq_cred);
296	VOP_UNLOCK(vp, 0, td);
297	vn_finished_write(mp);
298
299	ALQ_LOCK(alq);
300	alq->aq_flags &= ~AQ_FLUSHING;
301
302	if (alq->aq_entfree == NULL)
303		alq->aq_entfree = alstart;
304
305	if (alq->aq_flags & AQ_WANTED) {
306		alq->aq_flags &= ~AQ_WANTED;
307		return (1);
308	}
309
310	return(0);
311}
312
313static struct kproc_desc ald_kp = {
314        "ALQ Daemon",
315        ald_daemon,
316        &ald_proc
317};
318
319SYSINIT(aldthread, SI_SUB_KTHREAD_IDLE, SI_ORDER_ANY, kproc_start, &ald_kp)
320SYSINIT(ald, SI_SUB_LOCK, SI_ORDER_ANY, ald_startup, NULL)
321
322
323/* User visible queue functions */
324
325/*
326 * Create the queue data structure, allocate the buffer, and open the file.
327 */
328int
329alq_open(struct alq **alqp, const char *file, struct ucred *cred, int size,
330    int count)
331{
332	struct thread *td;
333	struct nameidata nd;
334	struct ale *ale;
335	struct ale *alp;
336	struct alq *alq;
337	char *bufp;
338	int flags;
339	int error;
340	int i;
341
342	*alqp = NULL;
343	td = curthread;
344
345	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_SYSSPACE, file, td);
346	flags = FWRITE | O_NOFOLLOW | O_CREAT;
347
348	error = vn_open_cred(&nd, &flags, 0, cred, -1);
349	if (error)
350		return (error);
351
352	NDFREE(&nd, NDF_ONLY_PNBUF);
353	/* We just unlock so we hold a reference */
354	VOP_UNLOCK(nd.ni_vp, 0, td);
355
356	alq = malloc(sizeof(*alq), M_ALD, M_WAITOK|M_ZERO);
357	alq->aq_entbuf = malloc(count * size, M_ALD, M_WAITOK|M_ZERO);
358	alq->aq_first = malloc(sizeof(*ale) * count, M_ALD, M_WAITOK|M_ZERO);
359	alq->aq_vp = nd.ni_vp;
360	alq->aq_cred = crhold(cred);
361	alq->aq_entmax = count;
362	alq->aq_entlen = size;
363	alq->aq_entfree = alq->aq_first;
364
365	mtx_init(&alq->aq_mtx, "ALD Queue", NULL, MTX_SPIN|MTX_QUIET);
366
367	bufp = alq->aq_entbuf;
368	ale = alq->aq_first;
369	alp = NULL;
370
371	/* Match up entries with buffers */
372	for (i = 0; i < count; i++) {
373		if (alp)
374			alp->ae_next = ale;
375		ale->ae_data = bufp;
376		alp = ale;
377		ale++;
378		bufp += size;
379	}
380
381	alp->ae_next = alq->aq_first;
382
383	if ((error = ald_add(alq)) != 0)
384		return (error);
385	*alqp = alq;
386
387	return (0);
388}
389
390/*
391 * Copy a new entry into the queue.  If the operation would block either
392 * wait or return an error depending on the value of waitok.
393 */
394int
395alq_write(struct alq *alq, void *data, int waitok)
396{
397	struct ale *ale;
398
399	if ((ale = alq_get(alq, waitok)) == NULL)
400		return (EWOULDBLOCK);
401
402	bcopy(data, ale->ae_data, alq->aq_entlen);
403	alq_post(alq, ale);
404
405	return (0);
406}
407
408struct ale *
409alq_get(struct alq *alq, int waitok)
410{
411	struct ale *ale;
412	struct ale *aln;
413
414	ale = NULL;
415
416	ALQ_LOCK(alq);
417
418	/* Loop until we get an entry or we're shutting down */
419	while ((alq->aq_flags & AQ_SHUTDOWN) == 0 &&
420	    (ale = alq->aq_entfree) == NULL &&
421	    (waitok & ALQ_WAITOK)) {
422		alq->aq_flags |= AQ_WANTED;
423		ALQ_UNLOCK(alq);
424		tsleep(alq, PWAIT, "alqget", 0);
425		ALQ_LOCK(alq);
426	}
427
428	if (ale != NULL) {
429		aln = ale->ae_next;
430		if ((aln->ae_flags & AE_VALID) == 0)
431			alq->aq_entfree = aln;
432		else
433			alq->aq_entfree = NULL;
434	} else
435		ALQ_UNLOCK(alq);
436
437
438	return (ale);
439}
440
441void
442alq_post(struct alq *alq, struct ale *ale)
443{
444	int activate;
445
446	ale->ae_flags |= AE_VALID;
447
448	if (alq->aq_entvalid == NULL)
449		alq->aq_entvalid = ale;
450
451	if ((alq->aq_flags & AQ_ACTIVE) == 0) {
452		alq->aq_flags |= AQ_ACTIVE;
453		activate = 1;
454	} else
455		activate = 0;
456
457	ALQ_UNLOCK(alq);
458	if (activate) {
459		ALD_LOCK();
460		ald_activate(alq);
461		ALD_UNLOCK();
462	}
463}
464
465void
466alq_flush(struct alq *alq)
467{
468	int needwakeup = 0;
469
470	ALD_LOCK();
471	ALQ_LOCK(alq);
472	if (alq->aq_flags & AQ_ACTIVE) {
473		ald_deactivate(alq);
474		ALD_UNLOCK();
475		needwakeup = alq_doio(alq);
476	} else
477		ALD_UNLOCK();
478	ALQ_UNLOCK(alq);
479
480	if (needwakeup)
481		wakeup(alq);
482}
483
484/*
485 * Flush remaining data, close the file and free all resources.
486 */
487void
488alq_close(struct alq *alq)
489{
490	/*
491	 * If we're already shuting down someone else will flush and close
492	 * the vnode.
493	 */
494	if (ald_rem(alq) != 0)
495		return;
496
497	/*
498	 * Drain all pending IO.
499	 */
500	alq_shutdown(alq);
501
502	mtx_destroy(&alq->aq_mtx);
503	free(alq->aq_first, M_ALD);
504	free(alq->aq_entbuf, M_ALD);
505	free(alq, M_ALD);
506}
507