vfs_trans.c revision 1.14
1/*	$NetBSD: vfs_trans.c,v 1.14 2007/10/08 09:09:47 hannken Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.14 2007/10/08 09:09:47 hannken Exp $");
41
42/*
43 * File system transaction operations.
44 */
45
46#include "opt_ddb.h"
47
48#if defined(DDB)
49#define _LWP_API_PRIVATE	/* Need _lwp_getspecific_by_lwp() */
50#endif
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/malloc.h>
55#include <sys/kmem.h>
56#include <sys/mount.h>
57#include <sys/rwlock.h>
58#include <sys/vnode.h>
59#define _FSTRANS_API_PRIVATE
60#include <sys/fstrans.h>
61#include <sys/proc.h>
62
63#include <miscfs/specfs/specdev.h>
64#include <miscfs/syncfs/syncfs.h>
65
66struct fstrans_lwp_info {
67	struct fstrans_lwp_info *fli_succ;
68	struct mount *fli_mount;
69	int fli_count;
70	enum fstrans_lock_type fli_lock_type;
71};
72struct fstrans_mount_info {
73	enum fstrans_state fmi_state;
74	krwlock_t fmi_shared_lock;
75	krwlock_t fmi_lazy_lock;
76};
77
78static specificdata_key_t lwp_data_key;
79static specificdata_key_t mount_data_key;
80static specificdata_key_t mount_cow_key;
81static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
82static kmutex_t fstrans_init_lock;
83
84POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0,
85    "fstrans", NULL, IPL_NONE);
86
87static void fstrans_lwp_dtor(void *);
88static void fstrans_mount_dtor(void *);
89static void fscow_mount_dtor(void *);
90static struct fstrans_mount_info *fstrans_mount_init(struct mount *);
91
92/*
93 * Initialize
94 */
95void
96fstrans_init(void)
97{
98	int error;
99
100	error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
101	KASSERT(error == 0);
102	error = mount_specific_key_create(&mount_data_key, fstrans_mount_dtor);
103	KASSERT(error == 0);
104	error = mount_specific_key_create(&mount_cow_key, fscow_mount_dtor);
105	KASSERT(error == 0);
106
107	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
108	mutex_init(&fstrans_init_lock, MUTEX_DEFAULT, IPL_NONE);
109}
110
111/*
112 * Deallocate lwp state
113 */
114static void
115fstrans_lwp_dtor(void *arg)
116{
117	struct fstrans_lwp_info *fli, *fli_next;
118
119	for (fli = arg; fli; fli = fli_next) {
120		KASSERT(fli->fli_mount == NULL);
121		KASSERT(fli->fli_count == 0);
122		fli_next = fli->fli_succ;
123		pool_put(&fstrans_pl, fli);
124	}
125}
126
127/*
128 * Deallocate mount state
129 */
130static void
131fstrans_mount_dtor(void *arg)
132{
133	struct fstrans_mount_info *fmi = arg;
134
135	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
136	rw_destroy(&fmi->fmi_lazy_lock);
137	rw_destroy(&fmi->fmi_shared_lock);
138	free(fmi, M_MOUNT);
139}
140
141/*
142 * Create mount info for this mount
143 */
144static struct fstrans_mount_info *
145fstrans_mount_init(struct mount *mp)
146{
147	struct fstrans_mount_info *new;
148
149	mutex_enter(&fstrans_init_lock);
150
151	if ((new = mount_getspecific(mp, mount_data_key)) != NULL) {
152		mutex_exit(&fstrans_init_lock);
153		return new;
154	}
155
156	new = malloc(sizeof(*new), M_MOUNT, M_WAITOK);
157	new->fmi_state = FSTRANS_NORMAL;
158	rw_init(&new->fmi_lazy_lock);
159	rw_init(&new->fmi_shared_lock);
160
161	mount_setspecific(mp, mount_data_key, new);
162	mutex_exit(&fstrans_init_lock);
163
164	return new;
165}
166
167/*
168 * Start a transaction.  If this thread already has a transaction on this
169 * file system increment the reference counter.
170 * A thread with an exclusive transaction lock may get a shared or lazy one.
171 * A thread with a shared or lazy transaction lock cannot upgrade to an
172 * exclusive one yet.
173 */
174int
175_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
176{
177	krwlock_t *lock_p;
178	krw_t lock_op;
179	struct fstrans_lwp_info *fli, *new_fli;
180	struct fstrans_mount_info *fmi;
181
182	ASSERT_SLEEPABLE(NULL, __func__);
183
184	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
185		return 0;
186
187	new_fli = NULL;
188	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
189		if (fli->fli_mount == NULL && new_fli == NULL)
190			new_fli = fli;
191		if (fli->fli_mount == mp) {
192			KASSERT(fli->fli_count > 0);
193			if (fli->fli_lock_type != FSTRANS_EXCL &&
194			    lock_type == FSTRANS_EXCL)
195				panic("fstrans_start: cannot upgrade lock");
196			fli->fli_count += 1;
197			return 0;
198		}
199	}
200
201	if (new_fli == NULL) {
202		new_fli = pool_get(&fstrans_pl, PR_WAITOK);
203		new_fli->fli_mount = NULL;
204		new_fli->fli_count = 0;
205		new_fli->fli_succ = lwp_getspecific(lwp_data_key);
206		lwp_setspecific(lwp_data_key, new_fli);
207	}
208
209	KASSERT(new_fli->fli_mount == NULL);
210	KASSERT(new_fli->fli_count == 0);
211
212	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
213		fmi = fstrans_mount_init(mp);
214
215	if (lock_type == FSTRANS_LAZY)
216		lock_p = &fmi->fmi_lazy_lock;
217	else
218		lock_p = &fmi->fmi_shared_lock;
219	lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER);
220
221	if (wait)
222		rw_enter(lock_p, lock_op);
223	else if (rw_tryenter(lock_p, lock_op) == 0)
224		return EBUSY;
225
226	new_fli->fli_mount = mp;
227	new_fli->fli_count = 1;
228	new_fli->fli_lock_type = lock_type;
229
230	return 0;
231}
232
233/*
234 * Finish a transaction.
235 */
236void
237fstrans_done(struct mount *mp)
238{
239	struct fstrans_lwp_info *fli;
240	struct fstrans_mount_info *fmi;
241
242	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
243		return;
244
245	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
246		if (fli->fli_mount == mp) {
247			fli->fli_count -= 1;
248			if (fli->fli_count > 0)
249				return;
250			break;
251		}
252	}
253
254	KASSERT(fli != NULL);
255	KASSERT(fli->fli_mount == mp);
256	KASSERT(fli->fli_count == 0);
257	fli->fli_mount = NULL;
258	fmi = mount_getspecific(mp, mount_data_key);
259	KASSERT(fmi != NULL);
260	if (fli->fli_lock_type == FSTRANS_LAZY)
261		rw_exit(&fmi->fmi_lazy_lock);
262	else
263		rw_exit(&fmi->fmi_shared_lock);
264}
265
266/*
267 * Check if this thread has an exclusive lock.
268 */
269int
270fstrans_is_owner(struct mount *mp)
271{
272	struct fstrans_lwp_info *fli;
273
274	if (mp == NULL)
275		return 0;
276	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
277		return 0;
278
279	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ)
280		if (fli->fli_mount == mp)
281			break;
282
283	if (fli == NULL)
284		return 0;
285
286	KASSERT(fli->fli_mount == mp);
287	KASSERT(fli->fli_count > 0);
288	return (fli->fli_lock_type == FSTRANS_EXCL);
289}
290
291/*
292 * Set new file system state.
293 */
294int
295fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
296{
297	struct fstrans_mount_info *fmi;
298
299	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
300		fmi = fstrans_mount_init(mp);
301
302	switch (new_state) {
303	case FSTRANS_SUSPENDING:
304		KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
305		fstrans_start(mp, FSTRANS_EXCL);
306		fmi->fmi_state = FSTRANS_SUSPENDING;
307		break;
308
309	case FSTRANS_SUSPENDED:
310		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
311			fmi->fmi_state == FSTRANS_SUSPENDING);
312		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
313			fstrans_is_owner(mp));
314		if (fmi->fmi_state == FSTRANS_NORMAL)
315			fstrans_start(mp, FSTRANS_EXCL);
316		rw_enter(&fmi->fmi_lazy_lock, RW_WRITER);
317		fmi->fmi_state = FSTRANS_SUSPENDED;
318		break;
319
320	case FSTRANS_NORMAL:
321		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
322			fstrans_is_owner(mp));
323		if (fmi->fmi_state == FSTRANS_SUSPENDED)
324			rw_exit(&fmi->fmi_lazy_lock);
325		if (fmi->fmi_state == FSTRANS_SUSPENDING ||
326		    fmi->fmi_state == FSTRANS_SUSPENDED) {
327			fmi->fmi_state = FSTRANS_NORMAL;
328			fstrans_done(mp);
329		}
330		break;
331
332	default:
333		panic("%s: illegal state %d", __func__, new_state);
334	}
335
336	return 0;
337}
338
339/*
340 * Get current file system state
341 */
342enum fstrans_state
343fstrans_getstate(struct mount *mp)
344{
345	struct fstrans_mount_info *fmi;
346
347	if ((fmi = mount_getspecific(mp, mount_data_key)) == NULL)
348		return FSTRANS_NORMAL;
349
350	return fmi->fmi_state;
351}
352
353/*
354 * Request a filesystem to suspend all operations.
355 */
356int
357vfs_suspend(struct mount *mp, int nowait)
358{
359	int error;
360
361	if (nowait) {
362		if (!mutex_tryenter(&vfs_suspend_lock))
363			return EWOULDBLOCK;
364	} else
365		mutex_enter(&vfs_suspend_lock);
366
367	mutex_enter(&syncer_mutex);
368
369	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) {
370		mutex_exit(&syncer_mutex);
371		mutex_exit(&vfs_suspend_lock);
372	}
373
374	return error;
375}
376
377/*
378 * Request a filesystem to resume all operations.
379 */
380void
381vfs_resume(struct mount *mp)
382{
383
384	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
385	mutex_exit(&syncer_mutex);
386	mutex_exit(&vfs_suspend_lock);
387}
388
389#if defined(DDB)
390void fstrans_dump(int);
391
392static void
393fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
394{
395	char prefix[9];
396	struct fstrans_lwp_info *fli;
397
398	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
399	for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key);
400	     fli;
401	     fli = fli->fli_succ) {
402		if (!verbose && fli->fli_count == 0)
403			continue;
404		printf("%-8s", prefix);
405		if (verbose)
406			printf(" @%p", fli);
407		if (fli->fli_mount != NULL)
408			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
409		else
410			printf(" NULL");
411		switch (fli->fli_lock_type) {
412		case FSTRANS_LAZY:
413			printf(" lazy");
414			break;
415		case FSTRANS_SHARED:
416			printf(" shared");
417			break;
418		case FSTRANS_EXCL:
419			printf(" excl");
420			break;
421		default:
422			printf(" %#x", fli->fli_lock_type);
423			break;
424		}
425		printf(" %d\n", fli->fli_count);
426		prefix[0] = '\0';
427	}
428}
429
430static void
431fstrans_print_mount(struct mount *mp, int verbose)
432{
433	struct fstrans_mount_info *fmi;
434
435	fmi = mount_getspecific(mp, mount_data_key);
436	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
437		return;
438
439	printf("%-16s ", mp->mnt_stat.f_mntonname);
440	if (fmi == NULL) {
441		printf("(null)\n");
442		return;
443	}
444	switch (fmi->fmi_state) {
445	case FSTRANS_NORMAL:
446		printf("state normal\n");
447		break;
448	case FSTRANS_SUSPENDING:
449		printf("state suspending\n");
450		break;
451	case FSTRANS_SUSPENDED:
452		printf("state suspended\n");
453		break;
454	default:
455		printf("state %#x\n", fmi->fmi_state);
456		break;
457	}
458	printf("%16s r=%d w=%d\n", "lock_lazy:",
459	    rw_read_held(&fmi->fmi_lazy_lock),
460	    rw_write_held(&fmi->fmi_lazy_lock));
461	printf("%16s r=%d w=%d\n", "lock_shared:",
462	    rw_read_held(&fmi->fmi_shared_lock),
463	    rw_write_held(&fmi->fmi_shared_lock));
464}
465
466void
467fstrans_dump(int full)
468{
469	const struct proclist_desc *pd;
470	struct proc *p;
471	struct lwp *l;
472	struct mount *mp;
473
474	printf("Fstrans locks by lwp:\n");
475	for (pd = proclists; pd->pd_list != NULL; pd++)
476		LIST_FOREACH(p, pd->pd_list, p_list)
477			LIST_FOREACH(l, &p->p_lwps, l_sibling)
478				fstrans_print_lwp(p, l, full == 1);
479
480	printf("Fstrans state by mount:\n");
481	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
482		fstrans_print_mount(mp, full == 1);
483}
484#endif /* defined(DDB) */
485
486
487struct fscow_handler {
488	SLIST_ENTRY(fscow_handler) ch_list;
489	int (*ch_func)(void *, struct buf *);
490	void *ch_arg;
491};
492
493struct fscow_mount_info {
494	krwlock_t cmi_lock;
495	SLIST_HEAD(, fscow_handler) cmi_handler;
496};
497
498/*
499 * Deallocate mount state
500 */
501static void
502fscow_mount_dtor(void *arg)
503{
504	struct fscow_mount_info *cmi = arg;
505
506	KASSERT(SLIST_EMPTY(&cmi->cmi_handler));
507	rw_destroy(&cmi->cmi_lock);
508	kmem_free(cmi, sizeof(*cmi));
509}
510
511/*
512 * Create mount info for this mount
513 */
514static struct fscow_mount_info *
515fscow_mount_init(struct mount *mp)
516{
517	struct fscow_mount_info *new;
518
519	mutex_enter(&fstrans_init_lock);
520
521	if ((new = mount_getspecific(mp, mount_cow_key)) != NULL) {
522		mutex_exit(&fstrans_init_lock);
523		return new;
524	}
525
526	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) != NULL) {
527		SLIST_INIT(&new->cmi_handler);
528		rw_init(&new->cmi_lock);
529		mount_setspecific(mp, mount_cow_key, new);
530	}
531
532	mutex_exit(&fstrans_init_lock);
533
534	return new;
535}
536
537int
538fscow_establish(struct mount *mp, int (*func)(void *, struct buf *), void *arg)
539{
540	struct fscow_mount_info *cmi;
541	struct fscow_handler *new;
542
543	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
544		cmi = fscow_mount_init(mp);
545	if (cmi == NULL)
546		return ENOMEM;
547
548	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
549		return ENOMEM;
550	new->ch_func = func;
551	new->ch_arg = arg;
552	rw_enter(&cmi->cmi_lock, RW_WRITER);
553	SLIST_INSERT_HEAD(&cmi->cmi_handler, new, ch_list);
554	rw_exit(&cmi->cmi_lock);
555
556	return 0;
557}
558
559int
560fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *),
561    void *arg)
562{
563	struct fscow_mount_info *cmi;
564	struct fscow_handler *hp = NULL;
565
566	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
567		return EINVAL;
568
569	rw_enter(&cmi->cmi_lock, RW_WRITER);
570	SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
571		if (hp->ch_func == func && hp->ch_arg == arg)
572			break;
573	if (hp != NULL) {
574		SLIST_REMOVE(&cmi->cmi_handler, hp, fscow_handler, ch_list);
575		kmem_free(hp, sizeof(*hp));
576	}
577	rw_exit(&cmi->cmi_lock);
578
579	return hp ? 0 : EINVAL;
580}
581
582int
583fscow_run(struct buf *bp)
584{
585	int error = 0;
586	struct mount *mp;
587	struct fscow_mount_info *cmi;
588	struct fscow_handler *hp;
589
590	if (bp->b_vp == NULL)
591		return 0;
592	if (bp->b_vp->v_type == VBLK)
593		mp = bp->b_vp->v_specmountpoint;
594	else
595		mp = bp->b_vp->v_mount;
596	if (mp == NULL)
597		return 0;
598
599	if ((cmi = mount_getspecific(mp, mount_cow_key)) == NULL)
600		return 0;
601
602	rw_enter(&cmi->cmi_lock, RW_READER);
603	SLIST_FOREACH(hp, &cmi->cmi_handler, ch_list)
604		if ((error = (*hp->ch_func)(hp->ch_arg, bp)) != 0)
605			break;
606	rw_exit(&cmi->cmi_lock);
607
608	return error;
609}
610