vfs_trans.c revision 1.17
1/*	$NetBSD: vfs_trans.c,v 1.17 2008/02/02 16:51:34 hannken Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39#include <sys/cdefs.h>
40__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.17 2008/02/02 16:51:34 hannken Exp $");
41
42/*
43 * File system transaction operations.
44 */
45
46#include "opt_ddb.h"
47
48#if defined(DDB)
49#define _LWP_API_PRIVATE	/* Need _lwp_getspecific_by_lwp() */
50#endif
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/kmem.h>
55#include <sys/mount.h>
56#include <sys/rwlock.h>
57#include <sys/vnode.h>
58#define _FSTRANS_API_PRIVATE
59#include <sys/fstrans.h>
60#include <sys/proc.h>
61
62#include <miscfs/specfs/specdev.h>
63#include <miscfs/syncfs/syncfs.h>
64
65struct fscow_handler {
66	SLIST_ENTRY(fscow_handler) ch_list;
67	int (*ch_func)(void *, struct buf *, bool);
68	void *ch_arg;
69};
70struct fstrans_lwp_info {
71	struct fstrans_lwp_info *fli_succ;
72	struct mount *fli_mount;
73	int fli_count;
74	enum fstrans_lock_type fli_lock_type;
75};
76struct fstrans_mount_info {
77	enum fstrans_state fmi_state;
78	krwlock_t fmi_shared_lock;
79	krwlock_t fmi_lazy_lock;
80	krwlock_t fmi_cow_lock;
81	SLIST_HEAD(, fscow_handler) fmi_cow_handler;
82};
83
84static specificdata_key_t lwp_data_key;
85static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
86
87POOL_INIT(fstrans_pl, sizeof(struct fstrans_lwp_info), 0, 0, 0,
88    "fstrans", NULL, IPL_NONE);
89
90static void fstrans_lwp_dtor(void *);
91
92/*
93 * Initialize
94 */
95void
96fstrans_init(void)
97{
98	int error;
99
100	error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
101	KASSERT(error == 0);
102
103	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
104}
105
106/*
107 * Deallocate lwp state
108 */
109static void
110fstrans_lwp_dtor(void *arg)
111{
112	struct fstrans_lwp_info *fli, *fli_next;
113
114	for (fli = arg; fli; fli = fli_next) {
115		KASSERT(fli->fli_mount == NULL);
116		KASSERT(fli->fli_count == 0);
117		fli_next = fli->fli_succ;
118		pool_put(&fstrans_pl, fli);
119	}
120}
121
122/*
123 * Allocate mount state
124 */
125int
126fstrans_mount(struct mount *mp)
127{
128	struct fstrans_mount_info *new;
129
130	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
131		return ENOMEM;
132	new->fmi_state = FSTRANS_NORMAL;
133	rw_init(&new->fmi_lazy_lock);
134	rw_init(&new->fmi_shared_lock);
135	SLIST_INIT(&new->fmi_cow_handler);
136	rw_init(&new->fmi_cow_lock);
137
138	mp->mnt_transinfo = new;
139	mp->mnt_iflag |= IMNT_HAS_TRANS;
140
141	return 0;
142}
143
144/*
145 * Deallocate mount state
146 */
147void
148fstrans_unmount(struct mount *mp)
149{
150	struct fstrans_mount_info *fmi;
151
152	if ((fmi = mp->mnt_transinfo) == NULL)
153		return;
154
155	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
156	rw_destroy(&fmi->fmi_lazy_lock);
157	rw_destroy(&fmi->fmi_shared_lock);
158	KASSERT(SLIST_EMPTY(&fmi->fmi_cow_handler));
159	rw_destroy(&fmi->fmi_cow_lock);
160	kmem_free(fmi, sizeof(*fmi));
161	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
162	mp->mnt_transinfo = NULL;
163}
164
165/*
166 * Start a transaction.  If this thread already has a transaction on this
167 * file system increment the reference counter.
168 * A thread with an exclusive transaction lock may get a shared or lazy one.
169 * A thread with a shared or lazy transaction lock cannot upgrade to an
170 * exclusive one yet.
171 */
172int
173_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
174{
175	krwlock_t *lock_p;
176	krw_t lock_op;
177	struct fstrans_lwp_info *fli, *new_fli;
178	struct fstrans_mount_info *fmi;
179
180	ASSERT_SLEEPABLE(NULL, __func__);
181
182	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
183		return 0;
184
185	new_fli = NULL;
186	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
187		if (fli->fli_mount == NULL && new_fli == NULL)
188			new_fli = fli;
189		if (fli->fli_mount == mp) {
190			KASSERT(fli->fli_count > 0);
191			if (fli->fli_lock_type != FSTRANS_EXCL &&
192			    lock_type == FSTRANS_EXCL)
193				panic("fstrans_start: cannot upgrade lock");
194			fli->fli_count += 1;
195			return 0;
196		}
197	}
198
199	if (new_fli == NULL) {
200		new_fli = pool_get(&fstrans_pl, PR_WAITOK);
201		new_fli->fli_mount = NULL;
202		new_fli->fli_count = 0;
203		new_fli->fli_succ = lwp_getspecific(lwp_data_key);
204		lwp_setspecific(lwp_data_key, new_fli);
205	}
206
207	KASSERT(new_fli->fli_mount == NULL);
208	KASSERT(new_fli->fli_count == 0);
209
210	fmi = mp->mnt_transinfo;
211
212	if (lock_type == FSTRANS_LAZY)
213		lock_p = &fmi->fmi_lazy_lock;
214	else
215		lock_p = &fmi->fmi_shared_lock;
216	lock_op = (lock_type == FSTRANS_EXCL ? RW_WRITER : RW_READER);
217
218	if (wait)
219		rw_enter(lock_p, lock_op);
220	else if (rw_tryenter(lock_p, lock_op) == 0)
221		return EBUSY;
222
223	new_fli->fli_mount = mp;
224	new_fli->fli_count = 1;
225	new_fli->fli_lock_type = lock_type;
226
227	return 0;
228}
229
230/*
231 * Finish a transaction.
232 */
233void
234fstrans_done(struct mount *mp)
235{
236	struct fstrans_lwp_info *fli;
237	struct fstrans_mount_info *fmi;
238
239	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
240		return;
241
242	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
243		if (fli->fli_mount == mp) {
244			fli->fli_count -= 1;
245			if (fli->fli_count > 0)
246				return;
247			break;
248		}
249	}
250
251	KASSERT(fli != NULL);
252	KASSERT(fli->fli_mount == mp);
253	KASSERT(fli->fli_count == 0);
254	fli->fli_mount = NULL;
255	fmi = mp->mnt_transinfo;
256	KASSERT(fmi != NULL);
257	if (fli->fli_lock_type == FSTRANS_LAZY)
258		rw_exit(&fmi->fmi_lazy_lock);
259	else
260		rw_exit(&fmi->fmi_shared_lock);
261}
262
263/*
264 * Check if this thread has an exclusive lock.
265 */
266int
267fstrans_is_owner(struct mount *mp)
268{
269	struct fstrans_lwp_info *fli;
270
271	if (mp == NULL)
272		return 0;
273	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
274		return 0;
275
276	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ)
277		if (fli->fli_mount == mp)
278			break;
279
280	if (fli == NULL)
281		return 0;
282
283	KASSERT(fli->fli_mount == mp);
284	KASSERT(fli->fli_count > 0);
285	return (fli->fli_lock_type == FSTRANS_EXCL);
286}
287
288/*
289 * Set new file system state.
290 */
291int
292fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
293{
294	struct fstrans_mount_info *fmi;
295
296	fmi = mp->mnt_transinfo;
297
298	switch (new_state) {
299	case FSTRANS_SUSPENDING:
300		KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
301		fstrans_start(mp, FSTRANS_EXCL);
302		fmi->fmi_state = FSTRANS_SUSPENDING;
303		break;
304
305	case FSTRANS_SUSPENDED:
306		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
307			fmi->fmi_state == FSTRANS_SUSPENDING);
308		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
309			fstrans_is_owner(mp));
310		if (fmi->fmi_state == FSTRANS_NORMAL)
311			fstrans_start(mp, FSTRANS_EXCL);
312		rw_enter(&fmi->fmi_lazy_lock, RW_WRITER);
313		fmi->fmi_state = FSTRANS_SUSPENDED;
314		break;
315
316	case FSTRANS_NORMAL:
317		KASSERT(fmi->fmi_state == FSTRANS_NORMAL ||
318			fstrans_is_owner(mp));
319		if (fmi->fmi_state == FSTRANS_SUSPENDED)
320			rw_exit(&fmi->fmi_lazy_lock);
321		if (fmi->fmi_state == FSTRANS_SUSPENDING ||
322		    fmi->fmi_state == FSTRANS_SUSPENDED) {
323			fmi->fmi_state = FSTRANS_NORMAL;
324			fstrans_done(mp);
325		}
326		break;
327
328	default:
329		panic("%s: illegal state %d", __func__, new_state);
330	}
331
332	return 0;
333}
334
335/*
336 * Get current file system state
337 */
338enum fstrans_state
339fstrans_getstate(struct mount *mp)
340{
341	struct fstrans_mount_info *fmi;
342
343	fmi = mp->mnt_transinfo;
344
345	return fmi->fmi_state;
346}
347
348/*
349 * Request a filesystem to suspend all operations.
350 */
351int
352vfs_suspend(struct mount *mp, int nowait)
353{
354	int error;
355
356	if (nowait) {
357		if (!mutex_tryenter(&vfs_suspend_lock))
358			return EWOULDBLOCK;
359	} else
360		mutex_enter(&vfs_suspend_lock);
361
362	mutex_enter(&syncer_mutex);
363
364	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0) {
365		mutex_exit(&syncer_mutex);
366		mutex_exit(&vfs_suspend_lock);
367	}
368
369	return error;
370}
371
372/*
373 * Request a filesystem to resume all operations.
374 */
375void
376vfs_resume(struct mount *mp)
377{
378
379	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
380	mutex_exit(&syncer_mutex);
381	mutex_exit(&vfs_suspend_lock);
382}
383
384#if defined(DDB)
385void fstrans_dump(int);
386
387static void
388fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
389{
390	char prefix[9];
391	struct fstrans_lwp_info *fli;
392
393	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
394	for (fli = _lwp_getspecific_by_lwp(l, lwp_data_key);
395	     fli;
396	     fli = fli->fli_succ) {
397		if (!verbose && fli->fli_count == 0)
398			continue;
399		printf("%-8s", prefix);
400		if (verbose)
401			printf(" @%p", fli);
402		if (fli->fli_mount != NULL)
403			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
404		else
405			printf(" NULL");
406		switch (fli->fli_lock_type) {
407		case FSTRANS_LAZY:
408			printf(" lazy");
409			break;
410		case FSTRANS_SHARED:
411			printf(" shared");
412			break;
413		case FSTRANS_EXCL:
414			printf(" excl");
415			break;
416		default:
417			printf(" %#x", fli->fli_lock_type);
418			break;
419		}
420		printf(" %d\n", fli->fli_count);
421		prefix[0] = '\0';
422	}
423}
424
425static void
426fstrans_print_mount(struct mount *mp, int verbose)
427{
428	struct fstrans_mount_info *fmi;
429
430	fmi = mp->mnt_transinfo;
431	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
432		return;
433
434	printf("%-16s ", mp->mnt_stat.f_mntonname);
435	if (fmi == NULL) {
436		printf("(null)\n");
437		return;
438	}
439	switch (fmi->fmi_state) {
440	case FSTRANS_NORMAL:
441		printf("state normal\n");
442		break;
443	case FSTRANS_SUSPENDING:
444		printf("state suspending\n");
445		break;
446	case FSTRANS_SUSPENDED:
447		printf("state suspended\n");
448		break;
449	default:
450		printf("state %#x\n", fmi->fmi_state);
451		break;
452	}
453	printf("%16s r=%d w=%d\n", "lock_lazy:",
454	    rw_read_held(&fmi->fmi_lazy_lock),
455	    rw_write_held(&fmi->fmi_lazy_lock));
456	printf("%16s r=%d w=%d\n", "lock_shared:",
457	    rw_read_held(&fmi->fmi_shared_lock),
458	    rw_write_held(&fmi->fmi_shared_lock));
459}
460
461void
462fstrans_dump(int full)
463{
464	const struct proclist_desc *pd;
465	struct proc *p;
466	struct lwp *l;
467	struct mount *mp;
468
469	printf("Fstrans locks by lwp:\n");
470	for (pd = proclists; pd->pd_list != NULL; pd++)
471		LIST_FOREACH(p, pd->pd_list, p_list)
472			LIST_FOREACH(l, &p->p_lwps, l_sibling)
473				fstrans_print_lwp(p, l, full == 1);
474
475	printf("Fstrans state by mount:\n");
476	CIRCLEQ_FOREACH(mp, &mountlist, mnt_list)
477		fstrans_print_mount(mp, full == 1);
478}
479#endif /* defined(DDB) */
480
481int
482fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
483    void *arg)
484{
485	struct fstrans_mount_info *fmi;
486	struct fscow_handler *new;
487
488	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
489		return EINVAL;
490
491	fmi = mp->mnt_transinfo;
492
493	if ((new = kmem_alloc(sizeof(*new), KM_SLEEP)) == NULL)
494		return ENOMEM;
495	new->ch_func = func;
496	new->ch_arg = arg;
497	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
498	SLIST_INSERT_HEAD(&fmi->fmi_cow_handler, new, ch_list);
499	rw_exit(&fmi->fmi_cow_lock);
500
501	return 0;
502}
503
504int
505fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
506    void *arg)
507{
508	struct fstrans_mount_info *fmi;
509	struct fscow_handler *hp = NULL;
510
511	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
512		return EINVAL;
513
514	fmi = mp->mnt_transinfo;
515
516	rw_enter(&fmi->fmi_cow_lock, RW_WRITER);
517	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
518		if (hp->ch_func == func && hp->ch_arg == arg)
519			break;
520	if (hp != NULL) {
521		SLIST_REMOVE(&fmi->fmi_cow_handler, hp, fscow_handler, ch_list);
522		kmem_free(hp, sizeof(*hp));
523	}
524	rw_exit(&fmi->fmi_cow_lock);
525
526	return hp ? 0 : EINVAL;
527}
528
529int
530fscow_run(struct buf *bp, bool data_valid)
531{
532	int error = 0;
533	struct mount *mp;
534	struct fstrans_mount_info *fmi;
535	struct fscow_handler *hp;
536
537	if ((bp->b_flags & B_COWDONE))
538		goto done;
539	if (bp->b_vp == NULL)
540		goto done;
541	if (bp->b_vp->v_type == VBLK)
542		mp = bp->b_vp->v_specmountpoint;
543	else
544		mp = bp->b_vp->v_mount;
545	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
546		goto done;
547
548	fmi = mp->mnt_transinfo;
549
550	rw_enter(&fmi->fmi_cow_lock, RW_READER);
551	SLIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
552		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
553			break;
554	rw_exit(&fmi->fmi_cow_lock);
555
556 done:
557 	if (error == 0)
558 		bp->b_flags |= B_COWDONE;
559
560	return error;
561}
562