vfs_trans.c revision 1.49
1/*	$NetBSD: vfs_trans.c,v 1.49 2018/09/27 01:03:40 manu Exp $	*/
2
3/*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Juergen Hannken-Illjes.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: vfs_trans.c,v 1.49 2018/09/27 01:03:40 manu Exp $");
34
35/*
36 * File system transaction operations.
37 */
38
39#ifdef _KERNEL_OPT
40#include "opt_ddb.h"
41#endif
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/kernel.h>
46#include <sys/atomic.h>
47#include <sys/buf.h>
48#include <sys/kmem.h>
49#include <sys/mount.h>
50#include <sys/pserialize.h>
51#include <sys/vnode.h>
52#include <sys/fstrans.h>
53#include <sys/proc.h>
54
55#include <miscfs/specfs/specdev.h>
56
57enum fstrans_lock_type {
58	FSTRANS_SHARED,			/* Granted while not suspending */
59	FSTRANS_EXCL			/* Internal: exclusive lock */
60};
61
62struct fscow_handler {
63	LIST_ENTRY(fscow_handler) ch_list;
64	int (*ch_func)(void *, struct buf *, bool);
65	void *ch_arg;
66};
67struct fstrans_lwp_info {
68	struct fstrans_lwp_info *fli_succ;
69	struct lwp *fli_self;
70	struct mount *fli_mount;
71	int fli_trans_cnt;
72	int fli_cow_cnt;
73	enum fstrans_lock_type fli_lock_type;
74	LIST_ENTRY(fstrans_lwp_info) fli_list;
75};
76struct fstrans_mount_info {
77	enum fstrans_state fmi_state;
78	unsigned int fmi_ref_cnt;
79	bool fmi_cow_change;
80	LIST_HEAD(, fscow_handler) fmi_cow_handler;
81};
82
83static specificdata_key_t lwp_data_key;	/* Our specific data key. */
84static kmutex_t vfs_suspend_lock;	/* Serialize suspensions. */
85static kmutex_t fstrans_lock;		/* Fstrans big lock. */
86static kmutex_t fstrans_mount_lock;	/* Fstrans mount big lock. */
87static kcondvar_t fstrans_state_cv;	/* Fstrans or cow state changed. */
88static kcondvar_t fstrans_count_cv;	/* Fstrans or cow count changed. */
89static pserialize_t fstrans_psz;	/* Pserialize state. */
90static LIST_HEAD(fstrans_lwp_head, fstrans_lwp_info) fstrans_fli_head;
91					/* List of all fstrans_lwp_info. */
92
93static inline struct mount *fstrans_normalize_mount(struct mount *);
94static void fstrans_lwp_dtor(void *);
95static void fstrans_mount_dtor(struct mount *);
96static void fstrans_clear_lwp_info(void);
97static inline struct fstrans_lwp_info *
98    fstrans_get_lwp_info(struct mount *, bool);
99static struct fstrans_lwp_info *fstrans_alloc_lwp_info(struct mount *);
100static inline int _fstrans_start(struct mount *, enum fstrans_lock_type, int);
101static bool grant_lock(const enum fstrans_state, const enum fstrans_lock_type);
102static bool state_change_done(const struct mount *);
103static bool cow_state_change_done(const struct mount *);
104static void cow_change_enter(const struct mount *);
105static void cow_change_done(const struct mount *);
106
107/*
108 * Initialize.
109 */
110void
111fstrans_init(void)
112{
113	int error __diagused;
114
115	error = lwp_specific_key_create(&lwp_data_key, fstrans_lwp_dtor);
116	KASSERT(error == 0);
117
118	mutex_init(&vfs_suspend_lock, MUTEX_DEFAULT, IPL_NONE);
119	mutex_init(&fstrans_lock, MUTEX_DEFAULT, IPL_NONE);
120	mutex_init(&fstrans_mount_lock, MUTEX_DEFAULT, IPL_NONE);
121	cv_init(&fstrans_state_cv, "fstchg");
122	cv_init(&fstrans_count_cv, "fstcnt");
123	fstrans_psz = pserialize_create();
124	LIST_INIT(&fstrans_fli_head);
125}
126
127/*
128 * Normalize mount.
129 * Return mount if file system supports fstrans, NULL otherwise.
130 */
131static inline struct mount *
132fstrans_normalize_mount(struct mount *mp)
133{
134
135	while (mp && mp->mnt_lower)
136		mp = mp->mnt_lower;
137	if (mp == NULL)
138		return NULL;
139	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
140		return NULL;
141	return mp;
142}
143
144/*
145 * Deallocate lwp state.
146 */
147static void
148fstrans_lwp_dtor(void *arg)
149{
150	struct fstrans_lwp_info *fli, *fli_next;
151
152	for (fli = arg; fli; fli = fli_next) {
153		KASSERT(fli->fli_trans_cnt == 0);
154		KASSERT(fli->fli_cow_cnt == 0);
155		if (fli->fli_mount != NULL)
156			fstrans_mount_dtor(fli->fli_mount);
157		fli_next = fli->fli_succ;
158		fli->fli_mount = NULL;
159		membar_sync();
160		fli->fli_self = NULL;
161	}
162}
163
164/*
165 * Dereference mount state.
166 */
167static void
168fstrans_mount_dtor(struct mount *mp)
169{
170	struct fstrans_mount_info *fmi;
171
172	mutex_enter(&fstrans_mount_lock);
173
174	fmi = mp->mnt_transinfo;
175	KASSERT(fmi != NULL);
176	fmi->fmi_ref_cnt -= 1;
177	if (fmi->fmi_ref_cnt > 0) {
178		mutex_exit(&fstrans_mount_lock);
179		return;
180	}
181
182	KASSERT(fmi->fmi_state == FSTRANS_NORMAL);
183	KASSERT(LIST_FIRST(&fmi->fmi_cow_handler) == NULL);
184
185	mp->mnt_iflag &= ~IMNT_HAS_TRANS;
186	mp->mnt_transinfo = NULL;
187
188	mutex_exit(&fstrans_mount_lock);
189
190	kmem_free(fmi, sizeof(*fmi));
191	vfs_rele(mp);
192}
193
194/*
195 * Allocate mount state.
196 */
197int
198fstrans_mount(struct mount *mp)
199{
200	struct fstrans_mount_info *newfmi;
201
202	newfmi = kmem_alloc(sizeof(*newfmi), KM_SLEEP);
203	newfmi->fmi_state = FSTRANS_NORMAL;
204	newfmi->fmi_ref_cnt = 1;
205	LIST_INIT(&newfmi->fmi_cow_handler);
206	newfmi->fmi_cow_change = false;
207
208	mutex_enter(&fstrans_mount_lock);
209	mp->mnt_transinfo = newfmi;
210	mp->mnt_iflag |= IMNT_HAS_TRANS;
211	mutex_exit(&fstrans_mount_lock);
212
213	vfs_ref(mp);
214
215	return 0;
216}
217
218/*
219 * Deallocate mount state.
220 */
221void
222fstrans_unmount(struct mount *mp)
223{
224
225	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
226		return;
227
228	KASSERT(mp->mnt_transinfo != NULL);
229
230	fstrans_mount_dtor(mp);
231}
232
233/*
234 * Clear mount entries whose mount is gone.
235 */
236static void
237fstrans_clear_lwp_info(void)
238{
239	struct fstrans_lwp_info *fli;
240
241	/*
242	 * Scan our list clearing entries whose mount is gone.
243	 */
244	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
245		if (fli->fli_mount != NULL &&
246		    (fli->fli_mount->mnt_iflag & IMNT_GONE) != 0 &&
247		    fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
248			fstrans_mount_dtor(fli->fli_mount);
249			fli->fli_mount = NULL;
250		}
251	}
252}
253
254/*
255 * Allocate and return per lwp info for this mount.
256 */
257static struct fstrans_lwp_info *
258fstrans_alloc_lwp_info(struct mount *mp)
259{
260	struct fstrans_lwp_info *fli;
261	struct fstrans_mount_info *fmi;
262
263	/*
264	 * Try to reuse a cleared entry or allocate a new one.
265	 */
266	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
267		KASSERT(fli->fli_mount != mp);
268		if (fli->fli_mount == NULL) {
269			KASSERT(fli->fli_trans_cnt == 0);
270			KASSERT(fli->fli_cow_cnt == 0);
271			break;
272		}
273	}
274	if (fli == NULL) {
275		mutex_enter(&fstrans_lock);
276		LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
277			if (fli->fli_self == NULL) {
278				KASSERT(fli->fli_mount == NULL);
279				KASSERT(fli->fli_trans_cnt == 0);
280				KASSERT(fli->fli_cow_cnt == 0);
281				fli->fli_self = curlwp;
282				fli->fli_succ = lwp_getspecific(lwp_data_key);
283				lwp_setspecific(lwp_data_key, fli);
284				break;
285			}
286		}
287		mutex_exit(&fstrans_lock);
288	}
289	if (fli == NULL) {
290		fli = kmem_alloc(sizeof(*fli), KM_SLEEP);
291		mutex_enter(&fstrans_lock);
292		memset(fli, 0, sizeof(*fli));
293		fli->fli_self = curlwp;
294		LIST_INSERT_HEAD(&fstrans_fli_head, fli, fli_list);
295		mutex_exit(&fstrans_lock);
296		fli->fli_succ = lwp_getspecific(lwp_data_key);
297		lwp_setspecific(lwp_data_key, fli);
298	}
299
300	/*
301	 * Attach the entry to the mount if its mnt_transinfo is valid.
302	 */
303	mutex_enter(&fstrans_mount_lock);
304	fmi = mp->mnt_transinfo;
305	if (__predict_true(fmi != NULL)) {
306		fli->fli_mount = mp;
307		fmi->fmi_ref_cnt += 1;
308	} else {
309		fli = NULL;
310	}
311	mutex_exit(&fstrans_mount_lock);
312
313	return fli;
314}
315
316/*
317 * Retrieve the per lwp info for this mount allocating if necessary.
318 */
319static inline struct fstrans_lwp_info *
320fstrans_get_lwp_info(struct mount *mp, bool do_alloc)
321{
322	struct fstrans_lwp_info *fli;
323
324	/*
325	 * Scan our list for a match.
326	 */
327	for (fli = lwp_getspecific(lwp_data_key); fli; fli = fli->fli_succ) {
328		if (fli->fli_mount == mp)
329			return fli;
330	}
331
332	return (do_alloc ? fstrans_alloc_lwp_info(mp) : NULL);
333}
334
335/*
336 * Check if this lock type is granted at this state.
337 */
338static bool
339grant_lock(const enum fstrans_state state, const enum fstrans_lock_type type)
340{
341
342	if (__predict_true(state == FSTRANS_NORMAL))
343		return true;
344	if (type == FSTRANS_EXCL)
345		return true;
346
347	return false;
348}
349
350/*
351 * Start a transaction.  If this thread already has a transaction on this
352 * file system increment the reference counter.
353 */
354static inline int
355_fstrans_start(struct mount *mp, enum fstrans_lock_type lock_type, int wait)
356{
357	int s;
358	struct mount *lmp;
359	struct fstrans_lwp_info *fli;
360	struct fstrans_mount_info *fmi;
361
362	if ((lmp = fstrans_normalize_mount(mp)) == NULL)
363		return 0;
364
365	ASSERT_SLEEPABLE();
366
367	/*
368	 * Allocate per lwp info for layered file systems to
369	 * get a reference to the mount.  No need to increment
370	 * the reference counter here.
371	 */
372	for (lmp = mp; lmp->mnt_lower; lmp = lmp->mnt_lower) {
373		fli = fstrans_get_lwp_info(lmp, true);
374	}
375
376	if ((fli = fstrans_get_lwp_info(lmp, true)) == NULL)
377		return 0;
378
379	if (fli->fli_trans_cnt > 0) {
380		KASSERT(lock_type != FSTRANS_EXCL);
381		fli->fli_trans_cnt += 1;
382
383		return 0;
384	}
385
386	s = pserialize_read_enter();
387	fmi = lmp->mnt_transinfo;
388	if (__predict_true(grant_lock(fmi->fmi_state, lock_type))) {
389		fli->fli_trans_cnt = 1;
390		fli->fli_lock_type = lock_type;
391		pserialize_read_exit(s);
392
393		return 0;
394	}
395	pserialize_read_exit(s);
396
397	if (! wait)
398		return EBUSY;
399
400	mutex_enter(&fstrans_lock);
401	while (! grant_lock(fmi->fmi_state, lock_type))
402		cv_wait(&fstrans_state_cv, &fstrans_lock);
403	fli->fli_trans_cnt = 1;
404	fli->fli_lock_type = lock_type;
405	mutex_exit(&fstrans_lock);
406
407	return 0;
408}
409
410void
411fstrans_start(struct mount *mp)
412{
413	int error __diagused;
414
415	error = _fstrans_start(mp, FSTRANS_SHARED, 1);
416	KASSERT(error == 0);
417}
418
419int
420fstrans_start_nowait(struct mount *mp)
421{
422
423	return _fstrans_start(mp, FSTRANS_SHARED, 0);
424}
425
426/*
427 * Finish a transaction.
428 */
429void
430fstrans_done(struct mount *mp)
431{
432	int s;
433	struct fstrans_lwp_info *fli;
434	struct fstrans_mount_info *fmi;
435
436	if ((mp = fstrans_normalize_mount(mp)) == NULL)
437		return;
438	if ((fli = fstrans_get_lwp_info(mp, false)) == NULL)
439		return;
440	KASSERT(fli->fli_trans_cnt > 0);
441
442	if (fli->fli_trans_cnt > 1) {
443		fli->fli_trans_cnt -= 1;
444
445		return;
446	}
447
448	fstrans_clear_lwp_info();
449
450	s = pserialize_read_enter();
451	fmi = mp->mnt_transinfo;
452	if (__predict_true(fmi->fmi_state == FSTRANS_NORMAL)) {
453		fli->fli_trans_cnt = 0;
454		pserialize_read_exit(s);
455
456		return;
457	}
458	pserialize_read_exit(s);
459
460	mutex_enter(&fstrans_lock);
461	fli->fli_trans_cnt = 0;
462	cv_signal(&fstrans_count_cv);
463	mutex_exit(&fstrans_lock);
464}
465
466/*
467 * Check if this thread has an exclusive lock.
468 */
469int
470fstrans_is_owner(struct mount *mp)
471{
472	struct fstrans_lwp_info *fli;
473
474	if ((mp = fstrans_normalize_mount(mp)) == NULL)
475		return 0;
476	if ((fli = fstrans_get_lwp_info(mp, false)) == NULL)
477		return 0;
478
479	if (fli->fli_trans_cnt == 0)
480		return 0;
481
482	KASSERT(fli->fli_mount == mp);
483	KASSERT(fli->fli_trans_cnt > 0);
484
485	return (fli->fli_lock_type == FSTRANS_EXCL);
486}
487
488/*
489 * True, if no thread is in a transaction not granted at the current state.
490 */
491static bool
492state_change_done(const struct mount *mp)
493{
494	struct fstrans_lwp_info *fli;
495	struct fstrans_mount_info *fmi;
496
497	KASSERT(mutex_owned(&fstrans_lock));
498
499	fmi = mp->mnt_transinfo;
500	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
501		if (fli->fli_mount != mp)
502			continue;
503		if (fli->fli_trans_cnt == 0)
504			continue;
505		if (grant_lock(fmi->fmi_state, fli->fli_lock_type))
506			continue;
507
508		return false;
509	}
510
511	return true;
512}
513
514/*
515 * Set new file system state.
516 */
517int
518fstrans_setstate(struct mount *mp, enum fstrans_state new_state)
519{
520	int error;
521	enum fstrans_state old_state;
522	struct fstrans_mount_info *fmi;
523
524	fmi = mp->mnt_transinfo;
525	old_state = fmi->fmi_state;
526	if (old_state == new_state)
527		return 0;
528
529	mutex_enter(&fstrans_lock);
530	fmi->fmi_state = new_state;
531	pserialize_perform(fstrans_psz);
532
533	/*
534	 * All threads see the new state now.
535	 * Wait for transactions invalid at this state to leave.
536	 * We cannot wait forever because many processes would
537	 * get stuck waiting for fstcnt in fstrans_start(). This
538	 * is acute when suspending the root filesystem.
539	 */
540	error = 0;
541	while (! state_change_done(mp)) {
542		error = cv_timedwait_sig(&fstrans_count_cv,
543					  &fstrans_lock, hz / 4);
544		if (error) {
545			new_state = fmi->fmi_state = FSTRANS_NORMAL;
546			break;
547		}
548	}
549	cv_broadcast(&fstrans_state_cv);
550	mutex_exit(&fstrans_lock);
551
552	if (old_state != new_state) {
553		if (old_state == FSTRANS_NORMAL)
554			_fstrans_start(mp, FSTRANS_EXCL, 1);
555		if (new_state == FSTRANS_NORMAL)
556			fstrans_done(mp);
557	}
558
559	return error;
560}
561
562/*
563 * Get current file system state.
564 */
565enum fstrans_state
566fstrans_getstate(struct mount *mp)
567{
568	struct fstrans_mount_info *fmi;
569
570	fmi = mp->mnt_transinfo;
571	KASSERT(fmi != NULL);
572
573	return fmi->fmi_state;
574}
575
576/*
577 * Request a filesystem to suspend all operations.
578 */
579int
580vfs_suspend(struct mount *mp, int nowait)
581{
582	int error;
583
584	if ((mp = fstrans_normalize_mount(mp)) == NULL)
585		return EOPNOTSUPP;
586	if (nowait) {
587		if (!mutex_tryenter(&vfs_suspend_lock))
588			return EWOULDBLOCK;
589	} else
590		mutex_enter(&vfs_suspend_lock);
591
592	if ((error = VFS_SUSPENDCTL(mp, SUSPEND_SUSPEND)) != 0)
593		mutex_exit(&vfs_suspend_lock);
594
595	return error;
596}
597
598/*
599 * Request a filesystem to resume all operations.
600 */
601void
602vfs_resume(struct mount *mp)
603{
604
605	mp = fstrans_normalize_mount(mp);
606	KASSERT(mp != NULL);
607
608	VFS_SUSPENDCTL(mp, SUSPEND_RESUME);
609	mutex_exit(&vfs_suspend_lock);
610}
611
612
613/*
614 * True, if no thread is running a cow handler.
615 */
616static bool
617cow_state_change_done(const struct mount *mp)
618{
619	struct fstrans_lwp_info *fli;
620	struct fstrans_mount_info *fmi __diagused;
621
622	fmi = mp->mnt_transinfo;
623
624	KASSERT(mutex_owned(&fstrans_lock));
625	KASSERT(fmi->fmi_cow_change);
626
627	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
628		if (fli->fli_mount != mp)
629			continue;
630		if (fli->fli_cow_cnt == 0)
631			continue;
632
633		return false;
634	}
635
636	return true;
637}
638
639/*
640 * Prepare for changing this mounts cow list.
641 * Returns with fstrans_lock locked.
642 */
643static void
644cow_change_enter(const struct mount *mp)
645{
646	struct fstrans_mount_info *fmi;
647
648	fmi = mp->mnt_transinfo;
649
650	mutex_enter(&fstrans_lock);
651
652	/*
653	 * Wait for other threads changing the list.
654	 */
655	while (fmi->fmi_cow_change)
656		cv_wait(&fstrans_state_cv, &fstrans_lock);
657
658	/*
659	 * Wait until all threads are aware of a state change.
660	 */
661	fmi->fmi_cow_change = true;
662	pserialize_perform(fstrans_psz);
663
664	while (! cow_state_change_done(mp))
665		cv_wait(&fstrans_count_cv, &fstrans_lock);
666}
667
668/*
669 * Done changing this mounts cow list.
670 */
671static void
672cow_change_done(const struct mount *mp)
673{
674	struct fstrans_mount_info *fmi;
675
676	KASSERT(mutex_owned(&fstrans_lock));
677
678	fmi = mp->mnt_transinfo;
679
680	fmi->fmi_cow_change = false;
681	pserialize_perform(fstrans_psz);
682
683	cv_broadcast(&fstrans_state_cv);
684
685	mutex_exit(&fstrans_lock);
686}
687
688/*
689 * Add a handler to this mount.
690 */
691int
692fscow_establish(struct mount *mp, int (*func)(void *, struct buf *, bool),
693    void *arg)
694{
695	struct fstrans_mount_info *fmi;
696	struct fscow_handler *newch;
697
698	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
699		return EINVAL;
700
701	fmi = mp->mnt_transinfo;
702	KASSERT(fmi != NULL);
703
704	newch = kmem_alloc(sizeof(*newch), KM_SLEEP);
705	newch->ch_func = func;
706	newch->ch_arg = arg;
707
708	cow_change_enter(mp);
709	LIST_INSERT_HEAD(&fmi->fmi_cow_handler, newch, ch_list);
710	cow_change_done(mp);
711
712	return 0;
713}
714
715/*
716 * Remove a handler from this mount.
717 */
718int
719fscow_disestablish(struct mount *mp, int (*func)(void *, struct buf *, bool),
720    void *arg)
721{
722	struct fstrans_mount_info *fmi;
723	struct fscow_handler *hp = NULL;
724
725	if ((mp->mnt_iflag & IMNT_HAS_TRANS) == 0)
726		return EINVAL;
727
728	fmi = mp->mnt_transinfo;
729	KASSERT(fmi != NULL);
730
731	cow_change_enter(mp);
732	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
733		if (hp->ch_func == func && hp->ch_arg == arg)
734			break;
735	if (hp != NULL) {
736		LIST_REMOVE(hp, ch_list);
737		kmem_free(hp, sizeof(*hp));
738	}
739	cow_change_done(mp);
740
741	return hp ? 0 : EINVAL;
742}
743
744/*
745 * Check for need to copy block that is about to be written.
746 */
747int
748fscow_run(struct buf *bp, bool data_valid)
749{
750	int error, s;
751	struct mount *mp;
752	struct fstrans_lwp_info *fli;
753	struct fstrans_mount_info *fmi;
754	struct fscow_handler *hp;
755
756	/*
757	 * First check if we need run the copy-on-write handler.
758	 */
759	if ((bp->b_flags & B_COWDONE))
760		return 0;
761	if (bp->b_vp == NULL) {
762		bp->b_flags |= B_COWDONE;
763		return 0;
764	}
765	if (bp->b_vp->v_type == VBLK)
766		mp = spec_node_getmountedfs(bp->b_vp);
767	else
768		mp = bp->b_vp->v_mount;
769	if (mp == NULL || (mp->mnt_iflag & IMNT_HAS_TRANS) == 0) {
770		bp->b_flags |= B_COWDONE;
771		return 0;
772	}
773
774	fli = fstrans_get_lwp_info(mp, true);
775	fmi = mp->mnt_transinfo;
776
777	/*
778	 * On non-recursed run check if other threads
779	 * want to change the list.
780	 */
781	if (fli->fli_cow_cnt == 0) {
782		s = pserialize_read_enter();
783		if (__predict_false(fmi->fmi_cow_change)) {
784			pserialize_read_exit(s);
785			mutex_enter(&fstrans_lock);
786			while (fmi->fmi_cow_change)
787				cv_wait(&fstrans_state_cv, &fstrans_lock);
788			fli->fli_cow_cnt = 1;
789			mutex_exit(&fstrans_lock);
790		} else {
791			fli->fli_cow_cnt = 1;
792			pserialize_read_exit(s);
793		}
794	} else
795		fli->fli_cow_cnt += 1;
796
797	/*
798	 * Run all copy-on-write handlers, stop on error.
799	 */
800	error = 0;
801	LIST_FOREACH(hp, &fmi->fmi_cow_handler, ch_list)
802		if ((error = (*hp->ch_func)(hp->ch_arg, bp, data_valid)) != 0)
803			break;
804 	if (error == 0)
805 		bp->b_flags |= B_COWDONE;
806
807	/*
808	 * Check if other threads want to change the list.
809	 */
810	if (fli->fli_cow_cnt > 1) {
811		fli->fli_cow_cnt -= 1;
812	} else {
813		s = pserialize_read_enter();
814		if (__predict_false(fmi->fmi_cow_change)) {
815			pserialize_read_exit(s);
816			mutex_enter(&fstrans_lock);
817			fli->fli_cow_cnt = 0;
818			cv_signal(&fstrans_count_cv);
819			mutex_exit(&fstrans_lock);
820		} else {
821			fli->fli_cow_cnt = 0;
822			pserialize_read_exit(s);
823		}
824	}
825
826	return error;
827}
828
829#if defined(DDB)
830void fstrans_dump(int);
831
832static void
833fstrans_print_lwp(struct proc *p, struct lwp *l, int verbose)
834{
835	char prefix[9];
836	struct fstrans_lwp_info *fli;
837
838	snprintf(prefix, sizeof(prefix), "%d.%d", p->p_pid, l->l_lid);
839	LIST_FOREACH(fli, &fstrans_fli_head, fli_list) {
840		if (fli->fli_self != l)
841			continue;
842		if (fli->fli_trans_cnt == 0 && fli->fli_cow_cnt == 0) {
843			if (! verbose)
844				continue;
845		}
846		printf("%-8s", prefix);
847		if (verbose)
848			printf(" @%p", fli);
849		if (fli->fli_mount != NULL)
850			printf(" (%s)", fli->fli_mount->mnt_stat.f_mntonname);
851		else
852			printf(" NULL");
853		if (fli->fli_trans_cnt == 0) {
854			printf(" -");
855		} else {
856			switch (fli->fli_lock_type) {
857			case FSTRANS_SHARED:
858				printf(" shared");
859				break;
860			case FSTRANS_EXCL:
861				printf(" excl");
862				break;
863			default:
864				printf(" %#x", fli->fli_lock_type);
865				break;
866			}
867		}
868		printf(" %d cow %d\n", fli->fli_trans_cnt, fli->fli_cow_cnt);
869		prefix[0] = '\0';
870	}
871}
872
873static void
874fstrans_print_mount(struct mount *mp, int verbose)
875{
876	struct fstrans_mount_info *fmi;
877
878	fmi = mp->mnt_transinfo;
879	if (!verbose && (fmi == NULL || fmi->fmi_state == FSTRANS_NORMAL))
880		return;
881
882	printf("%-16s ", mp->mnt_stat.f_mntonname);
883	if (fmi == NULL) {
884		printf("(null)\n");
885		return;
886	}
887	switch (fmi->fmi_state) {
888	case FSTRANS_NORMAL:
889		printf("state normal\n");
890		break;
891	case FSTRANS_SUSPENDED:
892		printf("state suspended\n");
893		break;
894	default:
895		printf("state %#x\n", fmi->fmi_state);
896		break;
897	}
898}
899
900void
901fstrans_dump(int full)
902{
903	const struct proclist_desc *pd;
904	struct proc *p;
905	struct lwp *l;
906	struct mount *mp;
907
908	printf("Fstrans locks by lwp:\n");
909	for (pd = proclists; pd->pd_list != NULL; pd++)
910		PROCLIST_FOREACH(p, pd->pd_list)
911			LIST_FOREACH(l, &p->p_lwps, l_sibling)
912				fstrans_print_lwp(p, l, full == 1);
913
914	printf("Fstrans state by mount:\n");
915	for (mp = _mountlist_next(NULL); mp; mp = _mountlist_next(mp))
916		fstrans_print_mount(mp, full == 1);
917}
918#endif /* defined(DDB) */
919