lfs_subr.c revision 1.51
1/*	$NetBSD: lfs_subr.c,v 1.51 2005/04/01 21:59:46 perseant Exp $	*/
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38/*
39 * Copyright (c) 1991, 1993
40 *	The Regents of the University of California.  All rights reserved.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 *    notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 *    notice, this list of conditions and the following disclaimer in the
49 *    documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 *    may be used to endorse or promote products derived from this software
52 *    without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 *	@(#)lfs_subr.c	8.4 (Berkeley) 5/8/95
67 */
68
69#include <sys/cdefs.h>
70__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.51 2005/04/01 21:59:46 perseant Exp $");
71
72#include <sys/param.h>
73#include <sys/systm.h>
74#include <sys/namei.h>
75#include <sys/vnode.h>
76#include <sys/buf.h>
77#include <sys/mount.h>
78#include <sys/malloc.h>
79#include <sys/proc.h>
80
81#include <ufs/ufs/inode.h>
82#include <ufs/lfs/lfs.h>
83#include <ufs/lfs/lfs_extern.h>
84
85#include <uvm/uvm.h>
86
87/*
88 * Return buffer with the contents of block "offset" from the beginning of
89 * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
90 * remaining space in the directory.
91 */
92int
93lfs_blkatoff(void *v)
94{
95	struct vop_blkatoff_args /* {
96		struct vnode *a_vp;
97		off_t a_offset;
98		char **a_res;
99		struct buf **a_bpp;
100		} */ *ap = v;
101	struct lfs *fs;
102	struct inode *ip;
103	struct buf *bp;
104	daddr_t lbn;
105	int bsize, error;
106
107	ip = VTOI(ap->a_vp);
108	fs = ip->i_lfs;
109	lbn = lblkno(fs, ap->a_offset);
110	bsize = blksize(fs, ip, lbn);
111
112	*ap->a_bpp = NULL;
113	if ((error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) != 0) {
114		brelse(bp);
115		return (error);
116	}
117	if (ap->a_res)
118		*ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset);
119	*ap->a_bpp = bp;
120	return (0);
121}
122
123#ifdef DEBUG
124char *lfs_res_names[LFS_NB_COUNT] = {
125	"summary",
126	"superblock",
127	"ifile block",
128	"cluster",
129	"clean",
130};
131#endif
132
133int lfs_res_qty[LFS_NB_COUNT] = {
134	LFS_N_SUMMARIES,
135	LFS_N_SBLOCKS,
136	LFS_N_IBLOCKS,
137	LFS_N_CLUSTERS,
138	LFS_N_CLEAN,
139};
140
141void
142lfs_setup_resblks(struct lfs *fs)
143{
144	int i, j;
145	int maxbpp;
146
147	ASSERT_NO_SEGLOCK(fs);
148	fs->lfs_resblk = (res_t *)malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
149					  M_WAITOK);
150	for (i = 0; i < LFS_N_TOTAL; i++) {
151		fs->lfs_resblk[i].inuse = 0;
152		fs->lfs_resblk[i].p = NULL;
153	}
154	for (i = 0; i < LFS_RESHASH_WIDTH; i++)
155		LIST_INIT(fs->lfs_reshash + i);
156
157	/*
158	 * These types of allocations can be larger than a page,
159	 * so we can't use the pool subsystem for them.
160	 */
161	for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
162		fs->lfs_resblk[i].size = fs->lfs_sumsize;
163	for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
164		fs->lfs_resblk[i].size = LFS_SBPAD;
165	for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
166		fs->lfs_resblk[i].size = fs->lfs_bsize;
167	for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
168		fs->lfs_resblk[i].size = MAXPHYS;
169	for (j = 0; j < LFS_N_CLEAN; j++, i++)
170		fs->lfs_resblk[i].size = MAXPHYS;
171
172	for (i = 0; i < LFS_N_TOTAL; i++) {
173		fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
174					     M_SEGMENT, M_WAITOK);
175	}
176
177	/*
178	 * Initialize pools for small types (XXX is BPP small?)
179	 */
180	pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
181		"lfsclpl", &pool_allocator_nointr);
182	pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
183		"lfssegpool", &pool_allocator_nointr);
184	maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
185	maxbpp = MIN(maxbpp, segsize(fs) / fs->lfs_fsize + 2);
186	pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
187		"lfsbpppl", &pool_allocator_nointr);
188}
189
190void
191lfs_free_resblks(struct lfs *fs)
192{
193	int i;
194
195	pool_destroy(&fs->lfs_bpppool);
196	pool_destroy(&fs->lfs_segpool);
197	pool_destroy(&fs->lfs_clpool);
198
199	simple_lock(&fs->lfs_interlock);
200	for (i = 0; i < LFS_N_TOTAL; i++) {
201		while (fs->lfs_resblk[i].inuse)
202			ltsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
203				&fs->lfs_interlock);
204		if (fs->lfs_resblk[i].p != NULL)
205			free(fs->lfs_resblk[i].p, M_SEGMENT);
206	}
207	free(fs->lfs_resblk, M_SEGMENT);
208	simple_unlock(&fs->lfs_interlock);
209}
210
211static unsigned int
212lfs_mhash(void *vp)
213{
214	return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
215}
216
217/*
218 * Return memory of the given size for the given purpose, or use one of a
219 * number of spare last-resort buffers, if malloc returns NULL.
220 */
221void *
222lfs_malloc(struct lfs *fs, size_t size, int type)
223{
224	struct lfs_res_blk *re;
225	void *r;
226	int i, s, start;
227	unsigned int h;
228
229	ASSERT_MAYBE_SEGLOCK(fs);
230	r = NULL;
231
232	/* If no mem allocated for this type, it just waits */
233	if (lfs_res_qty[type] == 0) {
234		r = malloc(size, M_SEGMENT, M_WAITOK);
235		return r;
236	}
237
238	/* Otherwise try a quick malloc, and if it works, great */
239	if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
240		return r;
241	}
242
243	/*
244	 * If malloc returned NULL, we are forced to use one of our
245	 * reserve blocks.  We have on hand at least one summary block,
246	 * at least one cluster block, at least one superblock,
247	 * and several indirect blocks.
248	 */
249
250	simple_lock(&fs->lfs_interlock);
251	/* skip over blocks of other types */
252	for (i = 0, start = 0; i < type; i++)
253		start += lfs_res_qty[i];
254	while (r == NULL) {
255		for (i = 0; i < lfs_res_qty[type]; i++) {
256			if (fs->lfs_resblk[start + i].inuse == 0) {
257				re = fs->lfs_resblk + start + i;
258				re->inuse = 1;
259				r = re->p;
260				KASSERT(re->size >= size);
261				h = lfs_mhash(r);
262				s = splbio();
263				LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
264				splx(s);
265				simple_unlock(&fs->lfs_interlock);
266				return r;
267			}
268		}
269		DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
270		      lfs_res_names[type], lfs_res_qty[type]));
271		ltsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
272			&fs->lfs_interlock);
273		DLOG((DLOG_MALLOC, "done sleeping on %s\n",
274		      lfs_res_names[type]));
275	}
276	/* NOTREACHED */
277	simple_unlock(&fs->lfs_interlock);
278	return r;
279}
280
281void
282lfs_free(struct lfs *fs, void *p, int type)
283{
284	int s;
285	unsigned int h;
286	res_t *re;
287#ifdef DEBUG
288	int i;
289#endif
290
291	ASSERT_MAYBE_SEGLOCK(fs);
292	h = lfs_mhash(p);
293	simple_lock(&fs->lfs_interlock);
294	s = splbio();
295	LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
296		if (re->p == p) {
297			KASSERT(re->inuse == 1);
298			LIST_REMOVE(re, res);
299			re->inuse = 0;
300			wakeup(&fs->lfs_resblk);
301			splx(s);
302			simple_unlock(&fs->lfs_interlock);
303			return;
304		}
305	}
306#ifdef DEBUG
307	for (i = 0; i < LFS_N_TOTAL; i++) {
308		if (fs->lfs_resblk[i].p == p)
309			panic("lfs_free: inconsistent reserved block");
310	}
311#endif
312	splx(s);
313	simple_unlock(&fs->lfs_interlock);
314
315	/*
316	 * If we didn't find it, free it.
317	 */
318	free(p, M_SEGMENT);
319}
320
321/*
322 * lfs_seglock --
323 *	Single thread the segment writer.
324 */
325int
326lfs_seglock(struct lfs *fs, unsigned long flags)
327{
328	struct segment *sp;
329
330	simple_lock(&fs->lfs_interlock);
331	if (fs->lfs_seglock) {
332		if (fs->lfs_lockpid == curproc->p_pid) {
333			simple_unlock(&fs->lfs_interlock);
334			++fs->lfs_seglock;
335			fs->lfs_sp->seg_flags |= flags;
336			return 0;
337		} else if (flags & SEGM_PAGEDAEMON) {
338			simple_unlock(&fs->lfs_interlock);
339			return EWOULDBLOCK;
340		} else {
341			while (fs->lfs_seglock) {
342				(void)ltsleep(&fs->lfs_seglock, PRIBIO + 1,
343					"lfs seglock", 0, &fs->lfs_interlock);
344			}
345		}
346	}
347
348	fs->lfs_seglock = 1;
349	fs->lfs_lockpid = curproc->p_pid;
350	simple_unlock(&fs->lfs_interlock);
351	fs->lfs_cleanind = 0;
352
353#ifdef DEBUG
354	LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
355#endif
356	/* Drain fragment size changes out */
357	lockmgr(&fs->lfs_fraglock, LK_EXCLUSIVE, 0);
358
359	sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
360	sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
361	sp->seg_flags = flags;
362	sp->vp = NULL;
363	sp->seg_iocount = 0;
364	(void) lfs_initseg(fs);
365
366	/*
367	 * Keep a cumulative count of the outstanding I/O operations.  If the
368	 * disk drive catches up with us it could go to zero before we finish,
369	 * so we artificially increment it by one until we've scheduled all of
370	 * the writes we intend to do.
371	 */
372	simple_lock(&fs->lfs_interlock);
373	++fs->lfs_iocount;
374	simple_unlock(&fs->lfs_interlock);
375	return 0;
376}
377
378static void lfs_unmark_dirop(struct lfs *);
379
380static void
381lfs_unmark_dirop(struct lfs *fs)
382{
383	struct inode *ip, *nip;
384	struct vnode *vp;
385	int doit;
386
387	ASSERT_NO_SEGLOCK(fs);
388	simple_lock(&fs->lfs_interlock);
389	doit = !(fs->lfs_flags & LFS_UNDIROP);
390	if (doit)
391		fs->lfs_flags |= LFS_UNDIROP;
392	if (!doit) {
393		simple_unlock(&fs->lfs_interlock);
394		return;
395	}
396
397	for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
398		nip = TAILQ_NEXT(ip, i_lfs_dchain);
399		simple_unlock(&fs->lfs_interlock);
400		vp = ITOV(ip);
401
402		simple_lock(&vp->v_interlock);
403		if (VOP_ISLOCKED(vp) &&
404			   vp->v_lock.lk_lockholder != curproc->p_pid) {
405			simple_lock(&fs->lfs_interlock);
406			simple_unlock(&vp->v_interlock);
407			continue;
408		}
409		if ((VTOI(vp)->i_flag & IN_ADIROP) == 0) {
410			simple_lock(&fs->lfs_interlock);
411			simple_lock(&lfs_subsys_lock);
412			--lfs_dirvcount;
413			simple_unlock(&lfs_subsys_lock);
414			vp->v_flag &= ~VDIROP;
415			TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
416			simple_unlock(&fs->lfs_interlock);
417			wakeup(&lfs_dirvcount);
418			simple_unlock(&vp->v_interlock);
419			simple_lock(&fs->lfs_interlock);
420			fs->lfs_unlockvp = vp;
421			simple_unlock(&fs->lfs_interlock);
422			vrele(vp);
423			simple_lock(&fs->lfs_interlock);
424			fs->lfs_unlockvp = NULL;
425			simple_unlock(&fs->lfs_interlock);
426		} else
427			simple_unlock(&vp->v_interlock);
428		simple_lock(&fs->lfs_interlock);
429	}
430
431	fs->lfs_flags &= ~LFS_UNDIROP;
432	simple_unlock(&fs->lfs_interlock);
433	wakeup(&fs->lfs_flags);
434}
435
436static void
437lfs_auto_segclean(struct lfs *fs)
438{
439	int i, error, s, waited;
440
441	ASSERT_SEGLOCK(fs);
442	/*
443	 * Now that we've swapped lfs_activesb, but while we still
444	 * hold the segment lock, run through the segment list marking
445	 * the empty ones clean.
446	 * XXX - do we really need to do them all at once?
447	 */
448	waited = 0;
449	for (i = 0; i < fs->lfs_nseg; i++) {
450		if ((fs->lfs_suflags[0][i] &
451		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
452		    (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
453		    (fs->lfs_suflags[1][i] &
454		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
455		    (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
456
457			/* Make sure the sb is written before we clean */
458			simple_lock(&fs->lfs_interlock);
459			s = splbio();
460			while (waited == 0 && fs->lfs_sbactive)
461				ltsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
462					0, &fs->lfs_interlock);
463			splx(s);
464			simple_unlock(&fs->lfs_interlock);
465			waited = 1;
466
467			if ((error = lfs_do_segclean(fs, i)) != 0) {
468				DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
469			}
470		}
471		fs->lfs_suflags[1 - fs->lfs_activesb][i] =
472			fs->lfs_suflags[fs->lfs_activesb][i];
473	}
474}
475
476/*
477 * lfs_segunlock --
478 *	Single thread the segment writer.
479 */
480void
481lfs_segunlock(struct lfs *fs)
482{
483	struct segment *sp;
484	unsigned long sync, ckp;
485	struct buf *bp;
486	int do_unmark_dirop = 0;
487
488	sp = fs->lfs_sp;
489
490	simple_lock(&fs->lfs_interlock);
491	LOCK_ASSERT(LFS_SEGLOCK_HELD(fs));
492	if (fs->lfs_seglock == 1) {
493		if ((sp->seg_flags & SEGM_PROT) == 0)
494			do_unmark_dirop = 1;
495		simple_unlock(&fs->lfs_interlock);
496		sync = sp->seg_flags & SEGM_SYNC;
497		ckp = sp->seg_flags & SEGM_CKP;
498		if (sp->bpp != sp->cbpp) {
499			/* Free allocated segment summary */
500			fs->lfs_offset -= btofsb(fs, fs->lfs_sumsize);
501			bp = *sp->bpp;
502			lfs_freebuf(fs, bp);
503		} else
504			DLOG((DLOG_SEG, "lfs_segunlock: unlock to 0 with no summary"));
505
506		pool_put(&fs->lfs_bpppool, sp->bpp);
507		sp->bpp = NULL;
508
509		/*
510		 * If we're not sync, we're done with sp, get rid of it.
511		 * Otherwise, we keep a local copy around but free
512		 * fs->lfs_sp so another process can use it (we have to
513		 * wait but they don't have to wait for us).
514		 */
515		if (!sync)
516			pool_put(&fs->lfs_segpool, sp);
517		fs->lfs_sp = NULL;
518
519		/*
520		 * If the I/O count is non-zero, sleep until it reaches zero.
521		 * At the moment, the user's process hangs around so we can
522		 * sleep.
523		 */
524		simple_lock(&fs->lfs_interlock);
525		if (--fs->lfs_iocount == 0)
526			LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
527		if (fs->lfs_iocount <= 1)
528			wakeup(&fs->lfs_iocount);
529		simple_unlock(&fs->lfs_interlock);
530		/*
531		 * If we're not checkpointing, we don't have to block
532		 * other processes to wait for a synchronous write
533		 * to complete.
534		 */
535		if (!ckp) {
536#ifdef DEBUG
537			LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
538#endif
539			simple_lock(&fs->lfs_interlock);
540			--fs->lfs_seglock;
541			fs->lfs_lockpid = 0;
542			simple_unlock(&fs->lfs_interlock);
543			wakeup(&fs->lfs_seglock);
544		}
545		/*
546		 * We let checkpoints happen asynchronously.  That means
547		 * that during recovery, we have to roll forward between
548		 * the two segments described by the first and second
549		 * superblocks to make sure that the checkpoint described
550		 * by a superblock completed.
551		 */
552		simple_lock(&fs->lfs_interlock);
553		while (ckp && sync && fs->lfs_iocount)
554			(void)ltsleep(&fs->lfs_iocount, PRIBIO + 1,
555				      "lfs_iocount", 0, &fs->lfs_interlock);
556		while (sync && sp->seg_iocount) {
557			(void)ltsleep(&sp->seg_iocount, PRIBIO + 1,
558				     "seg_iocount", 0, &fs->lfs_interlock);
559			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
560		}
561		simple_unlock(&fs->lfs_interlock);
562		if (sync)
563			pool_put(&fs->lfs_segpool, sp);
564
565		if (ckp) {
566			fs->lfs_nactive = 0;
567			/* If we *know* everything's on disk, write both sbs */
568			/* XXX should wait for this one	 */
569			if (sync)
570				lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]);
571			lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]);
572			if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
573				lfs_auto_segclean(fs);
574				/* If sync, we can clean the remainder too */
575				if (sync)
576					lfs_auto_segclean(fs);
577			}
578			fs->lfs_activesb = 1 - fs->lfs_activesb;
579#ifdef DEBUG
580			LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
581#endif
582			simple_lock(&fs->lfs_interlock);
583			--fs->lfs_seglock;
584			fs->lfs_lockpid = 0;
585			simple_unlock(&fs->lfs_interlock);
586			wakeup(&fs->lfs_seglock);
587		}
588		/* Reenable fragment size changes */
589		lockmgr(&fs->lfs_fraglock, LK_RELEASE, 0);
590		if (do_unmark_dirop)
591			lfs_unmark_dirop(fs);
592	} else if (fs->lfs_seglock == 0) {
593		simple_unlock(&fs->lfs_interlock);
594		panic ("Seglock not held");
595	} else {
596		--fs->lfs_seglock;
597		simple_unlock(&fs->lfs_interlock);
598	}
599}
600
601/*
602 * drain dirops and start writer.
603 */
604int
605lfs_writer_enter(struct lfs *fs, const char *wmesg)
606{
607	int error = 0;
608
609	ASSERT_MAYBE_SEGLOCK(fs);
610	simple_lock(&fs->lfs_interlock);
611
612	/* disallow dirops during flush */
613	fs->lfs_writer++;
614
615	while (fs->lfs_dirops > 0) {
616		++fs->lfs_diropwait;
617		error = ltsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
618				&fs->lfs_interlock);
619		--fs->lfs_diropwait;
620	}
621
622	if (error)
623		fs->lfs_writer--;
624
625	simple_unlock(&fs->lfs_interlock);
626
627	return error;
628}
629
630void
631lfs_writer_leave(struct lfs *fs)
632{
633	boolean_t dowakeup;
634
635	ASSERT_MAYBE_SEGLOCK(fs);
636	simple_lock(&fs->lfs_interlock);
637	dowakeup = !(--fs->lfs_writer);
638	simple_unlock(&fs->lfs_interlock);
639	if (dowakeup)
640		wakeup(&fs->lfs_dirops);
641}
642