lfs_subr.c revision 1.82
1/*	$NetBSD: lfs_subr.c,v 1.82 2015/07/24 06:56:42 dholland Exp $	*/
2
3/*-
4 * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Konrad E. Schroder <perseant@hhhh.org>.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31/*
32 * Copyright (c) 1991, 1993
33 *	The Regents of the University of California.  All rights reserved.
34 *
35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions
37 * are met:
38 * 1. Redistributions of source code must retain the above copyright
39 *    notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright
41 *    notice, this list of conditions and the following disclaimer in the
42 *    documentation and/or other materials provided with the distribution.
43 * 3. Neither the name of the University nor the names of its contributors
44 *    may be used to endorse or promote products derived from this software
45 *    without specific prior written permission.
46 *
47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
50 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
57 * SUCH DAMAGE.
58 *
59 *	@(#)lfs_subr.c	8.4 (Berkeley) 5/8/95
60 */
61
62#include <sys/cdefs.h>
63__KERNEL_RCSID(0, "$NetBSD: lfs_subr.c,v 1.82 2015/07/24 06:56:42 dholland Exp $");
64
65#include <sys/param.h>
66#include <sys/systm.h>
67#include <sys/namei.h>
68#include <sys/vnode.h>
69#include <sys/buf.h>
70#include <sys/mount.h>
71#include <sys/malloc.h>
72#include <sys/proc.h>
73#include <sys/kauth.h>
74
75#include <ufs/lfs/ulfs_inode.h>
76#include <ufs/lfs/lfs.h>
77#include <ufs/lfs/lfs_kernel.h>
78#include <ufs/lfs/lfs_extern.h>
79
80#include <uvm/uvm.h>
81
82#ifdef DEBUG
83const char *lfs_res_names[LFS_NB_COUNT] = {
84	"summary",
85	"superblock",
86	"file block",
87	"cluster",
88	"clean",
89	"blkiov",
90};
91#endif
92
93int lfs_res_qty[LFS_NB_COUNT] = {
94	LFS_N_SUMMARIES,
95	LFS_N_SBLOCKS,
96	LFS_N_IBLOCKS,
97	LFS_N_CLUSTERS,
98	LFS_N_CLEAN,
99	LFS_N_BLKIOV,
100};
101
102void
103lfs_setup_resblks(struct lfs *fs)
104{
105	int i, j;
106	int maxbpp;
107
108	ASSERT_NO_SEGLOCK(fs);
109	fs->lfs_resblk = malloc(LFS_N_TOTAL * sizeof(res_t), M_SEGMENT,
110				M_WAITOK);
111	for (i = 0; i < LFS_N_TOTAL; i++) {
112		fs->lfs_resblk[i].inuse = 0;
113		fs->lfs_resblk[i].p = NULL;
114	}
115	for (i = 0; i < LFS_RESHASH_WIDTH; i++)
116		LIST_INIT(fs->lfs_reshash + i);
117
118	/*
119	 * These types of allocations can be larger than a page,
120	 * so we can't use the pool subsystem for them.
121	 */
122	for (i = 0, j = 0; j < LFS_N_SUMMARIES; j++, i++)
123		fs->lfs_resblk[i].size = fs->lfs_sumsize;
124	for (j = 0; j < LFS_N_SBLOCKS; j++, i++)
125		fs->lfs_resblk[i].size = LFS_SBPAD;
126	for (j = 0; j < LFS_N_IBLOCKS; j++, i++)
127		fs->lfs_resblk[i].size = lfs_sb_getbsize(fs);
128	for (j = 0; j < LFS_N_CLUSTERS; j++, i++)
129		fs->lfs_resblk[i].size = MAXPHYS;
130	for (j = 0; j < LFS_N_CLEAN; j++, i++)
131		fs->lfs_resblk[i].size = MAXPHYS;
132	for (j = 0; j < LFS_N_BLKIOV; j++, i++)
133		fs->lfs_resblk[i].size = LFS_MARKV_MAXBLKCNT * sizeof(BLOCK_INFO);
134
135	for (i = 0; i < LFS_N_TOTAL; i++) {
136		fs->lfs_resblk[i].p = malloc(fs->lfs_resblk[i].size,
137					     M_SEGMENT, M_WAITOK);
138	}
139
140	/*
141	 * Initialize pools for small types (XXX is BPP small?)
142	 */
143	pool_init(&fs->lfs_clpool, sizeof(struct lfs_cluster), 0, 0, 0,
144		"lfsclpl", &pool_allocator_nointr, IPL_NONE);
145	pool_init(&fs->lfs_segpool, sizeof(struct segment), 0, 0, 0,
146		"lfssegpool", &pool_allocator_nointr, IPL_NONE);
147	maxbpp = ((fs->lfs_sumsize - SEGSUM_SIZE(fs)) / sizeof(int32_t) + 2);
148	maxbpp = MIN(maxbpp, lfs_segsize(fs) / lfs_sb_getfsize(fs) + 2);
149	pool_init(&fs->lfs_bpppool, maxbpp * sizeof(struct buf *), 0, 0, 0,
150		"lfsbpppl", &pool_allocator_nointr, IPL_NONE);
151}
152
153void
154lfs_free_resblks(struct lfs *fs)
155{
156	int i;
157
158	pool_destroy(&fs->lfs_bpppool);
159	pool_destroy(&fs->lfs_segpool);
160	pool_destroy(&fs->lfs_clpool);
161
162	mutex_enter(&lfs_lock);
163	for (i = 0; i < LFS_N_TOTAL; i++) {
164		while (fs->lfs_resblk[i].inuse)
165			mtsleep(&fs->lfs_resblk, PRIBIO + 1, "lfs_free", 0,
166				&lfs_lock);
167		if (fs->lfs_resblk[i].p != NULL)
168			free(fs->lfs_resblk[i].p, M_SEGMENT);
169	}
170	free(fs->lfs_resblk, M_SEGMENT);
171	mutex_exit(&lfs_lock);
172}
173
174static unsigned int
175lfs_mhash(void *vp)
176{
177	return (unsigned int)(((unsigned long)vp) >> 2) % LFS_RESHASH_WIDTH;
178}
179
180/*
181 * Return memory of the given size for the given purpose, or use one of a
182 * number of spare last-resort buffers, if malloc returns NULL.
183 */
184void *
185lfs_malloc(struct lfs *fs, size_t size, int type)
186{
187	struct lfs_res_blk *re;
188	void *r;
189	int i, s, start;
190	unsigned int h;
191
192	ASSERT_MAYBE_SEGLOCK(fs);
193	r = NULL;
194
195	/* If no mem allocated for this type, it just waits */
196	if (lfs_res_qty[type] == 0) {
197		r = malloc(size, M_SEGMENT, M_WAITOK);
198		return r;
199	}
200
201	/* Otherwise try a quick malloc, and if it works, great */
202	if ((r = malloc(size, M_SEGMENT, M_NOWAIT)) != NULL) {
203		return r;
204	}
205
206	/*
207	 * If malloc returned NULL, we are forced to use one of our
208	 * reserve blocks.  We have on hand at least one summary block,
209	 * at least one cluster block, at least one superblock,
210	 * and several indirect blocks.
211	 */
212
213	mutex_enter(&lfs_lock);
214	/* skip over blocks of other types */
215	for (i = 0, start = 0; i < type; i++)
216		start += lfs_res_qty[i];
217	while (r == NULL) {
218		for (i = 0; i < lfs_res_qty[type]; i++) {
219			if (fs->lfs_resblk[start + i].inuse == 0) {
220				re = fs->lfs_resblk + start + i;
221				re->inuse = 1;
222				r = re->p;
223				KASSERT(re->size >= size);
224				h = lfs_mhash(r);
225				s = splbio();
226				LIST_INSERT_HEAD(&fs->lfs_reshash[h], re, res);
227				splx(s);
228				mutex_exit(&lfs_lock);
229				return r;
230			}
231		}
232		DLOG((DLOG_MALLOC, "sleeping on %s (%d)\n",
233		      lfs_res_names[type], lfs_res_qty[type]));
234		mtsleep(&fs->lfs_resblk, PVM, "lfs_malloc", 0,
235			&lfs_lock);
236		DLOG((DLOG_MALLOC, "done sleeping on %s\n",
237		      lfs_res_names[type]));
238	}
239	/* NOTREACHED */
240	mutex_exit(&lfs_lock);
241	return r;
242}
243
244void
245lfs_free(struct lfs *fs, void *p, int type)
246{
247	int s;
248	unsigned int h;
249	res_t *re;
250#ifdef DEBUG
251	int i;
252#endif
253
254	ASSERT_MAYBE_SEGLOCK(fs);
255	h = lfs_mhash(p);
256	mutex_enter(&lfs_lock);
257	s = splbio();
258	LIST_FOREACH(re, &fs->lfs_reshash[h], res) {
259		if (re->p == p) {
260			KASSERT(re->inuse == 1);
261			LIST_REMOVE(re, res);
262			re->inuse = 0;
263			wakeup(&fs->lfs_resblk);
264			splx(s);
265			mutex_exit(&lfs_lock);
266			return;
267		}
268	}
269#ifdef DEBUG
270	for (i = 0; i < LFS_N_TOTAL; i++) {
271		if (fs->lfs_resblk[i].p == p)
272			panic("lfs_free: inconsistent reserved block");
273	}
274#endif
275	splx(s);
276	mutex_exit(&lfs_lock);
277
278	/*
279	 * If we didn't find it, free it.
280	 */
281	free(p, M_SEGMENT);
282}
283
284/*
285 * lfs_seglock --
286 *	Single thread the segment writer.
287 */
288int
289lfs_seglock(struct lfs *fs, unsigned long flags)
290{
291	struct segment *sp;
292
293	mutex_enter(&lfs_lock);
294	if (fs->lfs_seglock) {
295		if (fs->lfs_lockpid == curproc->p_pid &&
296		    fs->lfs_locklwp == curlwp->l_lid) {
297			++fs->lfs_seglock;
298			fs->lfs_sp->seg_flags |= flags;
299			mutex_exit(&lfs_lock);
300			return 0;
301		} else if (flags & SEGM_PAGEDAEMON) {
302			mutex_exit(&lfs_lock);
303			return EWOULDBLOCK;
304		} else {
305			while (fs->lfs_seglock) {
306				(void)mtsleep(&fs->lfs_seglock, PRIBIO + 1,
307					"lfs_seglock", 0, &lfs_lock);
308			}
309		}
310	}
311
312	fs->lfs_seglock = 1;
313	fs->lfs_lockpid = curproc->p_pid;
314	fs->lfs_locklwp = curlwp->l_lid;
315	mutex_exit(&lfs_lock);
316	fs->lfs_cleanind = 0;
317
318#ifdef DEBUG
319	LFS_ENTER_LOG("seglock", __FILE__, __LINE__, 0, flags, curproc->p_pid);
320#endif
321	/* Drain fragment size changes out */
322	rw_enter(&fs->lfs_fraglock, RW_WRITER);
323
324	sp = fs->lfs_sp = pool_get(&fs->lfs_segpool, PR_WAITOK);
325	sp->bpp = pool_get(&fs->lfs_bpppool, PR_WAITOK);
326	sp->seg_flags = flags;
327	sp->vp = NULL;
328	sp->seg_iocount = 0;
329	(void) lfs_initseg(fs);
330
331	/*
332	 * Keep a cumulative count of the outstanding I/O operations.  If the
333	 * disk drive catches up with us it could go to zero before we finish,
334	 * so we artificially increment it by one until we've scheduled all of
335	 * the writes we intend to do.
336	 */
337	mutex_enter(&lfs_lock);
338	++fs->lfs_iocount;
339	fs->lfs_startseg = lfs_sb_getcurseg(fs);
340	mutex_exit(&lfs_lock);
341	return 0;
342}
343
344static void lfs_unmark_dirop(struct lfs *);
345
346static void
347lfs_unmark_dirop(struct lfs *fs)
348{
349	struct inode *ip, *nip;
350	struct vnode *vp;
351	int doit;
352
353	ASSERT_NO_SEGLOCK(fs);
354	mutex_enter(&lfs_lock);
355	doit = !(fs->lfs_flags & LFS_UNDIROP);
356	if (doit)
357		fs->lfs_flags |= LFS_UNDIROP;
358	if (!doit) {
359		mutex_exit(&lfs_lock);
360		return;
361	}
362
363	for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) {
364		nip = TAILQ_NEXT(ip, i_lfs_dchain);
365		vp = ITOV(ip);
366		if ((ip->i_flag & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) {
367			--lfs_dirvcount;
368			--fs->lfs_dirvcount;
369			vp->v_uflag &= ~VU_DIROP;
370			TAILQ_REMOVE(&fs->lfs_dchainhd, ip, i_lfs_dchain);
371			wakeup(&lfs_dirvcount);
372			fs->lfs_unlockvp = vp;
373			mutex_exit(&lfs_lock);
374			vrele(vp);
375			mutex_enter(&lfs_lock);
376			fs->lfs_unlockvp = NULL;
377			ip->i_flag &= ~IN_CDIROP;
378		}
379	}
380
381	fs->lfs_flags &= ~LFS_UNDIROP;
382	wakeup(&fs->lfs_flags);
383	mutex_exit(&lfs_lock);
384}
385
386static void
387lfs_auto_segclean(struct lfs *fs)
388{
389	int i, error, s, waited;
390
391	ASSERT_SEGLOCK(fs);
392	/*
393	 * Now that we've swapped lfs_activesb, but while we still
394	 * hold the segment lock, run through the segment list marking
395	 * the empty ones clean.
396	 * XXX - do we really need to do them all at once?
397	 */
398	waited = 0;
399	for (i = 0; i < fs->lfs_nseg; i++) {
400		if ((fs->lfs_suflags[0][i] &
401		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
402		    (SEGUSE_DIRTY | SEGUSE_EMPTY) &&
403		    (fs->lfs_suflags[1][i] &
404		     (SEGUSE_ACTIVE | SEGUSE_DIRTY | SEGUSE_EMPTY)) ==
405		    (SEGUSE_DIRTY | SEGUSE_EMPTY)) {
406
407			/* Make sure the sb is written before we clean */
408			mutex_enter(&lfs_lock);
409			s = splbio();
410			while (waited == 0 && fs->lfs_sbactive)
411				mtsleep(&fs->lfs_sbactive, PRIBIO+1, "lfs asb",
412					0, &lfs_lock);
413			splx(s);
414			mutex_exit(&lfs_lock);
415			waited = 1;
416
417			if ((error = lfs_do_segclean(fs, i)) != 0) {
418				DLOG((DLOG_CLEAN, "lfs_auto_segclean: lfs_do_segclean returned %d for seg %d\n", error, i));
419			}
420		}
421		fs->lfs_suflags[1 - fs->lfs_activesb][i] =
422			fs->lfs_suflags[fs->lfs_activesb][i];
423	}
424}
425
426/*
427 * lfs_segunlock --
428 *	Single thread the segment writer.
429 */
430void
431lfs_segunlock(struct lfs *fs)
432{
433	struct segment *sp;
434	unsigned long sync, ckp;
435	struct buf *bp;
436	int do_unmark_dirop = 0;
437
438	sp = fs->lfs_sp;
439
440	mutex_enter(&lfs_lock);
441	KASSERT(LFS_SEGLOCK_HELD(fs));
442	if (fs->lfs_seglock == 1) {
443		if ((sp->seg_flags & (SEGM_PROT | SEGM_CLEAN)) == 0)
444			do_unmark_dirop = 1;
445		mutex_exit(&lfs_lock);
446		sync = sp->seg_flags & SEGM_SYNC;
447		ckp = sp->seg_flags & SEGM_CKP;
448
449		/* We should have a segment summary, and nothing else */
450		KASSERT(sp->cbpp == sp->bpp + 1);
451
452		/* Free allocated segment summary */
453		lfs_sb_suboffset(fs, lfs_btofsb(fs, lfs_sb_getsumsize(fs)));
454		bp = *sp->bpp;
455		lfs_freebuf(fs, bp);
456
457		pool_put(&fs->lfs_bpppool, sp->bpp);
458		sp->bpp = NULL;
459
460		/*
461		 * If we're not sync, we're done with sp, get rid of it.
462		 * Otherwise, we keep a local copy around but free
463		 * fs->lfs_sp so another process can use it (we have to
464		 * wait but they don't have to wait for us).
465		 */
466		if (!sync)
467			pool_put(&fs->lfs_segpool, sp);
468		fs->lfs_sp = NULL;
469
470		/*
471		 * If the I/O count is non-zero, sleep until it reaches zero.
472		 * At the moment, the user's process hangs around so we can
473		 * sleep.
474		 */
475		mutex_enter(&lfs_lock);
476		if (--fs->lfs_iocount == 0) {
477			LFS_DEBUG_COUNTLOCKED("lfs_segunlock");
478		}
479		if (fs->lfs_iocount <= 1)
480			wakeup(&fs->lfs_iocount);
481		mutex_exit(&lfs_lock);
482		/*
483		 * If we're not checkpointing, we don't have to block
484		 * other processes to wait for a synchronous write
485		 * to complete.
486		 */
487		if (!ckp) {
488#ifdef DEBUG
489			LFS_ENTER_LOG("segunlock_std", __FILE__, __LINE__, 0, 0, curproc->p_pid);
490#endif
491			mutex_enter(&lfs_lock);
492			--fs->lfs_seglock;
493			fs->lfs_lockpid = 0;
494			fs->lfs_locklwp = 0;
495			mutex_exit(&lfs_lock);
496			wakeup(&fs->lfs_seglock);
497		}
498		/*
499		 * We let checkpoints happen asynchronously.  That means
500		 * that during recovery, we have to roll forward between
501		 * the two segments described by the first and second
502		 * superblocks to make sure that the checkpoint described
503		 * by a superblock completed.
504		 */
505		mutex_enter(&lfs_lock);
506		while (ckp && sync && fs->lfs_iocount) {
507			(void)mtsleep(&fs->lfs_iocount, PRIBIO + 1,
508				      "lfs_iocount", 0, &lfs_lock);
509			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", fs, fs->lfs_iocount));
510		}
511		while (sync && sp->seg_iocount) {
512			(void)mtsleep(&sp->seg_iocount, PRIBIO + 1,
513				     "seg_iocount", 0, &lfs_lock);
514			DLOG((DLOG_SEG, "sleeping on iocount %x == %d\n", sp, sp->seg_iocount));
515		}
516		mutex_exit(&lfs_lock);
517		if (sync)
518			pool_put(&fs->lfs_segpool, sp);
519
520		if (ckp) {
521			fs->lfs_nactive = 0;
522			/* If we *know* everything's on disk, write both sbs */
523			/* XXX should wait for this one	 */
524			if (sync)
525				lfs_writesuper(fs, fs->lfs_sboffs[fs->lfs_activesb]);
526			lfs_writesuper(fs, fs->lfs_sboffs[1 - fs->lfs_activesb]);
527			if (!(fs->lfs_ivnode->v_mount->mnt_iflag & IMNT_UNMOUNT)) {
528				lfs_auto_segclean(fs);
529				/* If sync, we can clean the remainder too */
530				if (sync)
531					lfs_auto_segclean(fs);
532			}
533			fs->lfs_activesb = 1 - fs->lfs_activesb;
534#ifdef DEBUG
535			LFS_ENTER_LOG("segunlock_ckp", __FILE__, __LINE__, 0, 0, curproc->p_pid);
536#endif
537			mutex_enter(&lfs_lock);
538			--fs->lfs_seglock;
539			fs->lfs_lockpid = 0;
540			fs->lfs_locklwp = 0;
541			mutex_exit(&lfs_lock);
542			wakeup(&fs->lfs_seglock);
543		}
544		/* Reenable fragment size changes */
545		rw_exit(&fs->lfs_fraglock);
546		if (do_unmark_dirop)
547			lfs_unmark_dirop(fs);
548	} else if (fs->lfs_seglock == 0) {
549		mutex_exit(&lfs_lock);
550		panic ("Seglock not held");
551	} else {
552		--fs->lfs_seglock;
553		mutex_exit(&lfs_lock);
554	}
555}
556
557/*
558 * Drain dirops and start writer.
559 *
560 * No simple_locks are held when we enter and none are held when we return.
561 */
562int
563lfs_writer_enter(struct lfs *fs, const char *wmesg)
564{
565	int error = 0;
566
567	ASSERT_MAYBE_SEGLOCK(fs);
568	mutex_enter(&lfs_lock);
569
570	/* disallow dirops during flush */
571	fs->lfs_writer++;
572
573	while (fs->lfs_dirops > 0) {
574		++fs->lfs_diropwait;
575		error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0,
576				&lfs_lock);
577		--fs->lfs_diropwait;
578	}
579
580	if (error)
581		fs->lfs_writer--;
582
583	mutex_exit(&lfs_lock);
584
585	return error;
586}
587
588void
589lfs_writer_leave(struct lfs *fs)
590{
591	bool dowakeup;
592
593	ASSERT_MAYBE_SEGLOCK(fs);
594	mutex_enter(&lfs_lock);
595	dowakeup = !(--fs->lfs_writer);
596	mutex_exit(&lfs_lock);
597	if (dowakeup)
598		wakeup(&fs->lfs_dirops);
599}
600
601/*
602 * Unlock, wait for the cleaner, then relock to where we were before.
603 * To be used only at a fairly high level, to address a paucity of free
604 * segments propagated back from lfs_gop_write().
605 */
606void
607lfs_segunlock_relock(struct lfs *fs)
608{
609	int n = fs->lfs_seglock;
610	u_int16_t seg_flags;
611	CLEANERINFO *cip;
612	struct buf *bp;
613
614	if (n == 0)
615		return;
616
617	/* Write anything we've already gathered to disk */
618	lfs_writeseg(fs, fs->lfs_sp);
619
620	/* Tell cleaner */
621	LFS_CLEANERINFO(cip, fs, bp);
622	cip->flags |= LFS_CLEANER_MUST_CLEAN;
623	LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
624
625	/* Save segment flags for later */
626	seg_flags = fs->lfs_sp->seg_flags;
627
628	fs->lfs_sp->seg_flags |= SEGM_PROT; /* Don't unmark dirop nodes */
629	while(fs->lfs_seglock)
630		lfs_segunlock(fs);
631
632	/* Wait for the cleaner */
633	lfs_wakeup_cleaner(fs);
634	mutex_enter(&lfs_lock);
635	while (LFS_STARVED_FOR_SEGS(fs))
636		mtsleep(&fs->lfs_availsleep, PRIBIO, "relock", 0,
637			&lfs_lock);
638	mutex_exit(&lfs_lock);
639
640	/* Put the segment lock back the way it was. */
641	while(n--)
642		lfs_seglock(fs, seg_flags);
643
644	/* Cleaner can relax now */
645	LFS_CLEANERINFO(cip, fs, bp);
646	cip->flags &= ~LFS_CLEANER_MUST_CLEAN;
647	LFS_SYNC_CLEANERINFO(cip, fs, bp, 1);
648
649	return;
650}
651
652/*
653 * Wake up the cleaner, provided that nowrap is not set.
654 */
655void
656lfs_wakeup_cleaner(struct lfs *fs)
657{
658	if (fs->lfs_nowrap > 0)
659		return;
660
661	wakeup(&fs->lfs_nextsegsleep);
662	wakeup(&lfs_allclean_wakeup);
663}
664