1/* $NetBSD: lfs_pages.c,v 1.27 2023/04/11 14:50:47 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Konrad E. Schroder <perseant@hhhh.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31/* 32 * Copyright (c) 1986, 1989, 1991, 1993, 1995 33 * The Regents of the University of California. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. Neither the name of the University nor the names of its contributors 44 * may be used to endorse or promote products derived from this software 45 * without specific prior written permission. 46 * 47 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 48 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 49 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 50 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 51 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 52 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 53 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 54 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 55 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 56 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 57 * SUCH DAMAGE. 58 * 59 * @(#)lfs_vnops.c 8.13 (Berkeley) 6/10/95 60 */ 61 62#include <sys/cdefs.h> 63__KERNEL_RCSID(0, "$NetBSD: lfs_pages.c,v 1.27 2023/04/11 14:50:47 riastradh Exp $"); 64 65#ifdef _KERNEL_OPT 66#include "opt_compat_netbsd.h" 67#include "opt_uvm_page_trkown.h" 68#endif 69 70#include <sys/param.h> 71#include <sys/systm.h> 72#include <sys/namei.h> 73#include <sys/resourcevar.h> 74#include <sys/kernel.h> 75#include <sys/file.h> 76#include <sys/stat.h> 77#include <sys/buf.h> 78#include <sys/proc.h> 79#include <sys/mount.h> 80#include <sys/vnode.h> 81#include <sys/pool.h> 82#include <sys/signalvar.h> 83#include <sys/kauth.h> 84#include <sys/syslog.h> 85#include <sys/fstrans.h> 86 87#include <miscfs/fifofs/fifo.h> 88#include <miscfs/genfs/genfs.h> 89#include <miscfs/specfs/specdev.h> 90 91#include <ufs/lfs/ulfs_inode.h> 92#include <ufs/lfs/ulfsmount.h> 93#include <ufs/lfs/ulfs_bswap.h> 94#include <ufs/lfs/ulfs_extern.h> 95 96#include <uvm/uvm.h> 97#include <uvm/uvm_page.h> 98#include <uvm/uvm_pager.h> 99#include <uvm/uvm_pmap.h> 100#include <uvm/uvm_stat.h> 101 102#include <ufs/lfs/lfs.h> 103#include <ufs/lfs/lfs_accessors.h> 104#include <ufs/lfs/lfs_kernel.h> 105#include <ufs/lfs/lfs_extern.h> 106 107extern kcondvar_t lfs_writerd_cv; 108 109static int check_dirty(struct lfs *, struct vnode *, off_t, off_t, off_t, int, int, struct vm_page **); 110 111int 112lfs_getpages(void *v) 113{ 114 struct vop_getpages_args /* { 115 struct vnode *a_vp; 116 voff_t a_offset; 117 struct vm_page **a_m; 118 int *a_count; 119 int a_centeridx; 120 vm_prot_t a_access_type; 121 int a_advice; 122 int a_flags; 123 } */ *ap = v; 124 125 if (VTOI(ap->a_vp)->i_number == LFS_IFILE_INUM && 126 (ap->a_access_type & VM_PROT_WRITE) != 0) { 127 return EPERM; 128 } 129 if ((ap->a_access_type & VM_PROT_WRITE) != 0) { 130 mutex_enter(&lfs_lock); 131 LFS_SET_UINO(VTOI(ap->a_vp), IN_MODIFIED); 132 mutex_exit(&lfs_lock); 133 } 134 135 /* 136 * we're relying on the fact that genfs_getpages() always read in 137 * entire filesystem blocks. 138 */ 139 return genfs_getpages(v); 140} 141 142/* 143 * Wait for a page to become unbusy, possibly printing diagnostic messages 144 * as well. 145 * 146 * Called with vp->v_uobj.vmobjlock held; return with it held. 147 */ 148static void 149wait_for_page(struct vnode *vp, struct vm_page *pg, const char *label) 150{ 151 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 152 if ((pg->flags & PG_BUSY) == 0) 153 return; /* Nothing to wait for! */ 154 155#if defined(DEBUG) && defined(UVM_PAGE_TRKOWN) 156 static struct vm_page *lastpg; 157 158 if (label != NULL && pg != lastpg) { 159 if (pg->owner_tag) { 160 printf("lfs_putpages[%d.%d]: %s: page %p owner %d.%d [%s]\n", 161 curproc->p_pid, curlwp->l_lid, label, 162 pg, pg->owner, pg->lowner, pg->owner_tag); 163 } else { 164 printf("lfs_putpages[%d.%d]: %s: page %p unowned?!\n", 165 curproc->p_pid, curlwp->l_lid, label, pg); 166 } 167 } 168 lastpg = pg; 169#endif 170 171 uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput"); 172 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 173} 174 175/* 176 * This routine is called by lfs_putpages() when it can't complete the 177 * write because a page is busy. This means that either (1) someone, 178 * possibly the pagedaemon, is looking at this page, and will give it up 179 * presently; or (2) we ourselves are holding the page busy in the 180 * process of being written (either gathered or actually on its way to 181 * disk). We don't need to give up the segment lock, but we might need 182 * to call lfs_writeseg() to expedite the page's journey to disk. 183 * 184 * Called with vp->v_uobj.vmobjlock held; return with it held. 185 */ 186/* #define BUSYWAIT */ 187static void 188write_and_wait(struct lfs *fs, struct vnode *vp, struct vm_page *pg, 189 int seglocked, const char *label) 190{ 191 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 192#ifndef BUSYWAIT 193 struct inode *ip = VTOI(vp); 194 struct segment *sp = fs->lfs_sp; 195 int count = 0; 196 197 if (pg == NULL) 198 return; 199 200 while (pg->flags & PG_BUSY && 201 pg->uobject == &vp->v_uobj) { 202 rw_exit(vp->v_uobj.vmobjlock); 203 if (sp->cbpp - sp->bpp > 1) { 204 /* Write gathered pages */ 205 lfs_updatemeta(sp); 206 lfs_release_finfo(fs); 207 (void) lfs_writeseg(fs, sp); 208 209 /* 210 * Reinitialize FIP 211 */ 212 KASSERT(sp->vp == vp); 213 lfs_acquire_finfo(fs, ip->i_number, 214 ip->i_gen); 215 } 216 ++count; 217 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 218 wait_for_page(vp, pg, label); 219 } 220 if (label != NULL && count > 1) { 221 DLOG((DLOG_PAGE, "lfs_putpages[%d]: %s: %sn = %d\n", 222 curproc->p_pid, label, (count > 0 ? "looping, " : ""), 223 count)); 224 } 225#else 226 preempt(1); 227#endif 228 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 229} 230 231/* 232 * Make sure that for all pages in every block in the given range, 233 * either all are dirty or all are clean. If any of the pages 234 * we've seen so far are dirty, put the vnode on the paging chain, 235 * and mark it IN_PAGING. 236 * 237 * If checkfirst != 0, don't check all the pages but return at the 238 * first dirty page. 239 */ 240static int 241check_dirty(struct lfs *fs, struct vnode *vp, 242 off_t startoffset, off_t endoffset, off_t blkeof, 243 int flags, int checkfirst, struct vm_page **pgp) 244{ 245 struct vm_page *pgs[MAXBSIZE / MIN_PAGE_SIZE], *pg; 246 off_t soff = 0; /* XXX: gcc */ 247 voff_t off; 248 int i; 249 int nonexistent; 250 int any_dirty; /* number of dirty pages */ 251 int dirty; /* number of dirty pages in a block */ 252 int tdirty; 253 int pages_per_block = lfs_sb_getbsize(fs) >> PAGE_SHIFT; 254 int pagedaemon = (curlwp == uvm.pagedaemon_lwp); 255 256 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 257 ASSERT_MAYBE_SEGLOCK(fs); 258 top: 259 any_dirty = 0; 260 261 soff = startoffset; 262 KASSERT((soff & (lfs_sb_getbsize(fs) - 1)) == 0); 263 while (soff < MIN(blkeof, endoffset)) { 264 265 /* 266 * Mark all pages in extended range busy; find out if any 267 * of them are dirty. 268 */ 269 nonexistent = dirty = 0; 270 for (i = 0; i == 0 || i < pages_per_block; i++) { 271 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 272 off = soff + (i << PAGE_SHIFT); 273 pgs[i] = pg = uvm_pagelookup(&vp->v_uobj, off); 274 if (pg == NULL) { 275 ++nonexistent; 276 continue; 277 } 278 KASSERT(pg != NULL); 279 280 /* 281 * If we're holding the segment lock, we can deadlock 282 * against a process that has our page and is waiting 283 * for the cleaner, while the cleaner waits for the 284 * segment lock. Just bail in that case. 285 */ 286 if ((pg->flags & PG_BUSY) && 287 (pagedaemon || LFS_SEGLOCK_HELD(fs))) { 288 if (i > 0) 289 uvm_page_unbusy(pgs, i); 290 DLOG((DLOG_PAGE, "lfs_putpages: avoiding 3-way or pagedaemon deadlock\n")); 291 if (pgp) 292 *pgp = pg; 293 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 294 return -1; 295 } 296 297 while (pg->flags & PG_BUSY) { 298 wait_for_page(vp, pg, NULL); 299 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 300 if (i > 0) 301 uvm_page_unbusy(pgs, i); 302 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 303 goto top; 304 } 305 pg->flags |= PG_BUSY; 306 UVM_PAGE_OWN(pg, "lfs_putpages"); 307 308 pmap_page_protect(pg, VM_PROT_NONE); 309 tdirty = 310 uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN && 311 (uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_DIRTY || 312 pmap_clear_modify(pg)); 313 dirty += tdirty; 314 } 315 if ((pages_per_block > 0 && nonexistent >= pages_per_block) || 316 (pages_per_block == 0 && nonexistent > 0)) { 317 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); 318 continue; 319 } 320 321 any_dirty += dirty; 322 KASSERT(nonexistent == 0); 323 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 324 325 /* 326 * If any are dirty make all dirty; unbusy them, 327 * but if we were asked to clean, wire them so that 328 * the pagedaemon doesn't bother us about them while 329 * they're on their way to disk. 330 */ 331 for (i = 0; i == 0 || i < pages_per_block; i++) { 332 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 333 pg = pgs[i]; 334 KASSERT(!(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_DIRTY 335 && (pg->flags & PG_DELWRI))); 336 KASSERT(pg->flags & PG_BUSY); 337 if (dirty) { 338 uvm_pagemarkdirty(pg, UVM_PAGE_STATUS_DIRTY); 339 if (flags & PGO_FREE) { 340 /* 341 * Wire the page so that 342 * pdaemon doesn't see it again. 343 */ 344 uvm_pagelock(pg); 345 uvm_pagewire(pg); 346 uvm_pageunlock(pg); 347 348 /* Suspended write flag */ 349 pg->flags |= PG_DELWRI; 350 } 351 } 352 pg->flags &= ~PG_BUSY; 353 uvm_pagelock(pg); 354 uvm_pagewakeup(pg); 355 uvm_pageunlock(pg); 356 UVM_PAGE_OWN(pg, NULL); 357 } 358 359 if (checkfirst && any_dirty) 360 break; 361 362 soff += MAX(PAGE_SIZE, lfs_sb_getbsize(fs)); 363 } 364 365 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 366 return any_dirty; 367} 368 369/* 370 * lfs_putpages functions like genfs_putpages except that 371 * 372 * (1) It needs to bounds-check the incoming requests to ensure that 373 * they are block-aligned; if they are not, expand the range and 374 * do the right thing in case, e.g., the requested range is clean 375 * but the expanded range is dirty. 376 * 377 * (2) It needs to explicitly send blocks to be written when it is done. 378 * If VOP_PUTPAGES is called without the seglock held, we simply take 379 * the seglock and let lfs_segunlock wait for us. 380 * XXX There might be a bad situation if we have to flush a vnode while 381 * XXX lfs_markv is in operation. As of this writing we panic in this 382 * XXX case. 383 * 384 * Assumptions: 385 * 386 * (1) The caller does not hold any pages in this vnode busy. If it does, 387 * there is a danger that when we expand the page range and busy the 388 * pages we will deadlock. 389 * 390 * (2) We are called with vp->v_uobj.vmobjlock held; we must return with it 391 * released. 392 * 393 * (3) We don't absolutely have to free pages right away, provided that 394 * the request does not have PGO_SYNCIO. When the pagedaemon gives 395 * us a request with PGO_FREE, we take the pages out of the paging 396 * queue and wake up the writer, which will handle freeing them for us. 397 * 398 * We ensure that for any filesystem block, all pages for that 399 * block are either resident or not, even if those pages are higher 400 * than EOF; that means that we will be getting requests to free 401 * "unused" pages above EOF all the time, and should ignore them. 402 * 403 * (4) If we are called with PGO_LOCKED, the finfo array we are to write 404 * into has been set up for us by lfs_writefile. If not, we will 405 * have to handle allocating and/or freeing an finfo entry. 406 * 407 * XXX note that we're (ab)using PGO_LOCKED as "seglock held". 408 */ 409 410/* How many times to loop before we should start to worry */ 411#define TOOMANY 4 412 413int 414lfs_putpages(void *v) 415{ 416 int error; 417 struct vop_putpages_args /* { 418 struct vnode *a_vp; 419 voff_t a_offlo; 420 voff_t a_offhi; 421 int a_flags; 422 } */ *ap = v; 423 struct vnode *vp; 424 struct inode *ip; 425 struct lfs *fs; 426 struct segment *sp; 427 off_t origoffset, startoffset, endoffset, origendoffset, blkeof; 428 off_t off, max_endoffset; 429 bool seglocked, sync, pagedaemon, reclaim; 430 struct vm_page *pg, *busypg; 431 UVMHIST_FUNC("lfs_putpages"); UVMHIST_CALLED(ubchist); 432 struct mount *trans_mp; 433 int oreclaim = 0; 434 int donewriting = 0; 435#ifdef DEBUG 436 int debug_n_again, debug_n_dirtyclean; 437#endif 438 439 vp = ap->a_vp; 440 ip = VTOI(vp); 441 fs = ip->i_lfs; 442 sync = (ap->a_flags & PGO_SYNCIO) != 0; 443 reclaim = (ap->a_flags & PGO_RECLAIM) != 0; 444 pagedaemon = (curlwp == uvm.pagedaemon_lwp); 445 trans_mp = NULL; 446 447 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 448 449 /* Putpages does nothing for metadata. */ 450 if (vp == fs->lfs_ivnode || vp->v_type != VREG) { 451 rw_exit(vp->v_uobj.vmobjlock); 452 return 0; 453 } 454 455retry: 456 /* 457 * If there are no pages, don't do anything. 458 */ 459 if (vp->v_uobj.uo_npages == 0) { 460 mutex_enter(vp->v_interlock); 461 if ((vp->v_iflag & VI_ONWORKLST) && 462 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 463 vn_syncer_remove_from_worklist(vp); 464 } 465 mutex_exit(vp->v_interlock); 466 if (trans_mp) 467 fstrans_done(trans_mp); 468 rw_exit(vp->v_uobj.vmobjlock); 469 470 /* Remove us from paging queue, if we were on it */ 471 mutex_enter(&lfs_lock); 472 if (ip->i_state & IN_PAGING) { 473 ip->i_state &= ~IN_PAGING; 474 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 475 } 476 mutex_exit(&lfs_lock); 477 478 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 479 return 0; 480 } 481 482 blkeof = lfs_blkroundup(fs, ip->i_size); 483 484 /* 485 * Ignore requests to free pages past EOF but in the same block 486 * as EOF, unless the vnode is being reclaimed or the request 487 * is synchronous. (If the request is sync, it comes from 488 * lfs_truncate.) 489 * 490 * To avoid being flooded with this request, make these pages 491 * look "active". 492 */ 493 if (!sync && !reclaim && 494 ap->a_offlo >= ip->i_size && ap->a_offlo < blkeof) { 495 origoffset = ap->a_offlo; 496 for (off = origoffset; off < blkeof; off += lfs_sb_getbsize(fs)) { 497 pg = uvm_pagelookup(&vp->v_uobj, off); 498 KASSERT(pg != NULL); 499 while (pg->flags & PG_BUSY) { 500 uvm_pagewait(pg, vp->v_uobj.vmobjlock, "lfsput2"); 501 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 502 /* XXX Page can't change identity here? */ 503 KDASSERT(pg == 504 uvm_pagelookup(&vp->v_uobj, off)); 505 } 506 uvm_pagelock(pg); 507 uvm_pageactivate(pg); 508 uvm_pageunlock(pg); 509 } 510 ap->a_offlo = blkeof; 511 if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { 512 rw_exit(vp->v_uobj.vmobjlock); 513 return 0; 514 } 515 } 516 517 /* 518 * Extend page range to start and end at block boundaries. 519 * (For the purposes of VOP_PUTPAGES, fragments don't exist.) 520 */ 521 origoffset = ap->a_offlo; 522 origendoffset = ap->a_offhi; 523 startoffset = origoffset & ~(lfs_sb_getbmask(fs)); 524 max_endoffset = (trunc_page(LLONG_MAX) >> lfs_sb_getbshift(fs)) 525 << lfs_sb_getbshift(fs); 526 527 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 528 endoffset = max_endoffset; 529 origendoffset = endoffset; 530 } else { 531 origendoffset = round_page(ap->a_offhi); 532 endoffset = round_page(lfs_blkroundup(fs, origendoffset)); 533 } 534 535 KASSERT(startoffset > 0 || endoffset >= startoffset); 536 if (startoffset == endoffset) { 537 /* Nothing to do, why were we called? */ 538 rw_exit(vp->v_uobj.vmobjlock); 539 DLOG((DLOG_PAGE, "lfs_putpages: startoffset = endoffset = %" 540 PRId64 "\n", startoffset)); 541 return 0; 542 } 543 544 ap->a_offlo = startoffset; 545 ap->a_offhi = endoffset; 546 547 /* 548 * If not cleaning, just send the pages through genfs_putpages 549 * to be returned to the pool. 550 */ 551 if (!(ap->a_flags & PGO_CLEANIT)) { 552 DLOG((DLOG_PAGE, "lfs_putpages: no cleanit vn %p ino %d (flags %x)\n", 553 vp, (int)ip->i_number, ap->a_flags)); 554 int r = genfs_putpages(v); 555 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 556 return r; 557 } 558 559 if (trans_mp /* && (ap->a_flags & PGO_CLEANIT) != 0 */) { 560 if (pagedaemon) { 561 /* Pagedaemon must not sleep here. */ 562 trans_mp = vp->v_mount; 563 error = fstrans_start_nowait(trans_mp); 564 if (error) { 565 rw_exit(vp->v_uobj.vmobjlock); 566 return error; 567 } 568 } else { 569 /* 570 * Cannot use vdeadcheck() here as this operation 571 * usually gets used from VOP_RECLAIM(). Test for 572 * change of v_mount instead and retry on change. 573 */ 574 rw_exit(vp->v_uobj.vmobjlock); 575 trans_mp = vp->v_mount; 576 fstrans_start(trans_mp); 577 if (vp->v_mount != trans_mp) { 578 fstrans_done(trans_mp); 579 trans_mp = NULL; 580 } 581 } 582 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 583 goto retry; 584 } 585 586 /* Set PGO_BUSYFAIL to avoid deadlocks */ 587 ap->a_flags |= PGO_BUSYFAIL; 588 589 /* 590 * Likewise, if we are asked to clean but the pages are not 591 * dirty, we can just free them using genfs_putpages. 592 */ 593#ifdef DEBUG 594 debug_n_dirtyclean = 0; 595#endif 596 do { 597 int r; 598 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 599 600 /* Count the number of dirty pages */ 601 r = check_dirty(fs, vp, startoffset, endoffset, blkeof, 602 ap->a_flags, 1, NULL); 603 if (r < 0) { 604 /* Pages are busy with another process */ 605 rw_exit(vp->v_uobj.vmobjlock); 606 error = EDEADLK; 607 goto out; 608 } 609 if (r > 0) /* Some pages are dirty */ 610 break; 611 612 /* 613 * Sometimes pages are dirtied between the time that 614 * we check and the time we try to clean them. 615 * Instruct lfs_gop_write to return EDEADLK in this case 616 * so we can write them properly. 617 */ 618 ip->i_lfs_iflags |= LFSI_NO_GOP_WRITE; 619 r = genfs_do_putpages(vp, startoffset, endoffset, 620 ap->a_flags & ~PGO_SYNCIO, &busypg); 621 ip->i_lfs_iflags &= ~LFSI_NO_GOP_WRITE; 622 if (r != EDEADLK) { 623 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 624 error = r; 625 goto out; 626 } 627 628 /* One of the pages was busy. Start over. */ 629 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 630 wait_for_page(vp, busypg, "dirtyclean"); 631#ifdef DEBUG 632 ++debug_n_dirtyclean; 633#endif 634 } while(1); 635 636#ifdef DEBUG 637 if (debug_n_dirtyclean > TOOMANY) 638 DLOG((DLOG_PAGE, "lfs_putpages: dirtyclean: looping, n = %d\n", 639 debug_n_dirtyclean)); 640#endif 641 642 /* 643 * Dirty and asked to clean. 644 * 645 * Pagedaemon can't actually write LFS pages; wake up 646 * the writer to take care of that. The writer will 647 * notice the pager inode queue and act on that. 648 * 649 * XXX We must drop the vp->interlock before taking the lfs_lock or we 650 * get a nasty deadlock with lfs_flush_pchain(). 651 */ 652 if (pagedaemon) { 653 rw_exit(vp->v_uobj.vmobjlock); 654 mutex_enter(&lfs_lock); 655 if (!(ip->i_state & IN_PAGING)) { 656 ip->i_state |= IN_PAGING; 657 TAILQ_INSERT_TAIL(&fs->lfs_pchainhd, ip, i_lfs_pchain); 658 } 659 cv_broadcast(&lfs_writerd_cv); 660 mutex_exit(&lfs_lock); 661 preempt(); 662 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 663 error = EWOULDBLOCK; 664 goto out; 665 } 666 667 /* 668 * If this is a file created in a recent dirop, we can't flush its 669 * inode until the dirop is complete. Drain dirops, then flush the 670 * filesystem (taking care of any other pending dirops while we're 671 * at it). 672 */ 673 if ((ap->a_flags & (PGO_CLEANIT|PGO_LOCKED)) == PGO_CLEANIT && 674 (vp->v_uflag & VU_DIROP)) { 675 DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); 676 677 /* 678 * NB: lfs_flush_fs can recursively call lfs_putpages, 679 * but it won't reach this branch because it passes 680 * PGO_LOCKED. 681 */ 682 683 rw_exit(vp->v_uobj.vmobjlock); 684 mutex_enter(&lfs_lock); 685 lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); 686 mutex_exit(&lfs_lock); 687 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 688 689 /* 690 * The flush will have cleaned out this vnode as well, 691 * no need to do more to it. 692 * XXX then why are we falling through and continuing? 693 */ 694 695 /* 696 * XXX State may have changed while we dropped the 697 * lock; start over just in case. The above comment 698 * suggests this should maybe instead be goto out. 699 */ 700 goto retry; 701 } 702 703 /* 704 * This is it. We are going to write some pages. From here on 705 * down it's all just mechanics. 706 * 707 * Don't let genfs_putpages wait; lfs_segunlock will wait for us. 708 */ 709 ap->a_flags &= ~PGO_SYNCIO; 710 711 /* 712 * If we've already got the seglock, flush the node and return. 713 * The FIP has already been set up for us by lfs_writefile, 714 * and FIP cleanup and lfs_updatemeta will also be done there, 715 * unless genfs_putpages returns EDEADLK; then we must flush 716 * what we have, and correct FIP and segment header accounting. 717 */ 718 get_seglock: 719 /* 720 * If we are not called with the segment locked, lock it. 721 * Account for a new FIP in the segment header, and set sp->vp. 722 * (This should duplicate the setup at the top of lfs_writefile().) 723 */ 724 seglocked = (ap->a_flags & PGO_LOCKED) != 0; 725 if (!seglocked) { 726 rw_exit(vp->v_uobj.vmobjlock); 727 error = lfs_seglock(fs, SEGM_PROT | (sync ? SEGM_SYNC : 0)); 728 if (error != 0) { 729 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 730 goto out; 731 } 732 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 733 lfs_acquire_finfo(fs, ip->i_number, ip->i_gen); 734 } 735 sp = fs->lfs_sp; 736 KASSERT(sp->vp == NULL); 737 sp->vp = vp; 738 739 /* Note segments written by reclaim; only for debugging */ 740 mutex_enter(vp->v_interlock); 741 if (vdead_check(vp, VDEAD_NOWAIT) != 0) { 742 sp->seg_flags |= SEGM_RECLAIM; 743 fs->lfs_reclino = ip->i_number; 744 } 745 mutex_exit(vp->v_interlock); 746 747 /* 748 * Ensure that the partial segment is marked SS_DIROP if this 749 * vnode is a DIROP. 750 */ 751 if (!seglocked && vp->v_uflag & VU_DIROP) { 752 SEGSUM *ssp = sp->segsum; 753 754 lfs_ss_setflags(fs, ssp, 755 lfs_ss_getflags(fs, ssp) | (SS_DIROP|SS_CONT)); 756 } 757 758 /* 759 * Loop over genfs_putpages until all pages are gathered. 760 * genfs_putpages() drops the interlock, so reacquire it if necessary. 761 * Whenever we lose the interlock we have to rerun check_dirty, as 762 * well, since more pages might have been dirtied in our absence. 763 */ 764#ifdef DEBUG 765 debug_n_again = 0; 766#endif 767 do { 768 busypg = NULL; 769 KASSERT(rw_write_held(vp->v_uobj.vmobjlock)); 770 if (check_dirty(fs, vp, startoffset, endoffset, blkeof, 771 ap->a_flags, 0, &busypg) < 0) { 772 write_and_wait(fs, vp, busypg, seglocked, NULL); 773 if (!seglocked) { 774 rw_exit(vp->v_uobj.vmobjlock); 775 lfs_release_finfo(fs); 776 lfs_segunlock(fs); 777 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 778 } 779 sp->vp = NULL; 780 goto get_seglock; 781 } 782 783 busypg = NULL; 784 oreclaim = (ap->a_flags & PGO_RECLAIM); 785 ap->a_flags &= ~PGO_RECLAIM; 786 error = genfs_do_putpages(vp, startoffset, endoffset, 787 ap->a_flags, &busypg); 788 ap->a_flags |= oreclaim; 789 790 if (error == EDEADLK || error == EAGAIN) { 791 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 792 " %d ino %d off %jx (seg %d)\n", error, 793 ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), 794 lfs_dtosn(fs, lfs_sb_getoffset(fs)))); 795 796 if (oreclaim) { 797 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 798 write_and_wait(fs, vp, busypg, seglocked, "again"); 799 rw_exit(vp->v_uobj.vmobjlock); 800 } else { 801 if ((sp->seg_flags & SEGM_SINGLE) && 802 lfs_sb_getcurseg(fs) != fs->lfs_startseg) 803 donewriting = 1; 804 } 805 } else if (error) { 806 DLOG((DLOG_PAGE, "lfs_putpages: genfs_putpages returned" 807 " %d ino %d off %jx (seg %d)\n", error, 808 (int)ip->i_number, (uintmax_t)lfs_sb_getoffset(fs), 809 lfs_dtosn(fs, lfs_sb_getoffset(fs)))); 810 } 811 /* genfs_do_putpages loses the interlock */ 812#ifdef DEBUG 813 ++debug_n_again; 814#endif 815 if (oreclaim && error == EAGAIN) { 816 DLOG((DLOG_PAGE, "vp %p ino %d vi_flags %x a_flags %x avoiding vclean panic\n", 817 vp, (int)ip->i_number, vp->v_iflag, ap->a_flags)); 818 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 819 } 820 if (error == EDEADLK) 821 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 822 } while (error == EDEADLK || (oreclaim && error == EAGAIN)); 823#ifdef DEBUG 824 if (debug_n_again > TOOMANY) 825 DLOG((DLOG_PAGE, "lfs_putpages: again: looping, n = %d\n", debug_n_again)); 826#endif 827 828 KASSERT(sp != NULL && sp->vp == vp); 829 if (!seglocked && !donewriting) { 830 sp->vp = NULL; 831 832 /* Write indirect blocks as well */ 833 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_indir); 834 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_dindir); 835 lfs_gather(fs, fs->lfs_sp, vp, lfs_match_tindir); 836 837 KASSERT(sp->vp == NULL); 838 sp->vp = vp; 839 } 840 841 /* 842 * Blocks are now gathered into a segment waiting to be written. 843 * All that's left to do is update metadata, and write them. 844 */ 845 lfs_updatemeta(sp); 846 KASSERT(sp->vp == vp); 847 sp->vp = NULL; 848 849 /* 850 * If we were called from lfs_writefile, we don't need to clean up 851 * the FIP or unlock the segment lock. We're done. 852 */ 853 if (seglocked) { 854 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 855 goto out; 856 } 857 858 /* Clean up FIP and send it to disk. */ 859 lfs_release_finfo(fs); 860 lfs_writeseg(fs, fs->lfs_sp); 861 862 /* 863 * Remove us from paging queue if we wrote all our pages. 864 */ 865 if (origendoffset == 0 || ap->a_flags & PGO_ALLPAGES) { 866 mutex_enter(&lfs_lock); 867 if (ip->i_state & IN_PAGING) { 868 ip->i_state &= ~IN_PAGING; 869 TAILQ_REMOVE(&fs->lfs_pchainhd, ip, i_lfs_pchain); 870 } 871 mutex_exit(&lfs_lock); 872 } 873 874 /* 875 * XXX - with the malloc/copy writeseg, the pages are freed by now 876 * even if we don't wait (e.g. if we hold a nested lock). This 877 * will not be true if we stop using malloc/copy. 878 */ 879 KASSERT(fs->lfs_sp->seg_flags & SEGM_PROT); 880 lfs_segunlock(fs); 881 882 /* 883 * Wait for v_numoutput to drop to zero. The seglock should 884 * take care of this, but there is a slight possibility that 885 * aiodoned might not have got around to our buffers yet. 886 */ 887 if (sync) { 888 mutex_enter(vp->v_interlock); 889 while (vp->v_numoutput > 0) { 890 DLOG((DLOG_PAGE, "lfs_putpages: ino %d sleeping on" 891 " num %d\n", ip->i_number, vp->v_numoutput)); 892 cv_wait(&vp->v_cv, vp->v_interlock); 893 } 894 mutex_exit(vp->v_interlock); 895 } 896 897out:; 898 if (trans_mp) 899 fstrans_done(trans_mp); 900 KASSERT(!rw_write_held(vp->v_uobj.vmobjlock)); 901 return error; 902} 903 904