1241519Sattilio/* 2241519Sattilio * Copyright (c) 2007-2009 Google Inc. 3241519Sattilio * All rights reserved. 4241519Sattilio * 5241519Sattilio * Redistribution and use in source and binary forms, with or without 6241519Sattilio * modification, are permitted provided that the following conditions are 7241519Sattilio * met: 8241519Sattilio * 9241519Sattilio * * Redistributions of source code must retain the above copyright 10241519Sattilio * notice, this list of conditions and the following disclaimer. 11241519Sattilio * * Redistributions in binary form must reproduce the above 12241519Sattilio * copyright notice, this list of conditions and the following disclaimer 13241519Sattilio * in the documentation and/or other materials provided with the 14241519Sattilio * distribution. 15241519Sattilio * * Neither the name of Google Inc. nor the names of its 16241519Sattilio * contributors may be used to endorse or promote products derived from 17241519Sattilio * this software without specific prior written permission. 18241519Sattilio * 19241519Sattilio * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20241519Sattilio * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21241519Sattilio * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22241519Sattilio * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23241519Sattilio * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24241519Sattilio * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25241519Sattilio * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26241519Sattilio * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27241519Sattilio * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28241519Sattilio * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29241519Sattilio * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30241519Sattilio * 31241519Sattilio * Copyright (C) 2005 Csaba Henk. 32241519Sattilio * All rights reserved. 33241519Sattilio * 34241519Sattilio * Redistribution and use in source and binary forms, with or without 35241519Sattilio * modification, are permitted provided that the following conditions 36241519Sattilio * are met: 37241519Sattilio * 1. Redistributions of source code must retain the above copyright 38241519Sattilio * notice, this list of conditions and the following disclaimer. 39241519Sattilio * 2. Redistributions in binary form must reproduce the above copyright 40241519Sattilio * notice, this list of conditions and the following disclaimer in the 41241519Sattilio * documentation and/or other materials provided with the distribution. 42241519Sattilio * 43241519Sattilio * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND 44241519Sattilio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 45241519Sattilio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 46241519Sattilio * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 47241519Sattilio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 48241519Sattilio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 49241519Sattilio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 50241519Sattilio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 51241519Sattilio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 52241519Sattilio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 53241519Sattilio * SUCH DAMAGE. 54241519Sattilio */ 55241519Sattilio 56241519Sattilio#include <sys/cdefs.h> 57241519Sattilio__FBSDID("$FreeBSD: releng/10.3/sys/fs/fuse/fuse_io.c 248084 2013-03-09 02:32:23Z attilio $"); 58241519Sattilio 59241519Sattilio#include <sys/types.h> 60241519Sattilio#include <sys/module.h> 61241519Sattilio#include <sys/systm.h> 62241519Sattilio#include <sys/errno.h> 63241519Sattilio#include <sys/param.h> 64241519Sattilio#include <sys/kernel.h> 65241519Sattilio#include <sys/conf.h> 66241519Sattilio#include <sys/uio.h> 67241519Sattilio#include <sys/malloc.h> 68241519Sattilio#include <sys/queue.h> 69241519Sattilio#include <sys/lock.h> 70241519Sattilio#include <sys/sx.h> 71241519Sattilio#include <sys/mutex.h> 72248084Sattilio#include <sys/rwlock.h> 73241519Sattilio#include <sys/proc.h> 74241519Sattilio#include <sys/mount.h> 75241519Sattilio#include <sys/vnode.h> 76241519Sattilio#include <sys/stat.h> 77241519Sattilio#include <sys/unistd.h> 78241519Sattilio#include <sys/filedesc.h> 79241519Sattilio#include <sys/file.h> 80241519Sattilio#include <sys/fcntl.h> 81241519Sattilio#include <sys/bio.h> 82241519Sattilio#include <sys/buf.h> 83241519Sattilio#include <sys/sysctl.h> 84241519Sattilio 85241519Sattilio#include <vm/vm.h> 86241519Sattilio#include <vm/vm_extern.h> 87241519Sattilio#include <vm/pmap.h> 88241519Sattilio#include <vm/vm_map.h> 89241519Sattilio#include <vm/vm_page.h> 90241519Sattilio#include <vm/vm_object.h> 91241519Sattilio 92241519Sattilio#include "fuse.h" 93241519Sattilio#include "fuse_file.h" 94241519Sattilio#include "fuse_node.h" 95241519Sattilio#include "fuse_internal.h" 96241519Sattilio#include "fuse_ipc.h" 97241519Sattilio#include "fuse_io.h" 98241519Sattilio 99241519Sattilio#define FUSE_DEBUG_MODULE IO 100241519Sattilio#include "fuse_debug.h" 101241519Sattilio 102241519Sattilio 103241519Sattiliostatic int 104241519Sattiliofuse_read_directbackend(struct vnode *vp, struct uio *uio, 105241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh); 106241519Sattiliostatic int 107241519Sattiliofuse_read_biobackend(struct vnode *vp, struct uio *uio, 108241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh); 109241519Sattiliostatic int 110241519Sattiliofuse_write_directbackend(struct vnode *vp, struct uio *uio, 111241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh); 112241519Sattiliostatic int 113241519Sattiliofuse_write_biobackend(struct vnode *vp, struct uio *uio, 114245164Sbapt struct ucred *cred, struct fuse_filehandle *fufh, int ioflag); 115241519Sattilio 116241519Sattilioint 117241519Sattiliofuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, 118241519Sattilio struct ucred *cred) 119241519Sattilio{ 120241519Sattilio struct fuse_filehandle *fufh; 121241519Sattilio int err, directio; 122241519Sattilio 123242616Sattilio MPASS(vp->v_type == VREG || vp->v_type == VDIR); 124241519Sattilio 125241519Sattilio err = fuse_filehandle_getrw(vp, 126241519Sattilio (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); 127241519Sattilio if (err) { 128241519Sattilio printf("FUSE: io dispatch: filehandles are closed\n"); 129241519Sattilio return err; 130241519Sattilio } 131241519Sattilio /* 132241519Sattilio * Ideally, when the daemon asks for direct io at open time, the 133241519Sattilio * standard file flag should be set according to this, so that would 134241519Sattilio * just change the default mode, which later on could be changed via 135241519Sattilio * fcntl(2). 136241519Sattilio * But this doesn't work, the O_DIRECT flag gets cleared at some point 137241519Sattilio * (don't know where). So to make any use of the Fuse direct_io option, 138241519Sattilio * we hardwire it into the file's private data (similarly to Linux, 139241519Sattilio * btw.). 140241519Sattilio */ 141241519Sattilio directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); 142241519Sattilio 143241519Sattilio switch (uio->uio_rw) { 144241519Sattilio case UIO_READ: 145241519Sattilio if (directio) { 146241521Sattilio FS_DEBUG("direct read of vnode %ju via file handle %ju\n", 147241519Sattilio (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); 148241519Sattilio err = fuse_read_directbackend(vp, uio, cred, fufh); 149241519Sattilio } else { 150241521Sattilio FS_DEBUG("buffered read of vnode %ju\n", 151241519Sattilio (uintmax_t)VTOILLU(vp)); 152241519Sattilio err = fuse_read_biobackend(vp, uio, cred, fufh); 153241519Sattilio } 154241519Sattilio break; 155241519Sattilio case UIO_WRITE: 156241519Sattilio if (directio) { 157241521Sattilio FS_DEBUG("direct write of vnode %ju via file handle %ju\n", 158241519Sattilio (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); 159241519Sattilio err = fuse_write_directbackend(vp, uio, cred, fufh); 160241519Sattilio } else { 161241521Sattilio FS_DEBUG("buffered write of vnode %ju\n", 162241519Sattilio (uintmax_t)VTOILLU(vp)); 163245164Sbapt err = fuse_write_biobackend(vp, uio, cred, fufh, ioflag); 164241519Sattilio } 165241519Sattilio break; 166241519Sattilio default: 167241519Sattilio panic("uninterpreted mode passed to fuse_io_dispatch"); 168241519Sattilio } 169241519Sattilio 170241519Sattilio return (err); 171241519Sattilio} 172241519Sattilio 173241519Sattiliostatic int 174241519Sattiliofuse_read_biobackend(struct vnode *vp, struct uio *uio, 175241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh) 176241519Sattilio{ 177241519Sattilio struct buf *bp; 178241519Sattilio daddr_t lbn; 179241519Sattilio int bcount; 180241519Sattilio int err = 0, n = 0, on = 0; 181241519Sattilio off_t filesize; 182241519Sattilio 183241519Sattilio const int biosize = fuse_iosize(vp); 184241519Sattilio 185241521Sattilio FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", 186241519Sattilio uio->uio_resid, uio->uio_offset, VTOFUD(vp)->filesize); 187241519Sattilio 188241519Sattilio if (uio->uio_resid == 0) 189241519Sattilio return (0); 190241519Sattilio if (uio->uio_offset < 0) 191241519Sattilio return (EINVAL); 192241519Sattilio 193241519Sattilio bcount = MIN(MAXBSIZE, biosize); 194241519Sattilio filesize = VTOFUD(vp)->filesize; 195241519Sattilio 196241519Sattilio do { 197241519Sattilio if (fuse_isdeadfs(vp)) { 198241519Sattilio err = ENXIO; 199241519Sattilio break; 200241519Sattilio } 201241519Sattilio lbn = uio->uio_offset / biosize; 202241519Sattilio on = uio->uio_offset & (biosize - 1); 203241519Sattilio 204241521Sattilio FS_DEBUG2G("biosize %d, lbn %d, on %d\n", biosize, (int)lbn, on); 205241519Sattilio 206241519Sattilio /* 207241519Sattilio * Obtain the buffer cache block. Figure out the buffer size 208241519Sattilio * when we are at EOF. If we are modifying the size of the 209241519Sattilio * buffer based on an EOF condition we need to hold 210241519Sattilio * nfs_rslock() through obtaining the buffer to prevent 211241519Sattilio * a potential writer-appender from messing with n_size. 212241519Sattilio * Otherwise we may accidently truncate the buffer and 213241519Sattilio * lose dirty data. 214241519Sattilio * 215241519Sattilio * Note that bcount is *not* DEV_BSIZE aligned. 216241519Sattilio */ 217241519Sattilio if ((off_t)lbn * biosize >= filesize) { 218241519Sattilio bcount = 0; 219241519Sattilio } else if ((off_t)(lbn + 1) * biosize > filesize) { 220241519Sattilio bcount = filesize - (off_t)lbn *biosize; 221241519Sattilio } 222241519Sattilio bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); 223241519Sattilio 224241519Sattilio if (!bp) 225241519Sattilio return (EINTR); 226241519Sattilio 227241519Sattilio /* 228241519Sattilio * If B_CACHE is not set, we must issue the read. If this 229241519Sattilio * fails, we return an error. 230241519Sattilio */ 231241519Sattilio 232241519Sattilio if ((bp->b_flags & B_CACHE) == 0) { 233241519Sattilio bp->b_iocmd = BIO_READ; 234241519Sattilio vfs_busy_pages(bp, 0); 235241519Sattilio err = fuse_io_strategy(vp, bp); 236241519Sattilio if (err) { 237241519Sattilio brelse(bp); 238241519Sattilio return (err); 239241519Sattilio } 240241519Sattilio } 241241519Sattilio /* 242241519Sattilio * on is the offset into the current bp. Figure out how many 243241519Sattilio * bytes we can copy out of the bp. Note that bcount is 244241519Sattilio * NOT DEV_BSIZE aligned. 245241519Sattilio * 246241519Sattilio * Then figure out how many bytes we can copy into the uio. 247241519Sattilio */ 248241519Sattilio 249241519Sattilio n = 0; 250241519Sattilio if (on < bcount) 251241519Sattilio n = MIN((unsigned)(bcount - on), uio->uio_resid); 252241519Sattilio if (n > 0) { 253241521Sattilio FS_DEBUG2G("feeding buffeater with %d bytes of buffer %p," 254241519Sattilio " saying %d was asked for\n", 255241519Sattilio n, bp->b_data + on, n + (int)bp->b_resid); 256241519Sattilio err = uiomove(bp->b_data + on, n, uio); 257241519Sattilio } 258241519Sattilio brelse(bp); 259241521Sattilio FS_DEBUG2G("end of turn, err %d, uio->uio_resid %zd, n %d\n", 260241519Sattilio err, uio->uio_resid, n); 261241519Sattilio } while (err == 0 && uio->uio_resid > 0 && n > 0); 262241519Sattilio 263241519Sattilio return (err); 264241519Sattilio} 265241519Sattilio 266241519Sattiliostatic int 267241519Sattiliofuse_read_directbackend(struct vnode *vp, struct uio *uio, 268241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh) 269241519Sattilio{ 270241519Sattilio struct fuse_dispatcher fdi; 271241519Sattilio struct fuse_read_in *fri; 272241519Sattilio int err = 0; 273241519Sattilio 274241519Sattilio if (uio->uio_resid == 0) 275241519Sattilio return (0); 276241519Sattilio 277241519Sattilio fdisp_init(&fdi, 0); 278241519Sattilio 279241519Sattilio /* 280241519Sattilio * XXX In "normal" case we use an intermediate kernel buffer for 281241519Sattilio * transmitting data from daemon's context to ours. Eventually, we should 282241519Sattilio * get rid of this. Anyway, if the target uio lives in sysspace (we are 283241519Sattilio * called from pageops), and the input data doesn't need kernel-side 284241519Sattilio * processing (we are not called from readdir) we can already invoke 285241519Sattilio * an optimized, "peer-to-peer" I/O routine. 286241519Sattilio */ 287241519Sattilio while (uio->uio_resid > 0) { 288241519Sattilio fdi.iosize = sizeof(*fri); 289241519Sattilio fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); 290241519Sattilio fri = fdi.indata; 291241519Sattilio fri->fh = fufh->fh_id; 292241519Sattilio fri->offset = uio->uio_offset; 293241519Sattilio fri->size = MIN(uio->uio_resid, 294241519Sattilio fuse_get_mpdata(vp->v_mount)->max_read); 295241519Sattilio 296241521Sattilio FS_DEBUG2G("fri->fh %ju, fri->offset %ju, fri->size %ju\n", 297241519Sattilio (uintmax_t)fri->fh, (uintmax_t)fri->offset, 298241519Sattilio (uintmax_t)fri->size); 299241519Sattilio 300241519Sattilio if ((err = fdisp_wait_answ(&fdi))) 301241519Sattilio goto out; 302241519Sattilio 303241521Sattilio FS_DEBUG2G("complete: got iosize=%d, requested fri.size=%zd; " 304241519Sattilio "resid=%zd offset=%ju\n", 305241519Sattilio fri->size, fdi.iosize, uio->uio_resid, 306241519Sattilio (uintmax_t)uio->uio_offset); 307241519Sattilio 308241519Sattilio if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) 309241519Sattilio break; 310241519Sattilio if (fdi.iosize < fri->size) 311241519Sattilio break; 312241519Sattilio } 313241519Sattilio 314241519Sattilioout: 315241519Sattilio fdisp_destroy(&fdi); 316241519Sattilio return (err); 317241519Sattilio} 318241519Sattilio 319241519Sattiliostatic int 320241519Sattiliofuse_write_directbackend(struct vnode *vp, struct uio *uio, 321241519Sattilio struct ucred *cred, struct fuse_filehandle *fufh) 322241519Sattilio{ 323241519Sattilio struct fuse_vnode_data *fvdat = VTOFUD(vp); 324241519Sattilio struct fuse_write_in *fwi; 325241519Sattilio struct fuse_dispatcher fdi; 326241519Sattilio size_t chunksize; 327241519Sattilio int diff; 328241519Sattilio int err = 0; 329241519Sattilio 330241519Sattilio if (!uio->uio_resid) 331241519Sattilio return (0); 332241519Sattilio 333241519Sattilio fdisp_init(&fdi, 0); 334241519Sattilio 335241519Sattilio while (uio->uio_resid > 0) { 336241519Sattilio chunksize = MIN(uio->uio_resid, 337241519Sattilio fuse_get_mpdata(vp->v_mount)->max_write); 338241519Sattilio 339241519Sattilio fdi.iosize = sizeof(*fwi) + chunksize; 340241519Sattilio fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); 341241519Sattilio 342241519Sattilio fwi = fdi.indata; 343241519Sattilio fwi->fh = fufh->fh_id; 344241519Sattilio fwi->offset = uio->uio_offset; 345241519Sattilio fwi->size = chunksize; 346241519Sattilio 347241519Sattilio if ((err = uiomove((char *)fdi.indata + sizeof(*fwi), 348241519Sattilio chunksize, uio))) 349241519Sattilio break; 350241519Sattilio 351241519Sattilio if ((err = fdisp_wait_answ(&fdi))) 352241519Sattilio break; 353241519Sattilio 354241519Sattilio diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size; 355241519Sattilio if (diff < 0) { 356241519Sattilio err = EINVAL; 357241519Sattilio break; 358241519Sattilio } 359241519Sattilio uio->uio_resid += diff; 360241519Sattilio uio->uio_offset -= diff; 361241519Sattilio if (uio->uio_offset > fvdat->filesize) 362241519Sattilio fuse_vnode_setsize(vp, cred, uio->uio_offset); 363241519Sattilio } 364241519Sattilio 365241519Sattilio fdisp_destroy(&fdi); 366241519Sattilio 367241519Sattilio return (err); 368241519Sattilio} 369241519Sattilio 370241519Sattiliostatic int 371241519Sattiliofuse_write_biobackend(struct vnode *vp, struct uio *uio, 372245164Sbapt struct ucred *cred, struct fuse_filehandle *fufh, int ioflag) 373241519Sattilio{ 374241519Sattilio struct fuse_vnode_data *fvdat = VTOFUD(vp); 375241519Sattilio struct buf *bp; 376241519Sattilio daddr_t lbn; 377241519Sattilio int bcount; 378241519Sattilio int n, on, err = 0; 379241519Sattilio 380241519Sattilio const int biosize = fuse_iosize(vp); 381241519Sattilio 382241519Sattilio KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); 383241521Sattilio FS_DEBUG("resid=%zx offset=%jx fsize=%jx\n", 384241519Sattilio uio->uio_resid, uio->uio_offset, fvdat->filesize); 385241519Sattilio if (vp->v_type != VREG) 386241519Sattilio return (EIO); 387241519Sattilio if (uio->uio_offset < 0) 388241519Sattilio return (EINVAL); 389241519Sattilio if (uio->uio_resid == 0) 390241519Sattilio return (0); 391245164Sbapt if (ioflag & IO_APPEND) 392245164Sbapt uio_setoffset(uio, fvdat->filesize); 393241519Sattilio 394241519Sattilio /* 395241519Sattilio * Find all of this file's B_NEEDCOMMIT buffers. If our writes 396241519Sattilio * would exceed the local maximum per-file write commit size when 397241519Sattilio * combined with those, we must decide whether to flush, 398241519Sattilio * go synchronous, or return err. We don't bother checking 399241519Sattilio * IO_UNIT -- we just make all writes atomic anyway, as there's 400241519Sattilio * no point optimizing for something that really won't ever happen. 401241519Sattilio */ 402241519Sattilio do { 403241519Sattilio if (fuse_isdeadfs(vp)) { 404241519Sattilio err = ENXIO; 405241519Sattilio break; 406241519Sattilio } 407241519Sattilio lbn = uio->uio_offset / biosize; 408241519Sattilio on = uio->uio_offset & (biosize - 1); 409241519Sattilio n = MIN((unsigned)(biosize - on), uio->uio_resid); 410241519Sattilio 411241521Sattilio FS_DEBUG2G("lbn %ju, on %d, n %d, uio offset %ju, uio resid %zd\n", 412241519Sattilio (uintmax_t)lbn, on, n, 413241519Sattilio (uintmax_t)uio->uio_offset, uio->uio_resid); 414241519Sattilio 415241519Sattilioagain: 416241519Sattilio /* 417241519Sattilio * Handle direct append and file extension cases, calculate 418241519Sattilio * unaligned buffer size. 419241519Sattilio */ 420241519Sattilio if (uio->uio_offset == fvdat->filesize && n) { 421241519Sattilio /* 422241519Sattilio * Get the buffer (in its pre-append state to maintain 423241519Sattilio * B_CACHE if it was previously set). Resize the 424241519Sattilio * nfsnode after we have locked the buffer to prevent 425241519Sattilio * readers from reading garbage. 426241519Sattilio */ 427241519Sattilio bcount = on; 428241521Sattilio FS_DEBUG("getting block from OS, bcount %d\n", bcount); 429241519Sattilio bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); 430241519Sattilio 431241519Sattilio if (bp != NULL) { 432241519Sattilio long save; 433241519Sattilio 434241519Sattilio err = fuse_vnode_setsize(vp, cred, 435241519Sattilio uio->uio_offset + n); 436241519Sattilio if (err) { 437241519Sattilio brelse(bp); 438241519Sattilio break; 439241519Sattilio } 440241519Sattilio save = bp->b_flags & B_CACHE; 441241519Sattilio bcount += n; 442241519Sattilio allocbuf(bp, bcount); 443241519Sattilio bp->b_flags |= save; 444241519Sattilio } 445241519Sattilio } else { 446241519Sattilio /* 447241519Sattilio * Obtain the locked cache block first, and then 448241519Sattilio * adjust the file's size as appropriate. 449241519Sattilio */ 450241519Sattilio bcount = on + n; 451241519Sattilio if ((off_t)lbn * biosize + bcount < fvdat->filesize) { 452241519Sattilio if ((off_t)(lbn + 1) * biosize < fvdat->filesize) 453241519Sattilio bcount = biosize; 454241519Sattilio else 455241519Sattilio bcount = fvdat->filesize - 456241519Sattilio (off_t)lbn *biosize; 457241519Sattilio } 458241521Sattilio FS_DEBUG("getting block from OS, bcount %d\n", bcount); 459241519Sattilio bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); 460241519Sattilio if (bp && uio->uio_offset + n > fvdat->filesize) { 461241519Sattilio err = fuse_vnode_setsize(vp, cred, 462241519Sattilio uio->uio_offset + n); 463241519Sattilio if (err) { 464241519Sattilio brelse(bp); 465241519Sattilio break; 466241519Sattilio } 467241519Sattilio } 468241519Sattilio } 469241519Sattilio 470241519Sattilio if (!bp) { 471241519Sattilio err = EINTR; 472241519Sattilio break; 473241519Sattilio } 474241519Sattilio /* 475241519Sattilio * Issue a READ if B_CACHE is not set. In special-append 476241519Sattilio * mode, B_CACHE is based on the buffer prior to the write 477241519Sattilio * op and is typically set, avoiding the read. If a read 478241519Sattilio * is required in special append mode, the server will 479241519Sattilio * probably send us a short-read since we extended the file 480241519Sattilio * on our end, resulting in b_resid == 0 and, thusly, 481241519Sattilio * B_CACHE getting set. 482241519Sattilio * 483241519Sattilio * We can also avoid issuing the read if the write covers 484241519Sattilio * the entire buffer. We have to make sure the buffer state 485241519Sattilio * is reasonable in this case since we will not be initiating 486241519Sattilio * I/O. See the comments in kern/vfs_bio.c's getblk() for 487241519Sattilio * more information. 488241519Sattilio * 489241519Sattilio * B_CACHE may also be set due to the buffer being cached 490241519Sattilio * normally. 491241519Sattilio */ 492241519Sattilio 493241519Sattilio if (on == 0 && n == bcount) { 494241519Sattilio bp->b_flags |= B_CACHE; 495241519Sattilio bp->b_flags &= ~B_INVAL; 496241519Sattilio bp->b_ioflags &= ~BIO_ERROR; 497241519Sattilio } 498241519Sattilio if ((bp->b_flags & B_CACHE) == 0) { 499241519Sattilio bp->b_iocmd = BIO_READ; 500241519Sattilio vfs_busy_pages(bp, 0); 501241519Sattilio fuse_io_strategy(vp, bp); 502241519Sattilio if ((err = bp->b_error)) { 503241519Sattilio brelse(bp); 504241519Sattilio break; 505241519Sattilio } 506241519Sattilio } 507241519Sattilio if (bp->b_wcred == NOCRED) 508241519Sattilio bp->b_wcred = crhold(cred); 509241519Sattilio 510241519Sattilio /* 511241519Sattilio * If dirtyend exceeds file size, chop it down. This should 512241519Sattilio * not normally occur but there is an append race where it 513241519Sattilio * might occur XXX, so we log it. 514241519Sattilio * 515241519Sattilio * If the chopping creates a reverse-indexed or degenerate 516241519Sattilio * situation with dirtyoff/end, we 0 both of them. 517241519Sattilio */ 518241519Sattilio 519241519Sattilio if (bp->b_dirtyend > bcount) { 520241521Sattilio FS_DEBUG("FUSE append race @%lx:%d\n", 521241519Sattilio (long)bp->b_blkno * biosize, 522241519Sattilio bp->b_dirtyend - bcount); 523241519Sattilio bp->b_dirtyend = bcount; 524241519Sattilio } 525241519Sattilio if (bp->b_dirtyoff >= bp->b_dirtyend) 526241519Sattilio bp->b_dirtyoff = bp->b_dirtyend = 0; 527241519Sattilio 528241519Sattilio /* 529241519Sattilio * If the new write will leave a contiguous dirty 530241519Sattilio * area, just update the b_dirtyoff and b_dirtyend, 531241519Sattilio * otherwise force a write rpc of the old dirty area. 532241519Sattilio * 533241519Sattilio * While it is possible to merge discontiguous writes due to 534241519Sattilio * our having a B_CACHE buffer ( and thus valid read data 535241519Sattilio * for the hole), we don't because it could lead to 536241519Sattilio * significant cache coherency problems with multiple clients, 537241519Sattilio * especially if locking is implemented later on. 538241519Sattilio * 539241519Sattilio * as an optimization we could theoretically maintain 540241519Sattilio * a linked list of discontinuous areas, but we would still 541241519Sattilio * have to commit them separately so there isn't much 542241519Sattilio * advantage to it except perhaps a bit of asynchronization. 543241519Sattilio */ 544241519Sattilio 545241519Sattilio if (bp->b_dirtyend > 0 && 546241519Sattilio (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { 547241519Sattilio /* 548241519Sattilio * Yes, we mean it. Write out everything to "storage" 549241519Sattilio * immediatly, without hesitation. (Apart from other 550241519Sattilio * reasons: the only way to know if a write is valid 551241519Sattilio * if its actually written out.) 552241519Sattilio */ 553241519Sattilio bwrite(bp); 554241519Sattilio if (bp->b_error == EINTR) { 555241519Sattilio err = EINTR; 556241519Sattilio break; 557241519Sattilio } 558241519Sattilio goto again; 559241519Sattilio } 560241519Sattilio err = uiomove((char *)bp->b_data + on, n, uio); 561241519Sattilio 562241519Sattilio /* 563241519Sattilio * Since this block is being modified, it must be written 564241519Sattilio * again and not just committed. Since write clustering does 565241519Sattilio * not work for the stage 1 data write, only the stage 2 566241519Sattilio * commit rpc, we have to clear B_CLUSTEROK as well. 567241519Sattilio */ 568241519Sattilio bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); 569241519Sattilio 570241519Sattilio if (err) { 571241519Sattilio bp->b_ioflags |= BIO_ERROR; 572241519Sattilio bp->b_error = err; 573241519Sattilio brelse(bp); 574241519Sattilio break; 575241519Sattilio } 576241519Sattilio /* 577241519Sattilio * Only update dirtyoff/dirtyend if not a degenerate 578241519Sattilio * condition. 579241519Sattilio */ 580241519Sattilio if (n) { 581241519Sattilio if (bp->b_dirtyend > 0) { 582241519Sattilio bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); 583241519Sattilio bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); 584241519Sattilio } else { 585241519Sattilio bp->b_dirtyoff = on; 586241519Sattilio bp->b_dirtyend = on + n; 587241519Sattilio } 588241519Sattilio vfs_bio_set_valid(bp, on, n); 589241519Sattilio } 590241519Sattilio err = bwrite(bp); 591241519Sattilio if (err) 592241519Sattilio break; 593241519Sattilio } while (uio->uio_resid > 0 && n > 0); 594241519Sattilio 595241519Sattilio if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0) 596241519Sattilio fuse_vnode_savesize(vp, cred); 597241519Sattilio 598241519Sattilio return (err); 599241519Sattilio} 600241519Sattilio 601241519Sattilioint 602241519Sattiliofuse_io_strategy(struct vnode *vp, struct buf *bp) 603241519Sattilio{ 604241519Sattilio struct fuse_filehandle *fufh; 605241519Sattilio struct fuse_vnode_data *fvdat = VTOFUD(vp); 606241519Sattilio struct ucred *cred; 607241519Sattilio struct uio *uiop; 608241519Sattilio struct uio uio; 609241519Sattilio struct iovec io; 610241519Sattilio int error = 0; 611241519Sattilio 612241519Sattilio const int biosize = fuse_iosize(vp); 613241519Sattilio 614242616Sattilio MPASS(vp->v_type == VREG || vp->v_type == VDIR); 615241519Sattilio MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); 616241521Sattilio FS_DEBUG("inode=%ju offset=%jd resid=%ld\n", 617241519Sattilio (uintmax_t)VTOI(vp), (intmax_t)(((off_t)bp->b_blkno) * biosize), 618241519Sattilio bp->b_bcount); 619241519Sattilio 620241519Sattilio error = fuse_filehandle_getrw(vp, 621241519Sattilio (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); 622241519Sattilio if (error) { 623241519Sattilio printf("FUSE: strategy: filehandles are closed\n"); 624241519Sattilio bp->b_ioflags |= BIO_ERROR; 625241519Sattilio bp->b_error = error; 626241519Sattilio return (error); 627241519Sattilio } 628241519Sattilio cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; 629241519Sattilio 630241519Sattilio uiop = &uio; 631241519Sattilio uiop->uio_iov = &io; 632241519Sattilio uiop->uio_iovcnt = 1; 633241519Sattilio uiop->uio_segflg = UIO_SYSSPACE; 634241519Sattilio uiop->uio_td = curthread; 635241519Sattilio 636241519Sattilio /* 637241519Sattilio * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We 638241519Sattilio * do this here so we do not have to do it in all the code that 639241519Sattilio * calls us. 640241519Sattilio */ 641241519Sattilio bp->b_flags &= ~B_INVAL; 642241519Sattilio bp->b_ioflags &= ~BIO_ERROR; 643241519Sattilio 644241519Sattilio KASSERT(!(bp->b_flags & B_DONE), 645241519Sattilio ("fuse_io_strategy: bp %p already marked done", bp)); 646241519Sattilio if (bp->b_iocmd == BIO_READ) { 647241519Sattilio io.iov_len = uiop->uio_resid = bp->b_bcount; 648241519Sattilio io.iov_base = bp->b_data; 649241519Sattilio uiop->uio_rw = UIO_READ; 650241519Sattilio 651241519Sattilio uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; 652241519Sattilio error = fuse_read_directbackend(vp, uiop, cred, fufh); 653241519Sattilio 654241519Sattilio if ((!error && uiop->uio_resid) || 655241519Sattilio (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO && 656241519Sattilio uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 && 657241519Sattilio uiop->uio_offset >= fvdat->cached_attrs.va_size)) { 658241519Sattilio /* 659241519Sattilio * If we had a short read with no error, we must have 660241519Sattilio * hit a file hole. We should zero-fill the remainder. 661241519Sattilio * This can also occur if the server hits the file EOF. 662241519Sattilio * 663241519Sattilio * Holes used to be able to occur due to pending 664241519Sattilio * writes, but that is not possible any longer. 665241519Sattilio */ 666241519Sattilio int nread = bp->b_bcount - uiop->uio_resid; 667241519Sattilio int left = uiop->uio_resid; 668241519Sattilio 669241519Sattilio if (error != 0) { 670241519Sattilio printf("FUSE: Fix broken io: offset %ju, " 671241519Sattilio " resid %zd, file size %ju/%ju\n", 672241519Sattilio (uintmax_t)uiop->uio_offset, 673241519Sattilio uiop->uio_resid, fvdat->filesize, 674241519Sattilio fvdat->cached_attrs.va_size); 675241519Sattilio error = 0; 676241519Sattilio } 677241519Sattilio if (left > 0) 678241519Sattilio bzero((char *)bp->b_data + nread, left); 679241519Sattilio uiop->uio_resid = 0; 680241519Sattilio } 681241519Sattilio if (error) { 682241519Sattilio bp->b_ioflags |= BIO_ERROR; 683241519Sattilio bp->b_error = error; 684241519Sattilio } 685241519Sattilio } else { 686241519Sattilio /* 687241519Sattilio * If we only need to commit, try to commit 688241519Sattilio */ 689241519Sattilio if (bp->b_flags & B_NEEDCOMMIT) { 690241521Sattilio FS_DEBUG("write: B_NEEDCOMMIT flags set\n"); 691241519Sattilio } 692241519Sattilio /* 693241519Sattilio * Setup for actual write 694241519Sattilio */ 695241519Sattilio if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > 696241519Sattilio fvdat->filesize) 697241519Sattilio bp->b_dirtyend = fvdat->filesize - 698241519Sattilio (off_t)bp->b_blkno * biosize; 699241519Sattilio 700241519Sattilio if (bp->b_dirtyend > bp->b_dirtyoff) { 701241519Sattilio io.iov_len = uiop->uio_resid = bp->b_dirtyend 702241519Sattilio - bp->b_dirtyoff; 703241519Sattilio uiop->uio_offset = (off_t)bp->b_blkno * biosize 704241519Sattilio + bp->b_dirtyoff; 705241519Sattilio io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; 706241519Sattilio uiop->uio_rw = UIO_WRITE; 707241519Sattilio 708241519Sattilio error = fuse_write_directbackend(vp, uiop, cred, fufh); 709241519Sattilio 710241519Sattilio if (error == EINTR || error == ETIMEDOUT 711241519Sattilio || (!error && (bp->b_flags & B_NEEDCOMMIT))) { 712241519Sattilio 713241519Sattilio bp->b_flags &= ~(B_INVAL | B_NOCACHE); 714241519Sattilio if ((bp->b_flags & B_PAGING) == 0) { 715241519Sattilio bdirty(bp); 716241519Sattilio bp->b_flags &= ~B_DONE; 717241519Sattilio } 718241519Sattilio if ((error == EINTR || error == ETIMEDOUT) && 719241519Sattilio (bp->b_flags & B_ASYNC) == 0) 720241519Sattilio bp->b_flags |= B_EINTR; 721241519Sattilio } else { 722241519Sattilio if (error) { 723241519Sattilio bp->b_ioflags |= BIO_ERROR; 724241519Sattilio bp->b_flags |= B_INVAL; 725241519Sattilio bp->b_error = error; 726241519Sattilio } 727241519Sattilio bp->b_dirtyoff = bp->b_dirtyend = 0; 728241519Sattilio } 729241519Sattilio } else { 730241519Sattilio bp->b_resid = 0; 731241519Sattilio bufdone(bp); 732241519Sattilio return (0); 733241519Sattilio } 734241519Sattilio } 735241519Sattilio bp->b_resid = uiop->uio_resid; 736241519Sattilio bufdone(bp); 737241519Sattilio return (error); 738241519Sattilio} 739241519Sattilio 740241519Sattilioint 741241519Sattiliofuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) 742241519Sattilio{ 743241519Sattilio struct vop_fsync_args a = { 744241519Sattilio .a_vp = vp, 745241519Sattilio .a_waitfor = waitfor, 746241519Sattilio .a_td = td, 747241519Sattilio }; 748241519Sattilio 749241519Sattilio return (vop_stdfsync(&a)); 750241519Sattilio} 751241519Sattilio 752241519Sattilio/* 753241519Sattilio * Flush and invalidate all dirty buffers. If another process is already 754241519Sattilio * doing the flush, just wait for completion. 755241519Sattilio */ 756241519Sattilioint 757241519Sattiliofuse_io_invalbuf(struct vnode *vp, struct thread *td) 758241519Sattilio{ 759241519Sattilio struct fuse_vnode_data *fvdat = VTOFUD(vp); 760241519Sattilio int error = 0; 761241519Sattilio 762241519Sattilio if (vp->v_iflag & VI_DOOMED) 763241519Sattilio return 0; 764241519Sattilio 765241519Sattilio ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); 766241519Sattilio 767241519Sattilio while (fvdat->flag & FN_FLUSHINPROG) { 768241519Sattilio struct proc *p = td->td_proc; 769241519Sattilio 770241519Sattilio if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) 771241519Sattilio return EIO; 772241519Sattilio fvdat->flag |= FN_FLUSHWANT; 773241519Sattilio tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); 774241519Sattilio error = 0; 775241519Sattilio if (p != NULL) { 776241519Sattilio PROC_LOCK(p); 777241519Sattilio if (SIGNOTEMPTY(p->p_siglist) || 778241519Sattilio SIGNOTEMPTY(td->td_siglist)) 779241519Sattilio error = EINTR; 780241519Sattilio PROC_UNLOCK(p); 781241519Sattilio } 782241519Sattilio if (error == EINTR) 783241519Sattilio return EINTR; 784241519Sattilio } 785241519Sattilio fvdat->flag |= FN_FLUSHINPROG; 786241519Sattilio 787241519Sattilio if (vp->v_bufobj.bo_object != NULL) { 788248084Sattilio VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); 789241519Sattilio vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); 790248084Sattilio VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); 791241519Sattilio } 792241519Sattilio error = vinvalbuf(vp, V_SAVE, PCATCH, 0); 793241519Sattilio while (error) { 794241519Sattilio if (error == ERESTART || error == EINTR) { 795241519Sattilio fvdat->flag &= ~FN_FLUSHINPROG; 796241519Sattilio if (fvdat->flag & FN_FLUSHWANT) { 797241519Sattilio fvdat->flag &= ~FN_FLUSHWANT; 798241519Sattilio wakeup(&fvdat->flag); 799241519Sattilio } 800241519Sattilio return EINTR; 801241519Sattilio } 802241519Sattilio error = vinvalbuf(vp, V_SAVE, PCATCH, 0); 803241519Sattilio } 804241519Sattilio fvdat->flag &= ~FN_FLUSHINPROG; 805241519Sattilio if (fvdat->flag & FN_FLUSHWANT) { 806241519Sattilio fvdat->flag &= ~FN_FLUSHWANT; 807241519Sattilio wakeup(&fvdat->flag); 808241519Sattilio } 809241519Sattilio return (error); 810241519Sattilio} 811