1139825Simp/*- 2118131Srwatson * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3101720Sphk * All rights reserved. 4101720Sphk * 5101720Sphk * This software was developed for the FreeBSD Project by Marshall 6101720Sphk * Kirk McKusick and Network Associates Laboratories, the Security 7101720Sphk * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8101720Sphk * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9101720Sphk * research program 10101720Sphk * 11136721Srwatson * Redistribution and use in source and binary forms, with or without 12136721Srwatson * modification, are permitted provided that the following conditions 13136721Srwatson * are met: 14136721Srwatson * 1. Redistributions of source code must retain the above copyright 15136721Srwatson * notice, this list of conditions and the following disclaimer. 16136721Srwatson * 2. Redistributions in binary form must reproduce the above copyright 17136721Srwatson * notice, this list of conditions and the following disclaimer in the 18136721Srwatson * documentation and/or other materials provided with the distribution. 19136721Srwatson * 20136721Srwatson * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21136721Srwatson * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22136721Srwatson * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23136721Srwatson * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24136721Srwatson * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25136721Srwatson * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26136721Srwatson * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27136721Srwatson * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28136721Srwatson * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29136721Srwatson * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30136721Srwatson * SUCH DAMAGE. 31136721Srwatson * 321541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 331541Srgrimes * The Regents of the University of California. All rights reserved. 341541Srgrimes * 351541Srgrimes * Redistribution and use in source and binary forms, with or without 361541Srgrimes * modification, are permitted provided that the following conditions 371541Srgrimes * are met: 381541Srgrimes * 1. Redistributions of source code must retain the above copyright 391541Srgrimes * notice, this list of conditions and the following disclaimer. 401541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 411541Srgrimes * notice, this list of conditions and the following disclaimer in the 421541Srgrimes * documentation and/or other materials provided with the distribution. 431541Srgrimes * 4. Neither the name of the University nor the names of its contributors 441541Srgrimes * may be used to endorse or promote products derived from this software 451541Srgrimes * without specific prior written permission. 461541Srgrimes * 471541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 481541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 491541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 501541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 511541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 521541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 531541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 541541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 551541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 561541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 571541Srgrimes * SUCH DAMAGE. 581541Srgrimes * 59128006Sbde * from: @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 60128006Sbde * from: $FreeBSD: .../ufs/ufs_readwrite.c,v 1.96 2002/08/12 09:22:11 phk ... 6122521Sdyson * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 621541Srgrimes */ 631541Srgrimes 64116192Sobrien#include <sys/cdefs.h> 65116192Sobrien__FBSDID("$FreeBSD$"); 66116192Sobrien 671541Srgrimes#include <sys/param.h> 68102957Sbde#include <sys/bio.h> 691541Srgrimes#include <sys/systm.h> 70102957Sbde#include <sys/buf.h> 71102957Sbde#include <sys/conf.h> 72102991Sphk#include <sys/extattr.h> 73102957Sbde#include <sys/kernel.h> 74114216Skan#include <sys/limits.h> 75102957Sbde#include <sys/malloc.h> 76102957Sbde#include <sys/mount.h> 77164033Srwatson#include <sys/priv.h> 781541Srgrimes#include <sys/stat.h> 79102957Sbde#include <sys/vmmeter.h> 801541Srgrimes#include <sys/vnode.h> 811541Srgrimes 821541Srgrimes#include <vm/vm.h> 83240238Skib#include <vm/vm_param.h> 84102957Sbde#include <vm/vm_extern.h> 85102957Sbde#include <vm/vm_object.h> 867695Sdg#include <vm/vm_page.h> 87101720Sphk#include <vm/vm_pager.h> 88101720Sphk#include <vm/vnode_pager.h> 891541Srgrimes 9059241Srwatson#include <ufs/ufs/extattr.h> 911541Srgrimes#include <ufs/ufs/quota.h> 921541Srgrimes#include <ufs/ufs/inode.h> 93102957Sbde#include <ufs/ufs/ufs_extern.h> 9422521Sdyson#include <ufs/ufs/ufsmount.h> 951541Srgrimes 961541Srgrimes#include <ufs/ffs/fs.h> 971541Srgrimes#include <ufs/ffs/ffs_extern.h> 98112694Stegge#include "opt_directio.h" 99141521Sphk#include "opt_ffs.h" 1001541Srgrimes 101112694Stegge#ifdef DIRECTIO 102112694Steggeextern int ffs_rawread(struct vnode *vp, struct uio *uio, int *workdone); 103112694Stegge#endif 104138270Sphkstatic vop_fsync_t ffs_fsync; 105169671Skibstatic vop_lock1_t ffs_lock; 106138270Sphkstatic vop_getpages_t ffs_getpages; 107138270Sphkstatic vop_read_t ffs_read; 108138270Sphkstatic vop_write_t ffs_write; 109102090Sphkstatic int ffs_extread(struct vnode *vp, struct uio *uio, int ioflag); 110105136Smckusickstatic int ffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, 111105136Smckusick struct ucred *cred); 112138270Sphkstatic vop_strategy_t ffsext_strategy; 113138270Sphkstatic vop_closeextattr_t ffs_closeextattr; 114138270Sphkstatic vop_deleteextattr_t ffs_deleteextattr; 115138270Sphkstatic vop_getextattr_t ffs_getextattr; 116138270Sphkstatic vop_listextattr_t ffs_listextattr; 117138270Sphkstatic vop_openextattr_t ffs_openextattr; 118138270Sphkstatic vop_setextattr_t ffs_setextattr; 119166774Spjdstatic vop_vptofh_t ffs_vptofh; 12012911Sphk 121101720Sphk 1221541Srgrimes/* Global vfs data structures for ufs. */ 123141542Sphkstruct vop_vector ffs_vnodeops1 = { 124138290Sphk .vop_default = &ufs_vnodeops, 125138290Sphk .vop_fsync = ffs_fsync, 126138290Sphk .vop_getpages = ffs_getpages, 127169671Skib .vop_lock1 = ffs_lock, 128138290Sphk .vop_read = ffs_read, 129138290Sphk .vop_reallocblks = ffs_reallocblks, 130138290Sphk .vop_write = ffs_write, 131166774Spjd .vop_vptofh = ffs_vptofh, 132141542Sphk}; 133141542Sphk 134141542Sphkstruct vop_vector ffs_fifoops1 = { 135141542Sphk .vop_default = &ufs_fifoops, 136141542Sphk .vop_fsync = ffs_fsync, 137141542Sphk .vop_reallocblks = ffs_reallocblks, /* XXX: really ??? */ 138166774Spjd .vop_vptofh = ffs_vptofh, 139141542Sphk}; 140141542Sphk 141141542Sphk/* Global vfs data structures for ufs. */ 142141542Sphkstruct vop_vector ffs_vnodeops2 = { 143141542Sphk .vop_default = &ufs_vnodeops, 144141542Sphk .vop_fsync = ffs_fsync, 145141542Sphk .vop_getpages = ffs_getpages, 146169671Skib .vop_lock1 = ffs_lock, 147141542Sphk .vop_read = ffs_read, 148141542Sphk .vop_reallocblks = ffs_reallocblks, 149141542Sphk .vop_write = ffs_write, 150138290Sphk .vop_closeextattr = ffs_closeextattr, 151138290Sphk .vop_deleteextattr = ffs_deleteextattr, 152138869Sphk .vop_getextattr = ffs_getextattr, 153138290Sphk .vop_listextattr = ffs_listextattr, 154138290Sphk .vop_openextattr = ffs_openextattr, 155138869Sphk .vop_setextattr = ffs_setextattr, 156166774Spjd .vop_vptofh = ffs_vptofh, 1571541Srgrimes}; 1581541Srgrimes 159141542Sphkstruct vop_vector ffs_fifoops2 = { 160138290Sphk .vop_default = &ufs_fifoops, 161138290Sphk .vop_fsync = ffs_fsync, 162169671Skib .vop_lock1 = ffs_lock, 163138290Sphk .vop_reallocblks = ffs_reallocblks, 164138290Sphk .vop_strategy = ffsext_strategy, 165138290Sphk .vop_closeextattr = ffs_closeextattr, 166138290Sphk .vop_deleteextattr = ffs_deleteextattr, 167138869Sphk .vop_getextattr = ffs_getextattr, 168138290Sphk .vop_listextattr = ffs_listextattr, 169138290Sphk .vop_openextattr = ffs_openextattr, 170138869Sphk .vop_setextattr = ffs_setextattr, 171166774Spjd .vop_vptofh = ffs_vptofh, 1721541Srgrimes}; 1731541Srgrimes 1741541Srgrimes/* 1751541Srgrimes * Synch an open file. 1761541Srgrimes */ 1771541Srgrimes/* ARGSUSED */ 178105136Smckusickstatic int 179141522Sphkffs_fsync(struct vop_fsync_args *ap) 1801541Srgrimes{ 181195187Skib struct vnode *vp; 182195187Skib struct bufobj *bo; 183141522Sphk int error; 184141522Sphk 185195187Skib vp = ap->a_vp; 186195187Skib bo = &vp->v_bufobj; 187195187Skibretry: 188233630Smckusick error = ffs_syncvnode(vp, ap->a_waitfor, 0); 189141533Sphk if (error) 190141533Sphk return (error); 191224503Smckusick if (ap->a_waitfor == MNT_WAIT && DOINGSOFTDEP(vp)) { 192195187Skib error = softdep_fsync(vp); 193195187Skib if (error) 194195187Skib return (error); 195195187Skib 196195187Skib /* 197195187Skib * The softdep_fsync() function may drop vp lock, 198195187Skib * allowing for dirty buffers to reappear on the 199195187Skib * bo_dirty list. Recheck and resync as needed. 200195187Skib */ 201195187Skib BO_LOCK(bo); 202195187Skib if (vp->v_type == VREG && (bo->bo_numoutput > 0 || 203195187Skib bo->bo_dirty.bv_cnt > 0)) { 204195187Skib BO_UNLOCK(bo); 205195187Skib goto retry; 206195187Skib } 207195187Skib BO_UNLOCK(bo); 208195187Skib } 209195187Skib return (0); 210141522Sphk} 211141522Sphk 212141522Sphkint 213233630Smckusickffs_syncvnode(struct vnode *vp, int waitfor, int flags) 214141522Sphk{ 215222958Sjeff struct inode *ip; 216177493Sjeff struct bufobj *bo; 21734266Sjulian struct buf *bp; 2181541Srgrimes struct buf *nbp; 21998542Smckusick ufs_lbn_t lbn; 220222958Sjeff int error, wait, passes; 2211541Srgrimes 222222958Sjeff ip = VTOI(vp); 223222958Sjeff ip->i_flag &= ~IN_NEEDSYNC; 224177493Sjeff bo = &vp->v_bufobj; 22532286Sdyson 2267695Sdg /* 227222958Sjeff * When doing MNT_WAIT we must first flush all dependencies 228222958Sjeff * on the inode. 229222958Sjeff */ 230222958Sjeff if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && 231222958Sjeff (error = softdep_sync_metadata(vp)) != 0) 232222958Sjeff return (error); 233222958Sjeff 234222958Sjeff /* 2351541Srgrimes * Flush all dirty buffers associated with a vnode. 2361541Srgrimes */ 237222958Sjeff error = 0; 238222958Sjeff passes = 0; 239222958Sjeff wait = 0; /* Always do an async pass first. */ 240222958Sjeff lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 241177493Sjeff BO_LOCK(bo); 2421541Srgrimesloop: 243177493Sjeff TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) 244110584Sjeff bp->b_vflags &= ~BV_SCANNED; 245177493Sjeff TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { 246175053Sobrien /* 24755697Smckusick * Reasons to skip this buffer: it has already been considered 248222958Sjeff * on this pass, the buffer has dependencies that will cause 24955697Smckusick * it to be redirtied and it has not already been deferred, 25055697Smckusick * or it is already being written. 25134266Sjulian */ 252110584Sjeff if ((bp->b_vflags & BV_SCANNED) != 0) 2531541Srgrimes continue; 254110584Sjeff bp->b_vflags |= BV_SCANNED; 255222958Sjeff /* Flush indirects in order. */ 256222958Sjeff if (waitfor == MNT_WAIT && bp->b_lblkno <= -NDADDR && 257222958Sjeff lbn_level(bp->b_lblkno) >= passes) 25855697Smckusick continue; 259222958Sjeff if (bp->b_lblkno > lbn) 260222958Sjeff panic("ffs_syncvnode: syncing truncated data."); 261111463Sjeff if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) 262110584Sjeff continue; 263177493Sjeff BO_UNLOCK(bo); 2641541Srgrimes if ((bp->b_flags & B_DELWRI) == 0) 2651541Srgrimes panic("ffs_fsync: not dirty"); 26634266Sjulian /* 267222958Sjeff * Check for dependencies and potentially complete them. 26834266Sjulian */ 269222958Sjeff if (!LIST_EMPTY(&bp->b_dep) && 270222958Sjeff (error = softdep_sync_buf(vp, bp, 271222958Sjeff wait ? MNT_WAIT : MNT_NOWAIT)) != 0) { 272222958Sjeff /* I/O error. */ 273222958Sjeff if (error != EBUSY) { 274222958Sjeff BUF_UNLOCK(bp); 275222958Sjeff return (error); 27632286Sdyson } 277222958Sjeff /* If we deferred once, don't defer again. */ 278222958Sjeff if ((bp->b_flags & B_DEFERRED) == 0) { 279222958Sjeff bp->b_flags |= B_DEFERRED; 280222958Sjeff BUF_UNLOCK(bp); 281222958Sjeff goto next; 282222958Sjeff } 283222958Sjeff } 284222958Sjeff if (wait) { 28532286Sdyson bremfree(bp); 286222958Sjeff if ((error = bwrite(bp)) != 0) 287222958Sjeff return (error); 288222958Sjeff } else if ((bp->b_flags & B_CLUSTEROK)) { 289222958Sjeff (void) vfs_bio_awrite(bp); 290222958Sjeff } else { 291222958Sjeff bremfree(bp); 292222958Sjeff (void) bawrite(bp); 293222958Sjeff } 294222958Sjeffnext: 29544391Smckusick /* 296175053Sobrien * Since we may have slept during the I/O, we need 29744391Smckusick * to start from a known point. 29844391Smckusick */ 299177493Sjeff BO_LOCK(bo); 300177493Sjeff nbp = TAILQ_FIRST(&bo->bo_dirty.bv_hd); 3011541Srgrimes } 302222958Sjeff if (waitfor != MNT_WAIT) { 303222958Sjeff BO_UNLOCK(bo); 304233630Smckusick if ((flags & NO_INO_UPDT) != 0) 305233630Smckusick return (0); 306233630Smckusick else 307233630Smckusick return (ffs_update(vp, 0)); 308222958Sjeff } 309222958Sjeff /* Drain IO to see if we're done. */ 310222958Sjeff bufobj_wwait(bo, 0, 0); 31134266Sjulian /* 312222958Sjeff * Block devices associated with filesystems may have new I/O 313222958Sjeff * requests posted for them even if the vnode is locked, so no 314222958Sjeff * amount of trying will get them clean. We make several passes 315222958Sjeff * as a best effort. 316222958Sjeff * 317222958Sjeff * Regular files may need multiple passes to flush all dependency 318222958Sjeff * work as it is possible that we must write once per indirect 319222958Sjeff * level, once for the leaf, and once for the inode and each of 320222958Sjeff * these will be done with one sync and one async pass. 32134266Sjulian */ 322222958Sjeff if (bo->bo_dirty.bv_cnt > 0) { 323222958Sjeff /* Write the inode after sync passes to flush deps. */ 324233630Smckusick if (wait && DOINGSOFTDEP(vp) && (flags & NO_INO_UPDT) == 0) { 325222958Sjeff BO_UNLOCK(bo); 326233654Skib ffs_update(vp, 1); 327222958Sjeff BO_LOCK(bo); 328222958Sjeff } 329222958Sjeff /* switch between sync/async. */ 330222958Sjeff wait = !wait; 331222958Sjeff if (wait == 1 || ++passes < NIADDR + 2) 332222958Sjeff goto loop; 333173464Sobrien#ifdef INVARIANTS 334222958Sjeff if (!vn_isdisk(vp, NULL)) 335222958Sjeff vprint("ffs_fsync: dirty", vp); 33634266Sjulian#endif 3371541Srgrimes } 338177493Sjeff BO_UNLOCK(bo); 339233630Smckusick error = 0; 340233630Smckusick if ((flags & NO_INO_UPDT) == 0) 341233654Skib error = ffs_update(vp, 1); 342222958Sjeff if (DOINGSUJ(vp)) 343222958Sjeff softdep_journal_fsync(VTOI(vp)); 344222958Sjeff return (error); 3451541Srgrimes} 346101720Sphk 347141521Sphkstatic int 348141521Sphkffs_lock(ap) 349169671Skib struct vop_lock1_args /* { 350141521Sphk struct vnode *a_vp; 351141521Sphk int a_flags; 352141521Sphk struct thread *a_td; 353164248Skmacy char *file; 354164248Skmacy int line; 355141521Sphk } */ *ap; 356141521Sphk{ 357158321Stegge#ifndef NO_FFS_SNAPSHOT 358158259Stegge struct vnode *vp; 359158259Stegge int flags; 360158259Stegge struct lock *lkp; 361158259Stegge int result; 362175053Sobrien 363158259Stegge switch (ap->a_flags & LK_TYPE_MASK) { 364158259Stegge case LK_SHARED: 365158259Stegge case LK_UPGRADE: 366158259Stegge case LK_EXCLUSIVE: 367158259Stegge vp = ap->a_vp; 368158259Stegge flags = ap->a_flags; 369158259Stegge for (;;) { 370184074Skib#ifdef DEBUG_VFS_LOCKS 371184074Skib KASSERT(vp->v_holdcnt != 0, 372184074Skib ("ffs_lock %p: zero hold count", vp)); 373184074Skib#endif 374158259Stegge lkp = vp->v_vnlock; 375176320Sattilio result = _lockmgr_args(lkp, flags, VI_MTX(vp), 376176320Sattilio LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 377176320Sattilio ap->a_file, ap->a_line); 378158259Stegge if (lkp == vp->v_vnlock || result != 0) 379158259Stegge break; 380158259Stegge /* 381158259Stegge * Apparent success, except that the vnode 382158259Stegge * mutated between snapshot file vnode and 383158259Stegge * regular file vnode while this process 384158259Stegge * slept. The lock currently held is not the 385158259Stegge * right lock. Release it, and try to get the 386158259Stegge * new lock. 387158259Stegge */ 388177779Sjeff (void) _lockmgr_args(lkp, LK_RELEASE, NULL, 389176320Sattilio LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, 390175294Sattilio ap->a_file, ap->a_line); 391177779Sjeff if ((flags & (LK_INTERLOCK | LK_NOWAIT)) == 392177779Sjeff (LK_INTERLOCK | LK_NOWAIT)) 393177779Sjeff return (EBUSY); 394158259Stegge if ((flags & LK_TYPE_MASK) == LK_UPGRADE) 395158259Stegge flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; 396158259Stegge flags &= ~LK_INTERLOCK; 397158259Stegge } 398158259Stegge break; 399158259Stegge default: 400169671Skib result = VOP_LOCK1_APV(&ufs_vnodeops, ap); 401158259Stegge } 402158259Stegge return (result); 403158321Stegge#else 404169671Skib return (VOP_LOCK1_APV(&ufs_vnodeops, ap)); 405158321Stegge#endif 406141521Sphk} 407141521Sphk 408101720Sphk/* 409101720Sphk * Vnode op for reading. 410101720Sphk */ 411104094Sphkstatic int 412101720Sphkffs_read(ap) 413101720Sphk struct vop_read_args /* { 414101720Sphk struct vnode *a_vp; 415101720Sphk struct uio *a_uio; 416101720Sphk int a_ioflag; 417101720Sphk struct ucred *a_cred; 418101720Sphk } */ *ap; 419101720Sphk{ 420101720Sphk struct vnode *vp; 421101720Sphk struct inode *ip; 422101720Sphk struct uio *uio; 423101720Sphk struct fs *fs; 424101720Sphk struct buf *bp; 425101720Sphk ufs_lbn_t lbn, nextlbn; 426101720Sphk off_t bytesinfile; 427101720Sphk long size, xfersize, blkoffset; 428233353Skib ssize_t orig_resid; 429233353Skib int error; 430101720Sphk int seqcount; 431101720Sphk int ioflag; 432101720Sphk 433101789Sphk vp = ap->a_vp; 434101789Sphk uio = ap->a_uio; 435101789Sphk ioflag = ap->a_ioflag; 436101720Sphk if (ap->a_ioflag & IO_EXT) 437102090Sphk#ifdef notyet 438102090Sphk return (ffs_extread(vp, uio, ioflag)); 439102090Sphk#else 440102090Sphk panic("ffs_read+IO_EXT"); 441102090Sphk#endif 442112694Stegge#ifdef DIRECTIO 443112694Stegge if ((ioflag & IO_DIRECT) != 0) { 444112694Stegge int workdone; 445101720Sphk 446112694Stegge error = ffs_rawread(vp, uio, &workdone); 447112694Stegge if (error != 0 || workdone != 0) 448112694Stegge return error; 449112694Stegge } 450112694Stegge#endif 451112694Stegge 452124728Skan seqcount = ap->a_ioflag >> IO_SEQSHIFT; 453101720Sphk ip = VTOI(vp); 454101720Sphk 455173464Sobrien#ifdef INVARIANTS 456101720Sphk if (uio->uio_rw != UIO_READ) 457101720Sphk panic("ffs_read: mode"); 458101720Sphk 459101720Sphk if (vp->v_type == VLNK) { 460101720Sphk if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 461101720Sphk panic("ffs_read: short symlink"); 462101720Sphk } else if (vp->v_type != VREG && vp->v_type != VDIR) 463101720Sphk panic("ffs_read: type %d", vp->v_type); 464101720Sphk#endif 465101720Sphk orig_resid = uio->uio_resid; 466125079Sache KASSERT(orig_resid >= 0, ("ffs_read: uio->uio_resid < 0")); 467124855Sache if (orig_resid == 0) 468101720Sphk return (0); 469125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_read: uio->uio_offset < 0")); 470124855Sache fs = ip->i_fs; 471124855Sache if (uio->uio_offset < ip->i_size && 472124855Sache uio->uio_offset >= fs->fs_maxfilesize) 473124855Sache return (EOVERFLOW); 474101720Sphk 475101720Sphk for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 476101720Sphk if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 477101720Sphk break; 478101720Sphk lbn = lblkno(fs, uio->uio_offset); 479101720Sphk nextlbn = lbn + 1; 480101720Sphk 481101720Sphk /* 482101720Sphk * size of buffer. The buffer representing the 483101720Sphk * end of the file is rounded up to the size of 484175053Sobrien * the block type ( fragment or full block, 485101720Sphk * depending ). 486101720Sphk */ 487101720Sphk size = blksize(fs, ip, lbn); 488101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 489175053Sobrien 490101720Sphk /* 491101720Sphk * The amount we want to transfer in this iteration is 492101720Sphk * one FS block less the amount of the data before 493101720Sphk * our startpoint (duh!) 494101720Sphk */ 495101720Sphk xfersize = fs->fs_bsize - blkoffset; 496101720Sphk 497101720Sphk /* 498101720Sphk * But if we actually want less than the block, 499101720Sphk * or the file doesn't have a whole block more of data, 500101720Sphk * then use the lesser number. 501101720Sphk */ 502101720Sphk if (uio->uio_resid < xfersize) 503101720Sphk xfersize = uio->uio_resid; 504101720Sphk if (bytesinfile < xfersize) 505101720Sphk xfersize = bytesinfile; 506101720Sphk 507101720Sphk if (lblktosize(fs, nextlbn) >= ip->i_size) { 508101720Sphk /* 509101720Sphk * Don't do readahead if this is the end of the file. 510101720Sphk */ 511251897Sscottl error = bread_gb(vp, lbn, size, NOCRED, 512251897Sscottl GB_UNMAPPED, &bp); 513101720Sphk } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 514175053Sobrien /* 515101720Sphk * Otherwise if we are allowed to cluster, 516101720Sphk * grab as much as we can. 517101720Sphk * 518101720Sphk * XXX This may not be a win if we are not 519101720Sphk * doing sequential access. 520101720Sphk */ 521251897Sscottl error = cluster_read_gb(vp, ip->i_size, lbn, 522251144Sscottl size, NOCRED, blkoffset + uio->uio_resid, 523251897Sscottl seqcount, GB_UNMAPPED, &bp); 524101720Sphk } else if (seqcount > 1) { 525101720Sphk /* 526101720Sphk * If we are NOT allowed to cluster, then 527101720Sphk * if we appear to be acting sequentially, 528101720Sphk * fire off a request for a readahead 529101720Sphk * as well as a read. Note that the 4th and 5th 530101720Sphk * arguments point to arrays of the size specified in 531101720Sphk * the 6th argument. 532101720Sphk */ 533259224Spfg u_int nextsize = blksize(fs, ip, nextlbn); 534251897Sscottl error = breadn_flags(vp, lbn, size, &nextlbn, 535251897Sscottl &nextsize, 1, NOCRED, GB_UNMAPPED, &bp); 536101720Sphk } else { 537101720Sphk /* 538175053Sobrien * Failing all of the above, just read what the 539101720Sphk * user asked for. Interestingly, the same as 540101720Sphk * the first option above. 541101720Sphk */ 542251897Sscottl error = bread_gb(vp, lbn, size, NOCRED, 543251897Sscottl GB_UNMAPPED, &bp); 544101720Sphk } 545101720Sphk if (error) { 546101720Sphk brelse(bp); 547101720Sphk bp = NULL; 548101720Sphk break; 549101720Sphk } 550101720Sphk 551101720Sphk /* 552101720Sphk * If IO_DIRECT then set B_DIRECT for the buffer. This 553101720Sphk * will cause us to attempt to release the buffer later on 554101720Sphk * and will cause the buffer cache to attempt to free the 555101720Sphk * underlying pages. 556101720Sphk */ 557101720Sphk if (ioflag & IO_DIRECT) 558101720Sphk bp->b_flags |= B_DIRECT; 559101720Sphk 560101720Sphk /* 561101720Sphk * We should only get non-zero b_resid when an I/O error 562101720Sphk * has occurred, which should cause us to break above. 563101720Sphk * However, if the short read did not cause an error, 564101720Sphk * then we want to ensure that we do not uiomove bad 565101720Sphk * or uninitialized data. 566101720Sphk */ 567101720Sphk size -= bp->b_resid; 568101720Sphk if (size < xfersize) { 569101720Sphk if (size == 0) 570101720Sphk break; 571101720Sphk xfersize = size; 572101720Sphk } 573101720Sphk 574251897Sscottl if ((bp->b_flags & B_UNMAPPED) == 0) { 575251897Sscottl error = vn_io_fault_uiomove((char *)bp->b_data + 576251897Sscottl blkoffset, (int)xfersize, uio); 577251897Sscottl } else { 578251897Sscottl error = vn_io_fault_pgmove(bp->b_pages, blkoffset, 579251897Sscottl (int)xfersize, uio); 580251897Sscottl } 581101720Sphk if (error) 582101720Sphk break; 583101720Sphk 584101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 585168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 586101720Sphk /* 587101720Sphk * If there are no dependencies, and it's VMIO, 588101720Sphk * then we don't need the buf, mark it available 589221281Skib * for freeing. For non-direct VMIO reads, the VM 590221261Skib * has the data. 591101720Sphk */ 592101720Sphk bp->b_flags |= B_RELBUF; 593101720Sphk brelse(bp); 594101720Sphk } else { 595101720Sphk /* 596101720Sphk * Otherwise let whoever 597101720Sphk * made the request take care of 598101720Sphk * freeing it. We just queue 599101720Sphk * it onto another list. 600101720Sphk */ 601101720Sphk bqrelse(bp); 602101720Sphk } 603101720Sphk } 604101720Sphk 605175053Sobrien /* 606101720Sphk * This can only happen in the case of an error 607101720Sphk * because the loop above resets bp to NULL on each iteration 608101720Sphk * and on normal completion has not set a new value into it. 609101720Sphk * so it must have come from a 'break' statement 610101720Sphk */ 611101720Sphk if (bp != NULL) { 612101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 613168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 614101720Sphk bp->b_flags |= B_RELBUF; 615101720Sphk brelse(bp); 616101720Sphk } else { 617101720Sphk bqrelse(bp); 618101720Sphk } 619101720Sphk } 620101720Sphk 621101720Sphk if ((error == 0 || uio->uio_resid != orig_resid) && 622177474Skib (vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && 623177474Skib (ip->i_flag & IN_ACCESS) == 0) { 624163194Skib VI_LOCK(vp); 625101720Sphk ip->i_flag |= IN_ACCESS; 626163194Skib VI_UNLOCK(vp); 627163194Skib } 628101720Sphk return (error); 629101720Sphk} 630101720Sphk 631101720Sphk/* 632101720Sphk * Vnode op for writing. 633101720Sphk */ 634104094Sphkstatic int 635101720Sphkffs_write(ap) 636101720Sphk struct vop_write_args /* { 637101720Sphk struct vnode *a_vp; 638101720Sphk struct uio *a_uio; 639101720Sphk int a_ioflag; 640101720Sphk struct ucred *a_cred; 641101720Sphk } */ *ap; 642101720Sphk{ 643101720Sphk struct vnode *vp; 644101720Sphk struct uio *uio; 645101720Sphk struct inode *ip; 646101720Sphk struct fs *fs; 647101720Sphk struct buf *bp; 648101720Sphk ufs_lbn_t lbn; 649101720Sphk off_t osize; 650233353Skib ssize_t resid; 651101720Sphk int seqcount; 652233353Skib int blkoffset, error, flags, ioflag, size, xfersize; 653101720Sphk 654101789Sphk vp = ap->a_vp; 655101789Sphk uio = ap->a_uio; 656101789Sphk ioflag = ap->a_ioflag; 657101720Sphk if (ap->a_ioflag & IO_EXT) 658102090Sphk#ifdef notyet 659101789Sphk return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 660102090Sphk#else 661124856Sache panic("ffs_write+IO_EXT"); 662102090Sphk#endif 663101720Sphk 664124728Skan seqcount = ap->a_ioflag >> IO_SEQSHIFT; 665101720Sphk ip = VTOI(vp); 666101720Sphk 667173464Sobrien#ifdef INVARIANTS 668101720Sphk if (uio->uio_rw != UIO_WRITE) 669124856Sache panic("ffs_write: mode"); 670101720Sphk#endif 671101720Sphk 672101720Sphk switch (vp->v_type) { 673101720Sphk case VREG: 674101720Sphk if (ioflag & IO_APPEND) 675101720Sphk uio->uio_offset = ip->i_size; 676125710Sbde if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) 677101720Sphk return (EPERM); 678101720Sphk /* FALLTHROUGH */ 679101720Sphk case VLNK: 680101720Sphk break; 681101720Sphk case VDIR: 682124856Sache panic("ffs_write: dir write"); 683101720Sphk break; 684101720Sphk default: 685124856Sache panic("ffs_write: type %p %d (%d,%d)", vp, (int)vp->v_type, 686101720Sphk (int)uio->uio_offset, 687101720Sphk (int)uio->uio_resid 688101720Sphk ); 689101720Sphk } 690101720Sphk 691125079Sache KASSERT(uio->uio_resid >= 0, ("ffs_write: uio->uio_resid < 0")); 692125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_write: uio->uio_offset < 0")); 693101720Sphk fs = ip->i_fs; 694125710Sbde if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) 695101720Sphk return (EFBIG); 696101720Sphk /* 697101720Sphk * Maybe this should be above the vnode op call, but so long as 698101720Sphk * file servers have no limits, I don't think it matters. 699101720Sphk */ 700207662Strasz if (vn_rlimit_fsize(vp, uio, uio->uio_td)) 701207662Strasz return (EFBIG); 702101720Sphk 703101720Sphk resid = uio->uio_resid; 704101720Sphk osize = ip->i_size; 705105422Sdillon if (seqcount > BA_SEQMAX) 706105422Sdillon flags = BA_SEQMAX << BA_SEQSHIFT; 707105422Sdillon else 708105422Sdillon flags = seqcount << BA_SEQSHIFT; 709101720Sphk if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 710105422Sdillon flags |= IO_SYNC; 711251897Sscottl flags |= BA_UNMAPPED; 712101720Sphk 713101720Sphk for (error = 0; uio->uio_resid > 0;) { 714101720Sphk lbn = lblkno(fs, uio->uio_offset); 715101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 716101720Sphk xfersize = fs->fs_bsize - blkoffset; 717101720Sphk if (uio->uio_resid < xfersize) 718101720Sphk xfersize = uio->uio_resid; 719101720Sphk if (uio->uio_offset + xfersize > ip->i_size) 720101720Sphk vnode_pager_setsize(vp, uio->uio_offset + xfersize); 721101720Sphk 722262780Spfg /* 723101720Sphk * We must perform a read-before-write if the transfer size 724101720Sphk * does not cover the entire buffer. 725262780Spfg */ 726101720Sphk if (fs->fs_bsize > xfersize) 727101720Sphk flags |= BA_CLRBUF; 728101720Sphk else 729101720Sphk flags &= ~BA_CLRBUF; 730101720Sphk/* XXX is uio->uio_offset the right thing here? */ 731101720Sphk error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 732101720Sphk ap->a_cred, flags, &bp); 733187468Skib if (error != 0) { 734187468Skib vnode_pager_setsize(vp, ip->i_size); 735101720Sphk break; 736187468Skib } 737101720Sphk if (ioflag & IO_DIRECT) 738101720Sphk bp->b_flags |= B_DIRECT; 739129545Skensmith if ((ioflag & (IO_SYNC|IO_INVAL)) == (IO_SYNC|IO_INVAL)) 740129545Skensmith bp->b_flags |= B_NOCACHE; 741101720Sphk 742101720Sphk if (uio->uio_offset + xfersize > ip->i_size) { 743101720Sphk ip->i_size = uio->uio_offset + xfersize; 744132775Skan DIP_SET(ip, i_size, ip->i_size); 745101720Sphk } 746101720Sphk 747101720Sphk size = blksize(fs, ip, lbn) - bp->b_resid; 748101720Sphk if (size < xfersize) 749101720Sphk xfersize = size; 750101720Sphk 751251897Sscottl if ((bp->b_flags & B_UNMAPPED) == 0) { 752251897Sscottl error = vn_io_fault_uiomove((char *)bp->b_data + 753251897Sscottl blkoffset, (int)xfersize, uio); 754251897Sscottl } else { 755251897Sscottl error = vn_io_fault_pgmove(bp->b_pages, blkoffset, 756251897Sscottl (int)xfersize, uio); 757251897Sscottl } 758231953Skib /* 759231953Skib * If the buffer is not already filled and we encounter an 760231953Skib * error while trying to fill it, we have to clear out any 761231953Skib * garbage data from the pages instantiated for the buffer. 762231953Skib * If we do not, a failed uiomove() during a write can leave 763231953Skib * the prior contents of the pages exposed to a userland mmap. 764231953Skib * 765231953Skib * Note that we need only clear buffers with a transfer size 766231953Skib * equal to the block size because buffers with a shorter 767231953Skib * transfer size were cleared above by the call to UFS_BALLOC() 768231953Skib * with the BA_CLRBUF flag set. 769231953Skib * 770231953Skib * If the source region for uiomove identically mmaps the 771231953Skib * buffer, uiomove() performed the NOP copy, and the buffer 772231953Skib * content remains valid because the page fault handler 773231953Skib * validated the pages. 774231953Skib */ 775231953Skib if (error != 0 && (bp->b_flags & B_CACHE) == 0 && 776231953Skib fs->fs_bsize == xfersize) 777231953Skib vfs_bio_clrbuf(bp); 778101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 779168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 780101720Sphk bp->b_flags |= B_RELBUF; 781101720Sphk } 782101720Sphk 783101720Sphk /* 784101720Sphk * If IO_SYNC each buffer is written synchronously. Otherwise 785175053Sobrien * if we have a severe page deficiency write the buffer 786101720Sphk * asynchronously. Otherwise try to cluster, and if that 787101720Sphk * doesn't do it then either do an async write (if O_DIRECT), 788101720Sphk * or a delayed write (if not). 789101720Sphk */ 790101720Sphk if (ioflag & IO_SYNC) { 791101720Sphk (void)bwrite(bp); 792101720Sphk } else if (vm_page_count_severe() || 793101720Sphk buf_dirty_count_severe() || 794101720Sphk (ioflag & IO_ASYNC)) { 795101720Sphk bp->b_flags |= B_CLUSTEROK; 796101720Sphk bawrite(bp); 797101720Sphk } else if (xfersize + blkoffset == fs->fs_bsize) { 798101720Sphk if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 799101720Sphk bp->b_flags |= B_CLUSTEROK; 800251897Sscottl cluster_write_gb(vp, bp, ip->i_size, seqcount, 801251897Sscottl GB_UNMAPPED); 802101720Sphk } else { 803101720Sphk bawrite(bp); 804101720Sphk } 805101720Sphk } else if (ioflag & IO_DIRECT) { 806101720Sphk bp->b_flags |= B_CLUSTEROK; 807101720Sphk bawrite(bp); 808101720Sphk } else { 809101720Sphk bp->b_flags |= B_CLUSTEROK; 810101720Sphk bdwrite(bp); 811101720Sphk } 812101720Sphk if (error || xfersize == 0) 813101720Sphk break; 814101720Sphk ip->i_flag |= IN_CHANGE | IN_UPDATE; 815101720Sphk } 816101720Sphk /* 817101720Sphk * If we successfully wrote any data, and we are not the superuser 818101720Sphk * we clear the setuid and setgid bits as a precaution against 819101720Sphk * tampering. 820101720Sphk */ 821167151Spjd if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && 822167151Spjd ap->a_cred) { 823170587Srwatson if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) { 824167151Spjd ip->i_mode &= ~(ISUID | ISGID); 825167151Spjd DIP_SET(ip, i_mode, ip->i_mode); 826167151Spjd } 827101720Sphk } 828101720Sphk if (error) { 829101720Sphk if (ioflag & IO_UNIT) { 830141526Sphk (void)ffs_truncate(vp, osize, 831101720Sphk IO_NORMAL | (ioflag & IO_SYNC), 832101720Sphk ap->a_cred, uio->uio_td); 833101720Sphk uio->uio_offset -= resid - uio->uio_resid; 834101720Sphk uio->uio_resid = resid; 835101720Sphk } 836101720Sphk } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 837141526Sphk error = ffs_update(vp, 1); 838101720Sphk return (error); 839101720Sphk} 840101720Sphk 841101720Sphk/* 842101720Sphk * get page routine 843101720Sphk */ 844104094Sphkstatic int 845101720Sphkffs_getpages(ap) 846101720Sphk struct vop_getpages_args *ap; 847101720Sphk{ 848135459Sphk int i; 849101720Sphk vm_page_t mreq; 850101720Sphk int pcount; 851101720Sphk 852101720Sphk pcount = round_page(ap->a_count) / PAGE_SIZE; 853101720Sphk mreq = ap->a_m[ap->a_reqpage]; 854101720Sphk 855101720Sphk /* 856101720Sphk * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 857101720Sphk * then the entire page is valid. Since the page may be mapped, 858101720Sphk * user programs might reference data beyond the actual end of file 859101720Sphk * occuring within the page. We have to zero that data. 860101720Sphk */ 861120763Salc VM_OBJECT_LOCK(mreq->object); 862101720Sphk if (mreq->valid) { 863101720Sphk if (mreq->valid != VM_PAGE_BITS_ALL) 864101720Sphk vm_page_zero_invalid(mreq, TRUE); 865101720Sphk for (i = 0; i < pcount; i++) { 866101720Sphk if (i != ap->a_reqpage) { 867207669Salc vm_page_lock(ap->a_m[i]); 868101720Sphk vm_page_free(ap->a_m[i]); 869207669Salc vm_page_unlock(ap->a_m[i]); 870101720Sphk } 871101720Sphk } 872116423Salc VM_OBJECT_UNLOCK(mreq->object); 873101720Sphk return VM_PAGER_OK; 874101720Sphk } 875120763Salc VM_OBJECT_UNLOCK(mreq->object); 876101720Sphk 877135459Sphk return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 878135459Sphk ap->a_count, 879135459Sphk ap->a_reqpage); 880135459Sphk} 881101720Sphk 882101720Sphk 883101720Sphk/* 884102991Sphk * Extended attribute area reading. 885101720Sphk */ 886101720Sphkstatic int 887102090Sphkffs_extread(struct vnode *vp, struct uio *uio, int ioflag) 888101720Sphk{ 889101720Sphk struct inode *ip; 890101720Sphk struct ufs2_dinode *dp; 891101720Sphk struct fs *fs; 892101720Sphk struct buf *bp; 893101720Sphk ufs_lbn_t lbn, nextlbn; 894101720Sphk off_t bytesinfile; 895101720Sphk long size, xfersize, blkoffset; 896233353Skib ssize_t orig_resid; 897233353Skib int error; 898101720Sphk 899101720Sphk ip = VTOI(vp); 900101720Sphk fs = ip->i_fs; 901101720Sphk dp = ip->i_din2; 902101720Sphk 903173464Sobrien#ifdef INVARIANTS 904101720Sphk if (uio->uio_rw != UIO_READ || fs->fs_magic != FS_UFS2_MAGIC) 905101720Sphk panic("ffs_extread: mode"); 906101720Sphk 907101720Sphk#endif 908101720Sphk orig_resid = uio->uio_resid; 909125079Sache KASSERT(orig_resid >= 0, ("ffs_extread: uio->uio_resid < 0")); 910124857Sache if (orig_resid == 0) 911101720Sphk return (0); 912125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_extread: uio->uio_offset < 0")); 913101720Sphk 914101720Sphk for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 915101720Sphk if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 916101720Sphk break; 917101720Sphk lbn = lblkno(fs, uio->uio_offset); 918101720Sphk nextlbn = lbn + 1; 919101720Sphk 920101720Sphk /* 921101720Sphk * size of buffer. The buffer representing the 922101720Sphk * end of the file is rounded up to the size of 923175053Sobrien * the block type ( fragment or full block, 924101720Sphk * depending ). 925101720Sphk */ 926101720Sphk size = sblksize(fs, dp->di_extsize, lbn); 927101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 928175053Sobrien 929101720Sphk /* 930101720Sphk * The amount we want to transfer in this iteration is 931101720Sphk * one FS block less the amount of the data before 932101720Sphk * our startpoint (duh!) 933101720Sphk */ 934101720Sphk xfersize = fs->fs_bsize - blkoffset; 935101720Sphk 936101720Sphk /* 937101720Sphk * But if we actually want less than the block, 938101720Sphk * or the file doesn't have a whole block more of data, 939101720Sphk * then use the lesser number. 940101720Sphk */ 941101720Sphk if (uio->uio_resid < xfersize) 942101720Sphk xfersize = uio->uio_resid; 943101720Sphk if (bytesinfile < xfersize) 944101720Sphk xfersize = bytesinfile; 945101720Sphk 946101720Sphk if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 947101720Sphk /* 948101720Sphk * Don't do readahead if this is the end of the info. 949101720Sphk */ 950101720Sphk error = bread(vp, -1 - lbn, size, NOCRED, &bp); 951101720Sphk } else { 952101720Sphk /* 953101720Sphk * If we have a second block, then 954101720Sphk * fire off a request for a readahead 955101720Sphk * as well as a read. Note that the 4th and 5th 956101720Sphk * arguments point to arrays of the size specified in 957101720Sphk * the 6th argument. 958101720Sphk */ 959259224Spfg u_int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 960101720Sphk 961101720Sphk nextlbn = -1 - nextlbn; 962101720Sphk error = breadn(vp, -1 - lbn, 963101720Sphk size, &nextlbn, &nextsize, 1, NOCRED, &bp); 964101720Sphk } 965101720Sphk if (error) { 966101720Sphk brelse(bp); 967101720Sphk bp = NULL; 968101720Sphk break; 969101720Sphk } 970101720Sphk 971101720Sphk /* 972101720Sphk * If IO_DIRECT then set B_DIRECT for the buffer. This 973101720Sphk * will cause us to attempt to release the buffer later on 974101720Sphk * and will cause the buffer cache to attempt to free the 975101720Sphk * underlying pages. 976101720Sphk */ 977101720Sphk if (ioflag & IO_DIRECT) 978101720Sphk bp->b_flags |= B_DIRECT; 979101720Sphk 980101720Sphk /* 981101720Sphk * We should only get non-zero b_resid when an I/O error 982101720Sphk * has occurred, which should cause us to break above. 983101720Sphk * However, if the short read did not cause an error, 984101720Sphk * then we want to ensure that we do not uiomove bad 985101720Sphk * or uninitialized data. 986101720Sphk */ 987101720Sphk size -= bp->b_resid; 988101720Sphk if (size < xfersize) { 989101720Sphk if (size == 0) 990101720Sphk break; 991101720Sphk xfersize = size; 992101720Sphk } 993101720Sphk 994101720Sphk error = uiomove((char *)bp->b_data + blkoffset, 995101720Sphk (int)xfersize, uio); 996101720Sphk if (error) 997101720Sphk break; 998101720Sphk 999101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1000168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1001101720Sphk /* 1002101720Sphk * If there are no dependencies, and it's VMIO, 1003101720Sphk * then we don't need the buf, mark it available 1004221281Skib * for freeing. For non-direct VMIO reads, the VM 1005221261Skib * has the data. 1006101720Sphk */ 1007101720Sphk bp->b_flags |= B_RELBUF; 1008101720Sphk brelse(bp); 1009101720Sphk } else { 1010101720Sphk /* 1011101720Sphk * Otherwise let whoever 1012101720Sphk * made the request take care of 1013101720Sphk * freeing it. We just queue 1014101720Sphk * it onto another list. 1015101720Sphk */ 1016101720Sphk bqrelse(bp); 1017101720Sphk } 1018101720Sphk } 1019101720Sphk 1020175053Sobrien /* 1021101720Sphk * This can only happen in the case of an error 1022101720Sphk * because the loop above resets bp to NULL on each iteration 1023101720Sphk * and on normal completion has not set a new value into it. 1024101720Sphk * so it must have come from a 'break' statement 1025101720Sphk */ 1026101720Sphk if (bp != NULL) { 1027101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1028168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1029101720Sphk bp->b_flags |= B_RELBUF; 1030101720Sphk brelse(bp); 1031101720Sphk } else { 1032101720Sphk bqrelse(bp); 1033101720Sphk } 1034101720Sphk } 1035101720Sphk return (error); 1036101720Sphk} 1037101720Sphk 1038101720Sphk/* 1039102991Sphk * Extended attribute area writing. 1040101720Sphk */ 1041101720Sphkstatic int 1042101789Sphkffs_extwrite(struct vnode *vp, struct uio *uio, int ioflag, struct ucred *ucred) 1043101720Sphk{ 1044101720Sphk struct inode *ip; 1045101720Sphk struct ufs2_dinode *dp; 1046101720Sphk struct fs *fs; 1047101720Sphk struct buf *bp; 1048101720Sphk ufs_lbn_t lbn; 1049101720Sphk off_t osize; 1050233353Skib ssize_t resid; 1051233353Skib int blkoffset, error, flags, size, xfersize; 1052101720Sphk 1053101720Sphk ip = VTOI(vp); 1054101720Sphk fs = ip->i_fs; 1055101720Sphk dp = ip->i_din2; 1056101720Sphk 1057173464Sobrien#ifdef INVARIANTS 1058101720Sphk if (uio->uio_rw != UIO_WRITE || fs->fs_magic != FS_UFS2_MAGIC) 1059124856Sache panic("ffs_extwrite: mode"); 1060101720Sphk#endif 1061101720Sphk 1062101720Sphk if (ioflag & IO_APPEND) 1063101720Sphk uio->uio_offset = dp->di_extsize; 1064125079Sache KASSERT(uio->uio_offset >= 0, ("ffs_extwrite: uio->uio_offset < 0")); 1065125079Sache KASSERT(uio->uio_resid >= 0, ("ffs_extwrite: uio->uio_resid < 0")); 1066124856Sache if ((uoff_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1067101720Sphk return (EFBIG); 1068101720Sphk 1069101720Sphk resid = uio->uio_resid; 1070101720Sphk osize = dp->di_extsize; 1071101720Sphk flags = IO_EXT; 1072101720Sphk if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1073101720Sphk flags |= IO_SYNC; 1074101720Sphk 1075101720Sphk for (error = 0; uio->uio_resid > 0;) { 1076101720Sphk lbn = lblkno(fs, uio->uio_offset); 1077101720Sphk blkoffset = blkoff(fs, uio->uio_offset); 1078101720Sphk xfersize = fs->fs_bsize - blkoffset; 1079101720Sphk if (uio->uio_resid < xfersize) 1080101720Sphk xfersize = uio->uio_resid; 1081101720Sphk 1082175053Sobrien /* 1083101720Sphk * We must perform a read-before-write if the transfer size 1084101720Sphk * does not cover the entire buffer. 1085262780Spfg */ 1086101720Sphk if (fs->fs_bsize > xfersize) 1087101720Sphk flags |= BA_CLRBUF; 1088101720Sphk else 1089101720Sphk flags &= ~BA_CLRBUF; 1090101720Sphk error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1091101789Sphk ucred, flags, &bp); 1092101720Sphk if (error != 0) 1093101720Sphk break; 1094101720Sphk /* 1095101720Sphk * If the buffer is not valid we have to clear out any 1096101720Sphk * garbage data from the pages instantiated for the buffer. 1097101720Sphk * If we do not, a failed uiomove() during a write can leave 1098101720Sphk * the prior contents of the pages exposed to a userland 1099101720Sphk * mmap(). XXX deal with uiomove() errors a better way. 1100101720Sphk */ 1101101720Sphk if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1102101720Sphk vfs_bio_clrbuf(bp); 1103101720Sphk if (ioflag & IO_DIRECT) 1104101720Sphk bp->b_flags |= B_DIRECT; 1105101720Sphk 1106101720Sphk if (uio->uio_offset + xfersize > dp->di_extsize) 1107101720Sphk dp->di_extsize = uio->uio_offset + xfersize; 1108101720Sphk 1109101720Sphk size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1110101720Sphk if (size < xfersize) 1111101720Sphk xfersize = size; 1112101720Sphk 1113101720Sphk error = 1114101720Sphk uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); 1115101720Sphk if ((ioflag & (IO_VMIO|IO_DIRECT)) && 1116168353Sdelphij (LIST_EMPTY(&bp->b_dep))) { 1117101720Sphk bp->b_flags |= B_RELBUF; 1118101720Sphk } 1119101720Sphk 1120101720Sphk /* 1121101720Sphk * If IO_SYNC each buffer is written synchronously. Otherwise 1122175053Sobrien * if we have a severe page deficiency write the buffer 1123101720Sphk * asynchronously. Otherwise try to cluster, and if that 1124101720Sphk * doesn't do it then either do an async write (if O_DIRECT), 1125101720Sphk * or a delayed write (if not). 1126101720Sphk */ 1127101720Sphk if (ioflag & IO_SYNC) { 1128101720Sphk (void)bwrite(bp); 1129101720Sphk } else if (vm_page_count_severe() || 1130101720Sphk buf_dirty_count_severe() || 1131101720Sphk xfersize + blkoffset == fs->fs_bsize || 1132101720Sphk (ioflag & (IO_ASYNC | IO_DIRECT))) 1133101720Sphk bawrite(bp); 1134101720Sphk else 1135101720Sphk bdwrite(bp); 1136101720Sphk if (error || xfersize == 0) 1137101720Sphk break; 1138187790Srwatson ip->i_flag |= IN_CHANGE; 1139101720Sphk } 1140101720Sphk /* 1141101720Sphk * If we successfully wrote any data, and we are not the superuser 1142101720Sphk * we clear the setuid and setgid bits as a precaution against 1143101720Sphk * tampering. 1144101720Sphk */ 1145167151Spjd if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { 1146170587Srwatson if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID, 0)) { 1147167151Spjd ip->i_mode &= ~(ISUID | ISGID); 1148167151Spjd dp->di_mode = ip->i_mode; 1149167151Spjd } 1150101720Sphk } 1151101720Sphk if (error) { 1152101720Sphk if (ioflag & IO_UNIT) { 1153141526Sphk (void)ffs_truncate(vp, osize, 1154101789Sphk IO_EXT | (ioflag&IO_SYNC), ucred, uio->uio_td); 1155101720Sphk uio->uio_offset -= resid - uio->uio_resid; 1156101720Sphk uio->uio_resid = resid; 1157101720Sphk } 1158101720Sphk } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1159141526Sphk error = ffs_update(vp, 1); 1160101720Sphk return (error); 1161101720Sphk} 1162101780Sphk 1163102090Sphk 1164101780Sphk/* 1165101780Sphk * Vnode operating to retrieve a named extended attribute. 1166102090Sphk * 1167102090Sphk * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1168102090Sphk * the length of the EA, and possibly the pointer to the entry and to the data. 1169101780Sphk */ 1170102090Sphkstatic int 1171118607Sjhbffs_findextattr(u_char *ptr, u_int length, int nspace, const char *name, u_char **eap, u_char **eac) 1172102090Sphk{ 1173102090Sphk u_char *p, *pe, *pn, *p0; 1174102090Sphk int eapad1, eapad2, ealength, ealen, nlen; 1175102090Sphk uint32_t ul; 1176102090Sphk 1177102090Sphk pe = ptr + length; 1178102090Sphk nlen = strlen(name); 1179102090Sphk 1180102090Sphk for (p = ptr; p < pe; p = pn) { 1181102090Sphk p0 = p; 1182102090Sphk bcopy(p, &ul, sizeof(ul)); 1183102090Sphk pn = p + ul; 1184102090Sphk /* make sure this entry is complete */ 1185102090Sphk if (pn > pe) 1186102090Sphk break; 1187102090Sphk p += sizeof(uint32_t); 1188102090Sphk if (*p != nspace) 1189102090Sphk continue; 1190102090Sphk p++; 1191102090Sphk eapad2 = *p++; 1192102090Sphk if (*p != nlen) 1193102090Sphk continue; 1194102090Sphk p++; 1195102090Sphk if (bcmp(p, name, nlen)) 1196102090Sphk continue; 1197102090Sphk ealength = sizeof(uint32_t) + 3 + nlen; 1198102090Sphk eapad1 = 8 - (ealength % 8); 1199102090Sphk if (eapad1 == 8) 1200102090Sphk eapad1 = 0; 1201102090Sphk ealength += eapad1; 1202102090Sphk ealen = ul - ealength - eapad2; 1203102090Sphk p += nlen + eapad1; 1204102090Sphk if (eap != NULL) 1205102090Sphk *eap = p0; 1206102090Sphk if (eac != NULL) 1207102090Sphk *eac = p; 1208102090Sphk return (ealen); 1209102090Sphk } 1210102608Sphk return(-1); 1211102090Sphk} 1212102090Sphk 1213102090Sphkstatic int 1214102090Sphkffs_rdextattr(u_char **p, struct vnode *vp, struct thread *td, int extra) 1215102090Sphk{ 1216102090Sphk struct inode *ip; 1217102090Sphk struct ufs2_dinode *dp; 1218171437Srodrigc struct fs *fs; 1219102090Sphk struct uio luio; 1220102090Sphk struct iovec liovec; 1221259224Spfg u_int easize; 1222259224Spfg int error; 1223102090Sphk u_char *eae; 1224102090Sphk 1225102090Sphk ip = VTOI(vp); 1226171437Srodrigc fs = ip->i_fs; 1227102090Sphk dp = ip->i_din2; 1228102090Sphk easize = dp->di_extsize; 1229171437Srodrigc if ((uoff_t)easize + extra > NXADDR * fs->fs_bsize) 1230171437Srodrigc return (EFBIG); 1231102090Sphk 1232111119Simp eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1233102090Sphk 1234102090Sphk liovec.iov_base = eae; 1235102090Sphk liovec.iov_len = easize; 1236102090Sphk luio.uio_iov = &liovec; 1237102090Sphk luio.uio_iovcnt = 1; 1238102090Sphk luio.uio_offset = 0; 1239102090Sphk luio.uio_resid = easize; 1240102090Sphk luio.uio_segflg = UIO_SYSSPACE; 1241102090Sphk luio.uio_rw = UIO_READ; 1242102090Sphk luio.uio_td = td; 1243102090Sphk 1244102090Sphk error = ffs_extread(vp, &luio, IO_EXT | IO_SYNC); 1245102090Sphk if (error) { 1246102090Sphk free(eae, M_TEMP); 1247102090Sphk return(error); 1248102090Sphk } 1249102090Sphk *p = eae; 1250102090Sphk return (0); 1251102090Sphk} 1252102090Sphk 1253189737Skibstatic void 1254189737Skibffs_lock_ea(struct vnode *vp) 1255189737Skib{ 1256189737Skib struct inode *ip; 1257189737Skib 1258189737Skib ip = VTOI(vp); 1259189737Skib VI_LOCK(vp); 1260189737Skib while (ip->i_flag & IN_EA_LOCKED) { 1261189737Skib ip->i_flag |= IN_EA_LOCKWAIT; 1262189737Skib msleep(&ip->i_ea_refs, &vp->v_interlock, PINOD + 2, "ufs_ea", 1263189737Skib 0); 1264189737Skib } 1265189737Skib ip->i_flag |= IN_EA_LOCKED; 1266189737Skib VI_UNLOCK(vp); 1267189737Skib} 1268189737Skib 1269189737Skibstatic void 1270189737Skibffs_unlock_ea(struct vnode *vp) 1271189737Skib{ 1272189737Skib struct inode *ip; 1273189737Skib 1274189737Skib ip = VTOI(vp); 1275189737Skib VI_LOCK(vp); 1276189737Skib if (ip->i_flag & IN_EA_LOCKWAIT) 1277189737Skib wakeup(&ip->i_ea_refs); 1278189737Skib ip->i_flag &= ~(IN_EA_LOCKED | IN_EA_LOCKWAIT); 1279189737Skib VI_UNLOCK(vp); 1280189737Skib} 1281189737Skib 1282102991Sphkstatic int 1283102991Sphkffs_open_ea(struct vnode *vp, struct ucred *cred, struct thread *td) 1284102991Sphk{ 1285102991Sphk struct inode *ip; 1286102991Sphk struct ufs2_dinode *dp; 1287102991Sphk int error; 1288102991Sphk 1289102991Sphk ip = VTOI(vp); 1290102991Sphk 1291189737Skib ffs_lock_ea(vp); 1292189737Skib if (ip->i_ea_area != NULL) { 1293189737Skib ip->i_ea_refs++; 1294189737Skib ffs_unlock_ea(vp); 1295189737Skib return (0); 1296189737Skib } 1297102991Sphk dp = ip->i_din2; 1298102991Sphk error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1299189737Skib if (error) { 1300189737Skib ffs_unlock_ea(vp); 1301102991Sphk return (error); 1302189737Skib } 1303102991Sphk ip->i_ea_len = dp->di_extsize; 1304102991Sphk ip->i_ea_error = 0; 1305189737Skib ip->i_ea_refs++; 1306189737Skib ffs_unlock_ea(vp); 1307102991Sphk return (0); 1308102991Sphk} 1309102991Sphk 1310102090Sphk/* 1311102991Sphk * Vnode extattr transaction commit/abort 1312102090Sphk */ 1313102991Sphkstatic int 1314102991Sphkffs_close_ea(struct vnode *vp, int commit, struct ucred *cred, struct thread *td) 1315102991Sphk{ 1316102991Sphk struct inode *ip; 1317102991Sphk struct uio luio; 1318102991Sphk struct iovec liovec; 1319102991Sphk int error; 1320102991Sphk struct ufs2_dinode *dp; 1321102991Sphk 1322102991Sphk ip = VTOI(vp); 1323189737Skib 1324189737Skib ffs_lock_ea(vp); 1325189737Skib if (ip->i_ea_area == NULL) { 1326189737Skib ffs_unlock_ea(vp); 1327102991Sphk return (EINVAL); 1328189737Skib } 1329102991Sphk dp = ip->i_din2; 1330102991Sphk error = ip->i_ea_error; 1331102991Sphk if (commit && error == 0) { 1332189737Skib ASSERT_VOP_ELOCKED(vp, "ffs_close_ea commit"); 1333104051Sphk if (cred == NOCRED) 1334104051Sphk cred = vp->v_mount->mnt_cred; 1335102991Sphk liovec.iov_base = ip->i_ea_area; 1336102991Sphk liovec.iov_len = ip->i_ea_len; 1337102991Sphk luio.uio_iov = &liovec; 1338102991Sphk luio.uio_iovcnt = 1; 1339102991Sphk luio.uio_offset = 0; 1340102991Sphk luio.uio_resid = ip->i_ea_len; 1341102991Sphk luio.uio_segflg = UIO_SYSSPACE; 1342102991Sphk luio.uio_rw = UIO_WRITE; 1343102991Sphk luio.uio_td = td; 1344102991Sphk /* XXX: I'm not happy about truncating to zero size */ 1345102991Sphk if (ip->i_ea_len < dp->di_extsize) 1346102991Sphk error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1347102991Sphk error = ffs_extwrite(vp, &luio, IO_EXT | IO_SYNC, cred); 1348102991Sphk } 1349189737Skib if (--ip->i_ea_refs == 0) { 1350189737Skib free(ip->i_ea_area, M_TEMP); 1351189737Skib ip->i_ea_area = NULL; 1352189737Skib ip->i_ea_len = 0; 1353189737Skib ip->i_ea_error = 0; 1354189737Skib } 1355189737Skib ffs_unlock_ea(vp); 1356102991Sphk return (error); 1357102991Sphk} 1358102991Sphk 1359102991Sphk/* 1360137035Sphk * Vnode extattr strategy routine for fifos. 1361105136Smckusick * 1362105136Smckusick * We need to check for a read or write of the external attributes. 1363105136Smckusick * Otherwise we just fall through and do the usual thing. 1364105136Smckusick */ 1365105136Smckusickstatic int 1366105136Smckusickffsext_strategy(struct vop_strategy_args *ap) 1367105136Smckusick/* 1368105136Smckusickstruct vop_strategy_args { 1369105136Smckusick struct vnodeop_desc *a_desc; 1370105136Smckusick struct vnode *a_vp; 1371105136Smckusick struct buf *a_bp; 1372105136Smckusick}; 1373105136Smckusick*/ 1374105136Smckusick{ 1375105136Smckusick struct vnode *vp; 1376105136Smckusick daddr_t lbn; 1377105136Smckusick 1378105136Smckusick vp = ap->a_vp; 1379105136Smckusick lbn = ap->a_bp->b_lblkno; 1380105136Smckusick if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1381105136Smckusick lbn < 0 && lbn >= -NXADDR) 1382141520Sphk return (VOP_STRATEGY_APV(&ufs_vnodeops, ap)); 1383105136Smckusick if (vp->v_type == VFIFO) 1384141520Sphk return (VOP_STRATEGY_APV(&ufs_fifoops, ap)); 1385135877Sphk panic("spec nodes went here"); 1386105136Smckusick} 1387105136Smckusick 1388105136Smckusick/* 1389102991Sphk * Vnode extattr transaction commit/abort 1390102991Sphk */ 1391104094Sphkstatic int 1392102991Sphkffs_openextattr(struct vop_openextattr_args *ap) 1393102991Sphk/* 1394102991Sphkstruct vop_openextattr_args { 1395102991Sphk struct vnodeop_desc *a_desc; 1396102991Sphk struct vnode *a_vp; 1397102991Sphk IN struct ucred *a_cred; 1398102991Sphk IN struct thread *a_td; 1399102991Sphk}; 1400102991Sphk*/ 1401102991Sphk{ 1402102991Sphk struct inode *ip; 1403102991Sphk struct fs *fs; 1404102991Sphk 1405102991Sphk ip = VTOI(ap->a_vp); 1406102991Sphk fs = ip->i_fs; 1407115588Srwatson 1408195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1409115588Srwatson return (EOPNOTSUPP); 1410115588Srwatson 1411102991Sphk return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1412102991Sphk} 1413102991Sphk 1414102991Sphk 1415102991Sphk/* 1416102991Sphk * Vnode extattr transaction commit/abort 1417102991Sphk */ 1418104094Sphkstatic int 1419102991Sphkffs_closeextattr(struct vop_closeextattr_args *ap) 1420102991Sphk/* 1421102991Sphkstruct vop_closeextattr_args { 1422102991Sphk struct vnodeop_desc *a_desc; 1423102991Sphk struct vnode *a_vp; 1424102991Sphk int a_commit; 1425102991Sphk IN struct ucred *a_cred; 1426102991Sphk IN struct thread *a_td; 1427102991Sphk}; 1428102991Sphk*/ 1429102991Sphk{ 1430102991Sphk struct inode *ip; 1431102991Sphk struct fs *fs; 1432102991Sphk 1433102991Sphk ip = VTOI(ap->a_vp); 1434102991Sphk fs = ip->i_fs; 1435115588Srwatson 1436195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1437115588Srwatson return (EOPNOTSUPP); 1438115588Srwatson 1439166864Smckusick if (ap->a_commit && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) 1440166864Smckusick return (EROFS); 1441166864Smckusick 1442102991Sphk return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1443102991Sphk} 1444102991Sphk 1445118131Srwatson/* 1446118131Srwatson * Vnode operation to remove a named attribute. 1447118131Srwatson */ 1448118131Srwatsonstatic int 1449118131Srwatsonffs_deleteextattr(struct vop_deleteextattr_args *ap) 1450118131Srwatson/* 1451118131Srwatsonvop_deleteextattr { 1452118131Srwatson IN struct vnode *a_vp; 1453118131Srwatson IN int a_attrnamespace; 1454118131Srwatson IN const char *a_name; 1455118131Srwatson IN struct ucred *a_cred; 1456118131Srwatson IN struct thread *a_td; 1457118131Srwatson}; 1458118131Srwatson*/ 1459118131Srwatson{ 1460118131Srwatson struct inode *ip; 1461118131Srwatson struct fs *fs; 1462118131Srwatson uint32_t ealength, ul; 1463118131Srwatson int ealen, olen, eapad1, eapad2, error, i, easize; 1464118131Srwatson u_char *eae, *p; 1465102991Sphk 1466118131Srwatson ip = VTOI(ap->a_vp); 1467118131Srwatson fs = ip->i_fs; 1468102991Sphk 1469195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1470118131Srwatson return (EOPNOTSUPP); 1471118131Srwatson 1472118131Srwatson if (strlen(ap->a_name) == 0) 1473118131Srwatson return (EINVAL); 1474118131Srwatson 1475166864Smckusick if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1476166864Smckusick return (EROFS); 1477166864Smckusick 1478118131Srwatson error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1479182721Strasz ap->a_cred, ap->a_td, VWRITE); 1480118131Srwatson if (error) { 1481189737Skib 1482189737Skib /* 1483189737Skib * ffs_lock_ea is not needed there, because the vnode 1484190469Skib * must be exclusively locked. 1485189737Skib */ 1486118131Srwatson if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1487118131Srwatson ip->i_ea_error = error; 1488118131Srwatson return (error); 1489118131Srwatson } 1490118131Srwatson 1491189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1492189737Skib if (error) 1493189737Skib return (error); 1494118131Srwatson 1495118131Srwatson ealength = eapad1 = ealen = eapad2 = 0; 1496118131Srwatson 1497118131Srwatson eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1498118131Srwatson bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1499118131Srwatson easize = ip->i_ea_len; 1500118131Srwatson 1501118131Srwatson olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1502118131Srwatson &p, NULL); 1503118131Srwatson if (olen == -1) { 1504118131Srwatson /* delete but nonexistent */ 1505118131Srwatson free(eae, M_TEMP); 1506189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1507118131Srwatson return(ENOATTR); 1508118131Srwatson } 1509118131Srwatson bcopy(p, &ul, sizeof ul); 1510118131Srwatson i = p - eae + ul; 1511118131Srwatson if (ul != ealength) { 1512118131Srwatson bcopy(p + ul, p + ealength, easize - i); 1513118131Srwatson easize += (ealength - ul); 1514118131Srwatson } 1515118131Srwatson if (easize > NXADDR * fs->fs_bsize) { 1516118131Srwatson free(eae, M_TEMP); 1517189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1518189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1519118131Srwatson ip->i_ea_error = ENOSPC; 1520118131Srwatson return(ENOSPC); 1521118131Srwatson } 1522118131Srwatson p = ip->i_ea_area; 1523118131Srwatson ip->i_ea_area = eae; 1524118131Srwatson ip->i_ea_len = easize; 1525118131Srwatson free(p, M_TEMP); 1526189737Skib error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1527118131Srwatson return(error); 1528118131Srwatson} 1529118131Srwatson 1530102991Sphk/* 1531102991Sphk * Vnode operation to retrieve a named extended attribute. 1532102991Sphk */ 1533104094Sphkstatic int 1534101780Sphkffs_getextattr(struct vop_getextattr_args *ap) 1535101780Sphk/* 1536101780Sphkvop_getextattr { 1537101780Sphk IN struct vnode *a_vp; 1538101780Sphk IN int a_attrnamespace; 1539101780Sphk IN const char *a_name; 1540101780Sphk INOUT struct uio *a_uio; 1541104346Sdd OUT size_t *a_size; 1542101780Sphk IN struct ucred *a_cred; 1543101780Sphk IN struct thread *a_td; 1544101780Sphk}; 1545101780Sphk*/ 1546101780Sphk{ 1547102090Sphk struct inode *ip; 1548102090Sphk struct fs *fs; 1549115869Srwatson u_char *eae, *p; 1550115869Srwatson unsigned easize; 1551189737Skib int error, ealen; 1552115869Srwatson 1553115869Srwatson ip = VTOI(ap->a_vp); 1554115869Srwatson fs = ip->i_fs; 1555115869Srwatson 1556195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1557115869Srwatson return (EOPNOTSUPP); 1558115869Srwatson 1559115869Srwatson error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1560182721Strasz ap->a_cred, ap->a_td, VREAD); 1561115869Srwatson if (error) 1562115869Srwatson return (error); 1563115869Srwatson 1564189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1565189737Skib if (error) 1566189737Skib return (error); 1567189737Skib 1568115869Srwatson eae = ip->i_ea_area; 1569115869Srwatson easize = ip->i_ea_len; 1570115869Srwatson 1571115869Srwatson ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1572115869Srwatson NULL, &p); 1573115869Srwatson if (ealen >= 0) { 1574115869Srwatson error = 0; 1575115869Srwatson if (ap->a_size != NULL) 1576115869Srwatson *ap->a_size = ealen; 1577115869Srwatson else if (ap->a_uio != NULL) 1578115869Srwatson error = uiomove(p, ealen, ap->a_uio); 1579115869Srwatson } else 1580115869Srwatson error = ENOATTR; 1581189737Skib 1582189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1583115869Srwatson return(error); 1584115869Srwatson} 1585115869Srwatson 1586115869Srwatson/* 1587115869Srwatson * Vnode operation to retrieve extended attributes on a vnode. 1588115869Srwatson */ 1589115869Srwatsonstatic int 1590115869Srwatsonffs_listextattr(struct vop_listextattr_args *ap) 1591115869Srwatson/* 1592115869Srwatsonvop_listextattr { 1593115869Srwatson IN struct vnode *a_vp; 1594115869Srwatson IN int a_attrnamespace; 1595115869Srwatson INOUT struct uio *a_uio; 1596115869Srwatson OUT size_t *a_size; 1597115869Srwatson IN struct ucred *a_cred; 1598115869Srwatson IN struct thread *a_td; 1599115869Srwatson}; 1600115869Srwatson*/ 1601115869Srwatson{ 1602115869Srwatson struct inode *ip; 1603115869Srwatson struct fs *fs; 1604102175Sphk u_char *eae, *p, *pe, *pn; 1605102090Sphk unsigned easize; 1606102175Sphk uint32_t ul; 1607189737Skib int error, ealen; 1608101780Sphk 1609102090Sphk ip = VTOI(ap->a_vp); 1610102090Sphk fs = ip->i_fs; 1611102090Sphk 1612195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1613115588Srwatson return (EOPNOTSUPP); 1614115588Srwatson 1615102991Sphk error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1616182721Strasz ap->a_cred, ap->a_td, VREAD); 1617102090Sphk if (error) 1618102090Sphk return (error); 1619102991Sphk 1620189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1621189737Skib if (error) 1622189737Skib return (error); 1623102991Sphk eae = ip->i_ea_area; 1624102991Sphk easize = ip->i_ea_len; 1625115869Srwatson 1626115869Srwatson error = 0; 1627115869Srwatson if (ap->a_size != NULL) 1628115869Srwatson *ap->a_size = 0; 1629115869Srwatson pe = eae + easize; 1630115869Srwatson for(p = eae; error == 0 && p < pe; p = pn) { 1631115869Srwatson bcopy(p, &ul, sizeof(ul)); 1632115869Srwatson pn = p + ul; 1633115869Srwatson if (pn > pe) 1634115869Srwatson break; 1635115869Srwatson p += sizeof(ul); 1636115869Srwatson if (*p++ != ap->a_attrnamespace) 1637115869Srwatson continue; 1638115869Srwatson p++; /* pad2 */ 1639115869Srwatson ealen = *p; 1640115869Srwatson if (ap->a_size != NULL) { 1641115869Srwatson *ap->a_size += ealen + 1; 1642115869Srwatson } else if (ap->a_uio != NULL) { 1643115869Srwatson error = uiomove(p, ealen + 1, ap->a_uio); 1644102175Sphk } 1645102090Sphk } 1646189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1647102090Sphk return(error); 1648101780Sphk} 1649101780Sphk 1650101780Sphk/* 1651101780Sphk * Vnode operation to set a named attribute. 1652101780Sphk */ 1653104094Sphkstatic int 1654101780Sphkffs_setextattr(struct vop_setextattr_args *ap) 1655101780Sphk/* 1656101780Sphkvop_setextattr { 1657101780Sphk IN struct vnode *a_vp; 1658101780Sphk IN int a_attrnamespace; 1659101780Sphk IN const char *a_name; 1660101780Sphk INOUT struct uio *a_uio; 1661101780Sphk IN struct ucred *a_cred; 1662101780Sphk IN struct thread *a_td; 1663101780Sphk}; 1664101780Sphk*/ 1665101780Sphk{ 1666102090Sphk struct inode *ip; 1667102090Sphk struct fs *fs; 1668102090Sphk uint32_t ealength, ul; 1669237713Skib ssize_t ealen; 1670237713Skib int olen, eapad1, eapad2, error, i, easize; 1671102090Sphk u_char *eae, *p; 1672101780Sphk 1673102090Sphk ip = VTOI(ap->a_vp); 1674102090Sphk fs = ip->i_fs; 1675102090Sphk 1676195265Strasz if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) 1677115588Srwatson return (EOPNOTSUPP); 1678115588Srwatson 1679115869Srwatson if (strlen(ap->a_name) == 0) 1680115869Srwatson return (EINVAL); 1681115869Srwatson 1682118131Srwatson /* XXX Now unsupported API to delete EAs using NULL uio. */ 1683118131Srwatson if (ap->a_uio == NULL) 1684118131Srwatson return (EOPNOTSUPP); 1685118131Srwatson 1686166864Smckusick if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) 1687166864Smckusick return (EROFS); 1688166864Smckusick 1689237713Skib ealen = ap->a_uio->uio_resid; 1690237713Skib if (ealen < 0 || ealen > lblktosize(fs, NXADDR)) 1691237713Skib return (EINVAL); 1692237713Skib 1693102991Sphk error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1694182721Strasz ap->a_cred, ap->a_td, VWRITE); 1695102991Sphk if (error) { 1696189737Skib 1697189737Skib /* 1698189737Skib * ffs_lock_ea is not needed there, because the vnode 1699190469Skib * must be exclusively locked. 1700189737Skib */ 1701102991Sphk if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1702102991Sphk ip->i_ea_error = error; 1703102991Sphk return (error); 1704102991Sphk } 1705102991Sphk 1706189737Skib error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1707189737Skib if (error) 1708189737Skib return (error); 1709102991Sphk 1710118131Srwatson ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1711118131Srwatson eapad1 = 8 - (ealength % 8); 1712118131Srwatson if (eapad1 == 8) 1713118131Srwatson eapad1 = 0; 1714118131Srwatson eapad2 = 8 - (ealen % 8); 1715118131Srwatson if (eapad2 == 8) 1716118131Srwatson eapad2 = 0; 1717118131Srwatson ealength += eapad1 + ealen + eapad2; 1718102090Sphk 1719111119Simp eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1720102991Sphk bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1721102991Sphk easize = ip->i_ea_len; 1722102090Sphk 1723102608Sphk olen = ffs_findextattr(eae, easize, 1724102090Sphk ap->a_attrnamespace, ap->a_name, &p, NULL); 1725102991Sphk if (olen == -1) { 1726102090Sphk /* new, append at end */ 1727102090Sphk p = eae + easize; 1728102090Sphk easize += ealength; 1729102608Sphk } else { 1730102090Sphk bcopy(p, &ul, sizeof ul); 1731102090Sphk i = p - eae + ul; 1732102608Sphk if (ul != ealength) { 1733102608Sphk bcopy(p + ul, p + ealength, easize - i); 1734102608Sphk easize += (ealength - ul); 1735102608Sphk } 1736102090Sphk } 1737237713Skib if (easize > lblktosize(fs, NXADDR)) { 1738102090Sphk free(eae, M_TEMP); 1739189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1740189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1741102991Sphk ip->i_ea_error = ENOSPC; 1742102090Sphk return(ENOSPC); 1743102090Sphk } 1744118131Srwatson bcopy(&ealength, p, sizeof(ealength)); 1745118131Srwatson p += sizeof(ealength); 1746118131Srwatson *p++ = ap->a_attrnamespace; 1747118131Srwatson *p++ = eapad2; 1748118131Srwatson *p++ = strlen(ap->a_name); 1749118131Srwatson strcpy(p, ap->a_name); 1750118131Srwatson p += strlen(ap->a_name); 1751118131Srwatson bzero(p, eapad1); 1752118131Srwatson p += eapad1; 1753118131Srwatson error = uiomove(p, ealen, ap->a_uio); 1754118131Srwatson if (error) { 1755118131Srwatson free(eae, M_TEMP); 1756189737Skib ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1757189737Skib if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1758118131Srwatson ip->i_ea_error = error; 1759118131Srwatson return(error); 1760102090Sphk } 1761118131Srwatson p += ealen; 1762118131Srwatson bzero(p, eapad2); 1763118131Srwatson 1764102991Sphk p = ip->i_ea_area; 1765102991Sphk ip->i_ea_area = eae; 1766102991Sphk ip->i_ea_len = easize; 1767102991Sphk free(p, M_TEMP); 1768189737Skib error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1769102090Sphk return(error); 1770101780Sphk} 1771166774Spjd 1772166774Spjd/* 1773166774Spjd * Vnode pointer to File handle 1774166774Spjd */ 1775166774Spjdstatic int 1776166774Spjdffs_vptofh(struct vop_vptofh_args *ap) 1777166774Spjd/* 1778166774Spjdvop_vptofh { 1779166774Spjd IN struct vnode *a_vp; 1780166774Spjd IN struct fid *a_fhp; 1781166774Spjd}; 1782166774Spjd*/ 1783166774Spjd{ 1784166774Spjd struct inode *ip; 1785166774Spjd struct ufid *ufhp; 1786166774Spjd 1787166774Spjd ip = VTOI(ap->a_vp); 1788166774Spjd ufhp = (struct ufid *)ap->a_fhp; 1789166774Spjd ufhp->ufid_len = sizeof(struct ufid); 1790166774Spjd ufhp->ufid_ino = ip->i_number; 1791166774Spjd ufhp->ufid_gen = ip->i_gen; 1792166774Spjd return (0); 1793166774Spjd} 1794