Cross Reference: /freebsd-10.2-release/sys/ufs/ffs/ffs

Deleted Added

sdiff udiff text old ( 118131 ) new ( 118607 )

full compact

ffs_vnops.c (118131)	ffs_vnops.c (118607)
1/* 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 43 */ 44 45#include <sys/cdefs.h>	1/* 2 * Copyright (c) 2002, 2003 Networks Associates Technology, Inc. 3 * All rights reserved. 4 * 5 * This software was developed for the FreeBSD Project by Marshall 6 * Kirk McKusick and Network Associates Laboratories, the Security 7 * Research Division of Network Associates, Inc. under DARPA/SPAWAR 8 * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS 9 * research program 10 * 11 * Copyright (c) 1982, 1986, 1989, 1993 12 * The Regents of the University of California. All rights reserved. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 * 42 * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 43 */ 44 45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 118131 2003-07-28 18:53:29Z rwatson $");	46__FBSDID("$FreeBSD: head/sys/ufs/ffs/ffs_vnops.c 118607 2003-08-07 15:04:27Z jhb $");
47 48#include <sys/param.h> 49#include <sys/bio.h> 50#include <sys/systm.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/extattr.h> 54#include <sys/kernel.h> 55#include <sys/limits.h> 56#include <sys/malloc.h> 57#include <sys/mount.h> 58#include <sys/proc.h> 59#include <sys/resourcevar.h> 60#include <sys/signalvar.h> 61#include <sys/stat.h> 62#include <sys/vmmeter.h> 63#include <sys/vnode.h> 64 65#include <vm/vm.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_object.h> 68#include <vm/vm_page.h> 69#include <vm/vm_pager.h> 70#include <vm/vnode_pager.h> 71 72#include <ufs/ufs/extattr.h> 73#include <ufs/ufs/quota.h> 74#include <ufs/ufs/inode.h> 75#include <ufs/ufs/ufs_extern.h> 76#include <ufs/ufs/ufsmount.h> 77 78#include <ufs/ffs/fs.h> 79#include <ufs/ffs/ffs_extern.h> 80#include "opt_directio.h" 81 82#ifdef DIRECTIO 83extern int ffs_rawread(struct vnode vp, struct uio uio, int workdone); 84#endif 85static int ffs_fsync(struct vop_fsync_args ); 86static int ffs_getpages(struct vop_getpages_args ); 87static int ffs_read(struct vop_read_args ); 88static int ffs_write(struct vop_write_args ); 89static int ffs_extread(struct vnode vp, struct uio uio, int ioflag); 90static int ffs_extwrite(struct vnode vp, struct uio uio, int ioflag, 91 struct ucred cred); 92static int ffsext_strategy(struct vop_strategy_args ); 93static int ffs_closeextattr(struct vop_closeextattr_args ); 94static int ffs_deleteextattr(struct vop_deleteextattr_args ); 95static int ffs_getextattr(struct vop_getextattr_args ); 96static int ffs_listextattr(struct vop_listextattr_args ); 97static int ffs_openextattr(struct vop_openextattr_args ); 98static int ffs_setextattr(struct vop_setextattr_args ); 99 100* 101/* Global vfs data structures for ufs. / 102vop_t ffs_vnodeop_p; 103static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 104* { &vop_default_desc, (vop_t ) ufs_vnoperate }, 105* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 106* { &vop_getpages_desc, (vop_t ) ffs_getpages }, 107* { &vop_read_desc, (vop_t ) ffs_read }, 108* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 109* { &vop_write_desc, (vop_t ) ffs_write }, 110* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 111* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 112* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 113* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 114* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 115* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 116* { NULL, NULL } 117}; 118static struct vnodeopv_desc ffs_vnodeop_opv_desc = 119 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 120 121vop_t *ffs_specop_p; 122static struct vnodeopv_entry_desc ffs_specop_entries[] = { 123* { &vop_default_desc, (vop_t ) ufs_vnoperatespec }, 124* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 125* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 126* { &vop_strategy_desc, (vop_t ) ffsext_strategy }, 127* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 128* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 129* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 130* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 131* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 132* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 133* { NULL, NULL } 134}; 135static struct vnodeopv_desc ffs_specop_opv_desc = 136 { &ffs_specop_p, ffs_specop_entries }; 137 138vop_t *ffs_fifoop_p; 139static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 140* { &vop_default_desc, (vop_t ) ufs_vnoperatefifo }, 141* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 142* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 143* { &vop_strategy_desc, (vop_t ) ffsext_strategy }, 144* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 145* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 146* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 147* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 148* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 149* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 150* { NULL, NULL } 151}; 152static struct vnodeopv_desc ffs_fifoop_opv_desc = 153 { &ffs_fifoop_p, ffs_fifoop_entries }; 154 155VNODEOP_SET(ffs_vnodeop_opv_desc); 156VNODEOP_SET(ffs_specop_opv_desc); 157VNODEOP_SET(ffs_fifoop_opv_desc); 158 159/* 160 * Synch an open file. 161 / 162/ ARGSUSED / 163static int 164ffs_fsync(ap) 165* struct vop_fsync_args /* { 166 struct vnode a_vp; 167* struct ucred a_cred; 168* int a_waitfor; 169 struct thread a_td; 170* } / ap; 171{ 172 struct vnode vp = ap->a_vp; 173* struct inode ip = VTOI(vp); 174* struct buf bp; 175* struct buf nbp; 176* int s, error, wait, passes, skipmeta; 177 ufs_lbn_t lbn; 178 179 wait = (ap->a_waitfor == MNT_WAIT); 180 if (vn_isdisk(vp, NULL)) { 181 lbn = INT_MAX; 182 if (vp->v_rdev->si_mountpoint != NULL && 183 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) 184 softdep_fsync_mountdev(vp); 185 } else { 186 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 187 } 188 189 /* 190 * Flush all dirty buffers associated with a vnode. 191 / 192* passes = NIADDR + 1; 193 skipmeta = 0; 194 if (wait) 195 skipmeta = 1; 196 s = splbio(); 197 VI_LOCK(vp); 198loop: 199 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) 200 bp->b_vflags &= ~BV_SCANNED; 201 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 202 nbp = TAILQ_NEXT(bp, b_vnbufs); 203 /* 204 * Reasons to skip this buffer: it has already been considered 205 * on this pass, this pass is the first time through on a 206 * synchronous flush request and the buffer being considered 207 * is metadata, the buffer has dependencies that will cause 208 * it to be redirtied and it has not already been deferred, 209 * or it is already being written. 210 / 211* if ((bp->b_vflags & BV_SCANNED) != 0) 212 continue; 213 bp->b_vflags \|= BV_SCANNED; 214 if ((skipmeta == 1 && bp->b_lblkno < 0)) 215 continue; 216 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT, NULL)) 217 continue; 218 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 219 (bp->b_flags & B_DEFERRED) == 0 && 220 buf_countdeps(bp, 0)) { 221 bp->b_flags \|= B_DEFERRED; 222 BUF_UNLOCK(bp); 223 continue; 224 } 225 VI_UNLOCK(vp); 226 if ((bp->b_flags & B_DELWRI) == 0) 227 panic("ffs_fsync: not dirty"); 228 if (vp != bp->b_vp) 229 panic("ffs_fsync: vp != vp->b_vp"); 230 /* 231 * If this is a synchronous flush request, or it is not a 232 * file or device, start the write on this buffer immediatly. 233 / 234* if (wait \|\| (vp->v_type != VREG && vp->v_type != VBLK)) { 235 236 /* 237 * On our final pass through, do all I/O synchronously 238 * so that we can find out if our flush is failing 239 * because of write errors. 240 / 241* if (passes > 0 \|\| !wait) { 242 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 243 (void) vfs_bio_awrite(bp); 244 } else { 245 bremfree(bp); 246 splx(s); 247 (void) bawrite(bp); 248 s = splbio(); 249 } 250 } else { 251 bremfree(bp); 252 splx(s); 253 if ((error = bwrite(bp)) != 0) 254 return (error); 255 s = splbio(); 256 } 257 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 258 /* 259 * If the buffer is for data that has been truncated 260 * off the file, then throw it away. 261 / 262* bremfree(bp); 263 bp->b_flags \|= B_INVAL \| B_NOCACHE; 264 splx(s); 265 brelse(bp); 266 s = splbio(); 267 } else 268 vfs_bio_awrite(bp); 269 270 /* 271 * Since we may have slept during the I/O, we need 272 * to start from a known point. 273 / 274* VI_LOCK(vp); 275 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); 276 } 277 /* 278 * If we were asked to do this synchronously, then go back for 279 * another pass, this time doing the metadata. 280 / 281* if (skipmeta) { 282 skipmeta = 0; 283 goto loop; 284 } 285 286 if (wait) { 287 while (vp->v_numoutput) { 288 vp->v_iflag \|= VI_BWAIT; 289 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), 290 PRIBIO + 4, "ffsfsn", 0); 291 } 292 VI_UNLOCK(vp); 293 294 /* 295 * Ensure that any filesystem metatdata associated 296 * with the vnode has been written. 297 / 298* splx(s); 299 if ((error = softdep_sync_metadata(ap)) != 0) 300 return (error); 301 s = splbio(); 302 303 VI_LOCK(vp); 304 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 305 /* 306 * Block devices associated with filesystems may 307 * have new I/O requests posted for them even if 308 * the vnode is locked, so no amount of trying will 309 * get them clean. Thus we give block devices a 310 * good effort, then just give up. For all other file 311 * types, go around and try again until it is clean. 312 / 313* if (passes > 0) { 314 passes -= 1; 315 goto loop; 316 } 317#ifdef DIAGNOSTIC 318 if (!vn_isdisk(vp, NULL)) 319 vprint("ffs_fsync: dirty", vp); 320#endif 321 } 322 } 323 VI_UNLOCK(vp); 324 splx(s); 325 return (UFS_UPDATE(vp, wait)); 326} 327 328 329/* 330 * Vnode op for reading. 331 / 332/ ARGSUSED / 333static int 334ffs_read(ap) 335* struct vop_read_args /* { 336 struct vnode a_vp; 337* struct uio a_uio; 338* int a_ioflag; 339 struct ucred a_cred; 340* } / ap; 341{ 342 struct vnode vp; 343* struct inode ip; 344* struct uio uio; 345* struct fs fs; 346* struct buf bp; 347* ufs_lbn_t lbn, nextlbn; 348 off_t bytesinfile; 349 long size, xfersize, blkoffset; 350 int error, orig_resid; 351 int seqcount; 352 int ioflag; 353 vm_object_t object; 354 355 vp = ap->a_vp; 356 uio = ap->a_uio; 357 ioflag = ap->a_ioflag; 358 if (ap->a_ioflag & IO_EXT) 359#ifdef notyet 360 return (ffs_extread(vp, uio, ioflag)); 361#else 362 panic("ffs_read+IO_EXT"); 363#endif 364#ifdef DIRECTIO 365 if ((ioflag & IO_DIRECT) != 0) { 366 int workdone; 367 368 error = ffs_rawread(vp, uio, &workdone); 369 if (error != 0 \|\| workdone != 0) 370 return error; 371 } 372#endif 373 374 GIANT_REQUIRED; 375 376 seqcount = ap->a_ioflag >> 16; 377 ip = VTOI(vp); 378 379#ifdef DIAGNOSTIC 380 if (uio->uio_rw != UIO_READ) 381 panic("ffs_read: mode"); 382 383 if (vp->v_type == VLNK) { 384 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 385 panic("ffs_read: short symlink"); 386 } else if (vp->v_type != VREG && vp->v_type != VDIR) 387 panic("ffs_read: type %d", vp->v_type); 388#endif 389 fs = ip->i_fs; 390 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 391 return (EFBIG); 392 393 orig_resid = uio->uio_resid; 394 if (orig_resid <= 0) 395 return (0); 396 397 object = vp->v_object; 398 399 bytesinfile = ip->i_size - uio->uio_offset; 400 if (bytesinfile <= 0) { 401 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 402 ip->i_flag \|= IN_ACCESS; 403 return 0; 404 } 405 406 if (object) { 407 vm_object_reference(object); 408 } 409 410 /* 411 * Ok so we couldn't do it all in one vm trick... 412 * so cycle around trying smaller bites.. 413 / 414* for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 415 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 416 break; 417 418 lbn = lblkno(fs, uio->uio_offset); 419 nextlbn = lbn + 1; 420 421 /* 422 * size of buffer. The buffer representing the 423 * end of the file is rounded up to the size of 424 * the block type ( fragment or full block, 425 * depending ). 426 / 427* size = blksize(fs, ip, lbn); 428 blkoffset = blkoff(fs, uio->uio_offset); 429 430 /* 431 * The amount we want to transfer in this iteration is 432 * one FS block less the amount of the data before 433 * our startpoint (duh!) 434 / 435* xfersize = fs->fs_bsize - blkoffset; 436 437 /* 438 * But if we actually want less than the block, 439 * or the file doesn't have a whole block more of data, 440 * then use the lesser number. 441 / 442* if (uio->uio_resid < xfersize) 443 xfersize = uio->uio_resid; 444 if (bytesinfile < xfersize) 445 xfersize = bytesinfile; 446 447 if (lblktosize(fs, nextlbn) >= ip->i_size) { 448 /* 449 * Don't do readahead if this is the end of the file. 450 / 451* error = bread(vp, lbn, size, NOCRED, &bp); 452 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 453 /* 454 * Otherwise if we are allowed to cluster, 455 * grab as much as we can. 456 * 457 * XXX This may not be a win if we are not 458 * doing sequential access. 459 / 460* error = cluster_read(vp, ip->i_size, lbn, 461 size, NOCRED, uio->uio_resid, seqcount, &bp); 462 } else if (seqcount > 1) { 463 /* 464 * If we are NOT allowed to cluster, then 465 * if we appear to be acting sequentially, 466 * fire off a request for a readahead 467 * as well as a read. Note that the 4th and 5th 468 * arguments point to arrays of the size specified in 469 * the 6th argument. 470 / 471* int nextsize = blksize(fs, ip, nextlbn); 472 error = breadn(vp, lbn, 473 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 474 } else { 475 /* 476 * Failing all of the above, just read what the 477 * user asked for. Interestingly, the same as 478 * the first option above. 479 / 480* error = bread(vp, lbn, size, NOCRED, &bp); 481 } 482 if (error) { 483 brelse(bp); 484 bp = NULL; 485 break; 486 } 487 488 /* 489 * If IO_DIRECT then set B_DIRECT for the buffer. This 490 * will cause us to attempt to release the buffer later on 491 * and will cause the buffer cache to attempt to free the 492 * underlying pages. 493 / 494* if (ioflag & IO_DIRECT) 495 bp->b_flags \|= B_DIRECT; 496 497 /* 498 * We should only get non-zero b_resid when an I/O error 499 * has occurred, which should cause us to break above. 500 * However, if the short read did not cause an error, 501 * then we want to ensure that we do not uiomove bad 502 * or uninitialized data. 503 / 504* size -= bp->b_resid; 505 if (size < xfersize) { 506 if (size == 0) 507 break; 508 xfersize = size; 509 } 510 511 { 512 /* 513 * otherwise use the general form 514 / 515* error = 516 uiomove((char )bp->b_data + blkoffset, 517* (int)xfersize, uio); 518 } 519 520 if (error) 521 break; 522 523 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 524 (LIST_FIRST(&bp->b_dep) == NULL)) { 525 /* 526 * If there are no dependencies, and it's VMIO, 527 * then we don't need the buf, mark it available 528 * for freeing. The VM has the data. 529 / 530* bp->b_flags \|= B_RELBUF; 531 brelse(bp); 532 } else { 533 /* 534 * Otherwise let whoever 535 * made the request take care of 536 * freeing it. We just queue 537 * it onto another list. 538 / 539* bqrelse(bp); 540 } 541 } 542 543 /* 544 * This can only happen in the case of an error 545 * because the loop above resets bp to NULL on each iteration 546 * and on normal completion has not set a new value into it. 547 * so it must have come from a 'break' statement 548 / 549* if (bp != NULL) { 550 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 551 (LIST_FIRST(&bp->b_dep) == NULL)) { 552 bp->b_flags \|= B_RELBUF; 553 brelse(bp); 554 } else { 555 bqrelse(bp); 556 } 557 } 558 559 if (object) { 560 VM_OBJECT_LOCK(object); 561 vm_object_vndeallocate(object); 562 } 563 if ((error == 0 \|\| uio->uio_resid != orig_resid) && 564 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 565 ip->i_flag \|= IN_ACCESS; 566 return (error); 567} 568 569/* 570 * Vnode op for writing. 571 / 572static int 573ffs_write(ap) 574* struct vop_write_args /* { 575 struct vnode a_vp; 576* struct uio a_uio; 577* int a_ioflag; 578 struct ucred a_cred; 579* } / ap; 580{ 581 struct vnode vp; 582* struct uio uio; 583* struct inode ip; 584* struct fs fs; 585* struct buf bp; 586* struct thread td; 587* ufs_lbn_t lbn; 588 off_t osize; 589 int seqcount; 590 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 591 vm_object_t object; 592 593 vp = ap->a_vp; 594 uio = ap->a_uio; 595 ioflag = ap->a_ioflag; 596 if (ap->a_ioflag & IO_EXT) 597#ifdef notyet 598 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 599#else 600 panic("ffs_read+IO_EXT"); 601#endif 602 603 GIANT_REQUIRED; 604 605 extended = 0; 606 seqcount = ap->a_ioflag >> 16; 607 ip = VTOI(vp); 608 609 object = vp->v_object; 610 if (object) { 611 vm_object_reference(object); 612 } 613 614#ifdef DIAGNOSTIC 615 if (uio->uio_rw != UIO_WRITE) 616 panic("ffswrite: mode"); 617#endif 618 619 switch (vp->v_type) { 620 case VREG: 621 if (ioflag & IO_APPEND) 622 uio->uio_offset = ip->i_size; 623 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { 624 if (object) { 625 VM_OBJECT_LOCK(object); 626 vm_object_vndeallocate(object); 627 } 628 return (EPERM); 629 } 630 /* FALLTHROUGH / 631* case VLNK: 632 break; 633 case VDIR: 634 panic("ffswrite: dir write"); 635 break; 636 default: 637 panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type, 638 (int)uio->uio_offset, 639 (int)uio->uio_resid 640 ); 641 } 642 643 fs = ip->i_fs; 644 if (uio->uio_offset < 0 \|\| 645 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { 646 if (object) { 647 VM_OBJECT_LOCK(object); 648 vm_object_vndeallocate(object); 649 } 650 return (EFBIG); 651 } 652 /* 653 * Maybe this should be above the vnode op call, but so long as 654 * file servers have no limits, I don't think it matters. 655 / 656* td = uio->uio_td; 657 if (vp->v_type == VREG && td && 658 uio->uio_offset + uio->uio_resid > 659 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 660 PROC_LOCK(td->td_proc); 661 psignal(td->td_proc, SIGXFSZ); 662 PROC_UNLOCK(td->td_proc); 663 if (object) { 664 VM_OBJECT_LOCK(object); 665 vm_object_vndeallocate(object); 666 } 667 return (EFBIG); 668 } 669 670 resid = uio->uio_resid; 671 osize = ip->i_size; 672 if (seqcount > BA_SEQMAX) 673 flags = BA_SEQMAX << BA_SEQSHIFT; 674 else 675 flags = seqcount << BA_SEQSHIFT; 676 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 677 flags \|= IO_SYNC; 678 679 for (error = 0; uio->uio_resid > 0;) { 680 lbn = lblkno(fs, uio->uio_offset); 681 blkoffset = blkoff(fs, uio->uio_offset); 682 xfersize = fs->fs_bsize - blkoffset; 683 if (uio->uio_resid < xfersize) 684 xfersize = uio->uio_resid; 685 686 if (uio->uio_offset + xfersize > ip->i_size) 687 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 688 689 /* 690 * We must perform a read-before-write if the transfer size 691 * does not cover the entire buffer. 692 / 693* if (fs->fs_bsize > xfersize) 694 flags \|= BA_CLRBUF; 695 else 696 flags &= ~BA_CLRBUF; 697/* XXX is uio->uio_offset the right thing here? / 698* error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 699 ap->a_cred, flags, &bp); 700 if (error != 0) 701 break; 702 /* 703 * If the buffer is not valid we have to clear out any 704 * garbage data from the pages instantiated for the buffer. 705 * If we do not, a failed uiomove() during a write can leave 706 * the prior contents of the pages exposed to a userland 707 * mmap(). XXX deal with uiomove() errors a better way. 708 / 709* if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 710 vfs_bio_clrbuf(bp); 711 if (ioflag & IO_DIRECT) 712 bp->b_flags \|= B_DIRECT; 713 714 if (uio->uio_offset + xfersize > ip->i_size) { 715 ip->i_size = uio->uio_offset + xfersize; 716 DIP(ip, i_size) = ip->i_size; 717 extended = 1; 718 } 719 720 size = blksize(fs, ip, lbn) - bp->b_resid; 721 if (size < xfersize) 722 xfersize = size; 723 724 error = 725 uiomove((char )bp->b_data + blkoffset, (int)xfersize, uio); 726* if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 727 (LIST_FIRST(&bp->b_dep) == NULL)) { 728 bp->b_flags \|= B_RELBUF; 729 } 730 731 /* 732 * If IO_SYNC each buffer is written synchronously. Otherwise 733 * if we have a severe page deficiency write the buffer 734 * asynchronously. Otherwise try to cluster, and if that 735 * doesn't do it then either do an async write (if O_DIRECT), 736 * or a delayed write (if not). 737 / 738* if (ioflag & IO_SYNC) { 739 (void)bwrite(bp); 740 } else if (vm_page_count_severe() \|\| 741 buf_dirty_count_severe() \|\| 742 (ioflag & IO_ASYNC)) { 743 bp->b_flags \|= B_CLUSTEROK; 744 bawrite(bp); 745 } else if (xfersize + blkoffset == fs->fs_bsize) { 746 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 747 bp->b_flags \|= B_CLUSTEROK; 748 cluster_write(bp, ip->i_size, seqcount); 749 } else { 750 bawrite(bp); 751 } 752 } else if (ioflag & IO_DIRECT) { 753 bp->b_flags \|= B_CLUSTEROK; 754 bawrite(bp); 755 } else { 756 bp->b_flags \|= B_CLUSTEROK; 757 bdwrite(bp); 758 } 759 if (error \|\| xfersize == 0) 760 break; 761 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 762 } 763 /* 764 * If we successfully wrote any data, and we are not the superuser 765 * we clear the setuid and setgid bits as a precaution against 766 * tampering. 767 / 768* if (resid > uio->uio_resid && ap->a_cred && 769 suser_cred(ap->a_cred, PRISON_ROOT)) { 770 ip->i_mode &= ~(ISUID \| ISGID); 771 DIP(ip, i_mode) = ip->i_mode; 772 } 773 if (resid > uio->uio_resid) 774 VN_KNOTE(vp, NOTE_WRITE \| (extended ? NOTE_EXTEND : 0)); 775 if (error) { 776 if (ioflag & IO_UNIT) { 777 (void)UFS_TRUNCATE(vp, osize, 778 IO_NORMAL \| (ioflag & IO_SYNC), 779 ap->a_cred, uio->uio_td); 780 uio->uio_offset -= resid - uio->uio_resid; 781 uio->uio_resid = resid; 782 } 783 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 784 error = UFS_UPDATE(vp, 1); 785 786 if (object) { 787 VM_OBJECT_LOCK(object); 788 vm_object_vndeallocate(object); 789 } 790 791 return (error); 792} 793 794/* 795 * get page routine 796 / 797static int 798ffs_getpages(ap) 799* struct vop_getpages_args ap; 800{ 801* off_t foff, physoffset; 802 int i, size, bsize; 803 struct vnode dp, vp; 804 vm_object_t obj; 805 vm_pindex_t pindex; 806 vm_page_t mreq; 807 int bbackwards, bforwards; 808 int pbackwards, pforwards; 809 int firstpage; 810 ufs2_daddr_t reqblkno, reqlblkno; 811 int poff; 812 int pcount; 813 int rtval; 814 int pagesperblock; 815 816 GIANT_REQUIRED; 817 818 pcount = round_page(ap->a_count) / PAGE_SIZE; 819 mreq = ap->a_m[ap->a_reqpage]; 820 821 /* 822 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 823 * then the entire page is valid. Since the page may be mapped, 824 * user programs might reference data beyond the actual end of file 825 * occuring within the page. We have to zero that data. 826 / 827* if (mreq->valid) { 828 if (mreq->valid != VM_PAGE_BITS_ALL) 829 vm_page_zero_invalid(mreq, TRUE); 830 VM_OBJECT_LOCK(mreq->object); 831 vm_page_lock_queues(); 832 for (i = 0; i < pcount; i++) { 833 if (i != ap->a_reqpage) { 834 vm_page_free(ap->a_m[i]); 835 } 836 } 837 vm_page_unlock_queues(); 838 VM_OBJECT_UNLOCK(mreq->object); 839 return VM_PAGER_OK; 840 } 841 842 vp = ap->a_vp; 843 obj = vp->v_object; 844 bsize = vp->v_mount->mnt_stat.f_iosize; 845 pindex = mreq->pindex; 846 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero /; 847* 848 if (bsize < PAGE_SIZE) 849 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 850 ap->a_count, 851 ap->a_reqpage); 852 853 /* 854 * foff is the file offset of the required page 855 * reqlblkno is the logical block that contains the page 856 * poff is the index of the page into the logical block 857 / 858* reqlblkno = foff / bsize; 859 poff = (foff % bsize) / PAGE_SIZE; 860 861 dp = VTOI(vp)->i_devvp; 862 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) 863 \|\| (reqblkno == -1)) { 864 VM_OBJECT_LOCK(obj); 865 vm_page_lock_queues(); 866 for(i = 0; i < pcount; i++) { 867 if (i != ap->a_reqpage) 868 vm_page_free(ap->a_m[i]); 869 } 870 vm_page_unlock_queues(); 871 VM_OBJECT_UNLOCK(obj); 872 if (reqblkno == -1) { 873 if ((mreq->flags & PG_ZERO) == 0) 874 pmap_zero_page(mreq); 875 vm_page_undirty(mreq); 876 mreq->valid = VM_PAGE_BITS_ALL; 877 return VM_PAGER_OK; 878 } else { 879 return VM_PAGER_ERROR; 880 } 881 } 882 883 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; 884 pagesperblock = bsize / PAGE_SIZE; 885 /* 886 * find the first page that is contiguous... 887 * note that pbackwards is the number of pages that are contiguous 888 * backwards. 889 / 890* firstpage = 0; 891 if (ap->a_count) { 892 pbackwards = poff + bbackwards * pagesperblock; 893 if (ap->a_reqpage > pbackwards) { 894 firstpage = ap->a_reqpage - pbackwards; 895 VM_OBJECT_LOCK(obj); 896 vm_page_lock_queues(); 897 for(i=0;i<firstpage;i++) 898 vm_page_free(ap->a_m[i]); 899 vm_page_unlock_queues(); 900 VM_OBJECT_UNLOCK(obj); 901 } 902 903 /* 904 * pforwards is the number of pages that are contiguous 905 * after the current page. 906 / 907* pforwards = (pagesperblock - (poff + 1)) + 908 bforwards * pagesperblock; 909 if (pforwards < (pcount - (ap->a_reqpage + 1))) { 910 VM_OBJECT_LOCK(obj); 911 vm_page_lock_queues(); 912 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) 913 vm_page_free(ap->a_m[i]); 914 vm_page_unlock_queues(); 915 VM_OBJECT_UNLOCK(obj); 916 pcount = ap->a_reqpage + pforwards + 1; 917 } 918 919 /* 920 * number of pages for I/O corrected for the non-contig pages at 921 * the beginning of the array. 922 / 923* pcount -= firstpage; 924 } 925 926 /* 927 * calculate the size of the transfer 928 / 929* 930 size = pcount * PAGE_SIZE; 931 932 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > 933 obj->un_pager.vnp.vnp_size) 934 size = obj->un_pager.vnp.vnp_size - 935 IDX_TO_OFF(ap->a_m[firstpage]->pindex); 936 937 physoffset -= foff; 938 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, 939 (ap->a_reqpage - firstpage), physoffset); 940 941 return (rtval); 942} 943 944/* 945 * Extended attribute area reading. 946 / 947static int 948ffs_extread(struct vnode vp, struct uio uio, int ioflag) 949{ 950* struct inode ip; 951* struct ufs2_dinode dp; 952* struct fs fs; 953* struct buf bp; 954* ufs_lbn_t lbn, nextlbn; 955 off_t bytesinfile; 956 long size, xfersize, blkoffset; 957 int error, orig_resid; 958 959 GIANT_REQUIRED; 960 961 ip = VTOI(vp); 962 fs = ip->i_fs; 963 dp = ip->i_din2; 964 965#ifdef DIAGNOSTIC 966 if (uio->uio_rw != UIO_READ \|\| fs->fs_magic != FS_UFS2_MAGIC) 967 panic("ffs_extread: mode"); 968 969#endif 970 orig_resid = uio->uio_resid; 971 if (orig_resid <= 0) 972 return (0); 973 974 bytesinfile = dp->di_extsize - uio->uio_offset; 975 if (bytesinfile <= 0) { 976 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 977 ip->i_flag \|= IN_ACCESS; 978 return 0; 979 } 980 981 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 982 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 983 break; 984 985 lbn = lblkno(fs, uio->uio_offset); 986 nextlbn = lbn + 1; 987 988 /* 989 * size of buffer. The buffer representing the 990 * end of the file is rounded up to the size of 991 * the block type ( fragment or full block, 992 * depending ). 993 / 994* size = sblksize(fs, dp->di_extsize, lbn); 995 blkoffset = blkoff(fs, uio->uio_offset); 996 997 /* 998 * The amount we want to transfer in this iteration is 999 * one FS block less the amount of the data before 1000 * our startpoint (duh!) 1001 / 1002* xfersize = fs->fs_bsize - blkoffset; 1003 1004 /* 1005 * But if we actually want less than the block, 1006 * or the file doesn't have a whole block more of data, 1007 * then use the lesser number. 1008 / 1009* if (uio->uio_resid < xfersize) 1010 xfersize = uio->uio_resid; 1011 if (bytesinfile < xfersize) 1012 xfersize = bytesinfile; 1013 1014 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 1015 /* 1016 * Don't do readahead if this is the end of the info. 1017 / 1018* error = bread(vp, -1 - lbn, size, NOCRED, &bp); 1019 } else { 1020 /* 1021 * If we have a second block, then 1022 * fire off a request for a readahead 1023 * as well as a read. Note that the 4th and 5th 1024 * arguments point to arrays of the size specified in 1025 * the 6th argument. 1026 / 1027* int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 1028 1029 nextlbn = -1 - nextlbn; 1030 error = breadn(vp, -1 - lbn, 1031 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1032 } 1033 if (error) { 1034 brelse(bp); 1035 bp = NULL; 1036 break; 1037 } 1038 1039 /* 1040 * If IO_DIRECT then set B_DIRECT for the buffer. This 1041 * will cause us to attempt to release the buffer later on 1042 * and will cause the buffer cache to attempt to free the 1043 * underlying pages. 1044 / 1045* if (ioflag & IO_DIRECT) 1046 bp->b_flags \|= B_DIRECT; 1047 1048 /* 1049 * We should only get non-zero b_resid when an I/O error 1050 * has occurred, which should cause us to break above. 1051 * However, if the short read did not cause an error, 1052 * then we want to ensure that we do not uiomove bad 1053 * or uninitialized data. 1054 / 1055* size -= bp->b_resid; 1056 if (size < xfersize) { 1057 if (size == 0) 1058 break; 1059 xfersize = size; 1060 } 1061 1062 error = uiomove((char )bp->b_data + blkoffset, 1063* (int)xfersize, uio); 1064 if (error) 1065 break; 1066 1067 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1068 (LIST_FIRST(&bp->b_dep) == NULL)) { 1069 /* 1070 * If there are no dependencies, and it's VMIO, 1071 * then we don't need the buf, mark it available 1072 * for freeing. The VM has the data. 1073 / 1074* bp->b_flags \|= B_RELBUF; 1075 brelse(bp); 1076 } else { 1077 /* 1078 * Otherwise let whoever 1079 * made the request take care of 1080 * freeing it. We just queue 1081 * it onto another list. 1082 / 1083* bqrelse(bp); 1084 } 1085 } 1086 1087 /* 1088 * This can only happen in the case of an error 1089 * because the loop above resets bp to NULL on each iteration 1090 * and on normal completion has not set a new value into it. 1091 * so it must have come from a 'break' statement 1092 / 1093* if (bp != NULL) { 1094 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1095 (LIST_FIRST(&bp->b_dep) == NULL)) { 1096 bp->b_flags \|= B_RELBUF; 1097 brelse(bp); 1098 } else { 1099 bqrelse(bp); 1100 } 1101 } 1102 1103 if ((error == 0 \|\| uio->uio_resid != orig_resid) && 1104 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1105 ip->i_flag \|= IN_ACCESS; 1106 return (error); 1107} 1108 1109/* 1110 * Extended attribute area writing. 1111 / 1112static int 1113ffs_extwrite(struct vnode vp, struct uio uio, int ioflag, struct ucred ucred) 1114{ 1115 struct inode ip; 1116* struct ufs2_dinode dp; 1117* struct fs fs; 1118* struct buf bp; 1119* ufs_lbn_t lbn; 1120 off_t osize; 1121 int blkoffset, error, flags, resid, size, xfersize; 1122 1123 GIANT_REQUIRED; 1124 1125 ip = VTOI(vp); 1126 fs = ip->i_fs; 1127 dp = ip->i_din2; 1128 1129#ifdef DIAGNOSTIC 1130 if (uio->uio_rw != UIO_WRITE \|\| fs->fs_magic != FS_UFS2_MAGIC) 1131 panic("ext_write: mode"); 1132#endif 1133 1134 if (ioflag & IO_APPEND) 1135 uio->uio_offset = dp->di_extsize; 1136 1137 if (uio->uio_offset < 0 \|\| 1138 (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1139 return (EFBIG); 1140 1141 resid = uio->uio_resid; 1142 osize = dp->di_extsize; 1143 flags = IO_EXT; 1144 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1145 flags \|= IO_SYNC; 1146 1147 for (error = 0; uio->uio_resid > 0;) { 1148 lbn = lblkno(fs, uio->uio_offset); 1149 blkoffset = blkoff(fs, uio->uio_offset); 1150 xfersize = fs->fs_bsize - blkoffset; 1151 if (uio->uio_resid < xfersize) 1152 xfersize = uio->uio_resid; 1153 1154 /* 1155 * We must perform a read-before-write if the transfer size 1156 * does not cover the entire buffer. 1157 / 1158* if (fs->fs_bsize > xfersize) 1159 flags \|= BA_CLRBUF; 1160 else 1161 flags &= ~BA_CLRBUF; 1162 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1163 ucred, flags, &bp); 1164 if (error != 0) 1165 break; 1166 /* 1167 * If the buffer is not valid we have to clear out any 1168 * garbage data from the pages instantiated for the buffer. 1169 * If we do not, a failed uiomove() during a write can leave 1170 * the prior contents of the pages exposed to a userland 1171 * mmap(). XXX deal with uiomove() errors a better way. 1172 / 1173* if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1174 vfs_bio_clrbuf(bp); 1175 if (ioflag & IO_DIRECT) 1176 bp->b_flags \|= B_DIRECT; 1177 1178 if (uio->uio_offset + xfersize > dp->di_extsize) 1179 dp->di_extsize = uio->uio_offset + xfersize; 1180 1181 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1182 if (size < xfersize) 1183 xfersize = size; 1184 1185 error = 1186 uiomove((char )bp->b_data + blkoffset, (int)xfersize, uio); 1187* if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1188 (LIST_FIRST(&bp->b_dep) == NULL)) { 1189 bp->b_flags \|= B_RELBUF; 1190 } 1191 1192 /* 1193 * If IO_SYNC each buffer is written synchronously. Otherwise 1194 * if we have a severe page deficiency write the buffer 1195 * asynchronously. Otherwise try to cluster, and if that 1196 * doesn't do it then either do an async write (if O_DIRECT), 1197 * or a delayed write (if not). 1198 / 1199* if (ioflag & IO_SYNC) { 1200 (void)bwrite(bp); 1201 } else if (vm_page_count_severe() \|\| 1202 buf_dirty_count_severe() \|\| 1203 xfersize + blkoffset == fs->fs_bsize \|\| 1204 (ioflag & (IO_ASYNC \| IO_DIRECT))) 1205 bawrite(bp); 1206 else 1207 bdwrite(bp); 1208 if (error \|\| xfersize == 0) 1209 break; 1210 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1211 } 1212 /* 1213 * If we successfully wrote any data, and we are not the superuser 1214 * we clear the setuid and setgid bits as a precaution against 1215 * tampering. 1216 / 1217* if (resid > uio->uio_resid && ucred && 1218 suser_cred(ucred, PRISON_ROOT)) { 1219 ip->i_mode &= ~(ISUID \| ISGID); 1220 dp->di_mode = ip->i_mode; 1221 } 1222 if (error) { 1223 if (ioflag & IO_UNIT) { 1224 (void)UFS_TRUNCATE(vp, osize, 1225 IO_EXT \| (ioflag&IO_SYNC), ucred, uio->uio_td); 1226 uio->uio_offset -= resid - uio->uio_resid; 1227 uio->uio_resid = resid; 1228 } 1229 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1230 error = UFS_UPDATE(vp, 1); 1231 return (error); 1232} 1233 1234 1235/* 1236 * Vnode operating to retrieve a named extended attribute. 1237 * 1238 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1239 * the length of the EA, and possibly the pointer to the entry and to the data. 1240 / 1241*static int	47 48#include <sys/param.h> 49#include <sys/bio.h> 50#include <sys/systm.h> 51#include <sys/buf.h> 52#include <sys/conf.h> 53#include <sys/extattr.h> 54#include <sys/kernel.h> 55#include <sys/limits.h> 56#include <sys/malloc.h> 57#include <sys/mount.h> 58#include <sys/proc.h> 59#include <sys/resourcevar.h> 60#include <sys/signalvar.h> 61#include <sys/stat.h> 62#include <sys/vmmeter.h> 63#include <sys/vnode.h> 64 65#include <vm/vm.h> 66#include <vm/vm_extern.h> 67#include <vm/vm_object.h> 68#include <vm/vm_page.h> 69#include <vm/vm_pager.h> 70#include <vm/vnode_pager.h> 71 72#include <ufs/ufs/extattr.h> 73#include <ufs/ufs/quota.h> 74#include <ufs/ufs/inode.h> 75#include <ufs/ufs/ufs_extern.h> 76#include <ufs/ufs/ufsmount.h> 77 78#include <ufs/ffs/fs.h> 79#include <ufs/ffs/ffs_extern.h> 80#include "opt_directio.h" 81 82#ifdef DIRECTIO 83extern int ffs_rawread(struct vnode vp, struct uio uio, int workdone); 84#endif 85static int ffs_fsync(struct vop_fsync_args ); 86static int ffs_getpages(struct vop_getpages_args ); 87static int ffs_read(struct vop_read_args ); 88static int ffs_write(struct vop_write_args ); 89static int ffs_extread(struct vnode vp, struct uio uio, int ioflag); 90static int ffs_extwrite(struct vnode vp, struct uio uio, int ioflag, 91 struct ucred cred); 92static int ffsext_strategy(struct vop_strategy_args ); 93static int ffs_closeextattr(struct vop_closeextattr_args ); 94static int ffs_deleteextattr(struct vop_deleteextattr_args ); 95static int ffs_getextattr(struct vop_getextattr_args ); 96static int ffs_listextattr(struct vop_listextattr_args ); 97static int ffs_openextattr(struct vop_openextattr_args ); 98static int ffs_setextattr(struct vop_setextattr_args ); 99 100* 101/* Global vfs data structures for ufs. / 102vop_t ffs_vnodeop_p; 103static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { 104* { &vop_default_desc, (vop_t ) ufs_vnoperate }, 105* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 106* { &vop_getpages_desc, (vop_t ) ffs_getpages }, 107* { &vop_read_desc, (vop_t ) ffs_read }, 108* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 109* { &vop_write_desc, (vop_t ) ffs_write }, 110* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 111* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 112* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 113* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 114* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 115* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 116* { NULL, NULL } 117}; 118static struct vnodeopv_desc ffs_vnodeop_opv_desc = 119 { &ffs_vnodeop_p, ffs_vnodeop_entries }; 120 121vop_t *ffs_specop_p; 122static struct vnodeopv_entry_desc ffs_specop_entries[] = { 123* { &vop_default_desc, (vop_t ) ufs_vnoperatespec }, 124* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 125* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 126* { &vop_strategy_desc, (vop_t ) ffsext_strategy }, 127* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 128* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 129* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 130* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 131* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 132* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 133* { NULL, NULL } 134}; 135static struct vnodeopv_desc ffs_specop_opv_desc = 136 { &ffs_specop_p, ffs_specop_entries }; 137 138vop_t *ffs_fifoop_p; 139static struct vnodeopv_entry_desc ffs_fifoop_entries[] = { 140* { &vop_default_desc, (vop_t ) ufs_vnoperatefifo }, 141* { &vop_fsync_desc, (vop_t ) ffs_fsync }, 142* { &vop_reallocblks_desc, (vop_t ) ffs_reallocblks }, 143* { &vop_strategy_desc, (vop_t ) ffsext_strategy }, 144* { &vop_closeextattr_desc, (vop_t ) ffs_closeextattr }, 145* { &vop_deleteextattr_desc, (vop_t ) ffs_deleteextattr }, 146* { &vop_getextattr_desc, (vop_t ) ffs_getextattr }, 147* { &vop_listextattr_desc, (vop_t ) ffs_listextattr }, 148* { &vop_openextattr_desc, (vop_t ) ffs_openextattr }, 149* { &vop_setextattr_desc, (vop_t ) ffs_setextattr }, 150* { NULL, NULL } 151}; 152static struct vnodeopv_desc ffs_fifoop_opv_desc = 153 { &ffs_fifoop_p, ffs_fifoop_entries }; 154 155VNODEOP_SET(ffs_vnodeop_opv_desc); 156VNODEOP_SET(ffs_specop_opv_desc); 157VNODEOP_SET(ffs_fifoop_opv_desc); 158 159/* 160 * Synch an open file. 161 / 162/ ARGSUSED / 163static int 164ffs_fsync(ap) 165* struct vop_fsync_args /* { 166 struct vnode a_vp; 167* struct ucred a_cred; 168* int a_waitfor; 169 struct thread a_td; 170* } / ap; 171{ 172 struct vnode vp = ap->a_vp; 173* struct inode ip = VTOI(vp); 174* struct buf bp; 175* struct buf nbp; 176* int s, error, wait, passes, skipmeta; 177 ufs_lbn_t lbn; 178 179 wait = (ap->a_waitfor == MNT_WAIT); 180 if (vn_isdisk(vp, NULL)) { 181 lbn = INT_MAX; 182 if (vp->v_rdev->si_mountpoint != NULL && 183 (vp->v_rdev->si_mountpoint->mnt_flag & MNT_SOFTDEP)) 184 softdep_fsync_mountdev(vp); 185 } else { 186 lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); 187 } 188 189 /* 190 * Flush all dirty buffers associated with a vnode. 191 / 192* passes = NIADDR + 1; 193 skipmeta = 0; 194 if (wait) 195 skipmeta = 1; 196 s = splbio(); 197 VI_LOCK(vp); 198loop: 199 TAILQ_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) 200 bp->b_vflags &= ~BV_SCANNED; 201 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 202 nbp = TAILQ_NEXT(bp, b_vnbufs); 203 /* 204 * Reasons to skip this buffer: it has already been considered 205 * on this pass, this pass is the first time through on a 206 * synchronous flush request and the buffer being considered 207 * is metadata, the buffer has dependencies that will cause 208 * it to be redirtied and it has not already been deferred, 209 * or it is already being written. 210 / 211* if ((bp->b_vflags & BV_SCANNED) != 0) 212 continue; 213 bp->b_vflags \|= BV_SCANNED; 214 if ((skipmeta == 1 && bp->b_lblkno < 0)) 215 continue; 216 if (BUF_LOCK(bp, LK_EXCLUSIVE \| LK_NOWAIT, NULL)) 217 continue; 218 if (!wait && LIST_FIRST(&bp->b_dep) != NULL && 219 (bp->b_flags & B_DEFERRED) == 0 && 220 buf_countdeps(bp, 0)) { 221 bp->b_flags \|= B_DEFERRED; 222 BUF_UNLOCK(bp); 223 continue; 224 } 225 VI_UNLOCK(vp); 226 if ((bp->b_flags & B_DELWRI) == 0) 227 panic("ffs_fsync: not dirty"); 228 if (vp != bp->b_vp) 229 panic("ffs_fsync: vp != vp->b_vp"); 230 /* 231 * If this is a synchronous flush request, or it is not a 232 * file or device, start the write on this buffer immediatly. 233 / 234* if (wait \|\| (vp->v_type != VREG && vp->v_type != VBLK)) { 235 236 /* 237 * On our final pass through, do all I/O synchronously 238 * so that we can find out if our flush is failing 239 * because of write errors. 240 / 241* if (passes > 0 \|\| !wait) { 242 if ((bp->b_flags & B_CLUSTEROK) && !wait) { 243 (void) vfs_bio_awrite(bp); 244 } else { 245 bremfree(bp); 246 splx(s); 247 (void) bawrite(bp); 248 s = splbio(); 249 } 250 } else { 251 bremfree(bp); 252 splx(s); 253 if ((error = bwrite(bp)) != 0) 254 return (error); 255 s = splbio(); 256 } 257 } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { 258 /* 259 * If the buffer is for data that has been truncated 260 * off the file, then throw it away. 261 / 262* bremfree(bp); 263 bp->b_flags \|= B_INVAL \| B_NOCACHE; 264 splx(s); 265 brelse(bp); 266 s = splbio(); 267 } else 268 vfs_bio_awrite(bp); 269 270 /* 271 * Since we may have slept during the I/O, we need 272 * to start from a known point. 273 / 274* VI_LOCK(vp); 275 nbp = TAILQ_FIRST(&vp->v_dirtyblkhd); 276 } 277 /* 278 * If we were asked to do this synchronously, then go back for 279 * another pass, this time doing the metadata. 280 / 281* if (skipmeta) { 282 skipmeta = 0; 283 goto loop; 284 } 285 286 if (wait) { 287 while (vp->v_numoutput) { 288 vp->v_iflag \|= VI_BWAIT; 289 msleep((caddr_t)&vp->v_numoutput, VI_MTX(vp), 290 PRIBIO + 4, "ffsfsn", 0); 291 } 292 VI_UNLOCK(vp); 293 294 /* 295 * Ensure that any filesystem metatdata associated 296 * with the vnode has been written. 297 / 298* splx(s); 299 if ((error = softdep_sync_metadata(ap)) != 0) 300 return (error); 301 s = splbio(); 302 303 VI_LOCK(vp); 304 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { 305 /* 306 * Block devices associated with filesystems may 307 * have new I/O requests posted for them even if 308 * the vnode is locked, so no amount of trying will 309 * get them clean. Thus we give block devices a 310 * good effort, then just give up. For all other file 311 * types, go around and try again until it is clean. 312 / 313* if (passes > 0) { 314 passes -= 1; 315 goto loop; 316 } 317#ifdef DIAGNOSTIC 318 if (!vn_isdisk(vp, NULL)) 319 vprint("ffs_fsync: dirty", vp); 320#endif 321 } 322 } 323 VI_UNLOCK(vp); 324 splx(s); 325 return (UFS_UPDATE(vp, wait)); 326} 327 328 329/* 330 * Vnode op for reading. 331 / 332/ ARGSUSED / 333static int 334ffs_read(ap) 335* struct vop_read_args /* { 336 struct vnode a_vp; 337* struct uio a_uio; 338* int a_ioflag; 339 struct ucred a_cred; 340* } / ap; 341{ 342 struct vnode vp; 343* struct inode ip; 344* struct uio uio; 345* struct fs fs; 346* struct buf bp; 347* ufs_lbn_t lbn, nextlbn; 348 off_t bytesinfile; 349 long size, xfersize, blkoffset; 350 int error, orig_resid; 351 int seqcount; 352 int ioflag; 353 vm_object_t object; 354 355 vp = ap->a_vp; 356 uio = ap->a_uio; 357 ioflag = ap->a_ioflag; 358 if (ap->a_ioflag & IO_EXT) 359#ifdef notyet 360 return (ffs_extread(vp, uio, ioflag)); 361#else 362 panic("ffs_read+IO_EXT"); 363#endif 364#ifdef DIRECTIO 365 if ((ioflag & IO_DIRECT) != 0) { 366 int workdone; 367 368 error = ffs_rawread(vp, uio, &workdone); 369 if (error != 0 \|\| workdone != 0) 370 return error; 371 } 372#endif 373 374 GIANT_REQUIRED; 375 376 seqcount = ap->a_ioflag >> 16; 377 ip = VTOI(vp); 378 379#ifdef DIAGNOSTIC 380 if (uio->uio_rw != UIO_READ) 381 panic("ffs_read: mode"); 382 383 if (vp->v_type == VLNK) { 384 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) 385 panic("ffs_read: short symlink"); 386 } else if (vp->v_type != VREG && vp->v_type != VDIR) 387 panic("ffs_read: type %d", vp->v_type); 388#endif 389 fs = ip->i_fs; 390 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize) 391 return (EFBIG); 392 393 orig_resid = uio->uio_resid; 394 if (orig_resid <= 0) 395 return (0); 396 397 object = vp->v_object; 398 399 bytesinfile = ip->i_size - uio->uio_offset; 400 if (bytesinfile <= 0) { 401 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 402 ip->i_flag \|= IN_ACCESS; 403 return 0; 404 } 405 406 if (object) { 407 vm_object_reference(object); 408 } 409 410 /* 411 * Ok so we couldn't do it all in one vm trick... 412 * so cycle around trying smaller bites.. 413 / 414* for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 415 if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) 416 break; 417 418 lbn = lblkno(fs, uio->uio_offset); 419 nextlbn = lbn + 1; 420 421 /* 422 * size of buffer. The buffer representing the 423 * end of the file is rounded up to the size of 424 * the block type ( fragment or full block, 425 * depending ). 426 / 427* size = blksize(fs, ip, lbn); 428 blkoffset = blkoff(fs, uio->uio_offset); 429 430 /* 431 * The amount we want to transfer in this iteration is 432 * one FS block less the amount of the data before 433 * our startpoint (duh!) 434 / 435* xfersize = fs->fs_bsize - blkoffset; 436 437 /* 438 * But if we actually want less than the block, 439 * or the file doesn't have a whole block more of data, 440 * then use the lesser number. 441 / 442* if (uio->uio_resid < xfersize) 443 xfersize = uio->uio_resid; 444 if (bytesinfile < xfersize) 445 xfersize = bytesinfile; 446 447 if (lblktosize(fs, nextlbn) >= ip->i_size) { 448 /* 449 * Don't do readahead if this is the end of the file. 450 / 451* error = bread(vp, lbn, size, NOCRED, &bp); 452 } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 453 /* 454 * Otherwise if we are allowed to cluster, 455 * grab as much as we can. 456 * 457 * XXX This may not be a win if we are not 458 * doing sequential access. 459 / 460* error = cluster_read(vp, ip->i_size, lbn, 461 size, NOCRED, uio->uio_resid, seqcount, &bp); 462 } else if (seqcount > 1) { 463 /* 464 * If we are NOT allowed to cluster, then 465 * if we appear to be acting sequentially, 466 * fire off a request for a readahead 467 * as well as a read. Note that the 4th and 5th 468 * arguments point to arrays of the size specified in 469 * the 6th argument. 470 / 471* int nextsize = blksize(fs, ip, nextlbn); 472 error = breadn(vp, lbn, 473 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 474 } else { 475 /* 476 * Failing all of the above, just read what the 477 * user asked for. Interestingly, the same as 478 * the first option above. 479 / 480* error = bread(vp, lbn, size, NOCRED, &bp); 481 } 482 if (error) { 483 brelse(bp); 484 bp = NULL; 485 break; 486 } 487 488 /* 489 * If IO_DIRECT then set B_DIRECT for the buffer. This 490 * will cause us to attempt to release the buffer later on 491 * and will cause the buffer cache to attempt to free the 492 * underlying pages. 493 / 494* if (ioflag & IO_DIRECT) 495 bp->b_flags \|= B_DIRECT; 496 497 /* 498 * We should only get non-zero b_resid when an I/O error 499 * has occurred, which should cause us to break above. 500 * However, if the short read did not cause an error, 501 * then we want to ensure that we do not uiomove bad 502 * or uninitialized data. 503 / 504* size -= bp->b_resid; 505 if (size < xfersize) { 506 if (size == 0) 507 break; 508 xfersize = size; 509 } 510 511 { 512 /* 513 * otherwise use the general form 514 / 515* error = 516 uiomove((char )bp->b_data + blkoffset, 517* (int)xfersize, uio); 518 } 519 520 if (error) 521 break; 522 523 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 524 (LIST_FIRST(&bp->b_dep) == NULL)) { 525 /* 526 * If there are no dependencies, and it's VMIO, 527 * then we don't need the buf, mark it available 528 * for freeing. The VM has the data. 529 / 530* bp->b_flags \|= B_RELBUF; 531 brelse(bp); 532 } else { 533 /* 534 * Otherwise let whoever 535 * made the request take care of 536 * freeing it. We just queue 537 * it onto another list. 538 / 539* bqrelse(bp); 540 } 541 } 542 543 /* 544 * This can only happen in the case of an error 545 * because the loop above resets bp to NULL on each iteration 546 * and on normal completion has not set a new value into it. 547 * so it must have come from a 'break' statement 548 / 549* if (bp != NULL) { 550 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 551 (LIST_FIRST(&bp->b_dep) == NULL)) { 552 bp->b_flags \|= B_RELBUF; 553 brelse(bp); 554 } else { 555 bqrelse(bp); 556 } 557 } 558 559 if (object) { 560 VM_OBJECT_LOCK(object); 561 vm_object_vndeallocate(object); 562 } 563 if ((error == 0 \|\| uio->uio_resid != orig_resid) && 564 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 565 ip->i_flag \|= IN_ACCESS; 566 return (error); 567} 568 569/* 570 * Vnode op for writing. 571 / 572static int 573ffs_write(ap) 574* struct vop_write_args /* { 575 struct vnode a_vp; 576* struct uio a_uio; 577* int a_ioflag; 578 struct ucred a_cred; 579* } / ap; 580{ 581 struct vnode vp; 582* struct uio uio; 583* struct inode ip; 584* struct fs fs; 585* struct buf bp; 586* struct thread td; 587* ufs_lbn_t lbn; 588 off_t osize; 589 int seqcount; 590 int blkoffset, error, extended, flags, ioflag, resid, size, xfersize; 591 vm_object_t object; 592 593 vp = ap->a_vp; 594 uio = ap->a_uio; 595 ioflag = ap->a_ioflag; 596 if (ap->a_ioflag & IO_EXT) 597#ifdef notyet 598 return (ffs_extwrite(vp, uio, ioflag, ap->a_cred)); 599#else 600 panic("ffs_read+IO_EXT"); 601#endif 602 603 GIANT_REQUIRED; 604 605 extended = 0; 606 seqcount = ap->a_ioflag >> 16; 607 ip = VTOI(vp); 608 609 object = vp->v_object; 610 if (object) { 611 vm_object_reference(object); 612 } 613 614#ifdef DIAGNOSTIC 615 if (uio->uio_rw != UIO_WRITE) 616 panic("ffswrite: mode"); 617#endif 618 619 switch (vp->v_type) { 620 case VREG: 621 if (ioflag & IO_APPEND) 622 uio->uio_offset = ip->i_size; 623 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) { 624 if (object) { 625 VM_OBJECT_LOCK(object); 626 vm_object_vndeallocate(object); 627 } 628 return (EPERM); 629 } 630 /* FALLTHROUGH / 631* case VLNK: 632 break; 633 case VDIR: 634 panic("ffswrite: dir write"); 635 break; 636 default: 637 panic("ffswrite: type %p %d (%d,%d)", vp, (int)vp->v_type, 638 (int)uio->uio_offset, 639 (int)uio->uio_resid 640 ); 641 } 642 643 fs = ip->i_fs; 644 if (uio->uio_offset < 0 \|\| 645 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) { 646 if (object) { 647 VM_OBJECT_LOCK(object); 648 vm_object_vndeallocate(object); 649 } 650 return (EFBIG); 651 } 652 /* 653 * Maybe this should be above the vnode op call, but so long as 654 * file servers have no limits, I don't think it matters. 655 / 656* td = uio->uio_td; 657 if (vp->v_type == VREG && td && 658 uio->uio_offset + uio->uio_resid > 659 td->td_proc->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 660 PROC_LOCK(td->td_proc); 661 psignal(td->td_proc, SIGXFSZ); 662 PROC_UNLOCK(td->td_proc); 663 if (object) { 664 VM_OBJECT_LOCK(object); 665 vm_object_vndeallocate(object); 666 } 667 return (EFBIG); 668 } 669 670 resid = uio->uio_resid; 671 osize = ip->i_size; 672 if (seqcount > BA_SEQMAX) 673 flags = BA_SEQMAX << BA_SEQSHIFT; 674 else 675 flags = seqcount << BA_SEQSHIFT; 676 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 677 flags \|= IO_SYNC; 678 679 for (error = 0; uio->uio_resid > 0;) { 680 lbn = lblkno(fs, uio->uio_offset); 681 blkoffset = blkoff(fs, uio->uio_offset); 682 xfersize = fs->fs_bsize - blkoffset; 683 if (uio->uio_resid < xfersize) 684 xfersize = uio->uio_resid; 685 686 if (uio->uio_offset + xfersize > ip->i_size) 687 vnode_pager_setsize(vp, uio->uio_offset + xfersize); 688 689 /* 690 * We must perform a read-before-write if the transfer size 691 * does not cover the entire buffer. 692 / 693* if (fs->fs_bsize > xfersize) 694 flags \|= BA_CLRBUF; 695 else 696 flags &= ~BA_CLRBUF; 697/* XXX is uio->uio_offset the right thing here? / 698* error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 699 ap->a_cred, flags, &bp); 700 if (error != 0) 701 break; 702 /* 703 * If the buffer is not valid we have to clear out any 704 * garbage data from the pages instantiated for the buffer. 705 * If we do not, a failed uiomove() during a write can leave 706 * the prior contents of the pages exposed to a userland 707 * mmap(). XXX deal with uiomove() errors a better way. 708 / 709* if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 710 vfs_bio_clrbuf(bp); 711 if (ioflag & IO_DIRECT) 712 bp->b_flags \|= B_DIRECT; 713 714 if (uio->uio_offset + xfersize > ip->i_size) { 715 ip->i_size = uio->uio_offset + xfersize; 716 DIP(ip, i_size) = ip->i_size; 717 extended = 1; 718 } 719 720 size = blksize(fs, ip, lbn) - bp->b_resid; 721 if (size < xfersize) 722 xfersize = size; 723 724 error = 725 uiomove((char )bp->b_data + blkoffset, (int)xfersize, uio); 726* if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 727 (LIST_FIRST(&bp->b_dep) == NULL)) { 728 bp->b_flags \|= B_RELBUF; 729 } 730 731 /* 732 * If IO_SYNC each buffer is written synchronously. Otherwise 733 * if we have a severe page deficiency write the buffer 734 * asynchronously. Otherwise try to cluster, and if that 735 * doesn't do it then either do an async write (if O_DIRECT), 736 * or a delayed write (if not). 737 / 738* if (ioflag & IO_SYNC) { 739 (void)bwrite(bp); 740 } else if (vm_page_count_severe() \|\| 741 buf_dirty_count_severe() \|\| 742 (ioflag & IO_ASYNC)) { 743 bp->b_flags \|= B_CLUSTEROK; 744 bawrite(bp); 745 } else if (xfersize + blkoffset == fs->fs_bsize) { 746 if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { 747 bp->b_flags \|= B_CLUSTEROK; 748 cluster_write(bp, ip->i_size, seqcount); 749 } else { 750 bawrite(bp); 751 } 752 } else if (ioflag & IO_DIRECT) { 753 bp->b_flags \|= B_CLUSTEROK; 754 bawrite(bp); 755 } else { 756 bp->b_flags \|= B_CLUSTEROK; 757 bdwrite(bp); 758 } 759 if (error \|\| xfersize == 0) 760 break; 761 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 762 } 763 /* 764 * If we successfully wrote any data, and we are not the superuser 765 * we clear the setuid and setgid bits as a precaution against 766 * tampering. 767 / 768* if (resid > uio->uio_resid && ap->a_cred && 769 suser_cred(ap->a_cred, PRISON_ROOT)) { 770 ip->i_mode &= ~(ISUID \| ISGID); 771 DIP(ip, i_mode) = ip->i_mode; 772 } 773 if (resid > uio->uio_resid) 774 VN_KNOTE(vp, NOTE_WRITE \| (extended ? NOTE_EXTEND : 0)); 775 if (error) { 776 if (ioflag & IO_UNIT) { 777 (void)UFS_TRUNCATE(vp, osize, 778 IO_NORMAL \| (ioflag & IO_SYNC), 779 ap->a_cred, uio->uio_td); 780 uio->uio_offset -= resid - uio->uio_resid; 781 uio->uio_resid = resid; 782 } 783 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 784 error = UFS_UPDATE(vp, 1); 785 786 if (object) { 787 VM_OBJECT_LOCK(object); 788 vm_object_vndeallocate(object); 789 } 790 791 return (error); 792} 793 794/* 795 * get page routine 796 / 797static int 798ffs_getpages(ap) 799* struct vop_getpages_args ap; 800{ 801* off_t foff, physoffset; 802 int i, size, bsize; 803 struct vnode dp, vp; 804 vm_object_t obj; 805 vm_pindex_t pindex; 806 vm_page_t mreq; 807 int bbackwards, bforwards; 808 int pbackwards, pforwards; 809 int firstpage; 810 ufs2_daddr_t reqblkno, reqlblkno; 811 int poff; 812 int pcount; 813 int rtval; 814 int pagesperblock; 815 816 GIANT_REQUIRED; 817 818 pcount = round_page(ap->a_count) / PAGE_SIZE; 819 mreq = ap->a_m[ap->a_reqpage]; 820 821 /* 822 * if ANY DEV_BSIZE blocks are valid on a large filesystem block, 823 * then the entire page is valid. Since the page may be mapped, 824 * user programs might reference data beyond the actual end of file 825 * occuring within the page. We have to zero that data. 826 / 827* if (mreq->valid) { 828 if (mreq->valid != VM_PAGE_BITS_ALL) 829 vm_page_zero_invalid(mreq, TRUE); 830 VM_OBJECT_LOCK(mreq->object); 831 vm_page_lock_queues(); 832 for (i = 0; i < pcount; i++) { 833 if (i != ap->a_reqpage) { 834 vm_page_free(ap->a_m[i]); 835 } 836 } 837 vm_page_unlock_queues(); 838 VM_OBJECT_UNLOCK(mreq->object); 839 return VM_PAGER_OK; 840 } 841 842 vp = ap->a_vp; 843 obj = vp->v_object; 844 bsize = vp->v_mount->mnt_stat.f_iosize; 845 pindex = mreq->pindex; 846 foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero /; 847* 848 if (bsize < PAGE_SIZE) 849 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, 850 ap->a_count, 851 ap->a_reqpage); 852 853 /* 854 * foff is the file offset of the required page 855 * reqlblkno is the logical block that contains the page 856 * poff is the index of the page into the logical block 857 / 858* reqlblkno = foff / bsize; 859 poff = (foff % bsize) / PAGE_SIZE; 860 861 dp = VTOI(vp)->i_devvp; 862 if (ufs_bmaparray(vp, reqlblkno, &reqblkno, 0, &bforwards, &bbackwards) 863 \|\| (reqblkno == -1)) { 864 VM_OBJECT_LOCK(obj); 865 vm_page_lock_queues(); 866 for(i = 0; i < pcount; i++) { 867 if (i != ap->a_reqpage) 868 vm_page_free(ap->a_m[i]); 869 } 870 vm_page_unlock_queues(); 871 VM_OBJECT_UNLOCK(obj); 872 if (reqblkno == -1) { 873 if ((mreq->flags & PG_ZERO) == 0) 874 pmap_zero_page(mreq); 875 vm_page_undirty(mreq); 876 mreq->valid = VM_PAGE_BITS_ALL; 877 return VM_PAGER_OK; 878 } else { 879 return VM_PAGER_ERROR; 880 } 881 } 882 883 physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE; 884 pagesperblock = bsize / PAGE_SIZE; 885 /* 886 * find the first page that is contiguous... 887 * note that pbackwards is the number of pages that are contiguous 888 * backwards. 889 / 890* firstpage = 0; 891 if (ap->a_count) { 892 pbackwards = poff + bbackwards * pagesperblock; 893 if (ap->a_reqpage > pbackwards) { 894 firstpage = ap->a_reqpage - pbackwards; 895 VM_OBJECT_LOCK(obj); 896 vm_page_lock_queues(); 897 for(i=0;i<firstpage;i++) 898 vm_page_free(ap->a_m[i]); 899 vm_page_unlock_queues(); 900 VM_OBJECT_UNLOCK(obj); 901 } 902 903 /* 904 * pforwards is the number of pages that are contiguous 905 * after the current page. 906 / 907* pforwards = (pagesperblock - (poff + 1)) + 908 bforwards * pagesperblock; 909 if (pforwards < (pcount - (ap->a_reqpage + 1))) { 910 VM_OBJECT_LOCK(obj); 911 vm_page_lock_queues(); 912 for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++) 913 vm_page_free(ap->a_m[i]); 914 vm_page_unlock_queues(); 915 VM_OBJECT_UNLOCK(obj); 916 pcount = ap->a_reqpage + pforwards + 1; 917 } 918 919 /* 920 * number of pages for I/O corrected for the non-contig pages at 921 * the beginning of the array. 922 / 923* pcount -= firstpage; 924 } 925 926 /* 927 * calculate the size of the transfer 928 / 929* 930 size = pcount * PAGE_SIZE; 931 932 if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) > 933 obj->un_pager.vnp.vnp_size) 934 size = obj->un_pager.vnp.vnp_size - 935 IDX_TO_OFF(ap->a_m[firstpage]->pindex); 936 937 physoffset -= foff; 938 rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size, 939 (ap->a_reqpage - firstpage), physoffset); 940 941 return (rtval); 942} 943 944/* 945 * Extended attribute area reading. 946 / 947static int 948ffs_extread(struct vnode vp, struct uio uio, int ioflag) 949{ 950* struct inode ip; 951* struct ufs2_dinode dp; 952* struct fs fs; 953* struct buf bp; 954* ufs_lbn_t lbn, nextlbn; 955 off_t bytesinfile; 956 long size, xfersize, blkoffset; 957 int error, orig_resid; 958 959 GIANT_REQUIRED; 960 961 ip = VTOI(vp); 962 fs = ip->i_fs; 963 dp = ip->i_din2; 964 965#ifdef DIAGNOSTIC 966 if (uio->uio_rw != UIO_READ \|\| fs->fs_magic != FS_UFS2_MAGIC) 967 panic("ffs_extread: mode"); 968 969#endif 970 orig_resid = uio->uio_resid; 971 if (orig_resid <= 0) 972 return (0); 973 974 bytesinfile = dp->di_extsize - uio->uio_offset; 975 if (bytesinfile <= 0) { 976 if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 977 ip->i_flag \|= IN_ACCESS; 978 return 0; 979 } 980 981 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { 982 if ((bytesinfile = dp->di_extsize - uio->uio_offset) <= 0) 983 break; 984 985 lbn = lblkno(fs, uio->uio_offset); 986 nextlbn = lbn + 1; 987 988 /* 989 * size of buffer. The buffer representing the 990 * end of the file is rounded up to the size of 991 * the block type ( fragment or full block, 992 * depending ). 993 / 994* size = sblksize(fs, dp->di_extsize, lbn); 995 blkoffset = blkoff(fs, uio->uio_offset); 996 997 /* 998 * The amount we want to transfer in this iteration is 999 * one FS block less the amount of the data before 1000 * our startpoint (duh!) 1001 / 1002* xfersize = fs->fs_bsize - blkoffset; 1003 1004 /* 1005 * But if we actually want less than the block, 1006 * or the file doesn't have a whole block more of data, 1007 * then use the lesser number. 1008 / 1009* if (uio->uio_resid < xfersize) 1010 xfersize = uio->uio_resid; 1011 if (bytesinfile < xfersize) 1012 xfersize = bytesinfile; 1013 1014 if (lblktosize(fs, nextlbn) >= dp->di_extsize) { 1015 /* 1016 * Don't do readahead if this is the end of the info. 1017 / 1018* error = bread(vp, -1 - lbn, size, NOCRED, &bp); 1019 } else { 1020 /* 1021 * If we have a second block, then 1022 * fire off a request for a readahead 1023 * as well as a read. Note that the 4th and 5th 1024 * arguments point to arrays of the size specified in 1025 * the 6th argument. 1026 / 1027* int nextsize = sblksize(fs, dp->di_extsize, nextlbn); 1028 1029 nextlbn = -1 - nextlbn; 1030 error = breadn(vp, -1 - lbn, 1031 size, &nextlbn, &nextsize, 1, NOCRED, &bp); 1032 } 1033 if (error) { 1034 brelse(bp); 1035 bp = NULL; 1036 break; 1037 } 1038 1039 /* 1040 * If IO_DIRECT then set B_DIRECT for the buffer. This 1041 * will cause us to attempt to release the buffer later on 1042 * and will cause the buffer cache to attempt to free the 1043 * underlying pages. 1044 / 1045* if (ioflag & IO_DIRECT) 1046 bp->b_flags \|= B_DIRECT; 1047 1048 /* 1049 * We should only get non-zero b_resid when an I/O error 1050 * has occurred, which should cause us to break above. 1051 * However, if the short read did not cause an error, 1052 * then we want to ensure that we do not uiomove bad 1053 * or uninitialized data. 1054 / 1055* size -= bp->b_resid; 1056 if (size < xfersize) { 1057 if (size == 0) 1058 break; 1059 xfersize = size; 1060 } 1061 1062 error = uiomove((char )bp->b_data + blkoffset, 1063* (int)xfersize, uio); 1064 if (error) 1065 break; 1066 1067 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1068 (LIST_FIRST(&bp->b_dep) == NULL)) { 1069 /* 1070 * If there are no dependencies, and it's VMIO, 1071 * then we don't need the buf, mark it available 1072 * for freeing. The VM has the data. 1073 / 1074* bp->b_flags \|= B_RELBUF; 1075 brelse(bp); 1076 } else { 1077 /* 1078 * Otherwise let whoever 1079 * made the request take care of 1080 * freeing it. We just queue 1081 * it onto another list. 1082 / 1083* bqrelse(bp); 1084 } 1085 } 1086 1087 /* 1088 * This can only happen in the case of an error 1089 * because the loop above resets bp to NULL on each iteration 1090 * and on normal completion has not set a new value into it. 1091 * so it must have come from a 'break' statement 1092 / 1093* if (bp != NULL) { 1094 if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1095 (LIST_FIRST(&bp->b_dep) == NULL)) { 1096 bp->b_flags \|= B_RELBUF; 1097 brelse(bp); 1098 } else { 1099 bqrelse(bp); 1100 } 1101 } 1102 1103 if ((error == 0 \|\| uio->uio_resid != orig_resid) && 1104 (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) 1105 ip->i_flag \|= IN_ACCESS; 1106 return (error); 1107} 1108 1109/* 1110 * Extended attribute area writing. 1111 / 1112static int 1113ffs_extwrite(struct vnode vp, struct uio uio, int ioflag, struct ucred ucred) 1114{ 1115 struct inode ip; 1116* struct ufs2_dinode dp; 1117* struct fs fs; 1118* struct buf bp; 1119* ufs_lbn_t lbn; 1120 off_t osize; 1121 int blkoffset, error, flags, resid, size, xfersize; 1122 1123 GIANT_REQUIRED; 1124 1125 ip = VTOI(vp); 1126 fs = ip->i_fs; 1127 dp = ip->i_din2; 1128 1129#ifdef DIAGNOSTIC 1130 if (uio->uio_rw != UIO_WRITE \|\| fs->fs_magic != FS_UFS2_MAGIC) 1131 panic("ext_write: mode"); 1132#endif 1133 1134 if (ioflag & IO_APPEND) 1135 uio->uio_offset = dp->di_extsize; 1136 1137 if (uio->uio_offset < 0 \|\| 1138 (u_int64_t)uio->uio_offset + uio->uio_resid > NXADDR * fs->fs_bsize) 1139 return (EFBIG); 1140 1141 resid = uio->uio_resid; 1142 osize = dp->di_extsize; 1143 flags = IO_EXT; 1144 if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) 1145 flags \|= IO_SYNC; 1146 1147 for (error = 0; uio->uio_resid > 0;) { 1148 lbn = lblkno(fs, uio->uio_offset); 1149 blkoffset = blkoff(fs, uio->uio_offset); 1150 xfersize = fs->fs_bsize - blkoffset; 1151 if (uio->uio_resid < xfersize) 1152 xfersize = uio->uio_resid; 1153 1154 /* 1155 * We must perform a read-before-write if the transfer size 1156 * does not cover the entire buffer. 1157 / 1158* if (fs->fs_bsize > xfersize) 1159 flags \|= BA_CLRBUF; 1160 else 1161 flags &= ~BA_CLRBUF; 1162 error = UFS_BALLOC(vp, uio->uio_offset, xfersize, 1163 ucred, flags, &bp); 1164 if (error != 0) 1165 break; 1166 /* 1167 * If the buffer is not valid we have to clear out any 1168 * garbage data from the pages instantiated for the buffer. 1169 * If we do not, a failed uiomove() during a write can leave 1170 * the prior contents of the pages exposed to a userland 1171 * mmap(). XXX deal with uiomove() errors a better way. 1172 / 1173* if ((bp->b_flags & B_CACHE) == 0 && fs->fs_bsize <= xfersize) 1174 vfs_bio_clrbuf(bp); 1175 if (ioflag & IO_DIRECT) 1176 bp->b_flags \|= B_DIRECT; 1177 1178 if (uio->uio_offset + xfersize > dp->di_extsize) 1179 dp->di_extsize = uio->uio_offset + xfersize; 1180 1181 size = sblksize(fs, dp->di_extsize, lbn) - bp->b_resid; 1182 if (size < xfersize) 1183 xfersize = size; 1184 1185 error = 1186 uiomove((char )bp->b_data + blkoffset, (int)xfersize, uio); 1187* if ((ioflag & (IO_VMIO\|IO_DIRECT)) && 1188 (LIST_FIRST(&bp->b_dep) == NULL)) { 1189 bp->b_flags \|= B_RELBUF; 1190 } 1191 1192 /* 1193 * If IO_SYNC each buffer is written synchronously. Otherwise 1194 * if we have a severe page deficiency write the buffer 1195 * asynchronously. Otherwise try to cluster, and if that 1196 * doesn't do it then either do an async write (if O_DIRECT), 1197 * or a delayed write (if not). 1198 / 1199* if (ioflag & IO_SYNC) { 1200 (void)bwrite(bp); 1201 } else if (vm_page_count_severe() \|\| 1202 buf_dirty_count_severe() \|\| 1203 xfersize + blkoffset == fs->fs_bsize \|\| 1204 (ioflag & (IO_ASYNC \| IO_DIRECT))) 1205 bawrite(bp); 1206 else 1207 bdwrite(bp); 1208 if (error \|\| xfersize == 0) 1209 break; 1210 ip->i_flag \|= IN_CHANGE \| IN_UPDATE; 1211 } 1212 /* 1213 * If we successfully wrote any data, and we are not the superuser 1214 * we clear the setuid and setgid bits as a precaution against 1215 * tampering. 1216 / 1217* if (resid > uio->uio_resid && ucred && 1218 suser_cred(ucred, PRISON_ROOT)) { 1219 ip->i_mode &= ~(ISUID \| ISGID); 1220 dp->di_mode = ip->i_mode; 1221 } 1222 if (error) { 1223 if (ioflag & IO_UNIT) { 1224 (void)UFS_TRUNCATE(vp, osize, 1225 IO_EXT \| (ioflag&IO_SYNC), ucred, uio->uio_td); 1226 uio->uio_offset -= resid - uio->uio_resid; 1227 uio->uio_resid = resid; 1228 } 1229 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) 1230 error = UFS_UPDATE(vp, 1); 1231 return (error); 1232} 1233 1234 1235/* 1236 * Vnode operating to retrieve a named extended attribute. 1237 * 1238 * Locate a particular EA (nspace:name) in the area (ptr:length), and return 1239 * the length of the EA, and possibly the pointer to the entry and to the data. 1240 / 1241*static int
1242ffs_findextattr(u_char ptr, uint length, int nspace, const char name, u_char eap, u_char eac)	1242ffs_findextattr(u_char ptr, u_int length, int nspace, const char name, u_char eap, u_char eac)
1243{ 1244 u_char p, pe, pn, p0; 1245 int eapad1, eapad2, ealength, ealen, nlen; 1246 uint32_t ul; 1247 1248 pe = ptr + length; 1249 nlen = strlen(name); 1250 1251 for (p = ptr; p < pe; p = pn) { 1252 p0 = p; 1253 bcopy(p, &ul, sizeof(ul)); 1254 pn = p + ul; 1255 /* make sure this entry is complete / 1256* if (pn > pe) 1257 break; 1258 p += sizeof(uint32_t); 1259 if (p != nspace) 1260* continue; 1261 p++; 1262 eapad2 = p++; 1263* if (p != nlen) 1264* continue; 1265 p++; 1266 if (bcmp(p, name, nlen)) 1267 continue; 1268 ealength = sizeof(uint32_t) + 3 + nlen; 1269 eapad1 = 8 - (ealength % 8); 1270 if (eapad1 == 8) 1271 eapad1 = 0; 1272 ealength += eapad1; 1273 ealen = ul - ealength - eapad2; 1274 p += nlen + eapad1; 1275 if (eap != NULL) 1276 eap = p0; 1277* if (eac != NULL) 1278 eac = p; 1279* return (ealen); 1280 } 1281 return(-1); 1282} 1283 1284static int 1285ffs_rdextattr(u_char *p, struct vnode vp, struct thread td, int extra) 1286{ 1287* struct inode ip; 1288* struct ufs2_dinode dp; 1289* struct uio luio; 1290 struct iovec liovec; 1291 int easize, error; 1292 u_char eae; 1293* 1294 ip = VTOI(vp); 1295 dp = ip->i_din2; 1296 easize = dp->di_extsize; 1297 1298 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1299 1300 liovec.iov_base = eae; 1301 liovec.iov_len = easize; 1302 luio.uio_iov = &liovec; 1303 luio.uio_iovcnt = 1; 1304 luio.uio_offset = 0; 1305 luio.uio_resid = easize; 1306 luio.uio_segflg = UIO_SYSSPACE; 1307 luio.uio_rw = UIO_READ; 1308 luio.uio_td = td; 1309 1310 error = ffs_extread(vp, &luio, IO_EXT \| IO_SYNC); 1311 if (error) { 1312 free(eae, M_TEMP); 1313 return(error); 1314 } 1315 p = eae; 1316* return (0); 1317} 1318 1319static int 1320ffs_open_ea(struct vnode vp, struct ucred cred, struct thread td) 1321{ 1322* struct inode ip; 1323* struct ufs2_dinode dp; 1324* int error; 1325 1326 ip = VTOI(vp); 1327 1328 if (ip->i_ea_area != NULL) 1329 return (EBUSY); 1330 dp = ip->i_din2; 1331 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1332 if (error) 1333 return (error); 1334 ip->i_ea_len = dp->di_extsize; 1335 ip->i_ea_error = 0; 1336 return (0); 1337} 1338 1339/* 1340 * Vnode extattr transaction commit/abort 1341 / 1342static int 1343ffs_close_ea(struct vnode vp, int commit, struct ucred cred, struct thread td) 1344{ 1345 struct inode ip; 1346* struct uio luio; 1347 struct iovec liovec; 1348 int error; 1349 struct ufs2_dinode dp; 1350* 1351 ip = VTOI(vp); 1352 if (ip->i_ea_area == NULL) 1353 return (EINVAL); 1354 dp = ip->i_din2; 1355 error = ip->i_ea_error; 1356 if (commit && error == 0) { 1357 if (cred == NOCRED) 1358 cred = vp->v_mount->mnt_cred; 1359 liovec.iov_base = ip->i_ea_area; 1360 liovec.iov_len = ip->i_ea_len; 1361 luio.uio_iov = &liovec; 1362 luio.uio_iovcnt = 1; 1363 luio.uio_offset = 0; 1364 luio.uio_resid = ip->i_ea_len; 1365 luio.uio_segflg = UIO_SYSSPACE; 1366 luio.uio_rw = UIO_WRITE; 1367 luio.uio_td = td; 1368 /* XXX: I'm not happy about truncating to zero size / 1369* if (ip->i_ea_len < dp->di_extsize) 1370 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1371 error = ffs_extwrite(vp, &luio, IO_EXT \| IO_SYNC, cred); 1372 } 1373 free(ip->i_ea_area, M_TEMP); 1374 ip->i_ea_area = NULL; 1375 ip->i_ea_len = 0; 1376 ip->i_ea_error = 0; 1377 return (error); 1378} 1379 1380/* 1381 * Vnode extattr strategy routine for special devices and fifos. 1382 * 1383 * We need to check for a read or write of the external attributes. 1384 * Otherwise we just fall through and do the usual thing. 1385 / 1386static int 1387ffsext_strategy(struct vop_strategy_args ap) 1388/* 1389struct vop_strategy_args { 1390 struct vnodeop_desc a_desc; 1391* struct vnode a_vp; 1392* struct buf a_bp; 1393}; 1394*/ 1395{ 1396* struct vnode vp; 1397* daddr_t lbn; 1398 1399 KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", 1400 __func__, ap->a_vp, ap->a_bp->b_vp)); 1401 vp = ap->a_vp; 1402 lbn = ap->a_bp->b_lblkno; 1403 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1404 lbn < 0 && lbn >= -NXADDR) 1405 return (ufs_vnoperate((struct vop_generic_args )ap)); 1406* if (vp->v_type == VFIFO) 1407 return (ufs_vnoperatefifo((struct vop_generic_args )ap)); 1408* return (ufs_vnoperatespec((struct vop_generic_args )ap)); 1409} 1410* 1411/* 1412 * Vnode extattr transaction commit/abort 1413 / 1414static int 1415ffs_openextattr(struct vop_openextattr_args ap) 1416/* 1417struct vop_openextattr_args { 1418 struct vnodeop_desc a_desc; 1419* struct vnode a_vp; 1420* IN struct ucred a_cred; 1421* IN struct thread a_td; 1422}; 1423*/ 1424{ 1425* struct inode ip; 1426* struct fs fs; 1427* 1428 ip = VTOI(ap->a_vp); 1429 fs = ip->i_fs; 1430 if (fs->fs_magic == FS_UFS1_MAGIC) 1431 return (ufs_vnoperate((struct vop_generic_args )ap)); 1432* 1433 if (ap->a_vp->v_type == VCHR) 1434 return (EOPNOTSUPP); 1435 1436 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1437} 1438 1439 1440/* 1441 * Vnode extattr transaction commit/abort 1442 / 1443static int 1444ffs_closeextattr(struct vop_closeextattr_args ap) 1445/* 1446struct vop_closeextattr_args { 1447 struct vnodeop_desc a_desc; 1448* struct vnode a_vp; 1449* int a_commit; 1450 IN struct ucred a_cred; 1451* IN struct thread a_td; 1452}; 1453*/ 1454{ 1455* struct inode ip; 1456* struct fs fs; 1457* 1458 ip = VTOI(ap->a_vp); 1459 fs = ip->i_fs; 1460 if (fs->fs_magic == FS_UFS1_MAGIC) 1461 return (ufs_vnoperate((struct vop_generic_args )ap)); 1462* 1463 if (ap->a_vp->v_type == VCHR) 1464 return (EOPNOTSUPP); 1465 1466 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1467} 1468 1469/* 1470 * Vnode operation to remove a named attribute. 1471 / 1472static int 1473ffs_deleteextattr(struct vop_deleteextattr_args ap) 1474/* 1475vop_deleteextattr { 1476 IN struct vnode a_vp; 1477* IN int a_attrnamespace; 1478 IN const char a_name; 1479* IN struct ucred a_cred; 1480* IN struct thread a_td; 1481}; 1482*/ 1483{ 1484* struct inode ip; 1485* struct fs fs; 1486* uint32_t ealength, ul; 1487 int ealen, olen, eapad1, eapad2, error, i, easize; 1488 u_char eae, p; 1489 int stand_alone; 1490 1491 ip = VTOI(ap->a_vp); 1492 fs = ip->i_fs; 1493 1494 if (fs->fs_magic == FS_UFS1_MAGIC) 1495 return (ufs_vnoperate((struct vop_generic_args )ap)); 1496* 1497 if (ap->a_vp->v_type == VCHR) 1498 return (EOPNOTSUPP); 1499 1500 if (strlen(ap->a_name) == 0) 1501 return (EINVAL); 1502 1503 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1504 ap->a_cred, ap->a_td, IWRITE); 1505 if (error) { 1506 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1507 ip->i_ea_error = error; 1508 return (error); 1509 } 1510 1511 if (ip->i_ea_area == NULL) { 1512 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1513 if (error) 1514 return (error); 1515 stand_alone = 1; 1516 } else { 1517 stand_alone = 0; 1518 } 1519 1520 ealength = eapad1 = ealen = eapad2 = 0; 1521 1522 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1523 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1524 easize = ip->i_ea_len; 1525 1526 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1527 &p, NULL); 1528 if (olen == -1) { 1529 /* delete but nonexistent / 1530* free(eae, M_TEMP); 1531 if (stand_alone) 1532 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1533 return(ENOATTR); 1534 } 1535 bcopy(p, &ul, sizeof ul); 1536 i = p - eae + ul; 1537 if (ul != ealength) { 1538 bcopy(p + ul, p + ealength, easize - i); 1539 easize += (ealength - ul); 1540 } 1541 if (easize > NXADDR * fs->fs_bsize) { 1542 free(eae, M_TEMP); 1543 if (stand_alone) 1544 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1545 else if (ip->i_ea_error == 0) 1546 ip->i_ea_error = ENOSPC; 1547 return(ENOSPC); 1548 } 1549 p = ip->i_ea_area; 1550 ip->i_ea_area = eae; 1551 ip->i_ea_len = easize; 1552 free(p, M_TEMP); 1553 if (stand_alone) 1554 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1555 return(error); 1556} 1557 1558/* 1559 * Vnode operation to retrieve a named extended attribute. 1560 / 1561static int 1562ffs_getextattr(struct vop_getextattr_args ap) 1563/* 1564vop_getextattr { 1565 IN struct vnode a_vp; 1566* IN int a_attrnamespace; 1567 IN const char a_name; 1568* INOUT struct uio a_uio; 1569* OUT size_t a_size; 1570* IN struct ucred a_cred; 1571* IN struct thread a_td; 1572}; 1573*/ 1574{ 1575* struct inode ip; 1576* struct fs fs; 1577* u_char eae, p; 1578 unsigned easize; 1579 int error, ealen, stand_alone; 1580 1581 ip = VTOI(ap->a_vp); 1582 fs = ip->i_fs; 1583 1584 if (fs->fs_magic == FS_UFS1_MAGIC) 1585 return (ufs_vnoperate((struct vop_generic_args )ap)); 1586* 1587 if (ap->a_vp->v_type == VCHR) 1588 return (EOPNOTSUPP); 1589 1590 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1591 ap->a_cred, ap->a_td, IREAD); 1592 if (error) 1593 return (error); 1594 1595 if (ip->i_ea_area == NULL) { 1596 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1597 if (error) 1598 return (error); 1599 stand_alone = 1; 1600 } else { 1601 stand_alone = 0; 1602 } 1603 eae = ip->i_ea_area; 1604 easize = ip->i_ea_len; 1605 1606 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1607 NULL, &p); 1608 if (ealen >= 0) { 1609 error = 0; 1610 if (ap->a_size != NULL) 1611 ap->a_size = ealen; 1612* else if (ap->a_uio != NULL) 1613 error = uiomove(p, ealen, ap->a_uio); 1614 } else 1615 error = ENOATTR; 1616 if (stand_alone) 1617 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1618 return(error); 1619} 1620 1621/* 1622 * Vnode operation to retrieve extended attributes on a vnode. 1623 / 1624static int 1625ffs_listextattr(struct vop_listextattr_args ap) 1626/* 1627vop_listextattr { 1628 IN struct vnode a_vp; 1629* IN int a_attrnamespace; 1630 INOUT struct uio a_uio; 1631* OUT size_t a_size; 1632* IN struct ucred a_cred; 1633* IN struct thread a_td; 1634}; 1635*/ 1636{ 1637* struct inode ip; 1638* struct fs fs; 1639* u_char eae, p, pe, pn; 1640 unsigned easize; 1641 uint32_t ul; 1642 int error, ealen, stand_alone; 1643 1644 ip = VTOI(ap->a_vp); 1645 fs = ip->i_fs; 1646 1647 if (fs->fs_magic == FS_UFS1_MAGIC) 1648 return (ufs_vnoperate((struct vop_generic_args )ap)); 1649* 1650 if (ap->a_vp->v_type == VCHR) 1651 return (EOPNOTSUPP); 1652 1653 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1654 ap->a_cred, ap->a_td, IREAD); 1655 if (error) 1656 return (error); 1657 1658 if (ip->i_ea_area == NULL) { 1659 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1660 if (error) 1661 return (error); 1662 stand_alone = 1; 1663 } else { 1664 stand_alone = 0; 1665 } 1666 eae = ip->i_ea_area; 1667 easize = ip->i_ea_len; 1668 1669 error = 0; 1670 if (ap->a_size != NULL) 1671 ap->a_size = 0; 1672* pe = eae + easize; 1673 for(p = eae; error == 0 && p < pe; p = pn) { 1674 bcopy(p, &ul, sizeof(ul)); 1675 pn = p + ul; 1676 if (pn > pe) 1677 break; 1678 p += sizeof(ul); 1679 if (p++ != ap->a_attrnamespace) 1680* continue; 1681 p++; /* pad2 / 1682* ealen = p; 1683* if (ap->a_size != NULL) { 1684 ap->a_size += ealen + 1; 1685* } else if (ap->a_uio != NULL) { 1686 error = uiomove(p, ealen + 1, ap->a_uio); 1687 } 1688 } 1689 if (stand_alone) 1690 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1691 return(error); 1692} 1693 1694/* 1695 * Vnode operation to set a named attribute. 1696 / 1697static int 1698ffs_setextattr(struct vop_setextattr_args ap) 1699/* 1700vop_setextattr { 1701 IN struct vnode a_vp; 1702* IN int a_attrnamespace; 1703 IN const char a_name; 1704* INOUT struct uio a_uio; 1705* IN struct ucred a_cred; 1706* IN struct thread a_td; 1707}; 1708*/ 1709{ 1710* struct inode ip; 1711* struct fs fs; 1712* uint32_t ealength, ul; 1713 int ealen, olen, eapad1, eapad2, error, i, easize; 1714 u_char eae, p; 1715 int stand_alone; 1716 1717 ip = VTOI(ap->a_vp); 1718 fs = ip->i_fs; 1719 1720 if (fs->fs_magic == FS_UFS1_MAGIC) 1721 return (ufs_vnoperate((struct vop_generic_args )ap)); 1722* 1723 if (ap->a_vp->v_type == VCHR) 1724 return (EOPNOTSUPP); 1725 1726 if (strlen(ap->a_name) == 0) 1727 return (EINVAL); 1728 1729 /* XXX Now unsupported API to delete EAs using NULL uio. / 1730* if (ap->a_uio == NULL) 1731 return (EOPNOTSUPP); 1732 1733 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1734 ap->a_cred, ap->a_td, IWRITE); 1735 if (error) { 1736 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1737 ip->i_ea_error = error; 1738 return (error); 1739 } 1740 1741 if (ip->i_ea_area == NULL) { 1742 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1743 if (error) 1744 return (error); 1745 stand_alone = 1; 1746 } else { 1747 stand_alone = 0; 1748 } 1749 1750 ealen = ap->a_uio->uio_resid; 1751 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1752 eapad1 = 8 - (ealength % 8); 1753 if (eapad1 == 8) 1754 eapad1 = 0; 1755 eapad2 = 8 - (ealen % 8); 1756 if (eapad2 == 8) 1757 eapad2 = 0; 1758 ealength += eapad1 + ealen + eapad2; 1759 1760 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1761 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1762 easize = ip->i_ea_len; 1763 1764 olen = ffs_findextattr(eae, easize, 1765 ap->a_attrnamespace, ap->a_name, &p, NULL); 1766 if (olen == -1) { 1767 /* new, append at end / 1768* p = eae + easize; 1769 easize += ealength; 1770 } else { 1771 bcopy(p, &ul, sizeof ul); 1772 i = p - eae + ul; 1773 if (ul != ealength) { 1774 bcopy(p + ul, p + ealength, easize - i); 1775 easize += (ealength - ul); 1776 } 1777 } 1778 if (easize > NXADDR * fs->fs_bsize) { 1779 free(eae, M_TEMP); 1780 if (stand_alone) 1781 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1782 else if (ip->i_ea_error == 0) 1783 ip->i_ea_error = ENOSPC; 1784 return(ENOSPC); 1785 } 1786 bcopy(&ealength, p, sizeof(ealength)); 1787 p += sizeof(ealength); 1788 p++ = ap->a_attrnamespace; 1789* p++ = eapad2; 1790* p++ = strlen(ap->a_name); 1791* strcpy(p, ap->a_name); 1792 p += strlen(ap->a_name); 1793 bzero(p, eapad1); 1794 p += eapad1; 1795 error = uiomove(p, ealen, ap->a_uio); 1796 if (error) { 1797 free(eae, M_TEMP); 1798 if (stand_alone) 1799 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1800 else if (ip->i_ea_error == 0) 1801 ip->i_ea_error = error; 1802 return(error); 1803 } 1804 p += ealen; 1805 bzero(p, eapad2); 1806 1807 p = ip->i_ea_area; 1808 ip->i_ea_area = eae; 1809 ip->i_ea_len = easize; 1810 free(p, M_TEMP); 1811 if (stand_alone) 1812 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1813 return(error); 1814}	1243{ 1244 u_char p, pe, pn, p0; 1245 int eapad1, eapad2, ealength, ealen, nlen; 1246 uint32_t ul; 1247 1248 pe = ptr + length; 1249 nlen = strlen(name); 1250 1251 for (p = ptr; p < pe; p = pn) { 1252 p0 = p; 1253 bcopy(p, &ul, sizeof(ul)); 1254 pn = p + ul; 1255 /* make sure this entry is complete / 1256* if (pn > pe) 1257 break; 1258 p += sizeof(uint32_t); 1259 if (p != nspace) 1260* continue; 1261 p++; 1262 eapad2 = p++; 1263* if (p != nlen) 1264* continue; 1265 p++; 1266 if (bcmp(p, name, nlen)) 1267 continue; 1268 ealength = sizeof(uint32_t) + 3 + nlen; 1269 eapad1 = 8 - (ealength % 8); 1270 if (eapad1 == 8) 1271 eapad1 = 0; 1272 ealength += eapad1; 1273 ealen = ul - ealength - eapad2; 1274 p += nlen + eapad1; 1275 if (eap != NULL) 1276 eap = p0; 1277* if (eac != NULL) 1278 eac = p; 1279* return (ealen); 1280 } 1281 return(-1); 1282} 1283 1284static int 1285ffs_rdextattr(u_char *p, struct vnode vp, struct thread td, int extra) 1286{ 1287* struct inode ip; 1288* struct ufs2_dinode dp; 1289* struct uio luio; 1290 struct iovec liovec; 1291 int easize, error; 1292 u_char eae; 1293* 1294 ip = VTOI(vp); 1295 dp = ip->i_din2; 1296 easize = dp->di_extsize; 1297 1298 eae = malloc(easize + extra, M_TEMP, M_WAITOK); 1299 1300 liovec.iov_base = eae; 1301 liovec.iov_len = easize; 1302 luio.uio_iov = &liovec; 1303 luio.uio_iovcnt = 1; 1304 luio.uio_offset = 0; 1305 luio.uio_resid = easize; 1306 luio.uio_segflg = UIO_SYSSPACE; 1307 luio.uio_rw = UIO_READ; 1308 luio.uio_td = td; 1309 1310 error = ffs_extread(vp, &luio, IO_EXT \| IO_SYNC); 1311 if (error) { 1312 free(eae, M_TEMP); 1313 return(error); 1314 } 1315 p = eae; 1316* return (0); 1317} 1318 1319static int 1320ffs_open_ea(struct vnode vp, struct ucred cred, struct thread td) 1321{ 1322* struct inode ip; 1323* struct ufs2_dinode dp; 1324* int error; 1325 1326 ip = VTOI(vp); 1327 1328 if (ip->i_ea_area != NULL) 1329 return (EBUSY); 1330 dp = ip->i_din2; 1331 error = ffs_rdextattr(&ip->i_ea_area, vp, td, 0); 1332 if (error) 1333 return (error); 1334 ip->i_ea_len = dp->di_extsize; 1335 ip->i_ea_error = 0; 1336 return (0); 1337} 1338 1339/* 1340 * Vnode extattr transaction commit/abort 1341 / 1342static int 1343ffs_close_ea(struct vnode vp, int commit, struct ucred cred, struct thread td) 1344{ 1345 struct inode ip; 1346* struct uio luio; 1347 struct iovec liovec; 1348 int error; 1349 struct ufs2_dinode dp; 1350* 1351 ip = VTOI(vp); 1352 if (ip->i_ea_area == NULL) 1353 return (EINVAL); 1354 dp = ip->i_din2; 1355 error = ip->i_ea_error; 1356 if (commit && error == 0) { 1357 if (cred == NOCRED) 1358 cred = vp->v_mount->mnt_cred; 1359 liovec.iov_base = ip->i_ea_area; 1360 liovec.iov_len = ip->i_ea_len; 1361 luio.uio_iov = &liovec; 1362 luio.uio_iovcnt = 1; 1363 luio.uio_offset = 0; 1364 luio.uio_resid = ip->i_ea_len; 1365 luio.uio_segflg = UIO_SYSSPACE; 1366 luio.uio_rw = UIO_WRITE; 1367 luio.uio_td = td; 1368 /* XXX: I'm not happy about truncating to zero size / 1369* if (ip->i_ea_len < dp->di_extsize) 1370 error = ffs_truncate(vp, 0, IO_EXT, cred, td); 1371 error = ffs_extwrite(vp, &luio, IO_EXT \| IO_SYNC, cred); 1372 } 1373 free(ip->i_ea_area, M_TEMP); 1374 ip->i_ea_area = NULL; 1375 ip->i_ea_len = 0; 1376 ip->i_ea_error = 0; 1377 return (error); 1378} 1379 1380/* 1381 * Vnode extattr strategy routine for special devices and fifos. 1382 * 1383 * We need to check for a read or write of the external attributes. 1384 * Otherwise we just fall through and do the usual thing. 1385 / 1386static int 1387ffsext_strategy(struct vop_strategy_args ap) 1388/* 1389struct vop_strategy_args { 1390 struct vnodeop_desc a_desc; 1391* struct vnode a_vp; 1392* struct buf a_bp; 1393}; 1394*/ 1395{ 1396* struct vnode vp; 1397* daddr_t lbn; 1398 1399 KASSERT(ap->a_vp == ap->a_bp->b_vp, ("%s(%p != %p)", 1400 __func__, ap->a_vp, ap->a_bp->b_vp)); 1401 vp = ap->a_vp; 1402 lbn = ap->a_bp->b_lblkno; 1403 if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC && 1404 lbn < 0 && lbn >= -NXADDR) 1405 return (ufs_vnoperate((struct vop_generic_args )ap)); 1406* if (vp->v_type == VFIFO) 1407 return (ufs_vnoperatefifo((struct vop_generic_args )ap)); 1408* return (ufs_vnoperatespec((struct vop_generic_args )ap)); 1409} 1410* 1411/* 1412 * Vnode extattr transaction commit/abort 1413 / 1414static int 1415ffs_openextattr(struct vop_openextattr_args ap) 1416/* 1417struct vop_openextattr_args { 1418 struct vnodeop_desc a_desc; 1419* struct vnode a_vp; 1420* IN struct ucred a_cred; 1421* IN struct thread a_td; 1422}; 1423*/ 1424{ 1425* struct inode ip; 1426* struct fs fs; 1427* 1428 ip = VTOI(ap->a_vp); 1429 fs = ip->i_fs; 1430 if (fs->fs_magic == FS_UFS1_MAGIC) 1431 return (ufs_vnoperate((struct vop_generic_args )ap)); 1432* 1433 if (ap->a_vp->v_type == VCHR) 1434 return (EOPNOTSUPP); 1435 1436 return (ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td)); 1437} 1438 1439 1440/* 1441 * Vnode extattr transaction commit/abort 1442 / 1443static int 1444ffs_closeextattr(struct vop_closeextattr_args ap) 1445/* 1446struct vop_closeextattr_args { 1447 struct vnodeop_desc a_desc; 1448* struct vnode a_vp; 1449* int a_commit; 1450 IN struct ucred a_cred; 1451* IN struct thread a_td; 1452}; 1453*/ 1454{ 1455* struct inode ip; 1456* struct fs fs; 1457* 1458 ip = VTOI(ap->a_vp); 1459 fs = ip->i_fs; 1460 if (fs->fs_magic == FS_UFS1_MAGIC) 1461 return (ufs_vnoperate((struct vop_generic_args )ap)); 1462* 1463 if (ap->a_vp->v_type == VCHR) 1464 return (EOPNOTSUPP); 1465 1466 return (ffs_close_ea(ap->a_vp, ap->a_commit, ap->a_cred, ap->a_td)); 1467} 1468 1469/* 1470 * Vnode operation to remove a named attribute. 1471 / 1472static int 1473ffs_deleteextattr(struct vop_deleteextattr_args ap) 1474/* 1475vop_deleteextattr { 1476 IN struct vnode a_vp; 1477* IN int a_attrnamespace; 1478 IN const char a_name; 1479* IN struct ucred a_cred; 1480* IN struct thread a_td; 1481}; 1482*/ 1483{ 1484* struct inode ip; 1485* struct fs fs; 1486* uint32_t ealength, ul; 1487 int ealen, olen, eapad1, eapad2, error, i, easize; 1488 u_char eae, p; 1489 int stand_alone; 1490 1491 ip = VTOI(ap->a_vp); 1492 fs = ip->i_fs; 1493 1494 if (fs->fs_magic == FS_UFS1_MAGIC) 1495 return (ufs_vnoperate((struct vop_generic_args )ap)); 1496* 1497 if (ap->a_vp->v_type == VCHR) 1498 return (EOPNOTSUPP); 1499 1500 if (strlen(ap->a_name) == 0) 1501 return (EINVAL); 1502 1503 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1504 ap->a_cred, ap->a_td, IWRITE); 1505 if (error) { 1506 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1507 ip->i_ea_error = error; 1508 return (error); 1509 } 1510 1511 if (ip->i_ea_area == NULL) { 1512 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1513 if (error) 1514 return (error); 1515 stand_alone = 1; 1516 } else { 1517 stand_alone = 0; 1518 } 1519 1520 ealength = eapad1 = ealen = eapad2 = 0; 1521 1522 eae = malloc(ip->i_ea_len, M_TEMP, M_WAITOK); 1523 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1524 easize = ip->i_ea_len; 1525 1526 olen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1527 &p, NULL); 1528 if (olen == -1) { 1529 /* delete but nonexistent / 1530* free(eae, M_TEMP); 1531 if (stand_alone) 1532 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1533 return(ENOATTR); 1534 } 1535 bcopy(p, &ul, sizeof ul); 1536 i = p - eae + ul; 1537 if (ul != ealength) { 1538 bcopy(p + ul, p + ealength, easize - i); 1539 easize += (ealength - ul); 1540 } 1541 if (easize > NXADDR * fs->fs_bsize) { 1542 free(eae, M_TEMP); 1543 if (stand_alone) 1544 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1545 else if (ip->i_ea_error == 0) 1546 ip->i_ea_error = ENOSPC; 1547 return(ENOSPC); 1548 } 1549 p = ip->i_ea_area; 1550 ip->i_ea_area = eae; 1551 ip->i_ea_len = easize; 1552 free(p, M_TEMP); 1553 if (stand_alone) 1554 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1555 return(error); 1556} 1557 1558/* 1559 * Vnode operation to retrieve a named extended attribute. 1560 / 1561static int 1562ffs_getextattr(struct vop_getextattr_args ap) 1563/* 1564vop_getextattr { 1565 IN struct vnode a_vp; 1566* IN int a_attrnamespace; 1567 IN const char a_name; 1568* INOUT struct uio a_uio; 1569* OUT size_t a_size; 1570* IN struct ucred a_cred; 1571* IN struct thread a_td; 1572}; 1573*/ 1574{ 1575* struct inode ip; 1576* struct fs fs; 1577* u_char eae, p; 1578 unsigned easize; 1579 int error, ealen, stand_alone; 1580 1581 ip = VTOI(ap->a_vp); 1582 fs = ip->i_fs; 1583 1584 if (fs->fs_magic == FS_UFS1_MAGIC) 1585 return (ufs_vnoperate((struct vop_generic_args )ap)); 1586* 1587 if (ap->a_vp->v_type == VCHR) 1588 return (EOPNOTSUPP); 1589 1590 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1591 ap->a_cred, ap->a_td, IREAD); 1592 if (error) 1593 return (error); 1594 1595 if (ip->i_ea_area == NULL) { 1596 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1597 if (error) 1598 return (error); 1599 stand_alone = 1; 1600 } else { 1601 stand_alone = 0; 1602 } 1603 eae = ip->i_ea_area; 1604 easize = ip->i_ea_len; 1605 1606 ealen = ffs_findextattr(eae, easize, ap->a_attrnamespace, ap->a_name, 1607 NULL, &p); 1608 if (ealen >= 0) { 1609 error = 0; 1610 if (ap->a_size != NULL) 1611 ap->a_size = ealen; 1612* else if (ap->a_uio != NULL) 1613 error = uiomove(p, ealen, ap->a_uio); 1614 } else 1615 error = ENOATTR; 1616 if (stand_alone) 1617 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1618 return(error); 1619} 1620 1621/* 1622 * Vnode operation to retrieve extended attributes on a vnode. 1623 / 1624static int 1625ffs_listextattr(struct vop_listextattr_args ap) 1626/* 1627vop_listextattr { 1628 IN struct vnode a_vp; 1629* IN int a_attrnamespace; 1630 INOUT struct uio a_uio; 1631* OUT size_t a_size; 1632* IN struct ucred a_cred; 1633* IN struct thread a_td; 1634}; 1635*/ 1636{ 1637* struct inode ip; 1638* struct fs fs; 1639* u_char eae, p, pe, pn; 1640 unsigned easize; 1641 uint32_t ul; 1642 int error, ealen, stand_alone; 1643 1644 ip = VTOI(ap->a_vp); 1645 fs = ip->i_fs; 1646 1647 if (fs->fs_magic == FS_UFS1_MAGIC) 1648 return (ufs_vnoperate((struct vop_generic_args )ap)); 1649* 1650 if (ap->a_vp->v_type == VCHR) 1651 return (EOPNOTSUPP); 1652 1653 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1654 ap->a_cred, ap->a_td, IREAD); 1655 if (error) 1656 return (error); 1657 1658 if (ip->i_ea_area == NULL) { 1659 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1660 if (error) 1661 return (error); 1662 stand_alone = 1; 1663 } else { 1664 stand_alone = 0; 1665 } 1666 eae = ip->i_ea_area; 1667 easize = ip->i_ea_len; 1668 1669 error = 0; 1670 if (ap->a_size != NULL) 1671 ap->a_size = 0; 1672* pe = eae + easize; 1673 for(p = eae; error == 0 && p < pe; p = pn) { 1674 bcopy(p, &ul, sizeof(ul)); 1675 pn = p + ul; 1676 if (pn > pe) 1677 break; 1678 p += sizeof(ul); 1679 if (p++ != ap->a_attrnamespace) 1680* continue; 1681 p++; /* pad2 / 1682* ealen = p; 1683* if (ap->a_size != NULL) { 1684 ap->a_size += ealen + 1; 1685* } else if (ap->a_uio != NULL) { 1686 error = uiomove(p, ealen + 1, ap->a_uio); 1687 } 1688 } 1689 if (stand_alone) 1690 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1691 return(error); 1692} 1693 1694/* 1695 * Vnode operation to set a named attribute. 1696 / 1697static int 1698ffs_setextattr(struct vop_setextattr_args ap) 1699/* 1700vop_setextattr { 1701 IN struct vnode a_vp; 1702* IN int a_attrnamespace; 1703 IN const char a_name; 1704* INOUT struct uio a_uio; 1705* IN struct ucred a_cred; 1706* IN struct thread a_td; 1707}; 1708*/ 1709{ 1710* struct inode ip; 1711* struct fs fs; 1712* uint32_t ealength, ul; 1713 int ealen, olen, eapad1, eapad2, error, i, easize; 1714 u_char eae, p; 1715 int stand_alone; 1716 1717 ip = VTOI(ap->a_vp); 1718 fs = ip->i_fs; 1719 1720 if (fs->fs_magic == FS_UFS1_MAGIC) 1721 return (ufs_vnoperate((struct vop_generic_args )ap)); 1722* 1723 if (ap->a_vp->v_type == VCHR) 1724 return (EOPNOTSUPP); 1725 1726 if (strlen(ap->a_name) == 0) 1727 return (EINVAL); 1728 1729 /* XXX Now unsupported API to delete EAs using NULL uio. / 1730* if (ap->a_uio == NULL) 1731 return (EOPNOTSUPP); 1732 1733 error = extattr_check_cred(ap->a_vp, ap->a_attrnamespace, 1734 ap->a_cred, ap->a_td, IWRITE); 1735 if (error) { 1736 if (ip->i_ea_area != NULL && ip->i_ea_error == 0) 1737 ip->i_ea_error = error; 1738 return (error); 1739 } 1740 1741 if (ip->i_ea_area == NULL) { 1742 error = ffs_open_ea(ap->a_vp, ap->a_cred, ap->a_td); 1743 if (error) 1744 return (error); 1745 stand_alone = 1; 1746 } else { 1747 stand_alone = 0; 1748 } 1749 1750 ealen = ap->a_uio->uio_resid; 1751 ealength = sizeof(uint32_t) + 3 + strlen(ap->a_name); 1752 eapad1 = 8 - (ealength % 8); 1753 if (eapad1 == 8) 1754 eapad1 = 0; 1755 eapad2 = 8 - (ealen % 8); 1756 if (eapad2 == 8) 1757 eapad2 = 0; 1758 ealength += eapad1 + ealen + eapad2; 1759 1760 eae = malloc(ip->i_ea_len + ealength, M_TEMP, M_WAITOK); 1761 bcopy(ip->i_ea_area, eae, ip->i_ea_len); 1762 easize = ip->i_ea_len; 1763 1764 olen = ffs_findextattr(eae, easize, 1765 ap->a_attrnamespace, ap->a_name, &p, NULL); 1766 if (olen == -1) { 1767 /* new, append at end / 1768* p = eae + easize; 1769 easize += ealength; 1770 } else { 1771 bcopy(p, &ul, sizeof ul); 1772 i = p - eae + ul; 1773 if (ul != ealength) { 1774 bcopy(p + ul, p + ealength, easize - i); 1775 easize += (ealength - ul); 1776 } 1777 } 1778 if (easize > NXADDR * fs->fs_bsize) { 1779 free(eae, M_TEMP); 1780 if (stand_alone) 1781 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1782 else if (ip->i_ea_error == 0) 1783 ip->i_ea_error = ENOSPC; 1784 return(ENOSPC); 1785 } 1786 bcopy(&ealength, p, sizeof(ealength)); 1787 p += sizeof(ealength); 1788 p++ = ap->a_attrnamespace; 1789* p++ = eapad2; 1790* p++ = strlen(ap->a_name); 1791* strcpy(p, ap->a_name); 1792 p += strlen(ap->a_name); 1793 bzero(p, eapad1); 1794 p += eapad1; 1795 error = uiomove(p, ealen, ap->a_uio); 1796 if (error) { 1797 free(eae, M_TEMP); 1798 if (stand_alone) 1799 ffs_close_ea(ap->a_vp, 0, ap->a_cred, ap->a_td); 1800 else if (ip->i_ea_error == 0) 1801 ip->i_ea_error = error; 1802 return(error); 1803 } 1804 p += ealen; 1805 bzero(p, eapad2); 1806 1807 p = ip->i_ea_area; 1808 ip->i_ea_area = eae; 1809 ip->i_ea_len = easize; 1810 free(p, M_TEMP); 1811 if (stand_alone) 1812 error = ffs_close_ea(ap->a_vp, 1, ap->a_cred, ap->a_td); 1813 return(error); 1814}