1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 67 * 68 */ 69/* 70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 71 * support for mandatory and extensible security protections. This notice 72 * is included in support of clause 2.2 (b) of the Apple Public License, 73 * Version 2.0. 74 */ 75 76#include <sys/param.h> 77#include <sys/types.h> 78#include <sys/systm.h> 79#include <sys/kernel.h> 80#include <sys/file_internal.h> 81#include <sys/stat.h> 82#include <sys/proc_internal.h> 83#include <sys/kauth.h> 84#include <sys/mount_internal.h> 85#include <sys/namei.h> 86#include <sys/vnode_internal.h> 87#include <sys/ioctl.h> 88#include <sys/tty.h> 89/* Temporary workaround for ubc.h until <rdar://4714366 is resolved */ 90#define ubc_setcred ubc_setcred_deprecated 91#include <sys/ubc.h> 92#undef ubc_setcred 93int ubc_setcred(struct vnode *, struct proc *); 94#include <sys/conf.h> 95#include <sys/disk.h> 96#include <sys/fsevents.h> 97#include <sys/kdebug.h> 98#include <sys/xattr.h> 99#include <sys/ubc_internal.h> 100#include <sys/uio_internal.h> 101#include <sys/resourcevar.h> 102#include <sys/signalvar.h> 103 104#include <vm/vm_kern.h> 105#include <vm/vm_map.h> 106 107#include <miscfs/specfs/specdev.h> 108#include <miscfs/fifofs/fifo.h> 109 110#if CONFIG_MACF 111#include <security/mac_framework.h> 112#endif 113 114#if CONFIG_PROTECT 115#include <sys/cprotect.h> 116#endif 117 118extern void sigpup_attach_vnode(vnode_t); /* XXX */ 119 120static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); 121static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, 122 vfs_context_t ctx); 123static int vn_read(struct fileproc *fp, struct uio *uio, int flags, 124 vfs_context_t ctx); 125static int vn_write(struct fileproc *fp, struct uio *uio, int flags, 126 vfs_context_t ctx); 127static int vn_select( struct fileproc *fp, int which, void * wql, 128 vfs_context_t ctx); 129static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, 130 vfs_context_t ctx); 131static void filt_vndetach(struct knote *kn); 132static int filt_vnode(struct knote *kn, long hint); 133static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); 134#if 0 135static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, 136 vfs_context_t ctx); 137#endif 138 139const struct fileops vnops = { 140 DTYPE_VNODE, 141 vn_read, 142 vn_write, 143 vn_ioctl, 144 vn_select, 145 vn_closefile, 146 vn_kqfilt_add, 147 NULL 148}; 149 150struct filterops vnode_filtops = { 151 .f_isfd = 1, 152 .f_attach = NULL, 153 .f_detach = filt_vndetach, 154 .f_event = filt_vnode 155}; 156 157/* 158 * Common code for vnode open operations. 159 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. 160 * 161 * XXX the profusion of interfaces here is probably a bad thing. 162 */ 163int 164vn_open(struct nameidata *ndp, int fmode, int cmode) 165{ 166 return(vn_open_modflags(ndp, &fmode, cmode)); 167} 168 169int 170vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) 171{ 172 struct vnode_attr va; 173 174 VATTR_INIT(&va); 175 VATTR_SET(&va, va_mode, cmode); 176 177 return(vn_open_auth(ndp, fmodep, &va)); 178} 179 180static int 181vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) 182{ 183 int error; 184 185 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { 186 goto bad; 187 } 188 189 /* Call out to allow 3rd party notification of open. 190 * Ignore result of kauth_authorize_fileop call. 191 */ 192#if CONFIG_MACF 193 mac_vnode_notify_open(ctx, vp, fmode); 194#endif 195 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 196 (uintptr_t)vp, 0); 197 198 sigpup_attach_vnode(vp); 199 200 return 0; 201 202bad: 203 return error; 204 205} 206 207/* 208 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to 209 * determine whether that has happened. 210 */ 211static int 212vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) 213{ 214 uint32_t status = 0; 215 vnode_t dvp = ndp->ni_dvp; 216 int batched; 217 int error; 218 vnode_t vp; 219 220 batched = vnode_compound_open_available(ndp->ni_dvp); 221 *did_open = FALSE; 222 223 VATTR_SET(vap, va_type, VREG); 224 if (fmode & O_EXCL) 225 vap->va_vaflags |= VA_EXCLUSIVE; 226 227#if NAMEDRSRCFORK 228 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { 229 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 230 goto out; 231 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) 232 goto out; 233 *did_create = TRUE; 234 } else { 235#endif 236 if (!batched) { 237 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 238 goto out; 239 } 240 241 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); 242 if (error != 0) { 243 if (batched) { 244 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; 245 } else { 246 *did_create = FALSE; 247 } 248 249 if (error == EKEEPLOOKING) { 250 if (*did_create) { 251 panic("EKEEPLOOKING, but we did a create?"); 252 } 253 if (!batched) { 254 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?"); 255 } 256 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 257 panic("EKEEPLOOKING, but continue flag not set?"); 258 } 259 260 /* 261 * Do NOT drop the dvp: we need everything to continue the lookup. 262 */ 263 return error; 264 } 265 } else { 266 if (batched) { 267 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; 268 *did_open = TRUE; 269 } else { 270 *did_create = TRUE; 271 } 272 } 273#if NAMEDRSRCFORK 274 } 275#endif 276 277 vp = ndp->ni_vp; 278 279 if (*did_create) { 280 int update_flags = 0; 281 282 // Make sure the name & parent pointers are hooked up 283 if (vp->v_name == NULL) 284 update_flags |= VNODE_UPDATE_NAME; 285 if (vp->v_parent == NULLVP) 286 update_flags |= VNODE_UPDATE_PARENT; 287 288 if (update_flags) 289 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); 290 291 vnode_put(dvp); 292 ndp->ni_dvp = NULLVP; 293 294#if CONFIG_FSE 295 if (need_fsevent(FSE_CREATE_FILE, vp)) { 296 add_fsevent(FSE_CREATE_FILE, ctx, 297 FSE_ARG_VNODE, vp, 298 FSE_ARG_DONE); 299 } 300#endif 301 } 302out: 303 if (ndp->ni_dvp != NULLVP) { 304 vnode_put(dvp); 305 ndp->ni_dvp = NULLVP; 306 } 307 308 return error; 309} 310 311/* 312 * Open a file with authorization, updating the contents of the structures 313 * pointed to by ndp, fmodep, and vap as necessary to perform the requested 314 * operation. This function is used for both opens of existing files, and 315 * creation of new files. 316 * 317 * Parameters: ndp The nami data pointer describing the 318 * file 319 * fmodep A pointer to an int containg the mode 320 * information to be used for the open 321 * vap A pointer to the vnode attribute 322 * descriptor to be used for the open 323 * 324 * Indirect: * Contents of the data structures pointed 325 * to by the parameters are modified as 326 * necessary to the requested operation. 327 * 328 * Returns: 0 Success 329 * !0 errno value 330 * 331 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. 332 * 333 * The contents of '*ndp' will be modified, based on the other 334 * arguments to this function, and to return file and directory 335 * data necessary to satisfy the requested operation. 336 * 337 * If the file does not exist and we are creating it, then the 338 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the 339 * caller that the file was not truncated. 340 * 341 * If the file exists and the O_EXCL flag was not specified, then 342 * the O_CREAT flag will be cleared in '*fmodep' to indicate to 343 * the caller that the existing file was merely opened rather 344 * than created. 345 * 346 * The contents of '*vap' will be modified as necessary to 347 * complete the operation, including setting of supported 348 * attribute, clearing of fields containing unsupported attributes 349 * in the request, if the request proceeds without them, etc.. 350 * 351 * XXX: This function is too complicated in actings on its arguments 352 * 353 * XXX: We should enummerate the possible errno values here, and where 354 * in the code they originated. 355 */ 356int 357vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) 358{ 359 struct vnode *vp; 360 struct vnode *dvp; 361 vfs_context_t ctx = ndp->ni_cnd.cn_context; 362 int error; 363 int fmode; 364 uint32_t origcnflags; 365 boolean_t did_create; 366 boolean_t did_open; 367 boolean_t need_vnop_open; 368 boolean_t batched; 369 boolean_t ref_failed; 370 371again: 372 vp = NULL; 373 dvp = NULL; 374 batched = FALSE; 375 did_create = FALSE; 376 need_vnop_open = TRUE; 377 ref_failed = FALSE; 378 fmode = *fmodep; 379 origcnflags = ndp->ni_cnd.cn_flags; 380 381 /* 382 * O_CREAT 383 */ 384 if (fmode & O_CREAT) { 385 if ( (fmode & O_DIRECTORY) ) { 386 error = EINVAL; 387 goto out; 388 } 389 ndp->ni_cnd.cn_nameiop = CREATE; 390#if CONFIG_TRIGGERS 391 ndp->ni_op = OP_LINK; 392#endif 393 /* Inherit USEDVP, vnode_open() supported flags only */ 394 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); 395 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; 396 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 397#if NAMEDRSRCFORK 398 /* open calls are allowed for resource forks. */ 399 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 400#endif 401 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) 402 ndp->ni_cnd.cn_flags |= FOLLOW; 403 404continue_create_lookup: 405 if ( (error = namei(ndp)) ) 406 goto out; 407 408 dvp = ndp->ni_dvp; 409 vp = ndp->ni_vp; 410 411 batched = vnode_compound_open_available(dvp); 412 413 /* not found, create */ 414 if (vp == NULL) { 415 /* must have attributes for a new file */ 416 if (vap == NULL) { 417 vnode_put(dvp); 418 error = EINVAL; 419 goto out; 420 } 421 /* 422 * Attempt a create. For a system supporting compound VNOPs, we may 423 * find an existing file or create one; in either case, we will already 424 * have the file open and no VNOP_OPEN() will be needed. 425 */ 426 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx); 427 428 dvp = ndp->ni_dvp; 429 vp = ndp->ni_vp; 430 431 /* 432 * Detected a node that the filesystem couldn't handle. Don't call 433 * nameidone() yet, because we need that path buffer. 434 */ 435 if (error == EKEEPLOOKING) { 436 if (!batched) { 437 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?"); 438 } 439 goto continue_create_lookup; 440 } 441 442 nameidone(ndp); 443 if (dvp) { 444 panic("Shouldn't have a dvp here."); 445 } 446 447 if (error) { 448 /* 449 * Check for a creation or unlink race. 450 */ 451 if (((error == EEXIST) && !(fmode & O_EXCL)) || 452 ((error == ENOENT) && (fmode & O_CREAT))){ 453 if (vp) 454 vnode_put(vp); 455 goto again; 456 } 457 goto bad; 458 } 459 460 need_vnop_open = !did_open; 461 } 462 else { 463 if (fmode & O_EXCL) 464 error = EEXIST; 465 466 /* 467 * We have a vnode. Use compound open if available 468 * or else fall through to "traditional" path. Note: can't 469 * do a compound open for root, because the parent belongs 470 * to a different FS. 471 */ 472 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) { 473 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 474 475 if (error == 0) { 476 vp = ndp->ni_vp; 477 need_vnop_open = FALSE; 478 } else if (error == EKEEPLOOKING) { 479 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 480 panic("EKEEPLOOKING, but continue flag not set?"); 481 } 482 goto continue_create_lookup; 483 } 484 } 485 nameidone(ndp); 486 vnode_put(dvp); 487 ndp->ni_dvp = NULLVP; 488 489 if (error) { 490 goto bad; 491 } 492 493 fmode &= ~O_CREAT; 494 495 /* Fall through */ 496 } 497 } 498 else { 499 /* 500 * Not O_CREAT 501 */ 502 ndp->ni_cnd.cn_nameiop = LOOKUP; 503 /* Inherit USEDVP, vnode_open() supported flags only */ 504 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT); 505 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; 506#if NAMEDRSRCFORK 507 /* open calls are allowed for resource forks. */ 508 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 509#endif 510 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 511 512 /* preserve NOFOLLOW from vnode_open() */ 513 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { 514 ndp->ni_cnd.cn_flags &= ~FOLLOW; 515 } 516 517 /* Do a lookup, possibly going directly to filesystem for compound operation */ 518 do { 519 if ( (error = namei(ndp)) ) 520 goto out; 521 vp = ndp->ni_vp; 522 dvp = ndp->ni_dvp; 523 524 /* Check for batched lookup-open */ 525 batched = vnode_compound_open_available(dvp); 526 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) { 527 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 528 vp = ndp->ni_vp; 529 if (error == 0) { 530 need_vnop_open = FALSE; 531 } else if (error == EKEEPLOOKING) { 532 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 533 panic("EKEEPLOOKING, but continue flag not set?"); 534 } 535 } 536 } 537 } while (error == EKEEPLOOKING); 538 539 nameidone(ndp); 540 vnode_put(dvp); 541 ndp->ni_dvp = NULLVP; 542 543 if (error) { 544 goto bad; 545 } 546 } 547 548 /* 549 * By this point, nameidone() is called, dvp iocount is dropped, 550 * and dvp pointer is cleared. 551 */ 552 if (ndp->ni_dvp != NULLVP) { 553 panic("Haven't cleaned up adequately in vn_open_auth()"); 554 } 555 556 /* 557 * Expect to use this code for filesystems without compound VNOPs, for the root 558 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), 559 * and for shadow files, which do not live on the same filesystems as their "parents." 560 */ 561 if (need_vnop_open) { 562 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { 563 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?"); 564 } 565 566 if (!did_create) { 567 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL); 568 if (error) { 569 goto bad; 570 } 571 } 572 573#if CONFIG_PROTECT 574 /* 575 * Perform any content protection access checks prior to calling 576 * into the filesystem, if the raw encrypted mode was not 577 * requested. 578 * 579 * If the va_dataprotect_flags are NOT active, or if they are, 580 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 581 * to perform the checks. 582 */ 583 if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) || 584 ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) { 585 error = cp_handle_open (vp, fmode); 586 if (error) { 587 goto bad; 588 } 589 } 590#endif 591 592 error = VNOP_OPEN(vp, fmode, ctx); 593 if (error) { 594 goto bad; 595 } 596 need_vnop_open = FALSE; 597 } 598 599 // if the vnode is tagged VOPENEVT and the current process 600 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY 601 // flag to the open mode so that this open won't count against 602 // the vnode when carbon delete() does a vnode_isinuse() to see 603 // if a file is currently in use. this allows spotlight 604 // importers to not interfere with carbon apps that depend on 605 // the no-delete-if-busy semantics of carbon delete(). 606 // 607 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { 608 fmode |= O_EVTONLY; 609 } 610 611 /* 612 * Grab reference, etc. 613 */ 614 error = vn_open_auth_finish(vp, fmode, ctx); 615 if (error) { 616 ref_failed = TRUE; 617 goto bad; 618 } 619 620 /* Compound VNOP open is responsible for doing the truncate */ 621 if (batched || did_create) 622 fmode &= ~O_TRUNC; 623 624 *fmodep = fmode; 625 return (0); 626 627bad: 628 /* Opened either explicitly or by a batched create */ 629 if (!need_vnop_open) { 630 VNOP_CLOSE(vp, fmode, ctx); 631 } 632 633 ndp->ni_vp = NULL; 634 if (vp) { 635#if NAMEDRSRCFORK 636 /* Aggressively recycle shadow files if we error'd out during open() */ 637 if ((vnode_isnamedstream(vp)) && 638 (vp->v_parent != NULLVP) && 639 (vnode_isshadow(vp))) { 640 vnode_recycle(vp); 641 } 642#endif 643 vnode_put(vp); 644 /* 645 * Check for a race against unlink. We had a vnode 646 * but according to vnode_authorize or VNOP_OPEN it 647 * no longer exists. 648 * 649 * EREDRIVEOPEN: means that we were hit by the tty allocation race. 650 */ 651 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { 652 goto again; 653 } 654 } 655 656out: 657 return (error); 658} 659 660#if vn_access_DEPRECATED 661/* 662 * Authorize an action against a vnode. This has been the canonical way to 663 * ensure that the credential/process/etc. referenced by a vfs_context 664 * is granted the rights called out in 'mode' against the vnode 'vp'. 665 * 666 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult 667 * to add support for more rights. As such, this interface will be deprecated 668 * and callers will use vnode_authorize instead. 669 */ 670int 671vn_access(vnode_t vp, int mode, vfs_context_t context) 672{ 673 kauth_action_t action; 674 675 action = 0; 676 if (mode & VREAD) 677 action |= KAUTH_VNODE_READ_DATA; 678 if (mode & VWRITE) 679 action |= KAUTH_VNODE_WRITE_DATA; 680 if (mode & VEXEC) 681 action |= KAUTH_VNODE_EXECUTE; 682 683 return(vnode_authorize(vp, NULL, action, context)); 684} 685#endif /* vn_access_DEPRECATED */ 686 687/* 688 * Vnode close call 689 */ 690int 691vn_close(struct vnode *vp, int flags, vfs_context_t ctx) 692{ 693 int error; 694 int flusherror = 0; 695 696#if NAMEDRSRCFORK 697 /* Sync data from resource fork shadow file if needed. */ 698 if ((vp->v_flag & VISNAMEDSTREAM) && 699 (vp->v_parent != NULLVP) && 700 vnode_isshadow(vp)) { 701 if (flags & FWASWRITTEN) { 702 flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx); 703 } 704 } 705#endif 706 707 /* work around for foxhound */ 708 if (vnode_isspec(vp)) 709 (void)vnode_rele_ext(vp, flags, 0); 710 711 /* 712 * On HFS, we flush when the last writer closes. We do this 713 * because resource fork vnodes hold a reference on data fork 714 * vnodes and that will prevent them from getting VNOP_INACTIVE 715 * which will delay when we flush cached data. In future, we 716 * might find it beneficial to do this for all file systems. 717 * Note that it's OK to access v_writecount without the lock 718 * in this context. 719 */ 720 if (vp->v_tag == VT_HFS && (flags & FWRITE) && vp->v_writecount == 1) 721 VNOP_FSYNC(vp, MNT_NOWAIT, ctx); 722 723 error = VNOP_CLOSE(vp, flags, ctx); 724 725#if CONFIG_FSE 726 if (flags & FWASWRITTEN) { 727 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { 728 add_fsevent(FSE_CONTENT_MODIFIED, ctx, 729 FSE_ARG_VNODE, vp, 730 FSE_ARG_DONE); 731 } 732 } 733#endif 734 735 if (!vnode_isspec(vp)) 736 (void)vnode_rele_ext(vp, flags, 0); 737 738 if (flusherror) { 739 error = flusherror; 740 } 741 return (error); 742} 743 744static int 745vn_read_swapfile( 746 struct vnode *vp, 747 uio_t uio) 748{ 749 int error; 750 off_t swap_count, this_count; 751 off_t file_end, read_end; 752 off_t prev_resid; 753 char *my_swap_page; 754 755 /* 756 * Reading from a swap file will get you zeroes. 757 */ 758 759 my_swap_page = NULL; 760 error = 0; 761 swap_count = uio_resid(uio); 762 763 file_end = ubc_getsize(vp); 764 read_end = uio->uio_offset + uio_resid(uio); 765 if (uio->uio_offset >= file_end) { 766 /* uio starts after end of file: nothing to read */ 767 swap_count = 0; 768 } else if (read_end > file_end) { 769 /* uio extends beyond end of file: stop before that */ 770 swap_count -= (read_end - file_end); 771 } 772 773 while (swap_count > 0) { 774 if (my_swap_page == NULL) { 775 MALLOC(my_swap_page, char *, PAGE_SIZE, 776 M_TEMP, M_WAITOK); 777 memset(my_swap_page, '\0', PAGE_SIZE); 778 /* add an end-of-line to keep line counters happy */ 779 my_swap_page[PAGE_SIZE-1] = '\n'; 780 } 781 this_count = swap_count; 782 if (this_count > PAGE_SIZE) { 783 this_count = PAGE_SIZE; 784 } 785 786 prev_resid = uio_resid(uio); 787 error = uiomove((caddr_t) my_swap_page, 788 this_count, 789 uio); 790 if (error) { 791 break; 792 } 793 swap_count -= (prev_resid - uio_resid(uio)); 794 } 795 if (my_swap_page != NULL) { 796 FREE(my_swap_page, M_TEMP); 797 my_swap_page = NULL; 798 } 799 800 return error; 801} 802/* 803 * Package up an I/O request on a vnode into a uio and do it. 804 */ 805int 806vn_rdwr( 807 enum uio_rw rw, 808 struct vnode *vp, 809 caddr_t base, 810 int len, 811 off_t offset, 812 enum uio_seg segflg, 813 int ioflg, 814 kauth_cred_t cred, 815 int *aresid, 816 proc_t p) 817{ 818 int64_t resid; 819 int result; 820 821 result = vn_rdwr_64(rw, 822 vp, 823 (uint64_t)(uintptr_t)base, 824 (int64_t)len, 825 offset, 826 segflg, 827 ioflg, 828 cred, 829 &resid, 830 p); 831 832 /* "resid" should be bounded above by "len," which is an int */ 833 if (aresid != NULL) { 834 *aresid = resid; 835 } 836 837 return result; 838} 839 840 841int 842vn_rdwr_64( 843 enum uio_rw rw, 844 struct vnode *vp, 845 uint64_t base, 846 int64_t len, 847 off_t offset, 848 enum uio_seg segflg, 849 int ioflg, 850 kauth_cred_t cred, 851 int64_t *aresid, 852 proc_t p) 853{ 854 uio_t auio; 855 int spacetype; 856 struct vfs_context context; 857 int error=0; 858 char uio_buf[ UIO_SIZEOF(1) ]; 859 860 context.vc_thread = current_thread(); 861 context.vc_ucred = cred; 862 863 if (UIO_SEG_IS_USER_SPACE(segflg)) { 864 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 865 } 866 else { 867 spacetype = UIO_SYSSPACE; 868 } 869 auio = uio_createwithbuffer(1, offset, spacetype, rw, 870 &uio_buf[0], sizeof(uio_buf)); 871 uio_addiov(auio, base, len); 872 873#if CONFIG_MACF 874 /* XXXMAC 875 * IO_NOAUTH should be re-examined. 876 * Likely that mediation should be performed in caller. 877 */ 878 if ((ioflg & IO_NOAUTH) == 0) { 879 /* passed cred is fp->f_cred */ 880 if (rw == UIO_READ) 881 error = mac_vnode_check_read(&context, cred, vp); 882 else 883 error = mac_vnode_check_write(&context, cred, vp); 884 } 885#endif 886 887 if (error == 0) { 888 if (rw == UIO_READ) { 889 if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { 890 error = vn_read_swapfile(vp, auio); 891 } else { 892 error = VNOP_READ(vp, auio, ioflg, &context); 893 } 894 } else { 895 error = VNOP_WRITE(vp, auio, ioflg, &context); 896 } 897 } 898 899 if (aresid) 900 *aresid = uio_resid(auio); 901 else 902 if (uio_resid(auio) && error == 0) 903 error = EIO; 904 return (error); 905} 906 907static inline void 908vn_offset_lock(struct fileglob *fg) 909{ 910 lck_mtx_lock_spin(&fg->fg_lock); 911 while (fg->fg_lflags & FG_OFF_LOCKED) { 912 fg->fg_lflags |= FG_OFF_LOCKWANT; 913 msleep(&fg->fg_lflags, &fg->fg_lock, PVFS | PSPIN, 914 "fg_offset_lock_wait", 0); 915 } 916 fg->fg_lflags |= FG_OFF_LOCKED; 917 lck_mtx_unlock(&fg->fg_lock); 918} 919 920static inline void 921vn_offset_unlock(struct fileglob *fg) 922{ 923 int lock_wanted = 0; 924 925 lck_mtx_lock_spin(&fg->fg_lock); 926 if (fg->fg_lflags & FG_OFF_LOCKWANT) { 927 lock_wanted = 1; 928 } 929 fg->fg_lflags &= ~(FG_OFF_LOCKED | FG_OFF_LOCKWANT); 930 lck_mtx_unlock(&fg->fg_lock); 931 if (lock_wanted) { 932 wakeup(&fg->fg_lflags); 933 } 934} 935 936/* 937 * File table vnode read routine. 938 */ 939static int 940vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 941{ 942 struct vnode *vp; 943 int error; 944 int ioflag; 945 off_t count; 946 int offset_locked = 0; 947 948 vp = (struct vnode *)fp->f_fglob->fg_data; 949 if ( (error = vnode_getwithref(vp)) ) { 950 return(error); 951 } 952 953#if CONFIG_MACF 954 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp); 955 if (error) { 956 (void)vnode_put(vp); 957 return (error); 958 } 959#endif 960 961 /* This signals to VNOP handlers that this read came from a file table read */ 962 ioflag = IO_SYSCALL_DISPATCH; 963 964 if (fp->f_fglob->fg_flag & FNONBLOCK) 965 ioflag |= IO_NDELAY; 966 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 967 ioflag |= IO_NOCACHE; 968 if (fp->f_fglob->fg_flag & FENCRYPTED) { 969 ioflag |= IO_ENCRYPTED; 970 } 971 if (fp->f_fglob->fg_flag & FNORDAHEAD) 972 ioflag |= IO_RAOFF; 973 974 if ((flags & FOF_OFFSET) == 0) { 975 if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { 976 vn_offset_lock(fp->f_fglob); 977 offset_locked = 1; 978 } 979 uio->uio_offset = fp->f_fglob->fg_offset; 980 } 981 count = uio_resid(uio); 982 983 if (vnode_isswap(vp)) { 984 /* special case for swap files */ 985 error = vn_read_swapfile(vp, uio); 986 } else { 987 error = VNOP_READ(vp, uio, ioflag, ctx); 988 } 989 if ((flags & FOF_OFFSET) == 0) { 990 fp->f_fglob->fg_offset += count - uio_resid(uio); 991 if (offset_locked) { 992 vn_offset_unlock(fp->f_fglob); 993 offset_locked = 0; 994 } 995 } 996 997 (void)vnode_put(vp); 998 return (error); 999} 1000 1001 1002/* 1003 * File table vnode write routine. 1004 */ 1005static int 1006vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 1007{ 1008 struct vnode *vp; 1009 int error, ioflag; 1010 off_t count; 1011 int clippedsize = 0; 1012 int partialwrite=0; 1013 int residcount, oldcount; 1014 int offset_locked = 0; 1015 proc_t p = vfs_context_proc(ctx); 1016 1017 count = 0; 1018 vp = (struct vnode *)fp->f_fglob->fg_data; 1019 if ( (error = vnode_getwithref(vp)) ) { 1020 return(error); 1021 } 1022 1023#if CONFIG_MACF 1024 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); 1025 if (error) { 1026 (void)vnode_put(vp); 1027 return (error); 1028 } 1029#endif 1030 1031 /* 1032 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from 1033 * a file table write 1034 */ 1035 ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); 1036 1037 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND)) 1038 ioflag |= IO_APPEND; 1039 if (fp->f_fglob->fg_flag & FNONBLOCK) 1040 ioflag |= IO_NDELAY; 1041 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 1042 ioflag |= IO_NOCACHE; 1043 if (fp->f_fglob->fg_flag & FNODIRECT) 1044 ioflag |= IO_NODIRECT; 1045 if (fp->f_fglob->fg_flag & FSINGLE_WRITER) 1046 ioflag |= IO_SINGLE_WRITER; 1047 1048 /* 1049 * Treat synchronous mounts and O_FSYNC on the fd as equivalent. 1050 * 1051 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay 1052 * XXX the non-essential metadata without some additional VFS work; 1053 * XXX the intent at this point is to plumb the interface for it. 1054 */ 1055 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) || 1056 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { 1057 ioflag |= IO_SYNC; 1058 } 1059 1060 if ((flags & FOF_OFFSET) == 0) { 1061 if ((vnode_vtype(vp) == VREG) && !vnode_isswap(vp)) { 1062 vn_offset_lock(fp->f_fglob); 1063 offset_locked = 1; 1064 } 1065 uio->uio_offset = fp->f_fglob->fg_offset; 1066 count = uio_resid(uio); 1067 } 1068 if (((flags & FOF_OFFSET) == 0) && 1069 vfs_context_proc(ctx) && (vp->v_type == VREG) && 1070 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || 1071 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { 1072 /* 1073 * If the requested residual would cause us to go past the 1074 * administrative limit, then we need to adjust the residual 1075 * down to cause fewer bytes than requested to be written. If 1076 * we can't do that (e.g. the residual is already 1 byte), 1077 * then we fail the write with EFBIG. 1078 */ 1079 residcount = uio_resid(uio); 1080 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 1081 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1082 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { 1083 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset); 1084 } 1085 if (clippedsize >= residcount) { 1086 psignal(p, SIGXFSZ); 1087 error = EFBIG; 1088 goto error_out; 1089 } 1090 partialwrite = 1; 1091 uio_setresid(uio, residcount-clippedsize); 1092 } 1093 if ((flags & FOF_OFFSET) != 0) { 1094 /* for pwrite, append should be ignored */ 1095 ioflag &= ~IO_APPEND; 1096 if (p && (vp->v_type == VREG) && 1097 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1098 psignal(p, SIGXFSZ); 1099 error = EFBIG; 1100 goto error_out; 1101 } 1102 if (p && (vp->v_type == VREG) && 1103 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1104 //Debugger("vn_bwrite:overstepping the bounds"); 1105 residcount = uio_resid(uio); 1106 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1107 partialwrite = 1; 1108 uio_setresid(uio, residcount-clippedsize); 1109 } 1110 } 1111 1112 error = VNOP_WRITE(vp, uio, ioflag, ctx); 1113 1114 if (partialwrite) { 1115 oldcount = uio_resid(uio); 1116 uio_setresid(uio, oldcount + clippedsize); 1117 } 1118 1119 if ((flags & FOF_OFFSET) == 0) { 1120 if (ioflag & IO_APPEND) 1121 fp->f_fglob->fg_offset = uio->uio_offset; 1122 else 1123 fp->f_fglob->fg_offset += count - uio_resid(uio); 1124 if (offset_locked) { 1125 vn_offset_unlock(fp->f_fglob); 1126 offset_locked = 0; 1127 } 1128 } 1129 1130 /* 1131 * Set the credentials on successful writes 1132 */ 1133 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { 1134 /* 1135 * When called from aio subsystem, we only have the proc from 1136 * which to get the credential, at this point, so use that 1137 * instead. This means aio functions are incompatible with 1138 * per-thread credentials (aio operations are proxied). We 1139 * can't easily correct the aio vs. settid race in this case 1140 * anyway, so we disallow it. 1141 */ 1142 if ((flags & FOF_PCRED) == 0) { 1143 ubc_setthreadcred(vp, p, current_thread()); 1144 } else { 1145 ubc_setcred(vp, p); 1146 } 1147 } 1148 (void)vnode_put(vp); 1149 return (error); 1150 1151error_out: 1152 if (offset_locked) { 1153 vn_offset_unlock(fp->f_fglob); 1154 } 1155 (void)vnode_put(vp); 1156 return (error); 1157} 1158 1159/* 1160 * File table vnode stat routine. 1161 * 1162 * Returns: 0 Success 1163 * EBADF 1164 * ENOMEM 1165 * vnode_getattr:??? 1166 */ 1167int 1168vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1169{ 1170 struct vnode_attr va; 1171 int error; 1172 u_short mode; 1173 kauth_filesec_t fsec; 1174 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ 1175 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ 1176 1177 if (isstat64 != 0) 1178 sb64 = (struct stat64 *)sbptr; 1179 else 1180 sb = (struct stat *)sbptr; 1181 memset(&va, 0, sizeof(va)); 1182 VATTR_INIT(&va); 1183 VATTR_WANTED(&va, va_fsid); 1184 VATTR_WANTED(&va, va_fileid); 1185 VATTR_WANTED(&va, va_mode); 1186 VATTR_WANTED(&va, va_type); 1187 VATTR_WANTED(&va, va_nlink); 1188 VATTR_WANTED(&va, va_uid); 1189 VATTR_WANTED(&va, va_gid); 1190 VATTR_WANTED(&va, va_rdev); 1191 VATTR_WANTED(&va, va_data_size); 1192 VATTR_WANTED(&va, va_access_time); 1193 VATTR_WANTED(&va, va_modify_time); 1194 VATTR_WANTED(&va, va_change_time); 1195 VATTR_WANTED(&va, va_create_time); 1196 VATTR_WANTED(&va, va_flags); 1197 VATTR_WANTED(&va, va_gen); 1198 VATTR_WANTED(&va, va_iosize); 1199 /* lower layers will synthesise va_total_alloc from va_data_size if required */ 1200 VATTR_WANTED(&va, va_total_alloc); 1201 if (xsec != NULL) { 1202 VATTR_WANTED(&va, va_uuuid); 1203 VATTR_WANTED(&va, va_guuid); 1204 VATTR_WANTED(&va, va_acl); 1205 } 1206 error = vnode_getattr(vp, &va, ctx); 1207 if (error) 1208 goto out; 1209 /* 1210 * Copy from vattr table 1211 */ 1212 if (isstat64 != 0) { 1213 sb64->st_dev = va.va_fsid; 1214 sb64->st_ino = (ino64_t)va.va_fileid; 1215 1216 } else { 1217 sb->st_dev = va.va_fsid; 1218 sb->st_ino = (ino_t)va.va_fileid; 1219 } 1220 mode = va.va_mode; 1221 switch (vp->v_type) { 1222 case VREG: 1223 mode |= S_IFREG; 1224 break; 1225 case VDIR: 1226 mode |= S_IFDIR; 1227 break; 1228 case VBLK: 1229 mode |= S_IFBLK; 1230 break; 1231 case VCHR: 1232 mode |= S_IFCHR; 1233 break; 1234 case VLNK: 1235 mode |= S_IFLNK; 1236 break; 1237 case VSOCK: 1238 mode |= S_IFSOCK; 1239 break; 1240 case VFIFO: 1241 mode |= S_IFIFO; 1242 break; 1243 default: 1244 error = EBADF; 1245 goto out; 1246 }; 1247 if (isstat64 != 0) { 1248 sb64->st_mode = mode; 1249 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1250 sb64->st_uid = va.va_uid; 1251 sb64->st_gid = va.va_gid; 1252 sb64->st_rdev = va.va_rdev; 1253 sb64->st_size = va.va_data_size; 1254 sb64->st_atimespec = va.va_access_time; 1255 sb64->st_mtimespec = va.va_modify_time; 1256 sb64->st_ctimespec = va.va_change_time; 1257 sb64->st_birthtimespec = 1258 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time; 1259 sb64->st_blksize = va.va_iosize; 1260 sb64->st_flags = va.va_flags; 1261 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1262 } else { 1263 sb->st_mode = mode; 1264 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1265 sb->st_uid = va.va_uid; 1266 sb->st_gid = va.va_gid; 1267 sb->st_rdev = va.va_rdev; 1268 sb->st_size = va.va_data_size; 1269 sb->st_atimespec = va.va_access_time; 1270 sb->st_mtimespec = va.va_modify_time; 1271 sb->st_ctimespec = va.va_change_time; 1272 sb->st_blksize = va.va_iosize; 1273 sb->st_flags = va.va_flags; 1274 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1275 } 1276 1277 /* if we're interested in extended security data and we got an ACL */ 1278 if (xsec != NULL) { 1279 if (!VATTR_IS_SUPPORTED(&va, va_acl) && 1280 !VATTR_IS_SUPPORTED(&va, va_uuuid) && 1281 !VATTR_IS_SUPPORTED(&va, va_guuid)) { 1282 *xsec = KAUTH_FILESEC_NONE; 1283 } else { 1284 1285 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1286 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); 1287 } else { 1288 fsec = kauth_filesec_alloc(0); 1289 } 1290 if (fsec == NULL) { 1291 error = ENOMEM; 1292 goto out; 1293 } 1294 fsec->fsec_magic = KAUTH_FILESEC_MAGIC; 1295 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { 1296 fsec->fsec_owner = va.va_uuuid; 1297 } else { 1298 fsec->fsec_owner = kauth_null_guid; 1299 } 1300 if (VATTR_IS_SUPPORTED(&va, va_guuid)) { 1301 fsec->fsec_group = va.va_guuid; 1302 } else { 1303 fsec->fsec_group = kauth_null_guid; 1304 } 1305 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1306 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); 1307 } else { 1308 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; 1309 } 1310 *xsec = fsec; 1311 } 1312 } 1313 1314 /* Do not give the generation number out to unpriviledged users */ 1315 if (va.va_gen && !vfs_context_issuser(ctx)) { 1316 if (isstat64 != 0) 1317 sb64->st_gen = 0; 1318 else 1319 sb->st_gen = 0; 1320 } else { 1321 if (isstat64 != 0) 1322 sb64->st_gen = va.va_gen; 1323 else 1324 sb->st_gen = va.va_gen; 1325 } 1326 1327 error = 0; 1328out: 1329 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) 1330 kauth_acl_free(va.va_acl); 1331 return (error); 1332} 1333 1334int 1335vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1336{ 1337 int error; 1338 1339#if CONFIG_MACF 1340 error = mac_vnode_check_stat(ctx, NOCRED, vp); 1341 if (error) 1342 return (error); 1343#endif 1344 1345 /* authorize */ 1346 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) 1347 return(error); 1348 1349 /* actual stat */ 1350 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx)); 1351} 1352 1353 1354/* 1355 * File table vnode ioctl routine. 1356 */ 1357static int 1358vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) 1359{ 1360 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data); 1361 off_t file_size; 1362 int error; 1363 struct vnode *ttyvp; 1364 struct session * sessp; 1365 1366 if ( (error = vnode_getwithref(vp)) ) { 1367 return(error); 1368 } 1369 1370#if CONFIG_MACF 1371 error = mac_vnode_check_ioctl(ctx, vp, com); 1372 if (error) 1373 goto out; 1374#endif 1375 1376 switch (vp->v_type) { 1377 case VREG: 1378 case VDIR: 1379 if (com == FIONREAD) { 1380 if ((error = vnode_size(vp, &file_size, ctx)) != 0) 1381 goto out; 1382 *(int *)data = file_size - fp->f_fglob->fg_offset; 1383 goto out; 1384 } 1385 if (com == FIONBIO || com == FIOASYNC) { /* XXX */ 1386 goto out; 1387 } 1388 /* fall into ... */ 1389 1390 default: 1391 error = ENOTTY; 1392 goto out; 1393 1394 case VFIFO: 1395 case VCHR: 1396 case VBLK: 1397 1398 /* Should not be able to set block size from user space */ 1399 if (com == DKIOCSETBLOCKSIZE) { 1400 error = EPERM; 1401 goto out; 1402 } 1403 1404 if (com == FIODTYPE) { 1405 if (vp->v_type == VBLK) { 1406 if (major(vp->v_rdev) >= nblkdev) { 1407 error = ENXIO; 1408 goto out; 1409 } 1410 *(int *)data = bdevsw[major(vp->v_rdev)].d_type; 1411 1412 } else if (vp->v_type == VCHR) { 1413 if (major(vp->v_rdev) >= nchrdev) { 1414 error = ENXIO; 1415 goto out; 1416 } 1417 *(int *)data = cdevsw[major(vp->v_rdev)].d_type; 1418 } else { 1419 error = ENOTTY; 1420 goto out; 1421 } 1422 goto out; 1423 } 1424 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx); 1425 1426 if (error == 0 && com == TIOCSCTTY) { 1427 sessp = proc_session(vfs_context_proc(ctx)); 1428 1429 session_lock(sessp); 1430 ttyvp = sessp->s_ttyvp; 1431 sessp->s_ttyvp = vp; 1432 sessp->s_ttyvid = vnode_vid(vp); 1433 session_unlock(sessp); 1434 session_rele(sessp); 1435 } 1436 } 1437out: 1438 (void)vnode_put(vp); 1439 return(error); 1440} 1441 1442/* 1443 * File table vnode select routine. 1444 */ 1445static int 1446vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) 1447{ 1448 int error; 1449 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data; 1450 struct vfs_context context; 1451 1452 if ( (error = vnode_getwithref(vp)) == 0 ) { 1453 context.vc_thread = current_thread(); 1454 context.vc_ucred = fp->f_fglob->fg_cred; 1455 1456#if CONFIG_MACF 1457 /* 1458 * XXX We should use a per thread credential here; minimally, 1459 * XXX the process credential should have a persistent 1460 * XXX reference on it before being passed in here. 1461 */ 1462 error = mac_vnode_check_select(ctx, vp, which); 1463 if (error == 0) 1464#endif 1465 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx); 1466 1467 (void)vnode_put(vp); 1468 } 1469 return(error); 1470 1471} 1472 1473/* 1474 * File table vnode close routine. 1475 */ 1476static int 1477vn_closefile(struct fileglob *fg, vfs_context_t ctx) 1478{ 1479 struct vnode *vp = (struct vnode *)fg->fg_data; 1480 int error; 1481 struct flock lf; 1482 1483 if ( (error = vnode_getwithref(vp)) == 0 ) { 1484 1485 if ((fg->fg_flag & FHASLOCK) && 1486 FILEGLOB_DTYPE(fg) == DTYPE_VNODE) { 1487 lf.l_whence = SEEK_SET; 1488 lf.l_start = 0; 1489 lf.l_len = 0; 1490 lf.l_type = F_UNLCK; 1491 1492 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL); 1493 } 1494 error = vn_close(vp, fg->fg_flag, ctx); 1495 1496 (void)vnode_put(vp); 1497 } 1498 return(error); 1499} 1500 1501/* 1502 * Returns: 0 Success 1503 * VNOP_PATHCONF:??? 1504 */ 1505int 1506vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) 1507{ 1508 int error = 0; 1509 struct vfs_attr vfa; 1510 1511 switch(name) { 1512 case _PC_EXTENDED_SECURITY_NP: 1513 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; 1514 break; 1515 case _PC_AUTH_OPAQUE_NP: 1516 *retval = vfs_authopaque(vnode_mount(vp)); 1517 break; 1518 case _PC_2_SYMLINKS: 1519 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ 1520 break; 1521 case _PC_ALLOC_SIZE_MIN: 1522 *retval = 1; /* XXX lie: 1 byte */ 1523 break; 1524 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ 1525 *retval = 1; /* [AIO] option is supported */ 1526 break; 1527 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ 1528 *retval = 0; /* [PIO] option is not supported */ 1529 break; 1530 case _PC_REC_INCR_XFER_SIZE: 1531 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ 1532 break; 1533 case _PC_REC_MIN_XFER_SIZE: 1534 *retval = 4096; /* XXX recommend 4K minimum reads/writes */ 1535 break; 1536 case _PC_REC_MAX_XFER_SIZE: 1537 *retval = 65536; /* XXX recommend 64K maximum reads/writes */ 1538 break; 1539 case _PC_REC_XFER_ALIGN: 1540 *retval = 4096; /* XXX recommend page aligned buffers */ 1541 break; 1542 case _PC_SYMLINK_MAX: 1543 *retval = 255; /* Minimum acceptable POSIX value */ 1544 break; 1545 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ 1546 *retval = 0; /* [SIO] option is not supported */ 1547 break; 1548 case _PC_XATTR_SIZE_BITS: 1549 /* The number of bits used to store maximum extended 1550 * attribute size in bytes. For example, if the maximum 1551 * attribute size supported by a file system is 128K, the 1552 * value returned will be 18. However a value 18 can mean 1553 * that the maximum attribute size can be anywhere from 1554 * (256KB - 1) to 128KB. As a special case, the resource 1555 * fork can have much larger size, and some file system 1556 * specific extended attributes can have smaller and preset 1557 * size; for example, Finder Info is always 32 bytes. 1558 */ 1559 memset(&vfa, 0, sizeof(vfa)); 1560 VFSATTR_INIT(&vfa); 1561 VFSATTR_WANTED(&vfa, f_capabilities); 1562 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 && 1563 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && 1564 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && 1565 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { 1566 /* Supports native extended attributes */ 1567 error = VNOP_PATHCONF(vp, name, retval, ctx); 1568 } else { 1569 /* Number of bits used to represent the maximum size of 1570 * extended attribute stored in an Apple Double file. 1571 */ 1572 *retval = AD_XATTR_SIZE_BITS; 1573 } 1574 break; 1575 default: 1576 error = VNOP_PATHCONF(vp, name, retval, ctx); 1577 break; 1578 } 1579 1580 return (error); 1581} 1582 1583static int 1584vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) 1585{ 1586 int error; 1587 struct vnode *vp; 1588 1589 vp = (struct vnode *)fp->f_fglob->fg_data; 1590 1591 /* 1592 * Don't attach a knote to a dead vnode. 1593 */ 1594 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { 1595 switch (kn->kn_filter) { 1596 case EVFILT_READ: 1597 case EVFILT_WRITE: 1598 if (vnode_isfifo(vp)) { 1599 /* We'll only watch FIFOs that use our fifofs */ 1600 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { 1601 error = ENOTSUP; 1602 } 1603 1604 } else if (!vnode_isreg(vp)) { 1605 if (vnode_ischr(vp) && 1606 (error = spec_kqfilter(vp, kn)) == 0) { 1607 /* claimed by a special device */ 1608 vnode_put(vp); 1609 return 0; 1610 } 1611 1612 error = EINVAL; 1613 } 1614 break; 1615 case EVFILT_VNODE: 1616 break; 1617 default: 1618 error = EINVAL; 1619 } 1620 1621 if (error) { 1622 vnode_put(vp); 1623 return error; 1624 } 1625 1626#if CONFIG_MACF 1627 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp); 1628 if (error) { 1629 vnode_put(vp); 1630 return error; 1631 } 1632#endif 1633 1634 kn->kn_hook = (void*)vp; 1635 kn->kn_hookid = vnode_vid(vp); 1636 kn->kn_fop = &vnode_filtops; 1637 1638 vnode_lock(vp); 1639 KNOTE_ATTACH(&vp->v_knotes, kn); 1640 vnode_unlock(vp); 1641 1642 /* Ask the filesystem to provide remove notifications, but ignore failure */ 1643 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); 1644 1645 vnode_put(vp); 1646 } 1647 1648 return (error); 1649} 1650 1651static void 1652filt_vndetach(struct knote *kn) 1653{ 1654 vfs_context_t ctx = vfs_context_current(); 1655 struct vnode *vp; 1656 vp = (struct vnode *)kn->kn_hook; 1657 if (vnode_getwithvid(vp, kn->kn_hookid)) 1658 return; 1659 1660 vnode_lock(vp); 1661 KNOTE_DETACH(&vp->v_knotes, kn); 1662 vnode_unlock(vp); 1663 1664 /* 1665 * Tell a (generally networked) filesystem that we're no longer watching 1666 * If the FS wants to track contexts, it should still be using the one from 1667 * the VNODE_MONITOR_BEGIN. 1668 */ 1669 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); 1670 vnode_put(vp); 1671} 1672 1673 1674/* 1675 * Used for EVFILT_READ 1676 * 1677 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case 1678 * differently than the regular case for VREG files. If not in poll(), 1679 * then we need to know current fileproc offset for VREG. 1680 */ 1681static intptr_t 1682vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) 1683{ 1684 if (vnode_isfifo(vp)) { 1685#if FIFO 1686 int cnt; 1687 int err = fifo_charcount(vp, &cnt); 1688 if (err == 0) { 1689 return (intptr_t)cnt; 1690 } else 1691#endif 1692 { 1693 return (intptr_t)0; 1694 } 1695 } else if (vnode_isreg(vp)) { 1696 if (ispoll) { 1697 return (intptr_t)1; 1698 } 1699 1700 off_t amount; 1701 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; 1702 if (amount > (off_t)INTPTR_MAX) { 1703 return INTPTR_MAX; 1704 } else if (amount < (off_t)INTPTR_MIN) { 1705 return INTPTR_MIN; 1706 } else { 1707 return (intptr_t)amount; 1708 } 1709 } else { 1710 panic("Should never have an EVFILT_READ except for reg or fifo."); 1711 return 0; 1712 } 1713} 1714 1715/* 1716 * Used for EVFILT_WRITE. 1717 * 1718 * For regular vnodes, we can always write (1). For named pipes, 1719 * see how much space there is in the buffer. Nothing else is covered. 1720 */ 1721static intptr_t 1722vnode_writable_space_count(vnode_t vp) 1723{ 1724 if (vnode_isfifo(vp)) { 1725#if FIFO 1726 long spc; 1727 int err = fifo_freespace(vp, &spc); 1728 if (err == 0) { 1729 return (intptr_t)spc; 1730 } else 1731#endif 1732 { 1733 return (intptr_t)0; 1734 } 1735 } else if (vnode_isreg(vp)) { 1736 return (intptr_t)1; 1737 } else { 1738 panic("Should never have an EVFILT_READ except for reg or fifo."); 1739 return 0; 1740 } 1741} 1742 1743/* 1744 * Determine whether this knote should be active 1745 * 1746 * This is kind of subtle. 1747 * --First, notice if the vnode has been revoked: in so, override hint 1748 * --EVFILT_READ knotes are checked no matter what the hint is 1749 * --Other knotes activate based on hint. 1750 * --If hint is revoke, set special flags and activate 1751 */ 1752static int 1753filt_vnode(struct knote *kn, long hint) 1754{ 1755 vnode_t vp = (struct vnode *)kn->kn_hook; 1756 int activate = 0; 1757 long orig_hint = hint; 1758 1759 if (0 == hint) { 1760 vnode_lock(vp); 1761 1762 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) { 1763 /* Is recycled */ 1764 hint = NOTE_REVOKE; 1765 } 1766 } else { 1767 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); 1768 } 1769 1770 /* Special handling for vnodes that are in recycle or already gone */ 1771 if (NOTE_REVOKE == hint) { 1772 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1773 activate = 1; 1774 1775 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { 1776 kn->kn_fflags |= NOTE_REVOKE; 1777 } 1778 } else { 1779 switch(kn->kn_filter) { 1780 case EVFILT_READ: 1781 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL)); 1782 1783 if (kn->kn_data != 0) { 1784 activate = 1; 1785 } 1786 break; 1787 case EVFILT_WRITE: 1788 kn->kn_data = vnode_writable_space_count(vp); 1789 1790 if (kn->kn_data != 0) { 1791 activate = 1; 1792 } 1793 break; 1794 case EVFILT_VNODE: 1795 /* Check events this note matches against the hint */ 1796 if (kn->kn_sfflags & hint) { 1797 kn->kn_fflags |= hint; /* Set which event occurred */ 1798 } 1799 if (kn->kn_fflags != 0) { 1800 activate = 1; 1801 } 1802 break; 1803 default: 1804 panic("Invalid knote filter on a vnode!\n"); 1805 } 1806 } 1807 1808 if (orig_hint == 0) { 1809 /* 1810 * Definitely need to unlock, may need to put 1811 */ 1812 if (hint == 0) { 1813 vnode_put_locked(vp); 1814 } 1815 vnode_unlock(vp); 1816 } 1817 1818 return (activate); 1819} 1820