1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 67 * 68 */ 69/* 70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 71 * support for mandatory and extensible security protections. This notice 72 * is included in support of clause 2.2 (b) of the Apple Public License, 73 * Version 2.0. 74 */ 75 76#include <sys/param.h> 77#include <sys/types.h> 78#include <sys/systm.h> 79#include <sys/kernel.h> 80#include <sys/file_internal.h> 81#include <sys/stat.h> 82#include <sys/proc_internal.h> 83#include <sys/kauth.h> 84#include <sys/mount_internal.h> 85#include <sys/namei.h> 86#include <sys/vnode_internal.h> 87#include <sys/ioctl.h> 88#include <sys/tty.h> 89/* Temporary workaround for ubc.h until <rdar://4714366 is resolved */ 90#define ubc_setcred ubc_setcred_deprecated 91#include <sys/ubc.h> 92#undef ubc_setcred 93int ubc_setcred(struct vnode *, struct proc *); 94#include <sys/conf.h> 95#include <sys/disk.h> 96#include <sys/fsevents.h> 97#include <sys/kdebug.h> 98#include <sys/xattr.h> 99#include <sys/ubc_internal.h> 100#include <sys/uio_internal.h> 101#include <sys/resourcevar.h> 102#include <sys/signalvar.h> 103 104#include <vm/vm_kern.h> 105#include <vm/vm_map.h> 106 107#include <miscfs/specfs/specdev.h> 108#include <miscfs/fifofs/fifo.h> 109 110#if CONFIG_MACF 111#include <security/mac_framework.h> 112#endif 113 114#if CONFIG_PROTECT 115#include <sys/cprotect.h> 116#endif 117 118 119static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); 120static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, 121 vfs_context_t ctx); 122static int vn_read(struct fileproc *fp, struct uio *uio, int flags, 123 vfs_context_t ctx); 124static int vn_write(struct fileproc *fp, struct uio *uio, int flags, 125 vfs_context_t ctx); 126static int vn_select( struct fileproc *fp, int which, void * wql, 127 vfs_context_t ctx); 128static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, 129 vfs_context_t ctx); 130static void filt_vndetach(struct knote *kn); 131static int filt_vnode(struct knote *kn, long hint); 132static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); 133#if 0 134static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, 135 vfs_context_t ctx); 136#endif 137 138struct fileops vnops = 139 { vn_read, vn_write, vn_ioctl, vn_select, vn_closefile, vn_kqfilt_add, NULL }; 140 141struct filterops vnode_filtops = { 142 .f_isfd = 1, 143 .f_attach = NULL, 144 .f_detach = filt_vndetach, 145 .f_event = filt_vnode 146}; 147 148/* 149 * Common code for vnode open operations. 150 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. 151 * 152 * XXX the profusion of interfaces here is probably a bad thing. 153 */ 154int 155vn_open(struct nameidata *ndp, int fmode, int cmode) 156{ 157 return(vn_open_modflags(ndp, &fmode, cmode)); 158} 159 160int 161vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) 162{ 163 struct vnode_attr va; 164 165 VATTR_INIT(&va); 166 VATTR_SET(&va, va_mode, cmode); 167 168 return(vn_open_auth(ndp, fmodep, &va)); 169} 170 171static int 172vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) 173{ 174 int error; 175 176 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { 177 goto bad; 178 } 179 180 /* Call out to allow 3rd party notification of open. 181 * Ignore result of kauth_authorize_fileop call. 182 */ 183#if CONFIG_MACF 184 mac_vnode_notify_open(ctx, vp, fmode); 185#endif 186 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 187 (uintptr_t)vp, 0); 188 189 return 0; 190 191bad: 192 return error; 193 194} 195 196/* 197 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to 198 * determine whether that has happened. 199 */ 200static int 201vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) 202{ 203 uint32_t status = 0; 204 vnode_t dvp = ndp->ni_dvp; 205 int batched; 206 int error; 207 vnode_t vp; 208 209 batched = vnode_compound_open_available(ndp->ni_dvp); 210 *did_open = FALSE; 211 212 VATTR_SET(vap, va_type, VREG); 213 if (fmode & O_EXCL) 214 vap->va_vaflags |= VA_EXCLUSIVE; 215 216#if NAMEDRSRCFORK 217 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { 218 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 219 goto out; 220 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) 221 goto out; 222 *did_create = TRUE; 223 } else { 224#endif 225 if (!batched) { 226 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 227 goto out; 228 } 229 230 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); 231 if (error != 0) { 232 if (batched) { 233 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; 234 } else { 235 *did_create = FALSE; 236 } 237 238 if (error == EKEEPLOOKING) { 239 if (*did_create) { 240 panic("EKEEPLOOKING, but we did a create?"); 241 } 242 if (!batched) { 243 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?"); 244 } 245 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 246 panic("EKEEPLOOKING, but continue flag not set?"); 247 } 248 249 /* 250 * Do NOT drop the dvp: we need everything to continue the lookup. 251 */ 252 return error; 253 } 254 } else { 255 if (batched) { 256 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; 257 *did_open = TRUE; 258 } else { 259 *did_create = TRUE; 260 } 261 } 262#if NAMEDRSRCFORK 263 } 264#endif 265 266 /* 267 * Unlock the fsnode (if locked) here so that we are free 268 * to drop the dvp iocount and prevent deadlock in build_path(). 269 * nameidone() will still do the right thing later. 270 */ 271 vp = ndp->ni_vp; 272 namei_unlock_fsnode(ndp); 273 274 if (*did_create) { 275 int update_flags = 0; 276 277 // Make sure the name & parent pointers are hooked up 278 if (vp->v_name == NULL) 279 update_flags |= VNODE_UPDATE_NAME; 280 if (vp->v_parent == NULLVP) 281 update_flags |= VNODE_UPDATE_PARENT; 282 283 if (update_flags) 284 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); 285 286 vnode_put(dvp); 287 ndp->ni_dvp = NULLVP; 288 289#if CONFIG_FSE 290 if (need_fsevent(FSE_CREATE_FILE, vp)) { 291 add_fsevent(FSE_CREATE_FILE, ctx, 292 FSE_ARG_VNODE, vp, 293 FSE_ARG_DONE); 294 } 295#endif 296 } 297out: 298 if (ndp->ni_dvp != NULLVP) { 299 vnode_put(dvp); 300 ndp->ni_dvp = NULLVP; 301 } 302 303 return error; 304} 305 306/* 307 * Open a file with authorization, updating the contents of the structures 308 * pointed to by ndp, fmodep, and vap as necessary to perform the requested 309 * operation. This function is used for both opens of existing files, and 310 * creation of new files. 311 * 312 * Parameters: ndp The nami data pointer describing the 313 * file 314 * fmodep A pointer to an int containg the mode 315 * information to be used for the open 316 * vap A pointer to the vnode attribute 317 * descriptor to be used for the open 318 * 319 * Indirect: * Contents of the data structures pointed 320 * to by the parameters are modified as 321 * necessary to the requested operation. 322 * 323 * Returns: 0 Success 324 * !0 errno value 325 * 326 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. 327 * 328 * The contents of '*ndp' will be modified, based on the other 329 * arguments to this function, and to return file and directory 330 * data necessary to satisfy the requested operation. 331 * 332 * If the file does not exist and we are creating it, then the 333 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the 334 * caller that the file was not truncated. 335 * 336 * If the file exists and the O_EXCL flag was not specified, then 337 * the O_CREAT flag will be cleared in '*fmodep' to indicate to 338 * the caller that the existing file was merely opened rather 339 * than created. 340 * 341 * The contents of '*vap' will be modified as necessary to 342 * complete the operation, including setting of supported 343 * attribute, clearing of fields containing unsupported attributes 344 * in the request, if the request proceeds without them, etc.. 345 * 346 * XXX: This function is too complicated in actings on its arguments 347 * 348 * XXX: We should enummerate the possible errno values here, and where 349 * in the code they originated. 350 */ 351int 352vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) 353{ 354 struct vnode *vp; 355 struct vnode *dvp; 356 vfs_context_t ctx = ndp->ni_cnd.cn_context; 357 int error; 358 int fmode; 359 uint32_t origcnflags; 360 boolean_t did_create; 361 boolean_t did_open; 362 boolean_t need_vnop_open; 363 boolean_t batched; 364 boolean_t ref_failed; 365 366again: 367 vp = NULL; 368 dvp = NULL; 369 batched = FALSE; 370 did_create = FALSE; 371 need_vnop_open = TRUE; 372 ref_failed = FALSE; 373 fmode = *fmodep; 374 origcnflags = ndp->ni_cnd.cn_flags; 375 376 /* 377 * O_CREAT 378 */ 379 if (fmode & O_CREAT) { 380 if ( (fmode & O_DIRECTORY) ) { 381 error = EINVAL; 382 goto out; 383 } 384 ndp->ni_cnd.cn_nameiop = CREATE; 385#if CONFIG_TRIGGERS 386 ndp->ni_op = OP_LINK; 387#endif 388 /* Inherit USEDVP, vnode_open() supported flags only */ 389 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); 390 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; 391 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 392#if NAMEDRSRCFORK 393 /* open calls are allowed for resource forks. */ 394 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 395#endif 396 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) 397 ndp->ni_cnd.cn_flags |= FOLLOW; 398 399continue_create_lookup: 400 if ( (error = namei(ndp)) ) 401 goto out; 402 403 dvp = ndp->ni_dvp; 404 vp = ndp->ni_vp; 405 406 batched = vnode_compound_open_available(dvp); 407 408 /* not found, create */ 409 if (vp == NULL) { 410 /* must have attributes for a new file */ 411 if (vap == NULL) { 412 error = EINVAL; 413 goto out; 414 } 415 /* 416 * Attempt a create. For a system supporting compound VNOPs, we may 417 * find an existing file or create one; in either case, we will already 418 * have the file open and no VNOP_OPEN() will be needed. 419 */ 420 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx); 421 422 dvp = ndp->ni_dvp; 423 vp = ndp->ni_vp; 424 425 /* 426 * Detected a node that the filesystem couldn't handle. Don't call 427 * nameidone() yet, because we need that path buffer. 428 */ 429 if (error == EKEEPLOOKING) { 430 if (!batched) { 431 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?"); 432 } 433 goto continue_create_lookup; 434 } 435 436 nameidone(ndp); 437 if (dvp) { 438 panic("Shouldn't have a dvp here."); 439 } 440 441 if (error) { 442 /* 443 * Check for a creation or unlink race. 444 */ 445 if (((error == EEXIST) && !(fmode & O_EXCL)) || 446 ((error == ENOENT) && (fmode & O_CREAT))){ 447 if (vp) 448 vnode_put(vp); 449 goto again; 450 } 451 goto bad; 452 } 453 454 need_vnop_open = !did_open; 455 } 456 else { 457 if (fmode & O_EXCL) 458 error = EEXIST; 459 460 /* 461 * We have a vnode. Use compound open if available 462 * or else fall through to "traditional" path. Note: can't 463 * do a compound open for root, because the parent belongs 464 * to a different FS. 465 */ 466 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) { 467 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 468 469 if (error == 0) { 470 vp = ndp->ni_vp; 471 need_vnop_open = FALSE; 472 } else if (error == EKEEPLOOKING) { 473 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 474 panic("EKEEPLOOKING, but continue flag not set?"); 475 } 476 goto continue_create_lookup; 477 } 478 } 479 nameidone(ndp); 480 vnode_put(dvp); 481 ndp->ni_dvp = NULLVP; 482 483 if (error) { 484 goto bad; 485 } 486 487 fmode &= ~O_CREAT; 488 489 /* Fall through */ 490 } 491 } else { 492 /* 493 * Not O_CREAT 494 */ 495 ndp->ni_cnd.cn_nameiop = LOOKUP; 496 /* Inherit USEDVP, vnode_open() supported flags only */ 497 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); 498 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; 499#if NAMEDRSRCFORK 500 /* open calls are allowed for resource forks. */ 501 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 502#endif 503 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 504 505 /* preserve NOFOLLOW from vnode_open() */ 506 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { 507 ndp->ni_cnd.cn_flags &= ~FOLLOW; 508 } 509 510 /* Do a lookup, possibly going directly to filesystem for compound operation */ 511 do { 512 if ( (error = namei(ndp)) ) 513 goto out; 514 vp = ndp->ni_vp; 515 dvp = ndp->ni_dvp; 516 517 /* Check for batched lookup-open */ 518 batched = vnode_compound_open_available(dvp); 519 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) { 520 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 521 vp = ndp->ni_vp; 522 if (error == 0) { 523 need_vnop_open = FALSE; 524 } else if (error == EKEEPLOOKING) { 525 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 526 panic("EKEEPLOOKING, but continue flag not set?"); 527 } 528 } 529 } 530 } while (error == EKEEPLOOKING); 531 532 nameidone(ndp); 533 vnode_put(dvp); 534 ndp->ni_dvp = NULLVP; 535 536 if (error) { 537 goto bad; 538 } 539 } 540 541 /* 542 * By this point, nameidone() is called, dvp iocount is dropped, 543 * and dvp pointer is cleared. 544 */ 545 if (ndp->ni_dvp != NULLVP) { 546 panic("Haven't cleaned up adequately in vn_open_auth()"); 547 } 548 549 /* 550 * Expect to use this code for filesystems without compound VNOPs, for the root 551 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), 552 * and for shadow files, which do not live on the same filesystems as their "parents." 553 */ 554 if (need_vnop_open) { 555 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { 556 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?"); 557 } 558 559 if (!did_create) { 560 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL); 561 if (error) { 562 goto bad; 563 } 564 } 565 566#if CONFIG_PROTECT 567 /* 568 * Perform any content protection access checks prior to calling 569 * into the filesystem, if the raw encrypted mode was not 570 * requested. 571 * 572 * If the va_dataprotect_flags are NOT active, or if they are, 573 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 574 * to perform the checks. 575 */ 576 if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) || 577 ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) { 578 error = cp_handle_open (vp, fmode); 579 if (error) { 580 goto bad; 581 } 582 } 583#endif 584 585 error = VNOP_OPEN(vp, fmode, ctx); 586 if (error) { 587 goto bad; 588 } 589 need_vnop_open = FALSE; 590 } 591 592 // if the vnode is tagged VOPENEVT and the current process 593 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY 594 // flag to the open mode so that this open won't count against 595 // the vnode when carbon delete() does a vnode_isinuse() to see 596 // if a file is currently in use. this allows spotlight 597 // importers to not interfere with carbon apps that depend on 598 // the no-delete-if-busy semantics of carbon delete(). 599 // 600 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { 601 fmode |= O_EVTONLY; 602 } 603 604 /* 605 * Grab reference, etc. 606 */ 607 error = vn_open_auth_finish(vp, fmode, ctx); 608 if (error) { 609 ref_failed = TRUE; 610 goto bad; 611 } 612 613 /* Compound VNOP open is responsible for doing the truncate */ 614 if (batched || did_create) 615 fmode &= ~O_TRUNC; 616 617 *fmodep = fmode; 618 return (0); 619 620bad: 621 /* Opened either explicitly or by a batched create */ 622 if (!need_vnop_open) { 623 VNOP_CLOSE(vp, fmode, ctx); 624 } 625 626 ndp->ni_vp = NULL; 627 if (vp) { 628#if NAMEDRSRCFORK 629 /* Aggressively recycle shadow files if we error'd out during open() */ 630 if ((vnode_isnamedstream(vp)) && 631 (vp->v_parent != NULLVP) && 632 (vnode_isshadow(vp))) { 633 vnode_recycle(vp); 634 } 635#endif 636 vnode_put(vp); 637 /* 638 * Check for a race against unlink. We had a vnode 639 * but according to vnode_authorize or VNOP_OPEN it 640 * no longer exists. 641 * 642 * EREDRIVEOPEN: means that we were hit by the tty allocation race. 643 */ 644 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { 645 goto again; 646 } 647 } 648 649out: 650 return (error); 651} 652 653#if vn_access_DEPRECATED 654/* 655 * Authorize an action against a vnode. This has been the canonical way to 656 * ensure that the credential/process/etc. referenced by a vfs_context 657 * is granted the rights called out in 'mode' against the vnode 'vp'. 658 * 659 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult 660 * to add support for more rights. As such, this interface will be deprecated 661 * and callers will use vnode_authorize instead. 662 */ 663int 664vn_access(vnode_t vp, int mode, vfs_context_t context) 665{ 666 kauth_action_t action; 667 668 action = 0; 669 if (mode & VREAD) 670 action |= KAUTH_VNODE_READ_DATA; 671 if (mode & VWRITE) 672 action |= KAUTH_VNODE_WRITE_DATA; 673 if (mode & VEXEC) 674 action |= KAUTH_VNODE_EXECUTE; 675 676 return(vnode_authorize(vp, NULL, action, context)); 677} 678#endif /* vn_access_DEPRECATED */ 679 680/* 681 * Vnode close call 682 */ 683int 684vn_close(struct vnode *vp, int flags, vfs_context_t ctx) 685{ 686 int error; 687 688#if NAMEDRSRCFORK 689 /* Sync data from resource fork shadow file if needed. */ 690 if ((vp->v_flag & VISNAMEDSTREAM) && 691 (vp->v_parent != NULLVP) && 692 vnode_isshadow(vp)) { 693 if (flags & FWASWRITTEN) { 694 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); 695 } 696 } 697#endif 698 699 /* work around for foxhound */ 700 if (vnode_isspec(vp)) 701 (void)vnode_rele_ext(vp, flags, 0); 702 703 error = VNOP_CLOSE(vp, flags, ctx); 704 705#if CONFIG_FSE 706 if (flags & FWASWRITTEN) { 707 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { 708 add_fsevent(FSE_CONTENT_MODIFIED, ctx, 709 FSE_ARG_VNODE, vp, 710 FSE_ARG_DONE); 711 } 712 } 713#endif 714 715 if (!vnode_isspec(vp)) 716 (void)vnode_rele_ext(vp, flags, 0); 717 718 return (error); 719} 720 721static int 722vn_read_swapfile( 723 struct vnode *vp, 724 uio_t uio) 725{ 726 int error; 727 off_t swap_count, this_count; 728 off_t file_end, read_end; 729 off_t prev_resid; 730 char *my_swap_page; 731 732 /* 733 * Reading from a swap file will get you zeroes. 734 */ 735 736 my_swap_page = NULL; 737 error = 0; 738 swap_count = uio_resid(uio); 739 740 file_end = ubc_getsize(vp); 741 read_end = uio->uio_offset + uio_resid(uio); 742 if (uio->uio_offset >= file_end) { 743 /* uio starts after end of file: nothing to read */ 744 swap_count = 0; 745 } else if (read_end > file_end) { 746 /* uio extends beyond end of file: stop before that */ 747 swap_count -= (read_end - file_end); 748 } 749 750 while (swap_count > 0) { 751 if (my_swap_page == NULL) { 752 MALLOC(my_swap_page, char *, PAGE_SIZE, 753 M_TEMP, M_WAITOK); 754 memset(my_swap_page, '\0', PAGE_SIZE); 755 /* add an end-of-line to keep line counters happy */ 756 my_swap_page[PAGE_SIZE-1] = '\n'; 757 } 758 this_count = swap_count; 759 if (this_count > PAGE_SIZE) { 760 this_count = PAGE_SIZE; 761 } 762 763 prev_resid = uio_resid(uio); 764 error = uiomove((caddr_t) my_swap_page, 765 this_count, 766 uio); 767 if (error) { 768 break; 769 } 770 swap_count -= (prev_resid - uio_resid(uio)); 771 } 772 if (my_swap_page != NULL) { 773 FREE(my_swap_page, M_TEMP); 774 my_swap_page = NULL; 775 } 776 777 return error; 778} 779/* 780 * Package up an I/O request on a vnode into a uio and do it. 781 */ 782int 783vn_rdwr( 784 enum uio_rw rw, 785 struct vnode *vp, 786 caddr_t base, 787 int len, 788 off_t offset, 789 enum uio_seg segflg, 790 int ioflg, 791 kauth_cred_t cred, 792 int *aresid, 793 proc_t p) 794{ 795 int64_t resid; 796 int result; 797 798 result = vn_rdwr_64(rw, 799 vp, 800 (uint64_t)(uintptr_t)base, 801 (int64_t)len, 802 offset, 803 segflg, 804 ioflg, 805 cred, 806 &resid, 807 p); 808 809 /* "resid" should be bounded above by "len," which is an int */ 810 if (aresid != NULL) { 811 *aresid = resid; 812 } 813 814 return result; 815} 816 817 818int 819vn_rdwr_64( 820 enum uio_rw rw, 821 struct vnode *vp, 822 uint64_t base, 823 int64_t len, 824 off_t offset, 825 enum uio_seg segflg, 826 int ioflg, 827 kauth_cred_t cred, 828 int64_t *aresid, 829 proc_t p) 830{ 831 uio_t auio; 832 int spacetype; 833 struct vfs_context context; 834 int error=0; 835 char uio_buf[ UIO_SIZEOF(1) ]; 836 837 context.vc_thread = current_thread(); 838 context.vc_ucred = cred; 839 840 if (UIO_SEG_IS_USER_SPACE(segflg)) { 841 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 842 } 843 else { 844 spacetype = UIO_SYSSPACE; 845 } 846 auio = uio_createwithbuffer(1, offset, spacetype, rw, 847 &uio_buf[0], sizeof(uio_buf)); 848 uio_addiov(auio, base, len); 849 850#if CONFIG_MACF 851 /* XXXMAC 852 * IO_NOAUTH should be re-examined. 853 * Likely that mediation should be performed in caller. 854 */ 855 if ((ioflg & IO_NOAUTH) == 0) { 856 /* passed cred is fp->f_cred */ 857 if (rw == UIO_READ) 858 error = mac_vnode_check_read(&context, cred, vp); 859 else 860 error = mac_vnode_check_write(&context, cred, vp); 861 } 862#endif 863 864 if (error == 0) { 865 if (rw == UIO_READ) { 866 if (vnode_isswap(vp)) { 867 error = vn_read_swapfile(vp, auio); 868 } else { 869 error = VNOP_READ(vp, auio, ioflg, &context); 870 } 871 } else { 872 error = VNOP_WRITE(vp, auio, ioflg, &context); 873 } 874 } 875 876 if (aresid) 877 *aresid = uio_resid(auio); 878 else 879 if (uio_resid(auio) && error == 0) 880 error = EIO; 881 return (error); 882} 883 884/* 885 * File table vnode read routine. 886 */ 887static int 888vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 889{ 890 struct vnode *vp; 891 int error, ioflag; 892 off_t count; 893 894 vp = (struct vnode *)fp->f_fglob->fg_data; 895 if ( (error = vnode_getwithref(vp)) ) { 896 return(error); 897 } 898 899#if CONFIG_MACF 900 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp); 901 if (error) { 902 (void)vnode_put(vp); 903 return (error); 904 } 905#endif 906 907 /* This signals to VNOP handlers that this read came from a file table read */ 908 ioflag = IO_SYSCALL_DISPATCH; 909 910 if (fp->f_fglob->fg_flag & FNONBLOCK) 911 ioflag |= IO_NDELAY; 912 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 913 ioflag |= IO_NOCACHE; 914 if (fp->f_fglob->fg_flag & FENCRYPTED) { 915 ioflag |= IO_ENCRYPTED; 916 } 917 if (fp->f_fglob->fg_flag & FNORDAHEAD) 918 ioflag |= IO_RAOFF; 919 920 if ((flags & FOF_OFFSET) == 0) 921 uio->uio_offset = fp->f_fglob->fg_offset; 922 count = uio_resid(uio); 923 924 if (vnode_isswap(vp)) { 925 /* special case for swap files */ 926 error = vn_read_swapfile(vp, uio); 927 } else { 928 error = VNOP_READ(vp, uio, ioflag, ctx); 929 } 930 if ((flags & FOF_OFFSET) == 0) 931 fp->f_fglob->fg_offset += count - uio_resid(uio); 932 933 (void)vnode_put(vp); 934 return (error); 935} 936 937 938/* 939 * File table vnode write routine. 940 */ 941static int 942vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 943{ 944 struct vnode *vp; 945 int error, ioflag; 946 off_t count; 947 int clippedsize = 0; 948 int partialwrite=0; 949 int residcount, oldcount; 950 proc_t p = vfs_context_proc(ctx); 951 952 count = 0; 953 vp = (struct vnode *)fp->f_fglob->fg_data; 954 if ( (error = vnode_getwithref(vp)) ) { 955 return(error); 956 } 957 958#if CONFIG_MACF 959 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); 960 if (error) { 961 (void)vnode_put(vp); 962 return (error); 963 } 964#endif 965 966 /* 967 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write originated 968 * from a file table write. 969 */ 970 ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); 971 972 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND)) 973 ioflag |= IO_APPEND; 974 if (fp->f_fglob->fg_flag & FNONBLOCK) 975 ioflag |= IO_NDELAY; 976 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 977 ioflag |= IO_NOCACHE; 978 if (fp->f_fglob->fg_flag & FNODIRECT) 979 ioflag |= IO_NODIRECT; 980 if (fp->f_fglob->fg_flag & FSINGLE_WRITER) 981 ioflag |= IO_SINGLE_WRITER; 982 983 /* 984 * Treat synchronous mounts and O_FSYNC on the fd as equivalent. 985 * 986 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay 987 * XXX the non-essential metadata without some additional VFS work; 988 * XXX the intent at this point is to plumb the interface for it. 989 */ 990 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) || 991 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { 992 ioflag |= IO_SYNC; 993 } 994 995 if ((flags & FOF_OFFSET) == 0) { 996 uio->uio_offset = fp->f_fglob->fg_offset; 997 count = uio_resid(uio); 998 } 999 if (((flags & FOF_OFFSET) == 0) && 1000 vfs_context_proc(ctx) && (vp->v_type == VREG) && 1001 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || 1002 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { 1003 /* 1004 * If the requested residual would cause us to go past the 1005 * administrative limit, then we need to adjust the residual 1006 * down to cause fewer bytes than requested to be written. If 1007 * we can't do that (e.g. the residual is already 1 byte), 1008 * then we fail the write with EFBIG. 1009 */ 1010 residcount = uio_resid(uio); 1011 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 1012 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1013 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { 1014 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset); 1015 } 1016 if (clippedsize >= residcount) { 1017 psignal(p, SIGXFSZ); 1018 vnode_put(vp); 1019 return (EFBIG); 1020 } 1021 partialwrite = 1; 1022 uio_setresid(uio, residcount-clippedsize); 1023 } 1024 if ((flags & FOF_OFFSET) != 0) { 1025 /* for pwrite, append should be ignored */ 1026 ioflag &= ~IO_APPEND; 1027 if (p && (vp->v_type == VREG) && 1028 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1029 psignal(p, SIGXFSZ); 1030 vnode_put(vp); 1031 return (EFBIG); 1032 } 1033 if (p && (vp->v_type == VREG) && 1034 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1035 //Debugger("vn_bwrite:overstepping the bounds"); 1036 residcount = uio_resid(uio); 1037 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1038 partialwrite = 1; 1039 uio_setresid(uio, residcount-clippedsize); 1040 } 1041 } 1042 1043 error = VNOP_WRITE(vp, uio, ioflag, ctx); 1044 1045 if (partialwrite) { 1046 oldcount = uio_resid(uio); 1047 uio_setresid(uio, oldcount + clippedsize); 1048 } 1049 1050 if ((flags & FOF_OFFSET) == 0) { 1051 if (ioflag & IO_APPEND) 1052 fp->f_fglob->fg_offset = uio->uio_offset; 1053 else 1054 fp->f_fglob->fg_offset += count - uio_resid(uio); 1055 } 1056 1057 /* 1058 * Set the credentials on successful writes 1059 */ 1060 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { 1061 /* 1062 * When called from aio subsystem, we only have the proc from 1063 * which to get the credential, at this point, so use that 1064 * instead. This means aio functions are incompatible with 1065 * per-thread credentials (aio operations are proxied). We 1066 * can't easily correct the aio vs. settid race in this case 1067 * anyway, so we disallow it. 1068 */ 1069 if ((flags & FOF_PCRED) == 0) { 1070 ubc_setthreadcred(vp, p, current_thread()); 1071 } else { 1072 ubc_setcred(vp, p); 1073 } 1074 } 1075 (void)vnode_put(vp); 1076 return (error); 1077} 1078 1079/* 1080 * File table vnode stat routine. 1081 * 1082 * Returns: 0 Success 1083 * EBADF 1084 * ENOMEM 1085 * vnode_getattr:??? 1086 */ 1087int 1088vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1089{ 1090 struct vnode_attr va; 1091 int error; 1092 u_short mode; 1093 kauth_filesec_t fsec; 1094 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ 1095 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ 1096 1097 if (isstat64 != 0) 1098 sb64 = (struct stat64 *)sbptr; 1099 else 1100 sb = (struct stat *)sbptr; 1101 memset(&va, 0, sizeof(va)); 1102 VATTR_INIT(&va); 1103 VATTR_WANTED(&va, va_fsid); 1104 VATTR_WANTED(&va, va_fileid); 1105 VATTR_WANTED(&va, va_mode); 1106 VATTR_WANTED(&va, va_type); 1107 VATTR_WANTED(&va, va_nlink); 1108 VATTR_WANTED(&va, va_uid); 1109 VATTR_WANTED(&va, va_gid); 1110 VATTR_WANTED(&va, va_rdev); 1111 VATTR_WANTED(&va, va_data_size); 1112 VATTR_WANTED(&va, va_access_time); 1113 VATTR_WANTED(&va, va_modify_time); 1114 VATTR_WANTED(&va, va_change_time); 1115 VATTR_WANTED(&va, va_create_time); 1116 VATTR_WANTED(&va, va_flags); 1117 VATTR_WANTED(&va, va_gen); 1118 VATTR_WANTED(&va, va_iosize); 1119 /* lower layers will synthesise va_total_alloc from va_data_size if required */ 1120 VATTR_WANTED(&va, va_total_alloc); 1121 if (xsec != NULL) { 1122 VATTR_WANTED(&va, va_uuuid); 1123 VATTR_WANTED(&va, va_guuid); 1124 VATTR_WANTED(&va, va_acl); 1125 } 1126 error = vnode_getattr(vp, &va, ctx); 1127 if (error) 1128 goto out; 1129 /* 1130 * Copy from vattr table 1131 */ 1132 if (isstat64 != 0) { 1133 sb64->st_dev = va.va_fsid; 1134 sb64->st_ino = (ino64_t)va.va_fileid; 1135 1136 } else { 1137 sb->st_dev = va.va_fsid; 1138 sb->st_ino = (ino_t)va.va_fileid; 1139 } 1140 mode = va.va_mode; 1141 switch (vp->v_type) { 1142 case VREG: 1143 mode |= S_IFREG; 1144 break; 1145 case VDIR: 1146 mode |= S_IFDIR; 1147 break; 1148 case VBLK: 1149 mode |= S_IFBLK; 1150 break; 1151 case VCHR: 1152 mode |= S_IFCHR; 1153 break; 1154 case VLNK: 1155 mode |= S_IFLNK; 1156 break; 1157 case VSOCK: 1158 mode |= S_IFSOCK; 1159 break; 1160 case VFIFO: 1161 mode |= S_IFIFO; 1162 break; 1163 default: 1164 error = EBADF; 1165 goto out; 1166 }; 1167 if (isstat64 != 0) { 1168 sb64->st_mode = mode; 1169 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1170 sb64->st_uid = va.va_uid; 1171 sb64->st_gid = va.va_gid; 1172 sb64->st_rdev = va.va_rdev; 1173 sb64->st_size = va.va_data_size; 1174 sb64->st_atimespec = va.va_access_time; 1175 sb64->st_mtimespec = va.va_modify_time; 1176 sb64->st_ctimespec = va.va_change_time; 1177 sb64->st_birthtimespec = 1178 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time; 1179 sb64->st_blksize = va.va_iosize; 1180 sb64->st_flags = va.va_flags; 1181 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1182 } else { 1183 sb->st_mode = mode; 1184 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1185 sb->st_uid = va.va_uid; 1186 sb->st_gid = va.va_gid; 1187 sb->st_rdev = va.va_rdev; 1188 sb->st_size = va.va_data_size; 1189 sb->st_atimespec = va.va_access_time; 1190 sb->st_mtimespec = va.va_modify_time; 1191 sb->st_ctimespec = va.va_change_time; 1192 sb->st_blksize = va.va_iosize; 1193 sb->st_flags = va.va_flags; 1194 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1195 } 1196 1197 /* if we're interested in extended security data and we got an ACL */ 1198 if (xsec != NULL) { 1199 if (!VATTR_IS_SUPPORTED(&va, va_acl) && 1200 !VATTR_IS_SUPPORTED(&va, va_uuuid) && 1201 !VATTR_IS_SUPPORTED(&va, va_guuid)) { 1202 *xsec = KAUTH_FILESEC_NONE; 1203 } else { 1204 1205 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1206 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); 1207 } else { 1208 fsec = kauth_filesec_alloc(0); 1209 } 1210 if (fsec == NULL) { 1211 error = ENOMEM; 1212 goto out; 1213 } 1214 fsec->fsec_magic = KAUTH_FILESEC_MAGIC; 1215 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { 1216 fsec->fsec_owner = va.va_uuuid; 1217 } else { 1218 fsec->fsec_owner = kauth_null_guid; 1219 } 1220 if (VATTR_IS_SUPPORTED(&va, va_guuid)) { 1221 fsec->fsec_group = va.va_guuid; 1222 } else { 1223 fsec->fsec_group = kauth_null_guid; 1224 } 1225 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1226 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); 1227 } else { 1228 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; 1229 } 1230 *xsec = fsec; 1231 } 1232 } 1233 1234 /* Do not give the generation number out to unpriviledged users */ 1235 if (va.va_gen && !vfs_context_issuser(ctx)) { 1236 if (isstat64 != 0) 1237 sb64->st_gen = 0; 1238 else 1239 sb->st_gen = 0; 1240 } else { 1241 if (isstat64 != 0) 1242 sb64->st_gen = va.va_gen; 1243 else 1244 sb->st_gen = va.va_gen; 1245 } 1246 1247 error = 0; 1248out: 1249 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) 1250 kauth_acl_free(va.va_acl); 1251 return (error); 1252} 1253 1254int 1255vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1256{ 1257 int error; 1258 1259#if CONFIG_MACF 1260 error = mac_vnode_check_stat(ctx, NOCRED, vp); 1261 if (error) 1262 return (error); 1263#endif 1264 1265 /* authorize */ 1266 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) 1267 return(error); 1268 1269 /* actual stat */ 1270 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx)); 1271} 1272 1273 1274/* 1275 * File table vnode ioctl routine. 1276 */ 1277static int 1278vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) 1279{ 1280 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data); 1281 off_t file_size; 1282 int error; 1283 struct vnode *ttyvp; 1284 int funnel_state; 1285 struct session * sessp; 1286 1287 if ( (error = vnode_getwithref(vp)) ) { 1288 return(error); 1289 } 1290 1291#if CONFIG_MACF 1292 error = mac_vnode_check_ioctl(ctx, vp, com); 1293 if (error) 1294 goto out; 1295#endif 1296 1297 switch (vp->v_type) { 1298 case VREG: 1299 case VDIR: 1300 if (com == FIONREAD) { 1301 if ((error = vnode_size(vp, &file_size, ctx)) != 0) 1302 goto out; 1303 *(int *)data = file_size - fp->f_fglob->fg_offset; 1304 goto out; 1305 } 1306 if (com == FIONBIO || com == FIOASYNC) { /* XXX */ 1307 goto out; 1308 } 1309 /* fall into ... */ 1310 1311 default: 1312 error = ENOTTY; 1313 goto out; 1314 1315 case VFIFO: 1316 case VCHR: 1317 case VBLK: 1318 1319 /* Should not be able to set block size from user space */ 1320 if (com == DKIOCSETBLOCKSIZE) { 1321 error = EPERM; 1322 goto out; 1323 } 1324 1325 if (com == FIODTYPE) { 1326 if (vp->v_type == VBLK) { 1327 if (major(vp->v_rdev) >= nblkdev) { 1328 error = ENXIO; 1329 goto out; 1330 } 1331 *(int *)data = D_TYPEMASK & bdevsw[major(vp->v_rdev)].d_type; 1332 1333 } else if (vp->v_type == VCHR) { 1334 if (major(vp->v_rdev) >= nchrdev) { 1335 error = ENXIO; 1336 goto out; 1337 } 1338 *(int *)data = D_TYPEMASK & cdevsw[major(vp->v_rdev)].d_type; 1339 } else { 1340 error = ENOTTY; 1341 goto out; 1342 } 1343 goto out; 1344 } 1345 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx); 1346 1347 if (error == 0 && com == TIOCSCTTY) { 1348 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE); 1349 if (error != 0) { 1350 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!"); 1351 } 1352 1353 funnel_state = thread_funnel_set(kernel_flock, TRUE); 1354 sessp = proc_session(vfs_context_proc(ctx)); 1355 1356 session_lock(sessp); 1357 ttyvp = sessp->s_ttyvp; 1358 sessp->s_ttyvp = vp; 1359 sessp->s_ttyvid = vnode_vid(vp); 1360 session_unlock(sessp); 1361 session_rele(sessp); 1362 thread_funnel_set(kernel_flock, funnel_state); 1363 1364 if (ttyvp) 1365 vnode_rele(ttyvp); 1366 } 1367 } 1368out: 1369 (void)vnode_put(vp); 1370 return(error); 1371} 1372 1373/* 1374 * File table vnode select routine. 1375 */ 1376static int 1377vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) 1378{ 1379 int error; 1380 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data; 1381 struct vfs_context context; 1382 1383 if ( (error = vnode_getwithref(vp)) == 0 ) { 1384 context.vc_thread = current_thread(); 1385 context.vc_ucred = fp->f_fglob->fg_cred; 1386 1387#if CONFIG_MACF 1388 /* 1389 * XXX We should use a per thread credential here; minimally, 1390 * XXX the process credential should have a persistent 1391 * XXX reference on it before being passed in here. 1392 */ 1393 error = mac_vnode_check_select(ctx, vp, which); 1394 if (error == 0) 1395#endif 1396 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx); 1397 1398 (void)vnode_put(vp); 1399 } 1400 return(error); 1401 1402} 1403 1404/* 1405 * File table vnode close routine. 1406 */ 1407static int 1408vn_closefile(struct fileglob *fg, vfs_context_t ctx) 1409{ 1410 struct vnode *vp = (struct vnode *)fg->fg_data; 1411 int error; 1412 struct flock lf; 1413 1414 if ( (error = vnode_getwithref(vp)) == 0 ) { 1415 1416 if ((fg->fg_flag & FHASLOCK) && fg->fg_type == DTYPE_VNODE) { 1417 lf.l_whence = SEEK_SET; 1418 lf.l_start = 0; 1419 lf.l_len = 0; 1420 lf.l_type = F_UNLCK; 1421 1422 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx); 1423 } 1424 error = vn_close(vp, fg->fg_flag, ctx); 1425 1426 (void)vnode_put(vp); 1427 } 1428 return(error); 1429} 1430 1431/* 1432 * Returns: 0 Success 1433 * VNOP_PATHCONF:??? 1434 */ 1435int 1436vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) 1437{ 1438 int error = 0; 1439 struct vfs_attr vfa; 1440 1441 switch(name) { 1442 case _PC_EXTENDED_SECURITY_NP: 1443 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; 1444 break; 1445 case _PC_AUTH_OPAQUE_NP: 1446 *retval = vfs_authopaque(vnode_mount(vp)); 1447 break; 1448 case _PC_2_SYMLINKS: 1449 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ 1450 break; 1451 case _PC_ALLOC_SIZE_MIN: 1452 *retval = 1; /* XXX lie: 1 byte */ 1453 break; 1454 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ 1455 *retval = 1; /* [AIO] option is supported */ 1456 break; 1457 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ 1458 *retval = 0; /* [PIO] option is not supported */ 1459 break; 1460 case _PC_REC_INCR_XFER_SIZE: 1461 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ 1462 break; 1463 case _PC_REC_MIN_XFER_SIZE: 1464 *retval = 4096; /* XXX recommend 4K minimum reads/writes */ 1465 break; 1466 case _PC_REC_MAX_XFER_SIZE: 1467 *retval = 65536; /* XXX recommend 64K maximum reads/writes */ 1468 break; 1469 case _PC_REC_XFER_ALIGN: 1470 *retval = 4096; /* XXX recommend page aligned buffers */ 1471 break; 1472 case _PC_SYMLINK_MAX: 1473 *retval = 255; /* Minimum acceptable POSIX value */ 1474 break; 1475 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ 1476 *retval = 0; /* [SIO] option is not supported */ 1477 break; 1478 case _PC_XATTR_SIZE_BITS: 1479 /* The number of bits used to store maximum extended 1480 * attribute size in bytes. For example, if the maximum 1481 * attribute size supported by a file system is 128K, the 1482 * value returned will be 18. However a value 18 can mean 1483 * that the maximum attribute size can be anywhere from 1484 * (256KB - 1) to 128KB. As a special case, the resource 1485 * fork can have much larger size, and some file system 1486 * specific extended attributes can have smaller and preset 1487 * size; for example, Finder Info is always 32 bytes. 1488 */ 1489 memset(&vfa, 0, sizeof(vfa)); 1490 VFSATTR_INIT(&vfa); 1491 VFSATTR_WANTED(&vfa, f_capabilities); 1492 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 && 1493 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && 1494 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && 1495 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { 1496 /* Supports native extended attributes */ 1497 error = VNOP_PATHCONF(vp, name, retval, ctx); 1498 } else { 1499 /* Number of bits used to represent the maximum size of 1500 * extended attribute stored in an Apple Double file. 1501 */ 1502 *retval = AD_XATTR_SIZE_BITS; 1503 } 1504 break; 1505 default: 1506 error = VNOP_PATHCONF(vp, name, retval, ctx); 1507 break; 1508 } 1509 1510 return (error); 1511} 1512 1513static int 1514vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) 1515{ 1516 int error; 1517 struct vnode *vp; 1518 1519 vp = (struct vnode *)fp->f_fglob->fg_data; 1520 1521 /* 1522 * Don't attach a knote to a dead vnode. 1523 */ 1524 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { 1525 switch (kn->kn_filter) { 1526 case EVFILT_READ: 1527 case EVFILT_WRITE: 1528 if (vnode_isfifo(vp)) { 1529 /* We'll only watch FIFOs that use our fifofs */ 1530 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { 1531 error = ENOTSUP; 1532 } 1533 1534 } else if (!vnode_isreg(vp)) { 1535 if (vnode_ischr(vp) && 1536 (error = spec_kqfilter(vp, kn)) == 0) { 1537 /* claimed by a special device */ 1538 vnode_put(vp); 1539 return 0; 1540 } 1541 1542 error = EINVAL; 1543 } 1544 break; 1545 case EVFILT_VNODE: 1546 break; 1547 default: 1548 error = EINVAL; 1549 } 1550 1551 if (error) { 1552 vnode_put(vp); 1553 return error; 1554 } 1555 1556#if CONFIG_MACF 1557 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp); 1558 if (error) { 1559 vnode_put(vp); 1560 return error; 1561 } 1562#endif 1563 1564 kn->kn_hook = (void*)vp; 1565 kn->kn_hookid = vnode_vid(vp); 1566 kn->kn_fop = &vnode_filtops; 1567 1568 vnode_lock(vp); 1569 KNOTE_ATTACH(&vp->v_knotes, kn); 1570 vnode_unlock(vp); 1571 1572 /* Ask the filesystem to provide remove notifications, but ignore failure */ 1573 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); 1574 1575 vnode_put(vp); 1576 } 1577 1578 return (error); 1579} 1580 1581static void 1582filt_vndetach(struct knote *kn) 1583{ 1584 vfs_context_t ctx = vfs_context_current(); 1585 struct vnode *vp; 1586 vp = (struct vnode *)kn->kn_hook; 1587 if (vnode_getwithvid(vp, kn->kn_hookid)) 1588 return; 1589 1590 vnode_lock(vp); 1591 KNOTE_DETACH(&vp->v_knotes, kn); 1592 vnode_unlock(vp); 1593 1594 /* 1595 * Tell a (generally networked) filesystem that we're no longer watching 1596 * If the FS wants to track contexts, it should still be using the one from 1597 * the VNODE_MONITOR_BEGIN. 1598 */ 1599 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); 1600 vnode_put(vp); 1601} 1602 1603 1604/* 1605 * Used for EVFILT_READ 1606 * 1607 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case 1608 * differently than the regular case for VREG files. If not in poll(), 1609 * then we need to know current fileproc offset for VREG. 1610 */ 1611static intptr_t 1612vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) 1613{ 1614 if (vnode_isfifo(vp)) { 1615 int cnt; 1616 int err = fifo_charcount(vp, &cnt); 1617 if (err == 0) { 1618 return (intptr_t)cnt; 1619 } else { 1620 return (intptr_t)0; 1621 } 1622 } else if (vnode_isreg(vp)) { 1623 if (ispoll) { 1624 return (intptr_t)1; 1625 } 1626 1627 off_t amount; 1628 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; 1629 if (amount > (off_t)INTPTR_MAX) { 1630 return INTPTR_MAX; 1631 } else if (amount < (off_t)INTPTR_MIN) { 1632 return INTPTR_MIN; 1633 } else { 1634 return (intptr_t)amount; 1635 } 1636 } else { 1637 panic("Should never have an EVFILT_READ except for reg or fifo."); 1638 return 0; 1639 } 1640} 1641 1642/* 1643 * Used for EVFILT_WRITE. 1644 * 1645 * For regular vnodes, we can always write (1). For named pipes, 1646 * see how much space there is in the buffer. Nothing else is covered. 1647 */ 1648static intptr_t 1649vnode_writable_space_count(vnode_t vp) 1650{ 1651 if (vnode_isfifo(vp)) { 1652 long spc; 1653 int err = fifo_freespace(vp, &spc); 1654 if (err == 0) { 1655 return (intptr_t)spc; 1656 } else { 1657 return (intptr_t)0; 1658 } 1659 } else if (vnode_isreg(vp)) { 1660 return (intptr_t)1; 1661 } else { 1662 panic("Should never have an EVFILT_READ except for reg or fifo."); 1663 return 0; 1664 } 1665} 1666 1667/* 1668 * Determine whether this knote should be active 1669 * 1670 * This is kind of subtle. 1671 * --First, notice if the vnode has been revoked: in so, override hint 1672 * --EVFILT_READ knotes are checked no matter what the hint is 1673 * --Other knotes activate based on hint. 1674 * --If hint is revoke, set special flags and activate 1675 */ 1676static int 1677filt_vnode(struct knote *kn, long hint) 1678{ 1679 vnode_t vp = (struct vnode *)kn->kn_hook; 1680 int activate = 0; 1681 long orig_hint = hint; 1682 1683 if (0 == hint) { 1684 vnode_lock(vp); 1685 1686 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) { 1687 /* Is recycled */ 1688 hint = NOTE_REVOKE; 1689 } 1690 } else { 1691 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); 1692 } 1693 1694 /* Special handling for vnodes that are in recycle or already gone */ 1695 if (NOTE_REVOKE == hint) { 1696 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1697 activate = 1; 1698 1699 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { 1700 kn->kn_fflags |= NOTE_REVOKE; 1701 } 1702 } else { 1703 switch(kn->kn_filter) { 1704 case EVFILT_READ: 1705 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL)); 1706 1707 if (kn->kn_data != 0) { 1708 activate = 1; 1709 } 1710 break; 1711 case EVFILT_WRITE: 1712 kn->kn_data = vnode_writable_space_count(vp); 1713 1714 if (kn->kn_data != 0) { 1715 activate = 1; 1716 } 1717 break; 1718 case EVFILT_VNODE: 1719 /* Check events this note matches against the hint */ 1720 if (kn->kn_sfflags & hint) { 1721 kn->kn_fflags |= hint; /* Set which event occurred */ 1722 } 1723 if (kn->kn_fflags != 0) { 1724 activate = 1; 1725 } 1726 break; 1727 default: 1728 panic("Invalid knote filter on a vnode!\n"); 1729 } 1730 } 1731 1732 if (orig_hint == 0) { 1733 /* 1734 * Definitely need to unlock, may need to put 1735 */ 1736 if (hint == 0) { 1737 vnode_put_locked(vp); 1738 } 1739 vnode_unlock(vp); 1740 } 1741 1742 return (activate); 1743} 1744