1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_vnops.c 8.14 (Berkeley) 6/15/95 67 * 68 */ 69/* 70 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 71 * support for mandatory and extensible security protections. This notice 72 * is included in support of clause 2.2 (b) of the Apple Public License, 73 * Version 2.0. 74 */ 75 76#include <sys/param.h> 77#include <sys/types.h> 78#include <sys/systm.h> 79#include <sys/kernel.h> 80#include <sys/file_internal.h> 81#include <sys/stat.h> 82#include <sys/proc_internal.h> 83#include <sys/kauth.h> 84#include <sys/mount_internal.h> 85#include <sys/namei.h> 86#include <sys/vnode_internal.h> 87#include <sys/ioctl.h> 88#include <sys/tty.h> 89/* Temporary workaround for ubc.h until <rdar://4714366 is resolved */ 90#define ubc_setcred ubc_setcred_deprecated 91#include <sys/ubc.h> 92#undef ubc_setcred 93int ubc_setcred(struct vnode *, struct proc *); 94#include <sys/conf.h> 95#include <sys/disk.h> 96#include <sys/fsevents.h> 97#include <sys/kdebug.h> 98#include <sys/xattr.h> 99#include <sys/ubc_internal.h> 100#include <sys/uio_internal.h> 101#include <sys/resourcevar.h> 102#include <sys/signalvar.h> 103 104#include <vm/vm_kern.h> 105#include <vm/vm_map.h> 106 107#include <miscfs/specfs/specdev.h> 108#include <miscfs/fifofs/fifo.h> 109 110#if CONFIG_MACF 111#include <security/mac_framework.h> 112#endif 113 114#if CONFIG_PROTECT 115#include <sys/cprotect.h> 116#endif 117 118extern void sigpup_attach_vnode(vnode_t); /* XXX */ 119 120static int vn_closefile(struct fileglob *fp, vfs_context_t ctx); 121static int vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, 122 vfs_context_t ctx); 123static int vn_read(struct fileproc *fp, struct uio *uio, int flags, 124 vfs_context_t ctx); 125static int vn_write(struct fileproc *fp, struct uio *uio, int flags, 126 vfs_context_t ctx); 127static int vn_select( struct fileproc *fp, int which, void * wql, 128 vfs_context_t ctx); 129static int vn_kqfilt_add(struct fileproc *fp, struct knote *kn, 130 vfs_context_t ctx); 131static void filt_vndetach(struct knote *kn); 132static int filt_vnode(struct knote *kn, long hint); 133static int vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx); 134#if 0 135static int vn_kqfilt_remove(struct vnode *vp, uintptr_t ident, 136 vfs_context_t ctx); 137#endif 138 139const struct fileops vnops = { 140 DTYPE_VNODE, 141 vn_read, 142 vn_write, 143 vn_ioctl, 144 vn_select, 145 vn_closefile, 146 vn_kqfilt_add, 147 NULL 148}; 149 150struct filterops vnode_filtops = { 151 .f_isfd = 1, 152 .f_attach = NULL, 153 .f_detach = filt_vndetach, 154 .f_event = filt_vnode 155}; 156 157/* 158 * Common code for vnode open operations. 159 * Check permissions, and call the VNOP_OPEN or VNOP_CREATE routine. 160 * 161 * XXX the profusion of interfaces here is probably a bad thing. 162 */ 163int 164vn_open(struct nameidata *ndp, int fmode, int cmode) 165{ 166 return(vn_open_modflags(ndp, &fmode, cmode)); 167} 168 169int 170vn_open_modflags(struct nameidata *ndp, int *fmodep, int cmode) 171{ 172 struct vnode_attr va; 173 174 VATTR_INIT(&va); 175 VATTR_SET(&va, va_mode, cmode); 176 177 return(vn_open_auth(ndp, fmodep, &va)); 178} 179 180static int 181vn_open_auth_finish(vnode_t vp, int fmode, vfs_context_t ctx) 182{ 183 int error; 184 185 if ((error = vnode_ref_ext(vp, fmode, 0)) != 0) { 186 goto bad; 187 } 188 189 /* Call out to allow 3rd party notification of open. 190 * Ignore result of kauth_authorize_fileop call. 191 */ 192#if CONFIG_MACF 193 mac_vnode_notify_open(ctx, vp, fmode); 194#endif 195 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 196 (uintptr_t)vp, 0); 197 198 sigpup_attach_vnode(vp); 199 200 return 0; 201 202bad: 203 return error; 204 205} 206 207/* 208 * May do nameidone() to allow safely adding an FSEvent. Cue off of ni_dvp to 209 * determine whether that has happened. 210 */ 211static int 212vn_open_auth_do_create(struct nameidata *ndp, struct vnode_attr *vap, int fmode, boolean_t *did_create, boolean_t *did_open, vfs_context_t ctx) 213{ 214 uint32_t status = 0; 215 vnode_t dvp = ndp->ni_dvp; 216 int batched; 217 int error; 218 vnode_t vp; 219 220 batched = vnode_compound_open_available(ndp->ni_dvp); 221 *did_open = FALSE; 222 223 VATTR_SET(vap, va_type, VREG); 224 if (fmode & O_EXCL) 225 vap->va_vaflags |= VA_EXCLUSIVE; 226 227#if NAMEDRSRCFORK 228 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) { 229 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 230 goto out; 231 if ((error = vnode_makenamedstream(dvp, &ndp->ni_vp, XATTR_RESOURCEFORK_NAME, 0, ctx)) != 0) 232 goto out; 233 *did_create = TRUE; 234 } else { 235#endif 236 if (!batched) { 237 if ((error = vn_authorize_create(dvp, &ndp->ni_cnd, vap, ctx, NULL)) != 0) 238 goto out; 239 } 240 241 error = vn_create(dvp, &ndp->ni_vp, ndp, vap, VN_CREATE_DOOPEN, fmode, &status, ctx); 242 if (error != 0) { 243 if (batched) { 244 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? TRUE : FALSE; 245 } else { 246 *did_create = FALSE; 247 } 248 249 if (error == EKEEPLOOKING) { 250 if (*did_create) { 251 panic("EKEEPLOOKING, but we did a create?"); 252 } 253 if (!batched) { 254 panic("EKEEPLOOKING from filesystem that doesn't support compound vnops?"); 255 } 256 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 257 panic("EKEEPLOOKING, but continue flag not set?"); 258 } 259 260 /* 261 * Do NOT drop the dvp: we need everything to continue the lookup. 262 */ 263 return error; 264 } 265 } else { 266 if (batched) { 267 *did_create = (status & COMPOUND_OPEN_STATUS_DID_CREATE) ? 1 : 0; 268 *did_open = TRUE; 269 } else { 270 *did_create = TRUE; 271 } 272 } 273#if NAMEDRSRCFORK 274 } 275#endif 276 277 vp = ndp->ni_vp; 278 279 if (*did_create) { 280 int update_flags = 0; 281 282 // Make sure the name & parent pointers are hooked up 283 if (vp->v_name == NULL) 284 update_flags |= VNODE_UPDATE_NAME; 285 if (vp->v_parent == NULLVP) 286 update_flags |= VNODE_UPDATE_PARENT; 287 288 if (update_flags) 289 vnode_update_identity(vp, dvp, ndp->ni_cnd.cn_nameptr, ndp->ni_cnd.cn_namelen, ndp->ni_cnd.cn_hash, update_flags); 290 291 vnode_put(dvp); 292 ndp->ni_dvp = NULLVP; 293 294#if CONFIG_FSE 295 if (need_fsevent(FSE_CREATE_FILE, vp)) { 296 add_fsevent(FSE_CREATE_FILE, ctx, 297 FSE_ARG_VNODE, vp, 298 FSE_ARG_DONE); 299 } 300#endif 301 } 302out: 303 if (ndp->ni_dvp != NULLVP) { 304 vnode_put(dvp); 305 ndp->ni_dvp = NULLVP; 306 } 307 308 return error; 309} 310 311/* 312 * Open a file with authorization, updating the contents of the structures 313 * pointed to by ndp, fmodep, and vap as necessary to perform the requested 314 * operation. This function is used for both opens of existing files, and 315 * creation of new files. 316 * 317 * Parameters: ndp The nami data pointer describing the 318 * file 319 * fmodep A pointer to an int containg the mode 320 * information to be used for the open 321 * vap A pointer to the vnode attribute 322 * descriptor to be used for the open 323 * 324 * Indirect: * Contents of the data structures pointed 325 * to by the parameters are modified as 326 * necessary to the requested operation. 327 * 328 * Returns: 0 Success 329 * !0 errno value 330 * 331 * Notes: The kauth_filesec_t in 'vap', if any, is in host byte order. 332 * 333 * The contents of '*ndp' will be modified, based on the other 334 * arguments to this function, and to return file and directory 335 * data necessary to satisfy the requested operation. 336 * 337 * If the file does not exist and we are creating it, then the 338 * O_TRUNC flag will be cleared in '*fmodep' to indicate to the 339 * caller that the file was not truncated. 340 * 341 * If the file exists and the O_EXCL flag was not specified, then 342 * the O_CREAT flag will be cleared in '*fmodep' to indicate to 343 * the caller that the existing file was merely opened rather 344 * than created. 345 * 346 * The contents of '*vap' will be modified as necessary to 347 * complete the operation, including setting of supported 348 * attribute, clearing of fields containing unsupported attributes 349 * in the request, if the request proceeds without them, etc.. 350 * 351 * XXX: This function is too complicated in actings on its arguments 352 * 353 * XXX: We should enummerate the possible errno values here, and where 354 * in the code they originated. 355 */ 356int 357vn_open_auth(struct nameidata *ndp, int *fmodep, struct vnode_attr *vap) 358{ 359 struct vnode *vp; 360 struct vnode *dvp; 361 vfs_context_t ctx = ndp->ni_cnd.cn_context; 362 int error; 363 int fmode; 364 uint32_t origcnflags; 365 boolean_t did_create; 366 boolean_t did_open; 367 boolean_t need_vnop_open; 368 boolean_t batched; 369 boolean_t ref_failed; 370 371again: 372 vp = NULL; 373 dvp = NULL; 374 batched = FALSE; 375 did_create = FALSE; 376 need_vnop_open = TRUE; 377 ref_failed = FALSE; 378 fmode = *fmodep; 379 origcnflags = ndp->ni_cnd.cn_flags; 380 381 /* 382 * O_CREAT 383 */ 384 if (fmode & O_CREAT) { 385 if ( (fmode & O_DIRECTORY) ) { 386 error = EINVAL; 387 goto out; 388 } 389 ndp->ni_cnd.cn_nameiop = CREATE; 390#if CONFIG_TRIGGERS 391 ndp->ni_op = OP_LINK; 392#endif 393 /* Inherit USEDVP, vnode_open() supported flags only */ 394 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); 395 ndp->ni_cnd.cn_flags |= LOCKPARENT | LOCKLEAF | AUDITVNPATH1; 396 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 397#if NAMEDRSRCFORK 398 /* open calls are allowed for resource forks. */ 399 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 400#endif 401 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0 && (origcnflags & FOLLOW) != 0) 402 ndp->ni_cnd.cn_flags |= FOLLOW; 403 404continue_create_lookup: 405 if ( (error = namei(ndp)) ) 406 goto out; 407 408 dvp = ndp->ni_dvp; 409 vp = ndp->ni_vp; 410 411 batched = vnode_compound_open_available(dvp); 412 413 /* not found, create */ 414 if (vp == NULL) { 415 /* must have attributes for a new file */ 416 if (vap == NULL) { 417 error = EINVAL; 418 goto out; 419 } 420 /* 421 * Attempt a create. For a system supporting compound VNOPs, we may 422 * find an existing file or create one; in either case, we will already 423 * have the file open and no VNOP_OPEN() will be needed. 424 */ 425 error = vn_open_auth_do_create(ndp, vap, fmode, &did_create, &did_open, ctx); 426 427 dvp = ndp->ni_dvp; 428 vp = ndp->ni_vp; 429 430 /* 431 * Detected a node that the filesystem couldn't handle. Don't call 432 * nameidone() yet, because we need that path buffer. 433 */ 434 if (error == EKEEPLOOKING) { 435 if (!batched) { 436 panic("EKEEPLOOKING from a filesystem that doesn't support compound VNOPs?"); 437 } 438 goto continue_create_lookup; 439 } 440 441 nameidone(ndp); 442 if (dvp) { 443 panic("Shouldn't have a dvp here."); 444 } 445 446 if (error) { 447 /* 448 * Check for a creation or unlink race. 449 */ 450 if (((error == EEXIST) && !(fmode & O_EXCL)) || 451 ((error == ENOENT) && (fmode & O_CREAT))){ 452 if (vp) 453 vnode_put(vp); 454 goto again; 455 } 456 goto bad; 457 } 458 459 need_vnop_open = !did_open; 460 } 461 else { 462 if (fmode & O_EXCL) 463 error = EEXIST; 464 465 /* 466 * We have a vnode. Use compound open if available 467 * or else fall through to "traditional" path. Note: can't 468 * do a compound open for root, because the parent belongs 469 * to a different FS. 470 */ 471 if (error == 0 && batched && (vnode_mount(dvp) == vnode_mount(vp))) { 472 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 473 474 if (error == 0) { 475 vp = ndp->ni_vp; 476 need_vnop_open = FALSE; 477 } else if (error == EKEEPLOOKING) { 478 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 479 panic("EKEEPLOOKING, but continue flag not set?"); 480 } 481 goto continue_create_lookup; 482 } 483 } 484 nameidone(ndp); 485 vnode_put(dvp); 486 ndp->ni_dvp = NULLVP; 487 488 if (error) { 489 goto bad; 490 } 491 492 fmode &= ~O_CREAT; 493 494 /* Fall through */ 495 } 496 } 497 else { 498 /* 499 * Not O_CREAT 500 */ 501 ndp->ni_cnd.cn_nameiop = LOOKUP; 502 /* Inherit USEDVP, vnode_open() supported flags only */ 503 ndp->ni_cnd.cn_flags &= (USEDVP | NOCROSSMOUNT | DOWHITEOUT); 504 ndp->ni_cnd.cn_flags |= FOLLOW | LOCKLEAF | AUDITVNPATH1 | WANTPARENT; 505#if NAMEDRSRCFORK 506 /* open calls are allowed for resource forks. */ 507 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 508#endif 509 ndp->ni_flag = NAMEI_COMPOUNDOPEN; 510 511 /* preserve NOFOLLOW from vnode_open() */ 512 if (fmode & O_NOFOLLOW || fmode & O_SYMLINK || (origcnflags & FOLLOW) == 0) { 513 ndp->ni_cnd.cn_flags &= ~FOLLOW; 514 } 515 516 /* Do a lookup, possibly going directly to filesystem for compound operation */ 517 do { 518 if ( (error = namei(ndp)) ) 519 goto out; 520 vp = ndp->ni_vp; 521 dvp = ndp->ni_dvp; 522 523 /* Check for batched lookup-open */ 524 batched = vnode_compound_open_available(dvp); 525 if (batched && ((vp == NULLVP) || (vnode_mount(dvp) == vnode_mount(vp)))) { 526 error = VNOP_COMPOUND_OPEN(dvp, &ndp->ni_vp, ndp, 0, fmode, NULL, NULL, ctx); 527 vp = ndp->ni_vp; 528 if (error == 0) { 529 need_vnop_open = FALSE; 530 } else if (error == EKEEPLOOKING) { 531 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 532 panic("EKEEPLOOKING, but continue flag not set?"); 533 } 534 } 535 } 536 } while (error == EKEEPLOOKING); 537 538 nameidone(ndp); 539 vnode_put(dvp); 540 ndp->ni_dvp = NULLVP; 541 542 if (error) { 543 goto bad; 544 } 545 } 546 547 /* 548 * By this point, nameidone() is called, dvp iocount is dropped, 549 * and dvp pointer is cleared. 550 */ 551 if (ndp->ni_dvp != NULLVP) { 552 panic("Haven't cleaned up adequately in vn_open_auth()"); 553 } 554 555 /* 556 * Expect to use this code for filesystems without compound VNOPs, for the root 557 * of a filesystem, which can't be "looked up" in the sense of VNOP_LOOKUP(), 558 * and for shadow files, which do not live on the same filesystems as their "parents." 559 */ 560 if (need_vnop_open) { 561 if (batched && !vnode_isvroot(vp) && !vnode_isnamedstream(vp)) { 562 panic("Why am I trying to use VNOP_OPEN() on anything other than the root or a named stream?"); 563 } 564 565 if (!did_create) { 566 error = vn_authorize_open_existing(vp, &ndp->ni_cnd, fmode, ctx, NULL); 567 if (error) { 568 goto bad; 569 } 570 } 571 572#if CONFIG_PROTECT 573 /* 574 * Perform any content protection access checks prior to calling 575 * into the filesystem, if the raw encrypted mode was not 576 * requested. 577 * 578 * If the va_dataprotect_flags are NOT active, or if they are, 579 * but they do not have the VA_DP_RAWENCRYPTED bit set, then we need 580 * to perform the checks. 581 */ 582 if (!(VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) || 583 ((vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) == 0)) { 584 error = cp_handle_open (vp, fmode); 585 if (error) { 586 goto bad; 587 } 588 } 589#endif 590 591 error = VNOP_OPEN(vp, fmode, ctx); 592 if (error) { 593 goto bad; 594 } 595 need_vnop_open = FALSE; 596 } 597 598 // if the vnode is tagged VOPENEVT and the current process 599 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY 600 // flag to the open mode so that this open won't count against 601 // the vnode when carbon delete() does a vnode_isinuse() to see 602 // if a file is currently in use. this allows spotlight 603 // importers to not interfere with carbon apps that depend on 604 // the no-delete-if-busy semantics of carbon delete(). 605 // 606 if (!did_create && (vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { 607 fmode |= O_EVTONLY; 608 } 609 610 /* 611 * Grab reference, etc. 612 */ 613 error = vn_open_auth_finish(vp, fmode, ctx); 614 if (error) { 615 ref_failed = TRUE; 616 goto bad; 617 } 618 619 /* Compound VNOP open is responsible for doing the truncate */ 620 if (batched || did_create) 621 fmode &= ~O_TRUNC; 622 623 *fmodep = fmode; 624 return (0); 625 626bad: 627 /* Opened either explicitly or by a batched create */ 628 if (!need_vnop_open) { 629 VNOP_CLOSE(vp, fmode, ctx); 630 } 631 632 ndp->ni_vp = NULL; 633 if (vp) { 634#if NAMEDRSRCFORK 635 /* Aggressively recycle shadow files if we error'd out during open() */ 636 if ((vnode_isnamedstream(vp)) && 637 (vp->v_parent != NULLVP) && 638 (vnode_isshadow(vp))) { 639 vnode_recycle(vp); 640 } 641#endif 642 vnode_put(vp); 643 /* 644 * Check for a race against unlink. We had a vnode 645 * but according to vnode_authorize or VNOP_OPEN it 646 * no longer exists. 647 * 648 * EREDRIVEOPEN: means that we were hit by the tty allocation race. 649 */ 650 if (((error == ENOENT) && (*fmodep & O_CREAT)) || (error == EREDRIVEOPEN) || ref_failed) { 651 goto again; 652 } 653 } 654 655out: 656 return (error); 657} 658 659#if vn_access_DEPRECATED 660/* 661 * Authorize an action against a vnode. This has been the canonical way to 662 * ensure that the credential/process/etc. referenced by a vfs_context 663 * is granted the rights called out in 'mode' against the vnode 'vp'. 664 * 665 * Unfortunately, the use of VREAD/VWRITE/VEXEC makes it very difficult 666 * to add support for more rights. As such, this interface will be deprecated 667 * and callers will use vnode_authorize instead. 668 */ 669int 670vn_access(vnode_t vp, int mode, vfs_context_t context) 671{ 672 kauth_action_t action; 673 674 action = 0; 675 if (mode & VREAD) 676 action |= KAUTH_VNODE_READ_DATA; 677 if (mode & VWRITE) 678 action |= KAUTH_VNODE_WRITE_DATA; 679 if (mode & VEXEC) 680 action |= KAUTH_VNODE_EXECUTE; 681 682 return(vnode_authorize(vp, NULL, action, context)); 683} 684#endif /* vn_access_DEPRECATED */ 685 686/* 687 * Vnode close call 688 */ 689int 690vn_close(struct vnode *vp, int flags, vfs_context_t ctx) 691{ 692 int error; 693 int flusherror = 0; 694 695#if NAMEDRSRCFORK 696 /* Sync data from resource fork shadow file if needed. */ 697 if ((vp->v_flag & VISNAMEDSTREAM) && 698 (vp->v_parent != NULLVP) && 699 vnode_isshadow(vp)) { 700 if (flags & FWASWRITTEN) { 701 flusherror = vnode_flushnamedstream(vp->v_parent, vp, ctx); 702 } 703 } 704#endif 705 706 /* work around for foxhound */ 707 if (vnode_isspec(vp)) 708 (void)vnode_rele_ext(vp, flags, 0); 709 710 error = VNOP_CLOSE(vp, flags, ctx); 711 712#if CONFIG_FSE 713 if (flags & FWASWRITTEN) { 714 if (need_fsevent(FSE_CONTENT_MODIFIED, vp)) { 715 add_fsevent(FSE_CONTENT_MODIFIED, ctx, 716 FSE_ARG_VNODE, vp, 717 FSE_ARG_DONE); 718 } 719 } 720#endif 721 722 if (!vnode_isspec(vp)) 723 (void)vnode_rele_ext(vp, flags, 0); 724 725 if (flusherror) { 726 error = flusherror; 727 } 728 return (error); 729} 730 731static int 732vn_read_swapfile( 733 struct vnode *vp, 734 uio_t uio) 735{ 736 int error; 737 off_t swap_count, this_count; 738 off_t file_end, read_end; 739 off_t prev_resid; 740 char *my_swap_page; 741 742 /* 743 * Reading from a swap file will get you zeroes. 744 */ 745 746 my_swap_page = NULL; 747 error = 0; 748 swap_count = uio_resid(uio); 749 750 file_end = ubc_getsize(vp); 751 read_end = uio->uio_offset + uio_resid(uio); 752 if (uio->uio_offset >= file_end) { 753 /* uio starts after end of file: nothing to read */ 754 swap_count = 0; 755 } else if (read_end > file_end) { 756 /* uio extends beyond end of file: stop before that */ 757 swap_count -= (read_end - file_end); 758 } 759 760 while (swap_count > 0) { 761 if (my_swap_page == NULL) { 762 MALLOC(my_swap_page, char *, PAGE_SIZE, 763 M_TEMP, M_WAITOK); 764 memset(my_swap_page, '\0', PAGE_SIZE); 765 /* add an end-of-line to keep line counters happy */ 766 my_swap_page[PAGE_SIZE-1] = '\n'; 767 } 768 this_count = swap_count; 769 if (this_count > PAGE_SIZE) { 770 this_count = PAGE_SIZE; 771 } 772 773 prev_resid = uio_resid(uio); 774 error = uiomove((caddr_t) my_swap_page, 775 this_count, 776 uio); 777 if (error) { 778 break; 779 } 780 swap_count -= (prev_resid - uio_resid(uio)); 781 } 782 if (my_swap_page != NULL) { 783 FREE(my_swap_page, M_TEMP); 784 my_swap_page = NULL; 785 } 786 787 return error; 788} 789/* 790 * Package up an I/O request on a vnode into a uio and do it. 791 */ 792int 793vn_rdwr( 794 enum uio_rw rw, 795 struct vnode *vp, 796 caddr_t base, 797 int len, 798 off_t offset, 799 enum uio_seg segflg, 800 int ioflg, 801 kauth_cred_t cred, 802 int *aresid, 803 proc_t p) 804{ 805 int64_t resid; 806 int result; 807 808 result = vn_rdwr_64(rw, 809 vp, 810 (uint64_t)(uintptr_t)base, 811 (int64_t)len, 812 offset, 813 segflg, 814 ioflg, 815 cred, 816 &resid, 817 p); 818 819 /* "resid" should be bounded above by "len," which is an int */ 820 if (aresid != NULL) { 821 *aresid = resid; 822 } 823 824 return result; 825} 826 827 828int 829vn_rdwr_64( 830 enum uio_rw rw, 831 struct vnode *vp, 832 uint64_t base, 833 int64_t len, 834 off_t offset, 835 enum uio_seg segflg, 836 int ioflg, 837 kauth_cred_t cred, 838 int64_t *aresid, 839 proc_t p) 840{ 841 uio_t auio; 842 int spacetype; 843 struct vfs_context context; 844 int error=0; 845 char uio_buf[ UIO_SIZEOF(1) ]; 846 847 context.vc_thread = current_thread(); 848 context.vc_ucred = cred; 849 850 if (UIO_SEG_IS_USER_SPACE(segflg)) { 851 spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 852 } 853 else { 854 spacetype = UIO_SYSSPACE; 855 } 856 auio = uio_createwithbuffer(1, offset, spacetype, rw, 857 &uio_buf[0], sizeof(uio_buf)); 858 uio_addiov(auio, base, len); 859 860#if CONFIG_MACF 861 /* XXXMAC 862 * IO_NOAUTH should be re-examined. 863 * Likely that mediation should be performed in caller. 864 */ 865 if ((ioflg & IO_NOAUTH) == 0) { 866 /* passed cred is fp->f_cred */ 867 if (rw == UIO_READ) 868 error = mac_vnode_check_read(&context, cred, vp); 869 else 870 error = mac_vnode_check_write(&context, cred, vp); 871 } 872#endif 873 874 if (error == 0) { 875 if (rw == UIO_READ) { 876 if (vnode_isswap(vp) && ((ioflg & IO_SWAP_DISPATCH) == 0)) { 877 error = vn_read_swapfile(vp, auio); 878 } else { 879 error = VNOP_READ(vp, auio, ioflg, &context); 880 } 881 } else { 882 error = VNOP_WRITE(vp, auio, ioflg, &context); 883 } 884 } 885 886 if (aresid) 887 *aresid = uio_resid(auio); 888 else 889 if (uio_resid(auio) && error == 0) 890 error = EIO; 891 return (error); 892} 893 894/* 895 * File table vnode read routine. 896 */ 897static int 898vn_read(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 899{ 900 struct vnode *vp; 901 int error; 902 int ioflag; 903 off_t count; 904 905 vp = (struct vnode *)fp->f_fglob->fg_data; 906 if ( (error = vnode_getwithref(vp)) ) { 907 return(error); 908 } 909 910#if CONFIG_MACF 911 error = mac_vnode_check_read(ctx, vfs_context_ucred(ctx), vp); 912 if (error) { 913 (void)vnode_put(vp); 914 return (error); 915 } 916#endif 917 918 /* This signals to VNOP handlers that this read came from a file table read */ 919 ioflag = IO_SYSCALL_DISPATCH; 920 921 if (fp->f_fglob->fg_flag & FNONBLOCK) 922 ioflag |= IO_NDELAY; 923 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 924 ioflag |= IO_NOCACHE; 925 if (fp->f_fglob->fg_flag & FENCRYPTED) { 926 ioflag |= IO_ENCRYPTED; 927 } 928 if (fp->f_fglob->fg_flag & FNORDAHEAD) 929 ioflag |= IO_RAOFF; 930 931 if ((flags & FOF_OFFSET) == 0) 932 uio->uio_offset = fp->f_fglob->fg_offset; 933 count = uio_resid(uio); 934 935 if (vnode_isswap(vp)) { 936 /* special case for swap files */ 937 error = vn_read_swapfile(vp, uio); 938 } else { 939 error = VNOP_READ(vp, uio, ioflag, ctx); 940 } 941 if ((flags & FOF_OFFSET) == 0) 942 fp->f_fglob->fg_offset += count - uio_resid(uio); 943 944 (void)vnode_put(vp); 945 return (error); 946} 947 948 949/* 950 * File table vnode write routine. 951 */ 952static int 953vn_write(struct fileproc *fp, struct uio *uio, int flags, vfs_context_t ctx) 954{ 955 struct vnode *vp; 956 int error, ioflag; 957 off_t count; 958 int clippedsize = 0; 959 int partialwrite=0; 960 int residcount, oldcount; 961 proc_t p = vfs_context_proc(ctx); 962 963 count = 0; 964 vp = (struct vnode *)fp->f_fglob->fg_data; 965 if ( (error = vnode_getwithref(vp)) ) { 966 return(error); 967 } 968 969#if CONFIG_MACF 970 error = mac_vnode_check_write(ctx, vfs_context_ucred(ctx), vp); 971 if (error) { 972 (void)vnode_put(vp); 973 return (error); 974 } 975#endif 976 977 /* 978 * IO_SYSCALL_DISPATCH signals to VNOP handlers that this write came from 979 * a file table write 980 */ 981 ioflag = (IO_UNIT | IO_SYSCALL_DISPATCH); 982 983 if (vp->v_type == VREG && (fp->f_fglob->fg_flag & O_APPEND)) 984 ioflag |= IO_APPEND; 985 if (fp->f_fglob->fg_flag & FNONBLOCK) 986 ioflag |= IO_NDELAY; 987 if ((fp->f_fglob->fg_flag & FNOCACHE) || vnode_isnocache(vp)) 988 ioflag |= IO_NOCACHE; 989 if (fp->f_fglob->fg_flag & FNODIRECT) 990 ioflag |= IO_NODIRECT; 991 if (fp->f_fglob->fg_flag & FSINGLE_WRITER) 992 ioflag |= IO_SINGLE_WRITER; 993 994 /* 995 * Treat synchronous mounts and O_FSYNC on the fd as equivalent. 996 * 997 * XXX We treat O_DSYNC as O_FSYNC for now, since we can not delay 998 * XXX the non-essential metadata without some additional VFS work; 999 * XXX the intent at this point is to plumb the interface for it. 1000 */ 1001 if ((fp->f_fglob->fg_flag & (O_FSYNC|O_DSYNC)) || 1002 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))) { 1003 ioflag |= IO_SYNC; 1004 } 1005 1006 if ((flags & FOF_OFFSET) == 0) { 1007 uio->uio_offset = fp->f_fglob->fg_offset; 1008 count = uio_resid(uio); 1009 } 1010 if (((flags & FOF_OFFSET) == 0) && 1011 vfs_context_proc(ctx) && (vp->v_type == VREG) && 1012 (((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) || 1013 ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)))) { 1014 /* 1015 * If the requested residual would cause us to go past the 1016 * administrative limit, then we need to adjust the residual 1017 * down to cause fewer bytes than requested to be written. If 1018 * we can't do that (e.g. the residual is already 1 byte), 1019 * then we fail the write with EFBIG. 1020 */ 1021 residcount = uio_resid(uio); 1022 if ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) { 1023 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1024 } else if ((rlim_t)uio_resid(uio) > (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset)) { 1025 clippedsize = (p->p_rlimit[RLIMIT_FSIZE].rlim_cur - uio->uio_offset); 1026 } 1027 if (clippedsize >= residcount) { 1028 psignal(p, SIGXFSZ); 1029 vnode_put(vp); 1030 return (EFBIG); 1031 } 1032 partialwrite = 1; 1033 uio_setresid(uio, residcount-clippedsize); 1034 } 1035 if ((flags & FOF_OFFSET) != 0) { 1036 /* for pwrite, append should be ignored */ 1037 ioflag &= ~IO_APPEND; 1038 if (p && (vp->v_type == VREG) && 1039 ((rlim_t)uio->uio_offset >= p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1040 psignal(p, SIGXFSZ); 1041 vnode_put(vp); 1042 return (EFBIG); 1043 } 1044 if (p && (vp->v_type == VREG) && 1045 ((rlim_t)(uio->uio_offset + uio_resid(uio)) > p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { 1046 //Debugger("vn_bwrite:overstepping the bounds"); 1047 residcount = uio_resid(uio); 1048 clippedsize = (uio->uio_offset + uio_resid(uio)) - p->p_rlimit[RLIMIT_FSIZE].rlim_cur; 1049 partialwrite = 1; 1050 uio_setresid(uio, residcount-clippedsize); 1051 } 1052 } 1053 1054 error = VNOP_WRITE(vp, uio, ioflag, ctx); 1055 1056 if (partialwrite) { 1057 oldcount = uio_resid(uio); 1058 uio_setresid(uio, oldcount + clippedsize); 1059 } 1060 1061 if ((flags & FOF_OFFSET) == 0) { 1062 if (ioflag & IO_APPEND) 1063 fp->f_fglob->fg_offset = uio->uio_offset; 1064 else 1065 fp->f_fglob->fg_offset += count - uio_resid(uio); 1066 } 1067 1068 /* 1069 * Set the credentials on successful writes 1070 */ 1071 if ((error == 0) && (vp->v_tag == VT_NFS) && (UBCINFOEXISTS(vp))) { 1072 /* 1073 * When called from aio subsystem, we only have the proc from 1074 * which to get the credential, at this point, so use that 1075 * instead. This means aio functions are incompatible with 1076 * per-thread credentials (aio operations are proxied). We 1077 * can't easily correct the aio vs. settid race in this case 1078 * anyway, so we disallow it. 1079 */ 1080 if ((flags & FOF_PCRED) == 0) { 1081 ubc_setthreadcred(vp, p, current_thread()); 1082 } else { 1083 ubc_setcred(vp, p); 1084 } 1085 } 1086 (void)vnode_put(vp); 1087 return (error); 1088} 1089 1090/* 1091 * File table vnode stat routine. 1092 * 1093 * Returns: 0 Success 1094 * EBADF 1095 * ENOMEM 1096 * vnode_getattr:??? 1097 */ 1098int 1099vn_stat_noauth(struct vnode *vp, void *sbptr, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1100{ 1101 struct vnode_attr va; 1102 int error; 1103 u_short mode; 1104 kauth_filesec_t fsec; 1105 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ 1106 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ 1107 1108 if (isstat64 != 0) 1109 sb64 = (struct stat64 *)sbptr; 1110 else 1111 sb = (struct stat *)sbptr; 1112 memset(&va, 0, sizeof(va)); 1113 VATTR_INIT(&va); 1114 VATTR_WANTED(&va, va_fsid); 1115 VATTR_WANTED(&va, va_fileid); 1116 VATTR_WANTED(&va, va_mode); 1117 VATTR_WANTED(&va, va_type); 1118 VATTR_WANTED(&va, va_nlink); 1119 VATTR_WANTED(&va, va_uid); 1120 VATTR_WANTED(&va, va_gid); 1121 VATTR_WANTED(&va, va_rdev); 1122 VATTR_WANTED(&va, va_data_size); 1123 VATTR_WANTED(&va, va_access_time); 1124 VATTR_WANTED(&va, va_modify_time); 1125 VATTR_WANTED(&va, va_change_time); 1126 VATTR_WANTED(&va, va_create_time); 1127 VATTR_WANTED(&va, va_flags); 1128 VATTR_WANTED(&va, va_gen); 1129 VATTR_WANTED(&va, va_iosize); 1130 /* lower layers will synthesise va_total_alloc from va_data_size if required */ 1131 VATTR_WANTED(&va, va_total_alloc); 1132 if (xsec != NULL) { 1133 VATTR_WANTED(&va, va_uuuid); 1134 VATTR_WANTED(&va, va_guuid); 1135 VATTR_WANTED(&va, va_acl); 1136 } 1137 error = vnode_getattr(vp, &va, ctx); 1138 if (error) 1139 goto out; 1140 /* 1141 * Copy from vattr table 1142 */ 1143 if (isstat64 != 0) { 1144 sb64->st_dev = va.va_fsid; 1145 sb64->st_ino = (ino64_t)va.va_fileid; 1146 1147 } else { 1148 sb->st_dev = va.va_fsid; 1149 sb->st_ino = (ino_t)va.va_fileid; 1150 } 1151 mode = va.va_mode; 1152 switch (vp->v_type) { 1153 case VREG: 1154 mode |= S_IFREG; 1155 break; 1156 case VDIR: 1157 mode |= S_IFDIR; 1158 break; 1159 case VBLK: 1160 mode |= S_IFBLK; 1161 break; 1162 case VCHR: 1163 mode |= S_IFCHR; 1164 break; 1165 case VLNK: 1166 mode |= S_IFLNK; 1167 break; 1168 case VSOCK: 1169 mode |= S_IFSOCK; 1170 break; 1171 case VFIFO: 1172 mode |= S_IFIFO; 1173 break; 1174 default: 1175 error = EBADF; 1176 goto out; 1177 }; 1178 if (isstat64 != 0) { 1179 sb64->st_mode = mode; 1180 sb64->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1181 sb64->st_uid = va.va_uid; 1182 sb64->st_gid = va.va_gid; 1183 sb64->st_rdev = va.va_rdev; 1184 sb64->st_size = va.va_data_size; 1185 sb64->st_atimespec = va.va_access_time; 1186 sb64->st_mtimespec = va.va_modify_time; 1187 sb64->st_ctimespec = va.va_change_time; 1188 sb64->st_birthtimespec = 1189 VATTR_IS_SUPPORTED(&va, va_create_time) ? va.va_create_time : va.va_change_time; 1190 sb64->st_blksize = va.va_iosize; 1191 sb64->st_flags = va.va_flags; 1192 sb64->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1193 } else { 1194 sb->st_mode = mode; 1195 sb->st_nlink = VATTR_IS_SUPPORTED(&va, va_nlink) ? (u_int16_t)va.va_nlink : 1; 1196 sb->st_uid = va.va_uid; 1197 sb->st_gid = va.va_gid; 1198 sb->st_rdev = va.va_rdev; 1199 sb->st_size = va.va_data_size; 1200 sb->st_atimespec = va.va_access_time; 1201 sb->st_mtimespec = va.va_modify_time; 1202 sb->st_ctimespec = va.va_change_time; 1203 sb->st_blksize = va.va_iosize; 1204 sb->st_flags = va.va_flags; 1205 sb->st_blocks = roundup(va.va_total_alloc, 512) / 512; 1206 } 1207 1208 /* if we're interested in extended security data and we got an ACL */ 1209 if (xsec != NULL) { 1210 if (!VATTR_IS_SUPPORTED(&va, va_acl) && 1211 !VATTR_IS_SUPPORTED(&va, va_uuuid) && 1212 !VATTR_IS_SUPPORTED(&va, va_guuid)) { 1213 *xsec = KAUTH_FILESEC_NONE; 1214 } else { 1215 1216 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1217 fsec = kauth_filesec_alloc(va.va_acl->acl_entrycount); 1218 } else { 1219 fsec = kauth_filesec_alloc(0); 1220 } 1221 if (fsec == NULL) { 1222 error = ENOMEM; 1223 goto out; 1224 } 1225 fsec->fsec_magic = KAUTH_FILESEC_MAGIC; 1226 if (VATTR_IS_SUPPORTED(&va, va_uuuid)) { 1227 fsec->fsec_owner = va.va_uuuid; 1228 } else { 1229 fsec->fsec_owner = kauth_null_guid; 1230 } 1231 if (VATTR_IS_SUPPORTED(&va, va_guuid)) { 1232 fsec->fsec_group = va.va_guuid; 1233 } else { 1234 fsec->fsec_group = kauth_null_guid; 1235 } 1236 if (VATTR_IS_SUPPORTED(&va, va_acl) && (va.va_acl != NULL)) { 1237 bcopy(va.va_acl, &(fsec->fsec_acl), KAUTH_ACL_COPYSIZE(va.va_acl)); 1238 } else { 1239 fsec->fsec_acl.acl_entrycount = KAUTH_FILESEC_NOACL; 1240 } 1241 *xsec = fsec; 1242 } 1243 } 1244 1245 /* Do not give the generation number out to unpriviledged users */ 1246 if (va.va_gen && !vfs_context_issuser(ctx)) { 1247 if (isstat64 != 0) 1248 sb64->st_gen = 0; 1249 else 1250 sb->st_gen = 0; 1251 } else { 1252 if (isstat64 != 0) 1253 sb64->st_gen = va.va_gen; 1254 else 1255 sb->st_gen = va.va_gen; 1256 } 1257 1258 error = 0; 1259out: 1260 if (VATTR_IS_SUPPORTED(&va, va_acl) && va.va_acl != NULL) 1261 kauth_acl_free(va.va_acl); 1262 return (error); 1263} 1264 1265int 1266vn_stat(struct vnode *vp, void *sb, kauth_filesec_t *xsec, int isstat64, vfs_context_t ctx) 1267{ 1268 int error; 1269 1270#if CONFIG_MACF 1271 error = mac_vnode_check_stat(ctx, NOCRED, vp); 1272 if (error) 1273 return (error); 1274#endif 1275 1276 /* authorize */ 1277 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_ATTRIBUTES | KAUTH_VNODE_READ_SECURITY, ctx)) != 0) 1278 return(error); 1279 1280 /* actual stat */ 1281 return(vn_stat_noauth(vp, sb, xsec, isstat64, ctx)); 1282} 1283 1284 1285/* 1286 * File table vnode ioctl routine. 1287 */ 1288static int 1289vn_ioctl(struct fileproc *fp, u_long com, caddr_t data, vfs_context_t ctx) 1290{ 1291 struct vnode *vp = ((struct vnode *)fp->f_fglob->fg_data); 1292 off_t file_size; 1293 int error; 1294 struct vnode *ttyvp; 1295 int funnel_state; 1296 struct session * sessp; 1297 1298 if ( (error = vnode_getwithref(vp)) ) { 1299 return(error); 1300 } 1301 1302#if CONFIG_MACF 1303 error = mac_vnode_check_ioctl(ctx, vp, com); 1304 if (error) 1305 goto out; 1306#endif 1307 1308 switch (vp->v_type) { 1309 case VREG: 1310 case VDIR: 1311 if (com == FIONREAD) { 1312 if ((error = vnode_size(vp, &file_size, ctx)) != 0) 1313 goto out; 1314 *(int *)data = file_size - fp->f_fglob->fg_offset; 1315 goto out; 1316 } 1317 if (com == FIONBIO || com == FIOASYNC) { /* XXX */ 1318 goto out; 1319 } 1320 /* fall into ... */ 1321 1322 default: 1323 error = ENOTTY; 1324 goto out; 1325 1326 case VFIFO: 1327 case VCHR: 1328 case VBLK: 1329 1330 /* Should not be able to set block size from user space */ 1331 if (com == DKIOCSETBLOCKSIZE) { 1332 error = EPERM; 1333 goto out; 1334 } 1335 1336 if (com == FIODTYPE) { 1337 if (vp->v_type == VBLK) { 1338 if (major(vp->v_rdev) >= nblkdev) { 1339 error = ENXIO; 1340 goto out; 1341 } 1342 *(int *)data = bdevsw[major(vp->v_rdev)].d_type; 1343 1344 } else if (vp->v_type == VCHR) { 1345 if (major(vp->v_rdev) >= nchrdev) { 1346 error = ENXIO; 1347 goto out; 1348 } 1349 *(int *)data = cdevsw[major(vp->v_rdev)].d_type; 1350 } else { 1351 error = ENOTTY; 1352 goto out; 1353 } 1354 goto out; 1355 } 1356 error = VNOP_IOCTL(vp, com, data, fp->f_fglob->fg_flag, ctx); 1357 1358 if (error == 0 && com == TIOCSCTTY) { 1359 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE); 1360 if (error != 0) { 1361 panic("vnode_ref_ext() failed despite VNODE_REF_FORCE?!"); 1362 } 1363 1364 funnel_state = thread_funnel_set(kernel_flock, TRUE); 1365 sessp = proc_session(vfs_context_proc(ctx)); 1366 1367 session_lock(sessp); 1368 ttyvp = sessp->s_ttyvp; 1369 sessp->s_ttyvp = vp; 1370 sessp->s_ttyvid = vnode_vid(vp); 1371 session_unlock(sessp); 1372 session_rele(sessp); 1373 thread_funnel_set(kernel_flock, funnel_state); 1374 1375 if (ttyvp) 1376 vnode_rele(ttyvp); 1377 } 1378 } 1379out: 1380 (void)vnode_put(vp); 1381 return(error); 1382} 1383 1384/* 1385 * File table vnode select routine. 1386 */ 1387static int 1388vn_select(struct fileproc *fp, int which, void *wql, __unused vfs_context_t ctx) 1389{ 1390 int error; 1391 struct vnode * vp = (struct vnode *)fp->f_fglob->fg_data; 1392 struct vfs_context context; 1393 1394 if ( (error = vnode_getwithref(vp)) == 0 ) { 1395 context.vc_thread = current_thread(); 1396 context.vc_ucred = fp->f_fglob->fg_cred; 1397 1398#if CONFIG_MACF 1399 /* 1400 * XXX We should use a per thread credential here; minimally, 1401 * XXX the process credential should have a persistent 1402 * XXX reference on it before being passed in here. 1403 */ 1404 error = mac_vnode_check_select(ctx, vp, which); 1405 if (error == 0) 1406#endif 1407 error = VNOP_SELECT(vp, which, fp->f_fglob->fg_flag, wql, ctx); 1408 1409 (void)vnode_put(vp); 1410 } 1411 return(error); 1412 1413} 1414 1415/* 1416 * File table vnode close routine. 1417 */ 1418static int 1419vn_closefile(struct fileglob *fg, vfs_context_t ctx) 1420{ 1421 struct vnode *vp = (struct vnode *)fg->fg_data; 1422 int error; 1423 struct flock lf; 1424 1425 if ( (error = vnode_getwithref(vp)) == 0 ) { 1426 1427 if ((fg->fg_flag & FHASLOCK) && 1428 FILEGLOB_DTYPE(fg) == DTYPE_VNODE) { 1429 lf.l_whence = SEEK_SET; 1430 lf.l_start = 0; 1431 lf.l_len = 0; 1432 lf.l_type = F_UNLCK; 1433 1434 (void)VNOP_ADVLOCK(vp, (caddr_t)fg, F_UNLCK, &lf, F_FLOCK, ctx, NULL); 1435 } 1436 error = vn_close(vp, fg->fg_flag, ctx); 1437 1438 (void)vnode_put(vp); 1439 } 1440 return(error); 1441} 1442 1443/* 1444 * Returns: 0 Success 1445 * VNOP_PATHCONF:??? 1446 */ 1447int 1448vn_pathconf(vnode_t vp, int name, int32_t *retval, vfs_context_t ctx) 1449{ 1450 int error = 0; 1451 struct vfs_attr vfa; 1452 1453 switch(name) { 1454 case _PC_EXTENDED_SECURITY_NP: 1455 *retval = vfs_extendedsecurity(vnode_mount(vp)) ? 1 : 0; 1456 break; 1457 case _PC_AUTH_OPAQUE_NP: 1458 *retval = vfs_authopaque(vnode_mount(vp)); 1459 break; 1460 case _PC_2_SYMLINKS: 1461 *retval = 1; /* XXX NOTSUP on MSDOS, etc. */ 1462 break; 1463 case _PC_ALLOC_SIZE_MIN: 1464 *retval = 1; /* XXX lie: 1 byte */ 1465 break; 1466 case _PC_ASYNC_IO: /* unistd.h: _POSIX_ASYNCHRONUS_IO */ 1467 *retval = 1; /* [AIO] option is supported */ 1468 break; 1469 case _PC_PRIO_IO: /* unistd.h: _POSIX_PRIORITIZED_IO */ 1470 *retval = 0; /* [PIO] option is not supported */ 1471 break; 1472 case _PC_REC_INCR_XFER_SIZE: 1473 *retval = 4096; /* XXX go from MIN to MAX 4K at a time */ 1474 break; 1475 case _PC_REC_MIN_XFER_SIZE: 1476 *retval = 4096; /* XXX recommend 4K minimum reads/writes */ 1477 break; 1478 case _PC_REC_MAX_XFER_SIZE: 1479 *retval = 65536; /* XXX recommend 64K maximum reads/writes */ 1480 break; 1481 case _PC_REC_XFER_ALIGN: 1482 *retval = 4096; /* XXX recommend page aligned buffers */ 1483 break; 1484 case _PC_SYMLINK_MAX: 1485 *retval = 255; /* Minimum acceptable POSIX value */ 1486 break; 1487 case _PC_SYNC_IO: /* unistd.h: _POSIX_SYNCHRONIZED_IO */ 1488 *retval = 0; /* [SIO] option is not supported */ 1489 break; 1490 case _PC_XATTR_SIZE_BITS: 1491 /* The number of bits used to store maximum extended 1492 * attribute size in bytes. For example, if the maximum 1493 * attribute size supported by a file system is 128K, the 1494 * value returned will be 18. However a value 18 can mean 1495 * that the maximum attribute size can be anywhere from 1496 * (256KB - 1) to 128KB. As a special case, the resource 1497 * fork can have much larger size, and some file system 1498 * specific extended attributes can have smaller and preset 1499 * size; for example, Finder Info is always 32 bytes. 1500 */ 1501 memset(&vfa, 0, sizeof(vfa)); 1502 VFSATTR_INIT(&vfa); 1503 VFSATTR_WANTED(&vfa, f_capabilities); 1504 if (vfs_getattr(vnode_mount(vp), &vfa, ctx) == 0 && 1505 (VFSATTR_IS_SUPPORTED(&vfa, f_capabilities)) && 1506 (vfa.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && 1507 (vfa.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { 1508 /* Supports native extended attributes */ 1509 error = VNOP_PATHCONF(vp, name, retval, ctx); 1510 } else { 1511 /* Number of bits used to represent the maximum size of 1512 * extended attribute stored in an Apple Double file. 1513 */ 1514 *retval = AD_XATTR_SIZE_BITS; 1515 } 1516 break; 1517 default: 1518 error = VNOP_PATHCONF(vp, name, retval, ctx); 1519 break; 1520 } 1521 1522 return (error); 1523} 1524 1525static int 1526vn_kqfilt_add(struct fileproc *fp, struct knote *kn, vfs_context_t ctx) 1527{ 1528 int error; 1529 struct vnode *vp; 1530 1531 vp = (struct vnode *)fp->f_fglob->fg_data; 1532 1533 /* 1534 * Don't attach a knote to a dead vnode. 1535 */ 1536 if ((error = vget_internal(vp, 0, VNODE_NODEAD)) == 0) { 1537 switch (kn->kn_filter) { 1538 case EVFILT_READ: 1539 case EVFILT_WRITE: 1540 if (vnode_isfifo(vp)) { 1541 /* We'll only watch FIFOs that use our fifofs */ 1542 if (!(vp->v_fifoinfo && vp->v_fifoinfo->fi_readsock)) { 1543 error = ENOTSUP; 1544 } 1545 1546 } else if (!vnode_isreg(vp)) { 1547 if (vnode_ischr(vp) && 1548 (error = spec_kqfilter(vp, kn)) == 0) { 1549 /* claimed by a special device */ 1550 vnode_put(vp); 1551 return 0; 1552 } 1553 1554 error = EINVAL; 1555 } 1556 break; 1557 case EVFILT_VNODE: 1558 break; 1559 default: 1560 error = EINVAL; 1561 } 1562 1563 if (error) { 1564 vnode_put(vp); 1565 return error; 1566 } 1567 1568#if CONFIG_MACF 1569 error = mac_vnode_check_kqfilter(ctx, fp->f_fglob->fg_cred, kn, vp); 1570 if (error) { 1571 vnode_put(vp); 1572 return error; 1573 } 1574#endif 1575 1576 kn->kn_hook = (void*)vp; 1577 kn->kn_hookid = vnode_vid(vp); 1578 kn->kn_fop = &vnode_filtops; 1579 1580 vnode_lock(vp); 1581 KNOTE_ATTACH(&vp->v_knotes, kn); 1582 vnode_unlock(vp); 1583 1584 /* Ask the filesystem to provide remove notifications, but ignore failure */ 1585 VNOP_MONITOR(vp, 0, VNODE_MONITOR_BEGIN, (void*) kn, ctx); 1586 1587 vnode_put(vp); 1588 } 1589 1590 return (error); 1591} 1592 1593static void 1594filt_vndetach(struct knote *kn) 1595{ 1596 vfs_context_t ctx = vfs_context_current(); 1597 struct vnode *vp; 1598 vp = (struct vnode *)kn->kn_hook; 1599 if (vnode_getwithvid(vp, kn->kn_hookid)) 1600 return; 1601 1602 vnode_lock(vp); 1603 KNOTE_DETACH(&vp->v_knotes, kn); 1604 vnode_unlock(vp); 1605 1606 /* 1607 * Tell a (generally networked) filesystem that we're no longer watching 1608 * If the FS wants to track contexts, it should still be using the one from 1609 * the VNODE_MONITOR_BEGIN. 1610 */ 1611 VNOP_MONITOR(vp, 0, VNODE_MONITOR_END, (void*)kn, ctx); 1612 vnode_put(vp); 1613} 1614 1615 1616/* 1617 * Used for EVFILT_READ 1618 * 1619 * Takes only VFIFO or VREG. vnode is locked. We handle the "poll" case 1620 * differently than the regular case for VREG files. If not in poll(), 1621 * then we need to know current fileproc offset for VREG. 1622 */ 1623static intptr_t 1624vnode_readable_data_count(vnode_t vp, off_t current_offset, int ispoll) 1625{ 1626 if (vnode_isfifo(vp)) { 1627#if FIFO 1628 int cnt; 1629 int err = fifo_charcount(vp, &cnt); 1630 if (err == 0) { 1631 return (intptr_t)cnt; 1632 } else 1633#endif 1634 { 1635 return (intptr_t)0; 1636 } 1637 } else if (vnode_isreg(vp)) { 1638 if (ispoll) { 1639 return (intptr_t)1; 1640 } 1641 1642 off_t amount; 1643 amount = vp->v_un.vu_ubcinfo->ui_size - current_offset; 1644 if (amount > (off_t)INTPTR_MAX) { 1645 return INTPTR_MAX; 1646 } else if (amount < (off_t)INTPTR_MIN) { 1647 return INTPTR_MIN; 1648 } else { 1649 return (intptr_t)amount; 1650 } 1651 } else { 1652 panic("Should never have an EVFILT_READ except for reg or fifo."); 1653 return 0; 1654 } 1655} 1656 1657/* 1658 * Used for EVFILT_WRITE. 1659 * 1660 * For regular vnodes, we can always write (1). For named pipes, 1661 * see how much space there is in the buffer. Nothing else is covered. 1662 */ 1663static intptr_t 1664vnode_writable_space_count(vnode_t vp) 1665{ 1666 if (vnode_isfifo(vp)) { 1667#if FIFO 1668 long spc; 1669 int err = fifo_freespace(vp, &spc); 1670 if (err == 0) { 1671 return (intptr_t)spc; 1672 } else 1673#endif 1674 { 1675 return (intptr_t)0; 1676 } 1677 } else if (vnode_isreg(vp)) { 1678 return (intptr_t)1; 1679 } else { 1680 panic("Should never have an EVFILT_READ except for reg or fifo."); 1681 return 0; 1682 } 1683} 1684 1685/* 1686 * Determine whether this knote should be active 1687 * 1688 * This is kind of subtle. 1689 * --First, notice if the vnode has been revoked: in so, override hint 1690 * --EVFILT_READ knotes are checked no matter what the hint is 1691 * --Other knotes activate based on hint. 1692 * --If hint is revoke, set special flags and activate 1693 */ 1694static int 1695filt_vnode(struct knote *kn, long hint) 1696{ 1697 vnode_t vp = (struct vnode *)kn->kn_hook; 1698 int activate = 0; 1699 long orig_hint = hint; 1700 1701 if (0 == hint) { 1702 vnode_lock(vp); 1703 1704 if (vnode_getiocount(vp, kn->kn_hookid, VNODE_NODEAD | VNODE_WITHID) != 0) { 1705 /* Is recycled */ 1706 hint = NOTE_REVOKE; 1707 } 1708 } else { 1709 lck_mtx_assert(&vp->v_lock, LCK_MTX_ASSERT_OWNED); 1710 } 1711 1712 /* Special handling for vnodes that are in recycle or already gone */ 1713 if (NOTE_REVOKE == hint) { 1714 kn->kn_flags |= (EV_EOF | EV_ONESHOT); 1715 activate = 1; 1716 1717 if ((kn->kn_filter == EVFILT_VNODE) && (kn->kn_sfflags & NOTE_REVOKE)) { 1718 kn->kn_fflags |= NOTE_REVOKE; 1719 } 1720 } else { 1721 switch(kn->kn_filter) { 1722 case EVFILT_READ: 1723 kn->kn_data = vnode_readable_data_count(vp, kn->kn_fp->f_fglob->fg_offset, (kn->kn_flags & EV_POLL)); 1724 1725 if (kn->kn_data != 0) { 1726 activate = 1; 1727 } 1728 break; 1729 case EVFILT_WRITE: 1730 kn->kn_data = vnode_writable_space_count(vp); 1731 1732 if (kn->kn_data != 0) { 1733 activate = 1; 1734 } 1735 break; 1736 case EVFILT_VNODE: 1737 /* Check events this note matches against the hint */ 1738 if (kn->kn_sfflags & hint) { 1739 kn->kn_fflags |= hint; /* Set which event occurred */ 1740 } 1741 if (kn->kn_fflags != 0) { 1742 activate = 1; 1743 } 1744 break; 1745 default: 1746 panic("Invalid knote filter on a vnode!\n"); 1747 } 1748 } 1749 1750 if (orig_hint == 0) { 1751 /* 1752 * Definitely need to unlock, may need to put 1753 */ 1754 if (hint == 0) { 1755 vnode_put_locked(vp); 1756 } 1757 vnode_unlock(vp); 1758 } 1759 1760 return (activate); 1761} 1762