1/* 2 * Copyright (c) 1995-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1989, 1993 30 * The Regents of the University of California. All rights reserved. 31 * (c) UNIX System Laboratories, Inc. 32 * All or some portions of this file are derived from material licensed 33 * to the University of California by American Telephone and Telegraph 34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 35 * the permission of UNIX System Laboratories, Inc. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.41 (Berkeley) 6/15/95 66 */ 67/* 68 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 69 * support for mandatory and extensible security protections. This notice 70 * is included in support of clause 2.2 (b) of the Apple Public License, 71 * Version 2.0. 72 */ 73 74#include <sys/param.h> 75#include <sys/systm.h> 76#include <sys/namei.h> 77#include <sys/filedesc.h> 78#include <sys/kernel.h> 79#include <sys/file_internal.h> 80#include <sys/stat.h> 81#include <sys/vnode_internal.h> 82#include <sys/mount_internal.h> 83#include <sys/proc_internal.h> 84#include <sys/kauth.h> 85#include <sys/uio_internal.h> 86#include <sys/malloc.h> 87#include <sys/mman.h> 88#include <sys/dirent.h> 89#include <sys/attr.h> 90#include <sys/sysctl.h> 91#include <sys/ubc.h> 92#include <sys/quota.h> 93#include <sys/kdebug.h> 94#include <sys/fsevents.h> 95#include <sys/imgsrc.h> 96#include <sys/sysproto.h> 97#include <sys/xattr.h> 98#include <sys/fcntl.h> 99#include <sys/fsctl.h> 100#include <sys/ubc_internal.h> 101#include <sys/disk.h> 102#include <machine/cons.h> 103#include <machine/limits.h> 104#include <miscfs/specfs/specdev.h> 105 106#include <security/audit/audit.h> 107#include <bsm/audit_kevents.h> 108 109#include <mach/mach_types.h> 110#include <kern/kern_types.h> 111#include <kern/kalloc.h> 112#include <kern/task.h> 113 114#include <vm/vm_pageout.h> 115 116#include <libkern/OSAtomic.h> 117#include <pexpert/pexpert.h> 118 119#if CONFIG_MACF 120#include <security/mac.h> 121#include <security/mac_framework.h> 122#endif 123 124#if CONFIG_FSE 125#define GET_PATH(x) \ 126 (x) = get_pathbuff(); 127#define RELEASE_PATH(x) \ 128 release_pathbuff(x); 129#else 130#define GET_PATH(x) \ 131 MALLOC_ZONE((x), char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 132#define RELEASE_PATH(x) \ 133 FREE_ZONE((x), MAXPATHLEN, M_NAMEI); 134#endif /* CONFIG_FSE */ 135 136/* struct for checkdirs iteration */ 137struct cdirargs { 138 vnode_t olddp; 139 vnode_t newdp; 140}; 141/* callback for checkdirs iteration */ 142static int checkdirs_callback(proc_t p, void * arg); 143 144static int change_dir(struct nameidata *ndp, vfs_context_t ctx); 145static int checkdirs(vnode_t olddp, vfs_context_t ctx); 146void enablequotas(struct mount *mp, vfs_context_t ctx); 147static int getfsstat_callback(mount_t mp, void * arg); 148static int getutimes(user_addr_t usrtvp, struct timespec *tsp); 149static int setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, int nullflag); 150static int sync_callback(mount_t, void *); 151static int munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, 152 user_addr_t bufp, int *sizep, boolean_t is_64_bit, 153 boolean_t partial_copy); 154static int statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, 155 user_addr_t bufp); 156static int fsync_common(proc_t p, struct fsync_args *uap, int flags); 157static int mount_common(char *fstypename, vnode_t pvp, vnode_t vp, 158 struct componentname *cnp, user_addr_t fsmountargs, 159 int flags, uint32_t internal_flags, char *labelstr, boolean_t kernelmount, 160 vfs_context_t ctx); 161void vfs_notify_mount(vnode_t pdvp); 162 163int prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth); 164 165#ifdef CONFIG_IMGSRC_ACCESS 166static int authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx); 167static int place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx); 168static void undo_place_on_covered_vp(mount_t mp, vnode_t vp); 169static int mount_begin_update(mount_t mp, vfs_context_t ctx, int flags); 170static void mount_end_update(mount_t mp); 171static int relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, const char *fsname, vfs_context_t ctx, boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index); 172#endif /* CONFIG_IMGSRC_ACCESS */ 173 174int (*union_dircheckp)(struct vnode **, struct fileproc *, vfs_context_t); 175 176__private_extern__ 177int sync_internal(void); 178 179__private_extern__ 180int unlink1(vfs_context_t, struct nameidata *, int); 181 182/* 183 * incremented each time a mount or unmount operation occurs 184 * used to invalidate the cached value of the rootvp in the 185 * mount structure utilized by cache_lookup_path 186 */ 187uint32_t mount_generation = 0; 188 189/* counts number of mount and unmount operations */ 190unsigned int vfs_nummntops=0; 191 192extern const struct fileops vnops; 193#if CONFIG_APPLEDOUBLE 194extern errno_t rmdir_remove_orphaned_appleDouble(vnode_t, vfs_context_t, int *); 195#endif /* CONFIG_APPLEDOUBLE */ 196 197/* 198 * Virtual File System System Calls 199 */ 200 201#if NFSCLIENT 202/* 203 * Private in-kernel mounting spi (NFS only, not exported) 204 */ 205 __private_extern__ 206boolean_t 207vfs_iskernelmount(mount_t mp) 208{ 209 return ((mp->mnt_kern_flag & MNTK_KERNEL_MOUNT) ? TRUE : FALSE); 210} 211 212 __private_extern__ 213int 214kernel_mount(char *fstype, vnode_t pvp, vnode_t vp, const char *path, 215 void *data, __unused size_t datalen, int syscall_flags, __unused uint32_t kern_flags, vfs_context_t ctx) 216{ 217 struct nameidata nd; 218 boolean_t did_namei; 219 int error; 220 221 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW | AUDITVNPATH1 | WANTPARENT, 222 UIO_SYSSPACE, CAST_USER_ADDR_T(path), ctx); 223 224 /* 225 * Get the vnode to be covered if it's not supplied 226 */ 227 if (vp == NULLVP) { 228 error = namei(&nd); 229 if (error) 230 return (error); 231 vp = nd.ni_vp; 232 pvp = nd.ni_dvp; 233 did_namei = TRUE; 234 } else { 235 char *pnbuf = CAST_DOWN(char *, path); 236 237 nd.ni_cnd.cn_pnbuf = pnbuf; 238 nd.ni_cnd.cn_pnlen = strlen(pnbuf) + 1; 239 did_namei = FALSE; 240 } 241 242 error = mount_common(fstype, pvp, vp, &nd.ni_cnd, CAST_USER_ADDR_T(data), 243 syscall_flags, kern_flags, NULL, TRUE, ctx); 244 245 if (did_namei) { 246 vnode_put(vp); 247 vnode_put(pvp); 248 nameidone(&nd); 249 } 250 251 return (error); 252} 253#endif /* NFSCLIENT */ 254 255/* 256 * Mount a file system. 257 */ 258/* ARGSUSED */ 259int 260mount(proc_t p, struct mount_args *uap, __unused int32_t *retval) 261{ 262 struct __mac_mount_args muap; 263 264 muap.type = uap->type; 265 muap.path = uap->path; 266 muap.flags = uap->flags; 267 muap.data = uap->data; 268 muap.mac_p = USER_ADDR_NULL; 269 return (__mac_mount(p, &muap, retval)); 270} 271 272void 273vfs_notify_mount(vnode_t pdvp) 274{ 275 vfs_event_signal(NULL, VQ_MOUNT, (intptr_t)NULL); 276 lock_vnode_and_post(pdvp, NOTE_WRITE); 277} 278 279/* 280 * __mac_mount: 281 * Mount a file system taking into account MAC label behavior. 282 * See mount(2) man page for more information 283 * 284 * Parameters: p Process requesting the mount 285 * uap User argument descriptor (see below) 286 * retval (ignored) 287 * 288 * Indirect: uap->type Filesystem type 289 * uap->path Path to mount 290 * uap->data Mount arguments 291 * uap->mac_p MAC info 292 * uap->flags Mount flags 293 * 294 * 295 * Returns: 0 Success 296 * !0 Not success 297 */ 298boolean_t root_fs_upgrade_try = FALSE; 299 300int 301__mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused int32_t *retval) 302{ 303 vnode_t pvp = NULL; 304 vnode_t vp = NULL; 305 int need_nameidone = 0; 306 vfs_context_t ctx = vfs_context_current(); 307 char fstypename[MFSNAMELEN]; 308 struct nameidata nd; 309 size_t dummy=0; 310 char *labelstr = NULL; 311 int flags = uap->flags; 312 int error; 313#if CONFIG_IMGSRC_ACCESS || CONFIG_MACF 314 boolean_t is_64bit = IS_64BIT_PROCESS(p); 315#else 316#pragma unused(p) 317#endif 318 /* 319 * Get the fs type name from user space 320 */ 321 error = copyinstr(uap->type, fstypename, MFSNAMELEN, &dummy); 322 if (error) 323 return (error); 324 325 /* 326 * Get the vnode to be covered 327 */ 328 NDINIT(&nd, LOOKUP, OP_MOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1 | WANTPARENT, 329 UIO_USERSPACE, uap->path, ctx); 330 error = namei(&nd); 331 if (error) { 332 goto out; 333 } 334 need_nameidone = 1; 335 vp = nd.ni_vp; 336 pvp = nd.ni_dvp; 337 338#ifdef CONFIG_IMGSRC_ACCESS 339 /* Mounting image source cannot be batched with other operations */ 340 if (flags == MNT_IMGSRC_BY_INDEX) { 341 error = relocate_imageboot_source(pvp, vp, &nd.ni_cnd, fstypename, 342 ctx, is_64bit, uap->data, (flags == MNT_IMGSRC_BY_INDEX)); 343 goto out; 344 } 345#endif /* CONFIG_IMGSRC_ACCESS */ 346 347#if CONFIG_MACF 348 /* 349 * Get the label string (if any) from user space 350 */ 351 if (uap->mac_p != USER_ADDR_NULL) { 352 struct user_mac mac; 353 size_t ulen = 0; 354 355 if (is_64bit) { 356 struct user64_mac mac64; 357 error = copyin(uap->mac_p, &mac64, sizeof(mac64)); 358 mac.m_buflen = mac64.m_buflen; 359 mac.m_string = mac64.m_string; 360 } else { 361 struct user32_mac mac32; 362 error = copyin(uap->mac_p, &mac32, sizeof(mac32)); 363 mac.m_buflen = mac32.m_buflen; 364 mac.m_string = mac32.m_string; 365 } 366 if (error) 367 goto out; 368 if ((mac.m_buflen > MAC_MAX_LABEL_BUF_LEN) || 369 (mac.m_buflen < 2)) { 370 error = EINVAL; 371 goto out; 372 } 373 MALLOC(labelstr, char *, mac.m_buflen, M_MACTEMP, M_WAITOK); 374 error = copyinstr(mac.m_string, labelstr, mac.m_buflen, &ulen); 375 if (error) { 376 goto out; 377 } 378 AUDIT_ARG(mac_string, labelstr); 379 } 380#endif /* CONFIG_MACF */ 381 382 AUDIT_ARG(fflags, flags); 383 384 if ((vp->v_flag & VROOT) && 385 (vp->v_mount->mnt_flag & MNT_ROOTFS)) { 386 if (!(flags & MNT_UNION)) { 387 flags |= MNT_UPDATE; 388 } 389 else { 390 /* 391 * For a union mount on '/', treat it as fresh 392 * mount instead of update. 393 * Otherwise, union mouting on '/' used to panic the 394 * system before, since mnt_vnodecovered was found to 395 * be NULL for '/' which is required for unionlookup 396 * after it gets ENOENT on union mount. 397 */ 398 flags = (flags & ~(MNT_UPDATE)); 399 } 400 401#if 0 402//#ifdef SECURE_KERNEL 403 if ((flags & MNT_RDONLY) == 0) { 404 /* Release kernels are not allowed to mount "/" as rw */ 405 error = EPERM; 406 goto out; 407 } 408//#endif 409#endif 410 /* 411 * See 7392553 for more details on why this check exists. 412 * Suffice to say: If this check is ON and something tries 413 * to mount the rootFS RW, we'll turn off the codesign 414 * bitmap optimization. 415 */ 416#if CHECK_CS_VALIDATION_BITMAP 417 if ((flags & MNT_RDONLY) == 0 ) { 418 root_fs_upgrade_try = TRUE; 419 } 420#endif 421 } 422 423 error = mount_common(fstypename, pvp, vp, &nd.ni_cnd, uap->data, flags, 0, 424 labelstr, FALSE, ctx); 425 426out: 427 428#if CONFIG_MACF 429 if (labelstr) 430 FREE(labelstr, M_MACTEMP); 431#endif /* CONFIG_MACF */ 432 433 if (vp) { 434 vnode_put(vp); 435 } 436 if (pvp) { 437 vnode_put(pvp); 438 } 439 if (need_nameidone) { 440 nameidone(&nd); 441 } 442 443 return (error); 444} 445 446/* 447 * common mount implementation (final stage of mounting) 448 449 * Arguments: 450 * fstypename file system type (ie it's vfs name) 451 * pvp parent of covered vnode 452 * vp covered vnode 453 * cnp component name (ie path) of covered vnode 454 * flags generic mount flags 455 * fsmountargs file system specific data 456 * labelstr optional MAC label 457 * kernelmount TRUE for mounts initiated from inside the kernel 458 * ctx caller's context 459 */ 460static int 461mount_common(char *fstypename, vnode_t pvp, vnode_t vp, 462 struct componentname *cnp, user_addr_t fsmountargs, int flags, uint32_t internal_flags, 463 char *labelstr, boolean_t kernelmount, vfs_context_t ctx) 464{ 465#if !CONFIG_MACF 466#pragma unused(labelstr) 467#endif 468 struct vnode *devvp = NULLVP; 469 struct vnode *device_vnode = NULLVP; 470#if CONFIG_MACF 471 struct vnode *rvp; 472#endif 473 struct mount *mp; 474 struct vfstable *vfsp = (struct vfstable *)0; 475 struct proc *p = vfs_context_proc(ctx); 476 int error, flag = 0; 477 user_addr_t devpath = USER_ADDR_NULL; 478 int ronly = 0; 479 int mntalloc = 0; 480 boolean_t vfsp_ref = FALSE; 481 boolean_t is_rwlock_locked = FALSE; 482 boolean_t did_rele = FALSE; 483 boolean_t have_usecount = FALSE; 484 485 /* 486 * Process an update for an existing mount 487 */ 488 if (flags & MNT_UPDATE) { 489 if ((vp->v_flag & VROOT) == 0) { 490 error = EINVAL; 491 goto out1; 492 } 493 mp = vp->v_mount; 494 495 /* unmount in progress return error */ 496 mount_lock_spin(mp); 497 if (mp->mnt_lflag & MNT_LUNMOUNT) { 498 mount_unlock(mp); 499 error = EBUSY; 500 goto out1; 501 } 502 mount_unlock(mp); 503 lck_rw_lock_exclusive(&mp->mnt_rwlock); 504 is_rwlock_locked = TRUE; 505 /* 506 * We only allow the filesystem to be reloaded if it 507 * is currently mounted read-only. 508 */ 509 if ((flags & MNT_RELOAD) && 510 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 511 error = ENOTSUP; 512 goto out1; 513 } 514 515 /* 516 * If content protection is enabled, update mounts are not 517 * allowed to turn it off. 518 */ 519 if ((mp->mnt_flag & MNT_CPROTECT) && 520 ((flags & MNT_CPROTECT) == 0)) { 521 error = EINVAL; 522 goto out1; 523 } 524 525#ifdef CONFIG_IMGSRC_ACCESS 526 /* Can't downgrade the backer of the root FS */ 527 if ((mp->mnt_kern_flag & MNTK_BACKS_ROOT) && 528 (!vfs_isrdonly(mp)) && (flags & MNT_RDONLY)) { 529 error = ENOTSUP; 530 goto out1; 531 } 532#endif /* CONFIG_IMGSRC_ACCESS */ 533 534 /* 535 * Only root, or the user that did the original mount is 536 * permitted to update it. 537 */ 538 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) && 539 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) { 540 goto out1; 541 } 542#if CONFIG_MACF 543 error = mac_mount_check_remount(ctx, mp); 544 if (error != 0) { 545 goto out1; 546 } 547#endif 548 /* 549 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, 550 * and MNT_NOEXEC if mount point is already MNT_NOEXEC. 551 */ 552 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) { 553 flags |= MNT_NOSUID | MNT_NODEV; 554 if (mp->mnt_flag & MNT_NOEXEC) 555 flags |= MNT_NOEXEC; 556 } 557 flag = mp->mnt_flag; 558 559 560 561 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 562 563 vfsp = mp->mnt_vtable; 564 goto update; 565 } 566 /* 567 * For non-root users, silently enforce MNT_NOSUID and MNT_NODEV, and 568 * MNT_NOEXEC if mount point is already MNT_NOEXEC. 569 */ 570 if ((!kernelmount) && suser(vfs_context_ucred(ctx), NULL)) { 571 flags |= MNT_NOSUID | MNT_NODEV; 572 if (vp->v_mount->mnt_flag & MNT_NOEXEC) 573 flags |= MNT_NOEXEC; 574 } 575 576 /* XXXAUDIT: Should we capture the type on the error path as well? */ 577 AUDIT_ARG(text, fstypename); 578 mount_list_lock(); 579 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 580 if (!strncmp(vfsp->vfc_name, fstypename, MFSNAMELEN)) { 581 vfsp->vfc_refcount++; 582 vfsp_ref = TRUE; 583 break; 584 } 585 mount_list_unlock(); 586 if (vfsp == NULL) { 587 error = ENODEV; 588 goto out1; 589 } 590 591 /* 592 * VFC_VFSLOCALARGS is not currently supported for kernel mounts 593 */ 594 if (kernelmount && (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS)) { 595 error = EINVAL; /* unsupported request */ 596 goto out1; 597 } 598 599 error = prepare_coveredvp(vp, ctx, cnp, fstypename, ((internal_flags & KERNEL_MOUNT_NOAUTH) != 0)); 600 if (error != 0) { 601 goto out1; 602 } 603 604 /* 605 * Allocate and initialize the filesystem (mount_t) 606 */ 607 MALLOC_ZONE(mp, struct mount *, (u_int32_t)sizeof(struct mount), 608 M_MOUNT, M_WAITOK); 609 bzero((char *)mp, (u_int32_t)sizeof(struct mount)); 610 mntalloc = 1; 611 612 /* Initialize the default IO constraints */ 613 mp->mnt_maxreadcnt = mp->mnt_maxwritecnt = MAXPHYS; 614 mp->mnt_segreadcnt = mp->mnt_segwritecnt = 32; 615 mp->mnt_maxsegreadsize = mp->mnt_maxreadcnt; 616 mp->mnt_maxsegwritesize = mp->mnt_maxwritecnt; 617 mp->mnt_devblocksize = DEV_BSIZE; 618 mp->mnt_alignmentmask = PAGE_MASK; 619 mp->mnt_ioqueue_depth = MNT_DEFAULT_IOQUEUE_DEPTH; 620 mp->mnt_ioscale = 1; 621 mp->mnt_ioflags = 0; 622 mp->mnt_realrootvp = NULLVP; 623 mp->mnt_authcache_ttl = CACHED_LOOKUP_RIGHT_TTL; 624 625 TAILQ_INIT(&mp->mnt_vnodelist); 626 TAILQ_INIT(&mp->mnt_workerqueue); 627 TAILQ_INIT(&mp->mnt_newvnodes); 628 mount_lock_init(mp); 629 lck_rw_lock_exclusive(&mp->mnt_rwlock); 630 is_rwlock_locked = TRUE; 631 mp->mnt_op = vfsp->vfc_vfsops; 632 mp->mnt_vtable = vfsp; 633 //mp->mnt_stat.f_type = vfsp->vfc_typenum; 634 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 635 strncpy(mp->mnt_vfsstat.f_fstypename, vfsp->vfc_name, MFSTYPENAMELEN); 636 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); 637 mp->mnt_vnodecovered = vp; 638 mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx)); 639 mp->mnt_throttle_mask = LOWPRI_MAX_NUM_DEV - 1; 640 mp->mnt_devbsdunit = 0; 641 642 /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */ 643 vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE); 644 645#if NFSCLIENT 646 if (kernelmount) 647 mp->mnt_kern_flag |= MNTK_KERNEL_MOUNT; 648 if ((internal_flags & KERNEL_MOUNT_PERMIT_UNMOUNT) != 0) 649 mp->mnt_kern_flag |= MNTK_PERMIT_UNMOUNT; 650#endif /* NFSCLIENT */ 651 652update: 653 /* 654 * Set the mount level flags. 655 */ 656 if (flags & MNT_RDONLY) 657 mp->mnt_flag |= MNT_RDONLY; 658 else if (mp->mnt_flag & MNT_RDONLY) { 659 // disallow read/write upgrades of file systems that 660 // had the TYPENAME_OVERRIDE feature set. 661 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { 662 error = EPERM; 663 goto out1; 664 } 665 mp->mnt_kern_flag |= MNTK_WANTRDWR; 666 } 667 mp->mnt_flag &= ~(MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 668 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | 669 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | 670 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | 671 MNT_QUARANTINE | MNT_CPROTECT); 672 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 673 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | 674 MNT_UNKNOWNPERMISSIONS | MNT_DONTBROWSE | 675 MNT_AUTOMOUNTED | MNT_DEFWRITE | MNT_NOATIME | 676 MNT_QUARANTINE | MNT_CPROTECT); 677 678#if CONFIG_MACF 679 if (flags & MNT_MULTILABEL) { 680 if (vfsp->vfc_vfsflags & VFC_VFSNOMACLABEL) { 681 error = EINVAL; 682 goto out1; 683 } 684 mp->mnt_flag |= MNT_MULTILABEL; 685 } 686#endif 687 /* 688 * Process device path for local file systems if requested 689 */ 690 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) { 691 if (vfs_context_is64bit(ctx)) { 692 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) ) 693 goto out1; 694 fsmountargs += sizeof(devpath); 695 } else { 696 user32_addr_t tmp; 697 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) 698 goto out1; 699 /* munge into LP64 addr */ 700 devpath = CAST_USER_ADDR_T(tmp); 701 fsmountargs += sizeof(tmp); 702 } 703 704 /* Lookup device and authorize access to it */ 705 if ((devpath)) { 706 struct nameidata nd; 707 708 NDINIT(&nd, LOOKUP, OP_MOUNT, FOLLOW, UIO_USERSPACE, devpath, ctx); 709 if ( (error = namei(&nd)) ) 710 goto out1; 711 712 strncpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); 713 devvp = nd.ni_vp; 714 715 nameidone(&nd); 716 717 if (devvp->v_type != VBLK) { 718 error = ENOTBLK; 719 goto out2; 720 } 721 if (major(devvp->v_rdev) >= nblkdev) { 722 error = ENXIO; 723 goto out2; 724 } 725 /* 726 * If mount by non-root, then verify that user has necessary 727 * permissions on the device. 728 */ 729 if (suser(vfs_context_ucred(ctx), NULL) != 0) { 730 mode_t accessmode = KAUTH_VNODE_READ_DATA; 731 732 if ((mp->mnt_flag & MNT_RDONLY) == 0) 733 accessmode |= KAUTH_VNODE_WRITE_DATA; 734 if ((error = vnode_authorize(devvp, NULL, accessmode, ctx)) != 0) 735 goto out2; 736 } 737 } 738 /* On first mount, preflight and open device */ 739 if (devpath && ((flags & MNT_UPDATE) == 0)) { 740 if ( (error = vnode_ref(devvp)) ) 741 goto out2; 742 /* 743 * Disallow multiple mounts of the same device. 744 * Disallow mounting of a device that is currently in use 745 * (except for root, which might share swap device for miniroot). 746 * Flush out any old buffers remaining from a previous use. 747 */ 748 if ( (error = vfs_mountedon(devvp)) ) 749 goto out3; 750 751 if (vcount(devvp) > 1 && !(vfs_flags(mp) & MNT_ROOTFS)) { 752 error = EBUSY; 753 goto out3; 754 } 755 if ( (error = VNOP_FSYNC(devvp, MNT_WAIT, ctx)) ) { 756 error = ENOTBLK; 757 goto out3; 758 } 759 if ( (error = buf_invalidateblks(devvp, BUF_WRITE_DATA, 0, 0)) ) 760 goto out3; 761 762 ronly = (mp->mnt_flag & MNT_RDONLY) != 0; 763#if CONFIG_MACF 764 error = mac_vnode_check_open(ctx, 765 devvp, 766 ronly ? FREAD : FREAD|FWRITE); 767 if (error) 768 goto out3; 769#endif /* MAC */ 770 if ( (error = VNOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, ctx)) ) 771 goto out3; 772 773 mp->mnt_devvp = devvp; 774 device_vnode = devvp; 775 776 } else if ((mp->mnt_flag & MNT_RDONLY) && 777 (mp->mnt_kern_flag & MNTK_WANTRDWR) && 778 (device_vnode = mp->mnt_devvp)) { 779 dev_t dev; 780 int maj; 781 /* 782 * If upgrade to read-write by non-root, then verify 783 * that user has necessary permissions on the device. 784 */ 785 vnode_getalways(device_vnode); 786 787 if (suser(vfs_context_ucred(ctx), NULL) && 788 (error = vnode_authorize(device_vnode, NULL, 789 KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, 790 ctx)) != 0) { 791 vnode_put(device_vnode); 792 goto out2; 793 } 794 795 /* Tell the device that we're upgrading */ 796 dev = (dev_t)device_vnode->v_rdev; 797 maj = major(dev); 798 799 if ((u_int)maj >= (u_int)nblkdev) 800 panic("Volume mounted on a device with invalid major number."); 801 802 error = bdevsw[maj].d_open(dev, FREAD | FWRITE, S_IFBLK, p); 803 vnode_put(device_vnode); 804 device_vnode = NULLVP; 805 if (error != 0) { 806 goto out2; 807 } 808 } 809 } 810#if CONFIG_MACF 811 if ((flags & MNT_UPDATE) == 0) { 812 mac_mount_label_init(mp); 813 mac_mount_label_associate(ctx, mp); 814 } 815 if (labelstr) { 816 if ((flags & MNT_UPDATE) != 0) { 817 error = mac_mount_check_label_update(ctx, mp); 818 if (error != 0) 819 goto out3; 820 } 821 } 822#endif 823 /* 824 * Mount the filesystem. 825 */ 826 error = VFS_MOUNT(mp, device_vnode, fsmountargs, ctx); 827 828 if (flags & MNT_UPDATE) { 829 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 830 mp->mnt_flag &= ~MNT_RDONLY; 831 mp->mnt_flag &=~ 832 (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 833 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 834 if (error) 835 mp->mnt_flag = flag; /* restore flag value */ 836 vfs_event_signal(NULL, VQ_UPDATE, (intptr_t)NULL); 837 lck_rw_done(&mp->mnt_rwlock); 838 is_rwlock_locked = FALSE; 839 if (!error) 840 enablequotas(mp, ctx); 841 goto exit; 842 } 843 844 /* 845 * Put the new filesystem on the mount list after root. 846 */ 847 if (error == 0) { 848 struct vfs_attr vfsattr; 849#if CONFIG_MACF 850 if (vfs_flags(mp) & MNT_MULTILABEL) { 851 error = VFS_ROOT(mp, &rvp, ctx); 852 if (error) { 853 printf("%s() VFS_ROOT returned %d\n", __func__, error); 854 goto out3; 855 } 856 error = vnode_label(mp, NULL, rvp, NULL, 0, ctx); 857 /* 858 * drop reference provided by VFS_ROOT 859 */ 860 vnode_put(rvp); 861 862 if (error) 863 goto out3; 864 } 865#endif /* MAC */ 866 867 vnode_lock_spin(vp); 868 CLR(vp->v_flag, VMOUNT); 869 vp->v_mountedhere = mp; 870 vnode_unlock(vp); 871 872 /* 873 * taking the name_cache_lock exclusively will 874 * insure that everyone is out of the fast path who 875 * might be trying to use a now stale copy of 876 * vp->v_mountedhere->mnt_realrootvp 877 * bumping mount_generation causes the cached values 878 * to be invalidated 879 */ 880 name_cache_lock(); 881 mount_generation++; 882 name_cache_unlock(); 883 884 error = vnode_ref(vp); 885 if (error != 0) { 886 goto out4; 887 } 888 889 have_usecount = TRUE; 890 891 error = checkdirs(vp, ctx); 892 if (error != 0) { 893 /* Unmount the filesystem as cdir/rdirs cannot be updated */ 894 goto out4; 895 } 896 /* 897 * there is no cleanup code here so I have made it void 898 * we need to revisit this 899 */ 900 (void)VFS_START(mp, 0, ctx); 901 902 if (mount_list_add(mp) != 0) { 903 /* 904 * The system is shutting down trying to umount 905 * everything, so fail with a plausible errno. 906 */ 907 error = EBUSY; 908 goto out4; 909 } 910 lck_rw_done(&mp->mnt_rwlock); 911 is_rwlock_locked = FALSE; 912 913 /* Check if this mounted file system supports EAs or named streams. */ 914 /* Skip WebDAV file systems for now since they hang in VFS_GETATTR here. */ 915 VFSATTR_INIT(&vfsattr); 916 VFSATTR_WANTED(&vfsattr, f_capabilities); 917 if (strncmp(mp->mnt_vfsstat.f_fstypename, "webdav", sizeof("webdav")) != 0 && 918 vfs_getattr(mp, &vfsattr, ctx) == 0 && 919 VFSATTR_IS_SUPPORTED(&vfsattr, f_capabilities)) { 920 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR) && 921 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_EXTENDED_ATTR)) { 922 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS; 923 } 924#if NAMEDSTREAMS 925 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS) && 926 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_INTERFACES] & VOL_CAP_INT_NAMEDSTREAMS)) { 927 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS; 928 } 929#endif 930 /* Check if this file system supports path from id lookups. */ 931 if ((vfsattr.f_capabilities.capabilities[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID) && 932 (vfsattr.f_capabilities.valid[VOL_CAPABILITIES_FORMAT] & VOL_CAP_FMT_PATH_FROM_ID)) { 933 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID; 934 } else if (mp->mnt_flag & MNT_DOVOLFS) { 935 /* Legacy MNT_DOVOLFS flag also implies path from id lookups. */ 936 mp->mnt_kern_flag |= MNTK_PATH_FROM_ID; 937 } 938 } 939 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSNATIVEXATTR) { 940 mp->mnt_kern_flag |= MNTK_EXTENDED_ATTRS; 941 } 942 if (mp->mnt_vtable->vfc_vfsflags & VFC_VFSPREFLIGHT) { 943 mp->mnt_kern_flag |= MNTK_UNMOUNT_PREFLIGHT; 944 } 945 /* increment the operations count */ 946 OSAddAtomic(1, &vfs_nummntops); 947 enablequotas(mp, ctx); 948 949 if (device_vnode) { 950 device_vnode->v_specflags |= SI_MOUNTEDON; 951 952 /* 953 * cache the IO attributes for the underlying physical media... 954 * an error return indicates the underlying driver doesn't 955 * support all the queries necessary... however, reasonable 956 * defaults will have been set, so no reason to bail or care 957 */ 958 vfs_init_io_attributes(device_vnode, mp); 959 } 960 961 /* Now that mount is setup, notify the listeners */ 962 vfs_notify_mount(pvp); 963 } else { 964 /* If we fail a fresh mount, there should be no vnodes left hooked into the mountpoint. */ 965 if (mp->mnt_vnodelist.tqh_first != NULL) { 966 panic("mount_common(): mount of %s filesystem failed with %d, but vnode list is not empty.", 967 mp->mnt_vtable->vfc_name, error); 968 } 969 970 vnode_lock_spin(vp); 971 CLR(vp->v_flag, VMOUNT); 972 vnode_unlock(vp); 973 mount_list_lock(); 974 mp->mnt_vtable->vfc_refcount--; 975 mount_list_unlock(); 976 977 if (device_vnode ) { 978 vnode_rele(device_vnode); 979 VNOP_CLOSE(device_vnode, ronly ? FREAD : FREAD|FWRITE, ctx); 980 } 981 lck_rw_done(&mp->mnt_rwlock); 982 is_rwlock_locked = FALSE; 983 984 /* 985 * if we get here, we have a mount structure that needs to be freed, 986 * but since the coveredvp hasn't yet been updated to point at it, 987 * no need to worry about other threads holding a crossref on this mp 988 * so it's ok to just free it 989 */ 990 mount_lock_destroy(mp); 991#if CONFIG_MACF 992 mac_mount_label_destroy(mp); 993#endif 994 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); 995 } 996exit: 997 /* 998 * drop I/O count on the device vp if there was one 999 */ 1000 if (devpath && devvp) 1001 vnode_put(devvp); 1002 1003 return(error); 1004 1005/* Error condition exits */ 1006out4: 1007 (void)VFS_UNMOUNT(mp, MNT_FORCE, ctx); 1008 1009 /* 1010 * If the mount has been placed on the covered vp, 1011 * it may have been discovered by now, so we have 1012 * to treat this just like an unmount 1013 */ 1014 mount_lock_spin(mp); 1015 mp->mnt_lflag |= MNT_LDEAD; 1016 mount_unlock(mp); 1017 1018 if (device_vnode != NULLVP) { 1019 vnode_rele(device_vnode); 1020 VNOP_CLOSE(device_vnode, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, 1021 ctx); 1022 did_rele = TRUE; 1023 } 1024 1025 vnode_lock_spin(vp); 1026 1027 mp->mnt_crossref++; 1028 vp->v_mountedhere = (mount_t) 0; 1029 1030 vnode_unlock(vp); 1031 1032 if (have_usecount) { 1033 vnode_rele(vp); 1034 } 1035out3: 1036 if (devpath && ((flags & MNT_UPDATE) == 0) && (!did_rele)) 1037 vnode_rele(devvp); 1038out2: 1039 if (devpath && devvp) 1040 vnode_put(devvp); 1041out1: 1042 /* Release mnt_rwlock only when it was taken */ 1043 if (is_rwlock_locked == TRUE) { 1044 lck_rw_done(&mp->mnt_rwlock); 1045 } 1046 1047 if (mntalloc) { 1048 if (mp->mnt_crossref) 1049 mount_dropcrossref(mp, vp, 0); 1050 else { 1051 mount_lock_destroy(mp); 1052#if CONFIG_MACF 1053 mac_mount_label_destroy(mp); 1054#endif 1055 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); 1056 } 1057 } 1058 if (vfsp_ref) { 1059 mount_list_lock(); 1060 vfsp->vfc_refcount--; 1061 mount_list_unlock(); 1062 } 1063 1064 return(error); 1065} 1066 1067/* 1068 * Flush in-core data, check for competing mount attempts, 1069 * and set VMOUNT 1070 */ 1071int 1072prepare_coveredvp(vnode_t vp, vfs_context_t ctx, struct componentname *cnp, const char *fsname, boolean_t skip_auth) 1073{ 1074#if !CONFIG_MACF 1075#pragma unused(cnp,fsname) 1076#endif 1077 struct vnode_attr va; 1078 int error; 1079 1080 if (!skip_auth) { 1081 /* 1082 * If the user is not root, ensure that they own the directory 1083 * onto which we are attempting to mount. 1084 */ 1085 VATTR_INIT(&va); 1086 VATTR_WANTED(&va, va_uid); 1087 if ((error = vnode_getattr(vp, &va, ctx)) || 1088 (va.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)) && 1089 (!vfs_context_issuser(ctx)))) { 1090 error = EPERM; 1091 goto out; 1092 } 1093 } 1094 1095 if ( (error = VNOP_FSYNC(vp, MNT_WAIT, ctx)) ) 1096 goto out; 1097 1098 if ( (error = buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0)) ) 1099 goto out; 1100 1101 if (vp->v_type != VDIR) { 1102 error = ENOTDIR; 1103 goto out; 1104 } 1105 1106 if (ISSET(vp->v_flag, VMOUNT) && (vp->v_mountedhere != NULL)) { 1107 error = EBUSY; 1108 goto out; 1109 } 1110 1111#if CONFIG_MACF 1112 error = mac_mount_check_mount(ctx, vp, 1113 cnp, fsname); 1114 if (error != 0) 1115 goto out; 1116#endif 1117 1118 vnode_lock_spin(vp); 1119 SET(vp->v_flag, VMOUNT); 1120 vnode_unlock(vp); 1121 1122out: 1123 return error; 1124} 1125 1126#if CONFIG_IMGSRC_ACCESS 1127 1128#if DEBUG 1129#define IMGSRC_DEBUG(args...) printf(args) 1130#else 1131#define IMGSRC_DEBUG(args...) do { } while(0) 1132#endif 1133 1134static int 1135authorize_devpath_and_update_mntfromname(mount_t mp, user_addr_t devpath, vnode_t *devvpp, vfs_context_t ctx) 1136{ 1137 struct nameidata nd; 1138 vnode_t vp, realdevvp; 1139 mode_t accessmode; 1140 int error; 1141 1142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW, UIO_USERSPACE, devpath, ctx); 1143 if ( (error = namei(&nd)) ) { 1144 IMGSRC_DEBUG("namei() failed with %d\n", error); 1145 return error; 1146 } 1147 1148 vp = nd.ni_vp; 1149 1150 if (!vnode_isblk(vp)) { 1151 IMGSRC_DEBUG("Not block device.\n"); 1152 error = ENOTBLK; 1153 goto out; 1154 } 1155 1156 realdevvp = mp->mnt_devvp; 1157 if (realdevvp == NULLVP) { 1158 IMGSRC_DEBUG("No device backs the mount.\n"); 1159 error = ENXIO; 1160 goto out; 1161 } 1162 1163 error = vnode_getwithref(realdevvp); 1164 if (error != 0) { 1165 IMGSRC_DEBUG("Coudn't get iocount on device.\n"); 1166 goto out; 1167 } 1168 1169 if (vnode_specrdev(vp) != vnode_specrdev(realdevvp)) { 1170 IMGSRC_DEBUG("Wrong dev_t.\n"); 1171 error = ENXIO; 1172 goto out1; 1173 } 1174 1175 strlcpy(mp->mnt_vfsstat.f_mntfromname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN); 1176 1177 /* 1178 * If mount by non-root, then verify that user has necessary 1179 * permissions on the device. 1180 */ 1181 if (!vfs_context_issuser(ctx)) { 1182 accessmode = KAUTH_VNODE_READ_DATA; 1183 if ((mp->mnt_flag & MNT_RDONLY) == 0) 1184 accessmode |= KAUTH_VNODE_WRITE_DATA; 1185 if ((error = vnode_authorize(vp, NULL, accessmode, ctx)) != 0) { 1186 IMGSRC_DEBUG("Access denied.\n"); 1187 goto out1; 1188 } 1189 } 1190 1191 *devvpp = vp; 1192 1193out1: 1194 vnode_put(realdevvp); 1195out: 1196 nameidone(&nd); 1197 if (error) { 1198 vnode_put(vp); 1199 } 1200 1201 return error; 1202} 1203 1204/* 1205 * Clear VMOUNT, set v_mountedhere, and mnt_vnodecovered, ref the vnode, 1206 * and call checkdirs() 1207 */ 1208static int 1209place_mount_and_checkdirs(mount_t mp, vnode_t vp, vfs_context_t ctx) 1210{ 1211 int error; 1212 1213 mp->mnt_vnodecovered = vp; /* XXX This is normally only set at init-time ... */ 1214 1215 vnode_lock_spin(vp); 1216 CLR(vp->v_flag, VMOUNT); 1217 vp->v_mountedhere = mp; 1218 vnode_unlock(vp); 1219 1220 /* 1221 * taking the name_cache_lock exclusively will 1222 * insure that everyone is out of the fast path who 1223 * might be trying to use a now stale copy of 1224 * vp->v_mountedhere->mnt_realrootvp 1225 * bumping mount_generation causes the cached values 1226 * to be invalidated 1227 */ 1228 name_cache_lock(); 1229 mount_generation++; 1230 name_cache_unlock(); 1231 1232 error = vnode_ref(vp); 1233 if (error != 0) { 1234 goto out; 1235 } 1236 1237 error = checkdirs(vp, ctx); 1238 if (error != 0) { 1239 /* Unmount the filesystem as cdir/rdirs cannot be updated */ 1240 vnode_rele(vp); 1241 goto out; 1242 } 1243 1244out: 1245 if (error != 0) { 1246 mp->mnt_vnodecovered = NULLVP; 1247 } 1248 return error; 1249} 1250 1251static void 1252undo_place_on_covered_vp(mount_t mp, vnode_t vp) 1253{ 1254 vnode_rele(vp); 1255 vnode_lock_spin(vp); 1256 vp->v_mountedhere = (mount_t)NULL; 1257 vnode_unlock(vp); 1258 1259 mp->mnt_vnodecovered = NULLVP; 1260} 1261 1262static int 1263mount_begin_update(mount_t mp, vfs_context_t ctx, int flags) 1264{ 1265 int error; 1266 1267 /* unmount in progress return error */ 1268 mount_lock_spin(mp); 1269 if (mp->mnt_lflag & MNT_LUNMOUNT) { 1270 mount_unlock(mp); 1271 return EBUSY; 1272 } 1273 mount_unlock(mp); 1274 lck_rw_lock_exclusive(&mp->mnt_rwlock); 1275 1276 /* 1277 * We only allow the filesystem to be reloaded if it 1278 * is currently mounted read-only. 1279 */ 1280 if ((flags & MNT_RELOAD) && 1281 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 1282 error = ENOTSUP; 1283 goto out; 1284 } 1285 1286 /* 1287 * Only root, or the user that did the original mount is 1288 * permitted to update it. 1289 */ 1290 if (mp->mnt_vfsstat.f_owner != kauth_cred_getuid(vfs_context_ucred(ctx)) && 1291 (!vfs_context_issuser(ctx))) { 1292 error = EPERM; 1293 goto out; 1294 } 1295#if CONFIG_MACF 1296 error = mac_mount_check_remount(ctx, mp); 1297 if (error != 0) { 1298 goto out; 1299 } 1300#endif 1301 1302out: 1303 if (error) { 1304 lck_rw_done(&mp->mnt_rwlock); 1305 } 1306 1307 return error; 1308} 1309 1310static void 1311mount_end_update(mount_t mp) 1312{ 1313 lck_rw_done(&mp->mnt_rwlock); 1314} 1315 1316static int 1317get_imgsrc_rootvnode(uint32_t height, vnode_t *rvpp) 1318{ 1319 vnode_t vp; 1320 1321 if (height >= MAX_IMAGEBOOT_NESTING) { 1322 return EINVAL; 1323 } 1324 1325 vp = imgsrc_rootvnodes[height]; 1326 if ((vp != NULLVP) && (vnode_get(vp) == 0)) { 1327 *rvpp = vp; 1328 return 0; 1329 } else { 1330 return ENOENT; 1331 } 1332} 1333 1334static int 1335relocate_imageboot_source(vnode_t pvp, vnode_t vp, struct componentname *cnp, 1336 const char *fsname, vfs_context_t ctx, 1337 boolean_t is64bit, user_addr_t fsmountargs, boolean_t by_index) 1338{ 1339 int error; 1340 mount_t mp; 1341 boolean_t placed = FALSE; 1342 vnode_t devvp = NULLVP; 1343 struct vfstable *vfsp; 1344 user_addr_t devpath; 1345 char *old_mntonname; 1346 vnode_t rvp; 1347 uint32_t height; 1348 uint32_t flags; 1349 1350 /* If we didn't imageboot, nothing to move */ 1351 if (imgsrc_rootvnodes[0] == NULLVP) { 1352 return EINVAL; 1353 } 1354 1355 /* Only root can do this */ 1356 if (!vfs_context_issuser(ctx)) { 1357 return EPERM; 1358 } 1359 1360 IMGSRC_DEBUG("looking for root vnode.\n"); 1361 1362 /* 1363 * Get root vnode of filesystem we're moving. 1364 */ 1365 if (by_index) { 1366 if (is64bit) { 1367 struct user64_mnt_imgsrc_args mia64; 1368 error = copyin(fsmountargs, &mia64, sizeof(mia64)); 1369 if (error != 0) { 1370 IMGSRC_DEBUG("Failed to copy in arguments.\n"); 1371 return error; 1372 } 1373 1374 height = mia64.mi_height; 1375 flags = mia64.mi_flags; 1376 devpath = mia64.mi_devpath; 1377 } else { 1378 struct user32_mnt_imgsrc_args mia32; 1379 error = copyin(fsmountargs, &mia32, sizeof(mia32)); 1380 if (error != 0) { 1381 IMGSRC_DEBUG("Failed to copy in arguments.\n"); 1382 return error; 1383 } 1384 1385 height = mia32.mi_height; 1386 flags = mia32.mi_flags; 1387 devpath = mia32.mi_devpath; 1388 } 1389 } else { 1390 /* 1391 * For binary compatibility--assumes one level of nesting. 1392 */ 1393 if (is64bit) { 1394 if ( (error = copyin(fsmountargs, (caddr_t)&devpath, sizeof(devpath))) ) 1395 return error; 1396 } else { 1397 user32_addr_t tmp; 1398 if ( (error = copyin(fsmountargs, (caddr_t)&tmp, sizeof(tmp))) ) 1399 return error; 1400 1401 /* munge into LP64 addr */ 1402 devpath = CAST_USER_ADDR_T(tmp); 1403 } 1404 1405 height = 0; 1406 flags = 0; 1407 } 1408 1409 if (flags != 0) { 1410 IMGSRC_DEBUG("%s: Got nonzero flags.\n", __FUNCTION__); 1411 return EINVAL; 1412 } 1413 1414 error = get_imgsrc_rootvnode(height, &rvp); 1415 if (error != 0) { 1416 IMGSRC_DEBUG("getting root vnode failed with %d\n", error); 1417 return error; 1418 } 1419 1420 IMGSRC_DEBUG("got root vnode.\n"); 1421 1422 MALLOC(old_mntonname, char*, MAXPATHLEN, M_TEMP, M_WAITOK); 1423 1424 /* Can only move once */ 1425 mp = vnode_mount(rvp); 1426 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) { 1427 IMGSRC_DEBUG("Already moved.\n"); 1428 error = EBUSY; 1429 goto out0; 1430 } 1431 1432 IMGSRC_DEBUG("Starting updated.\n"); 1433 1434 /* Get exclusive rwlock on mount, authorize update on mp */ 1435 error = mount_begin_update(mp , ctx, 0); 1436 if (error != 0) { 1437 IMGSRC_DEBUG("Starting updated failed with %d\n", error); 1438 goto out0; 1439 } 1440 1441 /* 1442 * It can only be moved once. Flag is set under the rwlock, 1443 * so we're now safe to proceed. 1444 */ 1445 if ((mp->mnt_kern_flag & MNTK_HAS_MOVED) == MNTK_HAS_MOVED) { 1446 IMGSRC_DEBUG("Already moved [2]\n"); 1447 goto out1; 1448 } 1449 1450 1451 IMGSRC_DEBUG("Preparing coveredvp.\n"); 1452 1453 /* Mark covered vnode as mount in progress, authorize placing mount on top */ 1454 error = prepare_coveredvp(vp, ctx, cnp, fsname, FALSE); 1455 if (error != 0) { 1456 IMGSRC_DEBUG("Preparing coveredvp failed with %d.\n", error); 1457 goto out1; 1458 } 1459 1460 IMGSRC_DEBUG("Covered vp OK.\n"); 1461 1462 /* Sanity check the name caller has provided */ 1463 vfsp = mp->mnt_vtable; 1464 if (strncmp(vfsp->vfc_name, fsname, MFSNAMELEN) != 0) { 1465 IMGSRC_DEBUG("Wrong fs name.\n"); 1466 error = EINVAL; 1467 goto out2; 1468 } 1469 1470 /* Check the device vnode and update mount-from name, for local filesystems */ 1471 if (vfsp->vfc_vfsflags & VFC_VFSLOCALARGS) { 1472 IMGSRC_DEBUG("Local, doing device validation.\n"); 1473 1474 if (devpath != USER_ADDR_NULL) { 1475 error = authorize_devpath_and_update_mntfromname(mp, devpath, &devvp, ctx); 1476 if (error) { 1477 IMGSRC_DEBUG("authorize_devpath_and_update_mntfromname() failed.\n"); 1478 goto out2; 1479 } 1480 1481 vnode_put(devvp); 1482 } 1483 } 1484 1485 /* 1486 * Place mp on top of vnode, ref the vnode, call checkdirs(), 1487 * and increment the name cache's mount generation 1488 */ 1489 1490 IMGSRC_DEBUG("About to call place_mount_and_checkdirs().\n"); 1491 error = place_mount_and_checkdirs(mp, vp, ctx); 1492 if (error != 0) { 1493 goto out2; 1494 } 1495 1496 placed = TRUE; 1497 1498 strncpy(old_mntonname, mp->mnt_vfsstat.f_mntonname, MAXPATHLEN); 1499 strncpy(mp->mnt_vfsstat.f_mntonname, cnp->cn_pnbuf, MAXPATHLEN); 1500 1501 /* Forbid future moves */ 1502 mount_lock(mp); 1503 mp->mnt_kern_flag |= MNTK_HAS_MOVED; 1504 mount_unlock(mp); 1505 1506 /* Finally, add to mount list, completely ready to go */ 1507 if (mount_list_add(mp) != 0) { 1508 /* 1509 * The system is shutting down trying to umount 1510 * everything, so fail with a plausible errno. 1511 */ 1512 error = EBUSY; 1513 goto out3; 1514 } 1515 1516 mount_end_update(mp); 1517 vnode_put(rvp); 1518 FREE(old_mntonname, M_TEMP); 1519 1520 vfs_notify_mount(pvp); 1521 1522 return 0; 1523out3: 1524 strncpy(mp->mnt_vfsstat.f_mntonname, old_mntonname, MAXPATHLEN); 1525 1526 mount_lock(mp); 1527 mp->mnt_kern_flag &= ~(MNTK_HAS_MOVED); 1528 mount_unlock(mp); 1529 1530out2: 1531 /* 1532 * Placing the mp on the vnode clears VMOUNT, 1533 * so cleanup is different after that point 1534 */ 1535 if (placed) { 1536 /* Rele the vp, clear VMOUNT and v_mountedhere */ 1537 undo_place_on_covered_vp(mp, vp); 1538 } else { 1539 vnode_lock_spin(vp); 1540 CLR(vp->v_flag, VMOUNT); 1541 vnode_unlock(vp); 1542 } 1543out1: 1544 mount_end_update(mp); 1545 1546out0: 1547 vnode_put(rvp); 1548 FREE(old_mntonname, M_TEMP); 1549 return error; 1550} 1551 1552#endif /* CONFIG_IMGSRC_ACCESS */ 1553 1554void 1555enablequotas(struct mount *mp, vfs_context_t ctx) 1556{ 1557 struct nameidata qnd; 1558 int type; 1559 char qfpath[MAXPATHLEN]; 1560 const char *qfname = QUOTAFILENAME; 1561 const char *qfopsname = QUOTAOPSNAME; 1562 const char *qfextension[] = INITQFNAMES; 1563 1564 /* XXX Shoulkd be an MNTK_ flag, instead of strncmp()'s */ 1565 if (strncmp(mp->mnt_vfsstat.f_fstypename, "hfs", sizeof("hfs")) != 0 ) { 1566 return; 1567 } 1568 /* 1569 * Enable filesystem disk quotas if necessary. 1570 * We ignore errors as this should not interfere with final mount 1571 */ 1572 for (type=0; type < MAXQUOTAS; type++) { 1573 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfopsname, qfextension[type]); 1574 NDINIT(&qnd, LOOKUP, OP_MOUNT, FOLLOW, UIO_SYSSPACE, 1575 CAST_USER_ADDR_T(qfpath), ctx); 1576 if (namei(&qnd) != 0) 1577 continue; /* option file to trigger quotas is not present */ 1578 vnode_put(qnd.ni_vp); 1579 nameidone(&qnd); 1580 snprintf(qfpath, sizeof(qfpath), "%s/%s.%s", mp->mnt_vfsstat.f_mntonname, qfname, qfextension[type]); 1581 1582 (void) VFS_QUOTACTL(mp, QCMD(Q_QUOTAON, type), 0, qfpath, ctx); 1583 } 1584 return; 1585} 1586 1587 1588static int 1589checkdirs_callback(proc_t p, void * arg) 1590{ 1591 struct cdirargs * cdrp = (struct cdirargs * )arg; 1592 vnode_t olddp = cdrp->olddp; 1593 vnode_t newdp = cdrp->newdp; 1594 struct filedesc *fdp; 1595 vnode_t tvp; 1596 vnode_t fdp_cvp; 1597 vnode_t fdp_rvp; 1598 int cdir_changed = 0; 1599 int rdir_changed = 0; 1600 1601 /* 1602 * XXX Also needs to iterate each thread in the process to see if it 1603 * XXX is using a per-thread current working directory, and, if so, 1604 * XXX update that as well. 1605 */ 1606 1607 proc_fdlock(p); 1608 fdp = p->p_fd; 1609 if (fdp == (struct filedesc *)0) { 1610 proc_fdunlock(p); 1611 return(PROC_RETURNED); 1612 } 1613 fdp_cvp = fdp->fd_cdir; 1614 fdp_rvp = fdp->fd_rdir; 1615 proc_fdunlock(p); 1616 1617 if (fdp_cvp == olddp) { 1618 vnode_ref(newdp); 1619 tvp = fdp->fd_cdir; 1620 fdp_cvp = newdp; 1621 cdir_changed = 1; 1622 vnode_rele(tvp); 1623 } 1624 if (fdp_rvp == olddp) { 1625 vnode_ref(newdp); 1626 tvp = fdp->fd_rdir; 1627 fdp_rvp = newdp; 1628 rdir_changed = 1; 1629 vnode_rele(tvp); 1630 } 1631 if (cdir_changed || rdir_changed) { 1632 proc_fdlock(p); 1633 fdp->fd_cdir = fdp_cvp; 1634 fdp->fd_rdir = fdp_rvp; 1635 proc_fdunlock(p); 1636 } 1637 return(PROC_RETURNED); 1638} 1639 1640 1641 1642/* 1643 * Scan all active processes to see if any of them have a current 1644 * or root directory onto which the new filesystem has just been 1645 * mounted. If so, replace them with the new mount point. 1646 */ 1647static int 1648checkdirs(vnode_t olddp, vfs_context_t ctx) 1649{ 1650 vnode_t newdp; 1651 vnode_t tvp; 1652 int err; 1653 struct cdirargs cdr; 1654 struct uthread * uth = get_bsdthread_info(current_thread()); 1655 1656 if (olddp->v_usecount == 1) 1657 return(0); 1658 if (uth != (struct uthread *)0) 1659 uth->uu_notrigger = 1; 1660 err = VFS_ROOT(olddp->v_mountedhere, &newdp, ctx); 1661 if (uth != (struct uthread *)0) 1662 uth->uu_notrigger = 0; 1663 1664 if (err != 0) { 1665#if DIAGNOSTIC 1666 panic("mount: lost mount: error %d", err); 1667#endif 1668 return(err); 1669 } 1670 1671 cdr.olddp = olddp; 1672 cdr.newdp = newdp; 1673 /* do not block for exec/fork trans as the vp in cwd & rootdir are not changing */ 1674 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS, checkdirs_callback, (void *)&cdr, NULL, NULL); 1675 1676 if (rootvnode == olddp) { 1677 vnode_ref(newdp); 1678 tvp = rootvnode; 1679 rootvnode = newdp; 1680 vnode_rele(tvp); 1681 } 1682 1683 vnode_put(newdp); 1684 return(0); 1685} 1686 1687/* 1688 * Unmount a file system. 1689 * 1690 * Note: unmount takes a path to the vnode mounted on as argument, 1691 * not special file (as before). 1692 */ 1693/* ARGSUSED */ 1694int 1695unmount(__unused proc_t p, struct unmount_args *uap, __unused int32_t *retval) 1696{ 1697 vnode_t vp; 1698 struct mount *mp; 1699 int error; 1700 struct nameidata nd; 1701 vfs_context_t ctx = vfs_context_current(); 1702 1703 NDINIT(&nd, LOOKUP, OP_UNMOUNT, NOTRIGGER | FOLLOW | AUDITVNPATH1, 1704 UIO_USERSPACE, uap->path, ctx); 1705 error = namei(&nd); 1706 if (error) 1707 return (error); 1708 vp = nd.ni_vp; 1709 mp = vp->v_mount; 1710 nameidone(&nd); 1711 1712#if CONFIG_MACF 1713 error = mac_mount_check_umount(ctx, mp); 1714 if (error != 0) { 1715 vnode_put(vp); 1716 return (error); 1717 } 1718#endif 1719 /* 1720 * Must be the root of the filesystem 1721 */ 1722 if ((vp->v_flag & VROOT) == 0) { 1723 vnode_put(vp); 1724 return (EINVAL); 1725 } 1726 mount_ref(mp, 0); 1727 vnode_put(vp); 1728 /* safedounmount consumes the mount ref */ 1729 return (safedounmount(mp, uap->flags, ctx)); 1730} 1731 1732int 1733vfs_unmountbyfsid(fsid_t * fsid, int flags, vfs_context_t ctx) 1734{ 1735 mount_t mp; 1736 1737 mp = mount_list_lookupby_fsid(fsid, 0, 1); 1738 if (mp == (mount_t)0) { 1739 return(ENOENT); 1740 } 1741 mount_ref(mp, 0); 1742 mount_iterdrop(mp); 1743 /* safedounmount consumes the mount ref */ 1744 return(safedounmount(mp, flags, ctx)); 1745} 1746 1747 1748/* 1749 * The mount struct comes with a mount ref which will be consumed. 1750 * Do the actual file system unmount, prevent some common foot shooting. 1751 */ 1752int 1753safedounmount(struct mount *mp, int flags, vfs_context_t ctx) 1754{ 1755 int error; 1756 proc_t p = vfs_context_proc(ctx); 1757 1758 /* 1759 * If the file system is not responding and MNT_NOBLOCK 1760 * is set and not a forced unmount then return EBUSY. 1761 */ 1762 if ((mp->mnt_kern_flag & MNT_LNOTRESP) && 1763 (flags & MNT_NOBLOCK) && ((flags & MNT_FORCE) == 0)) { 1764 error = EBUSY; 1765 goto out; 1766 } 1767 1768 /* 1769 * Skip authorization if the mount is tagged as permissive and 1770 * this is not a forced-unmount attempt. 1771 */ 1772 if (!(((mp->mnt_kern_flag & MNTK_PERMIT_UNMOUNT) != 0) && ((flags & MNT_FORCE) == 0))) { 1773 /* 1774 * Only root, or the user that did the original mount is 1775 * permitted to unmount this filesystem. 1776 */ 1777 if ((mp->mnt_vfsstat.f_owner != kauth_cred_getuid(kauth_cred_get())) && 1778 (error = suser(kauth_cred_get(), &p->p_acflag))) 1779 goto out; 1780 } 1781 /* 1782 * Don't allow unmounting the root file system. 1783 */ 1784 if (mp->mnt_flag & MNT_ROOTFS) { 1785 error = EBUSY; /* the root is always busy */ 1786 goto out; 1787 } 1788 1789#ifdef CONFIG_IMGSRC_ACCESS 1790 if (mp->mnt_kern_flag & MNTK_BACKS_ROOT) { 1791 error = EBUSY; 1792 goto out; 1793 } 1794#endif /* CONFIG_IMGSRC_ACCESS */ 1795 1796 return (dounmount(mp, flags, 1, ctx)); 1797 1798out: 1799 mount_drop(mp, 0); 1800 return(error); 1801} 1802 1803/* 1804 * Do the actual file system unmount. 1805 */ 1806int 1807dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx) 1808{ 1809 vnode_t coveredvp = (vnode_t)0; 1810 int error; 1811 int needwakeup = 0; 1812 int forcedunmount = 0; 1813 int lflags = 0; 1814 struct vnode *devvp = NULLVP; 1815#if CONFIG_TRIGGERS 1816 proc_t p = vfs_context_proc(ctx); 1817 int did_vflush = 0; 1818 int pflags_save = 0; 1819#endif /* CONFIG_TRIGGERS */ 1820 1821 if (flags & MNT_FORCE) 1822 forcedunmount = 1; 1823 1824 mount_lock(mp); 1825 /* XXX post jaguar fix LK_DRAIN - then clean this up */ 1826 if ((flags & MNT_FORCE)) { 1827 mp->mnt_kern_flag |= MNTK_FRCUNMOUNT; 1828 mp->mnt_lflag |= MNT_LFORCE; 1829 } 1830 if (mp->mnt_lflag & MNT_LUNMOUNT) { 1831 mp->mnt_lflag |= MNT_LWAIT; 1832 if(withref != 0) 1833 mount_drop(mp, 1); 1834 msleep((caddr_t)mp, &mp->mnt_mlock, (PVFS | PDROP), "dounmount", NULL); 1835 /* 1836 * The prior unmount attempt has probably succeeded. 1837 * Do not dereference mp here - returning EBUSY is safest. 1838 */ 1839 return (EBUSY); 1840 } 1841 1842#if CONFIG_TRIGGERS 1843 if (flags & MNT_NOBLOCK && p != kernproc) 1844 pflags_save = OSBitOrAtomic(P_NOREMOTEHANG, &p->p_flag); 1845#endif 1846 1847 mp->mnt_kern_flag |= MNTK_UNMOUNT; 1848 mp->mnt_lflag |= MNT_LUNMOUNT; 1849 mp->mnt_flag &=~ MNT_ASYNC; 1850 /* 1851 * anyone currently in the fast path that 1852 * trips over the cached rootvp will be 1853 * dumped out and forced into the slow path 1854 * to regenerate a new cached value 1855 */ 1856 mp->mnt_realrootvp = NULLVP; 1857 mount_unlock(mp); 1858 1859 /* 1860 * taking the name_cache_lock exclusively will 1861 * insure that everyone is out of the fast path who 1862 * might be trying to use a now stale copy of 1863 * vp->v_mountedhere->mnt_realrootvp 1864 * bumping mount_generation causes the cached values 1865 * to be invalidated 1866 */ 1867 name_cache_lock(); 1868 mount_generation++; 1869 name_cache_unlock(); 1870 1871 1872 lck_rw_lock_exclusive(&mp->mnt_rwlock); 1873 if (withref != 0) 1874 mount_drop(mp, 0); 1875#if CONFIG_FSE 1876 fsevent_unmount(mp); /* has to come first! */ 1877#endif 1878 error = 0; 1879 if (forcedunmount == 0) { 1880 ubc_umount(mp); /* release cached vnodes */ 1881 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1882 error = VFS_SYNC(mp, MNT_WAIT, ctx); 1883 if (error) { 1884 mount_lock(mp); 1885 mp->mnt_kern_flag &= ~MNTK_UNMOUNT; 1886 mp->mnt_lflag &= ~MNT_LUNMOUNT; 1887 mp->mnt_lflag &= ~MNT_LFORCE; 1888 goto out; 1889 } 1890 } 1891 } 1892 1893#if CONFIG_TRIGGERS 1894 vfs_nested_trigger_unmounts(mp, flags, ctx); 1895 did_vflush = 1; 1896#endif 1897 if (forcedunmount) 1898 lflags |= FORCECLOSE; 1899 error = vflush(mp, NULLVP, SKIPSWAP | SKIPSYSTEM | SKIPROOT | lflags); 1900 if ((forcedunmount == 0) && error) { 1901 mount_lock(mp); 1902 mp->mnt_kern_flag &= ~MNTK_UNMOUNT; 1903 mp->mnt_lflag &= ~MNT_LUNMOUNT; 1904 mp->mnt_lflag &= ~MNT_LFORCE; 1905 goto out; 1906 } 1907 1908 /* make sure there are no one in the mount iterations or lookup */ 1909 mount_iterdrain(mp); 1910 1911 error = VFS_UNMOUNT(mp, flags, ctx); 1912 if (error) { 1913 mount_iterreset(mp); 1914 mount_lock(mp); 1915 mp->mnt_kern_flag &= ~MNTK_UNMOUNT; 1916 mp->mnt_lflag &= ~MNT_LUNMOUNT; 1917 mp->mnt_lflag &= ~MNT_LFORCE; 1918 goto out; 1919 } 1920 1921 /* increment the operations count */ 1922 if (!error) 1923 OSAddAtomic(1, &vfs_nummntops); 1924 1925 if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) { 1926 /* hold an io reference and drop the usecount before close */ 1927 devvp = mp->mnt_devvp; 1928 vnode_getalways(devvp); 1929 vnode_rele(devvp); 1930 VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE, 1931 ctx); 1932 vnode_clearmountedon(devvp); 1933 vnode_put(devvp); 1934 } 1935 lck_rw_done(&mp->mnt_rwlock); 1936 mount_list_remove(mp); 1937 lck_rw_lock_exclusive(&mp->mnt_rwlock); 1938 1939 /* mark the mount point hook in the vp but not drop the ref yet */ 1940 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1941 vnode_getwithref(coveredvp); 1942 vnode_lock_spin(coveredvp); 1943 1944 mp->mnt_crossref++; 1945 coveredvp->v_mountedhere = (struct mount *)0; 1946 1947 vnode_unlock(coveredvp); 1948 vnode_put(coveredvp); 1949 } 1950 1951 mount_list_lock(); 1952 mp->mnt_vtable->vfc_refcount--; 1953 mount_list_unlock(); 1954 1955 cache_purgevfs(mp); /* remove cache entries for this file sys */ 1956 vfs_event_signal(NULL, VQ_UNMOUNT, (intptr_t)NULL); 1957 mount_lock(mp); 1958 mp->mnt_lflag |= MNT_LDEAD; 1959 1960 if (mp->mnt_lflag & MNT_LWAIT) { 1961 /* 1962 * do the wakeup here 1963 * in case we block in mount_refdrain 1964 * which will drop the mount lock 1965 * and allow anyone blocked in vfs_busy 1966 * to wakeup and see the LDEAD state 1967 */ 1968 mp->mnt_lflag &= ~MNT_LWAIT; 1969 wakeup((caddr_t)mp); 1970 } 1971 mount_refdrain(mp); 1972out: 1973 if (mp->mnt_lflag & MNT_LWAIT) { 1974 mp->mnt_lflag &= ~MNT_LWAIT; 1975 needwakeup = 1; 1976 } 1977 1978#if CONFIG_TRIGGERS 1979 if (flags & MNT_NOBLOCK && p != kernproc) { 1980 // Restore P_NOREMOTEHANG bit to its previous value 1981 if ((pflags_save & P_NOREMOTEHANG) == 0) 1982 OSBitAndAtomic(~((uint32_t) P_NOREMOTEHANG), &p->p_flag); 1983 } 1984 1985 /* 1986 * Callback and context are set together under the mount lock, and 1987 * never cleared, so we're safe to examine them here, drop the lock, 1988 * and call out. 1989 */ 1990 if (mp->mnt_triggercallback != NULL) { 1991 mount_unlock(mp); 1992 if (error == 0) { 1993 mp->mnt_triggercallback(mp, VTC_RELEASE, mp->mnt_triggerdata, ctx); 1994 } else if (did_vflush) { 1995 mp->mnt_triggercallback(mp, VTC_REPLACE, mp->mnt_triggerdata, ctx); 1996 } 1997 } else { 1998 mount_unlock(mp); 1999 } 2000#else 2001 mount_unlock(mp); 2002#endif /* CONFIG_TRIGGERS */ 2003 2004 lck_rw_done(&mp->mnt_rwlock); 2005 2006 if (needwakeup) 2007 wakeup((caddr_t)mp); 2008 2009 if (!error) { 2010 if ((coveredvp != NULLVP)) { 2011 vnode_t pvp; 2012 2013 vnode_getwithref(coveredvp); 2014 pvp = vnode_getparent(coveredvp); 2015 vnode_rele(coveredvp); 2016 2017 mount_dropcrossref(mp, coveredvp, 0); 2018#if CONFIG_TRIGGERS 2019 if (coveredvp->v_resolve) 2020 vnode_trigger_rearm(coveredvp, ctx); 2021#endif 2022 vnode_put(coveredvp); 2023 2024 if (pvp) { 2025 lock_vnode_and_post(pvp, NOTE_WRITE); 2026 vnode_put(pvp); 2027 } 2028 } else if (mp->mnt_flag & MNT_ROOTFS) { 2029 mount_lock_destroy(mp); 2030#if CONFIG_MACF 2031 mac_mount_label_destroy(mp); 2032#endif 2033 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); 2034 } else 2035 panic("dounmount: no coveredvp"); 2036 } 2037 return (error); 2038} 2039 2040void 2041mount_dropcrossref(mount_t mp, vnode_t dp, int need_put) 2042{ 2043 vnode_lock(dp); 2044 mp->mnt_crossref--; 2045 2046 if (mp->mnt_crossref < 0) 2047 panic("mount cross refs -ve"); 2048 2049 if ((mp != dp->v_mountedhere) && (mp->mnt_crossref == 0)) { 2050 2051 if (need_put) 2052 vnode_put_locked(dp); 2053 vnode_unlock(dp); 2054 2055 mount_lock_destroy(mp); 2056#if CONFIG_MACF 2057 mac_mount_label_destroy(mp); 2058#endif 2059 FREE_ZONE((caddr_t)mp, sizeof (struct mount), M_MOUNT); 2060 return; 2061 } 2062 if (need_put) 2063 vnode_put_locked(dp); 2064 vnode_unlock(dp); 2065} 2066 2067 2068/* 2069 * Sync each mounted filesystem. 2070 */ 2071#if DIAGNOSTIC 2072int syncprt = 0; 2073struct ctldebug debug0 = { "syncprt", &syncprt }; 2074#endif 2075 2076int print_vmpage_stat=0; 2077 2078static int 2079sync_callback(mount_t mp, void * arg) 2080{ 2081 int asyncflag; 2082 2083 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 2084 asyncflag = mp->mnt_flag & MNT_ASYNC; 2085 mp->mnt_flag &= ~MNT_ASYNC; 2086 VFS_SYNC(mp, arg ? MNT_WAIT : MNT_NOWAIT, vfs_context_current()); 2087 if (asyncflag) 2088 mp->mnt_flag |= MNT_ASYNC; 2089 } 2090 return(VFS_RETURNED); 2091} 2092 2093 2094/* ARGSUSED */ 2095int 2096sync(__unused proc_t p, __unused struct sync_args *uap, __unused int32_t *retval) 2097{ 2098 vfs_iterate(LK_NOWAIT, sync_callback, (void *)0); 2099 2100 if(print_vmpage_stat) { 2101 vm_countdirtypages(); 2102 } 2103 2104#if DIAGNOSTIC 2105 if (syncprt) 2106 vfs_bufstats(); 2107#endif /* DIAGNOSTIC */ 2108 return (0); 2109} 2110 2111/* 2112 * Change filesystem quotas. 2113 */ 2114#if QUOTA 2115static int quotactl_funneled(proc_t p, struct quotactl_args *uap, int32_t *retval); 2116 2117int 2118quotactl(proc_t p, struct quotactl_args *uap, int32_t *retval) 2119{ 2120 boolean_t funnel_state; 2121 int error; 2122 2123 funnel_state = thread_funnel_set(kernel_flock, TRUE); 2124 error = quotactl_funneled(p, uap, retval); 2125 thread_funnel_set(kernel_flock, funnel_state); 2126 return(error); 2127} 2128 2129static int 2130quotactl_funneled(proc_t p, struct quotactl_args *uap, __unused int32_t *retval) 2131{ 2132 struct mount *mp; 2133 int error, quota_cmd, quota_status; 2134 caddr_t datap; 2135 size_t fnamelen; 2136 struct nameidata nd; 2137 vfs_context_t ctx = vfs_context_current(); 2138 struct dqblk my_dqblk; 2139 2140 AUDIT_ARG(uid, uap->uid); 2141 AUDIT_ARG(cmd, uap->cmd); 2142 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, 2143 uap->path, ctx); 2144 error = namei(&nd); 2145 if (error) 2146 return (error); 2147 mp = nd.ni_vp->v_mount; 2148 vnode_put(nd.ni_vp); 2149 nameidone(&nd); 2150 2151 /* copyin any data we will need for downstream code */ 2152 quota_cmd = uap->cmd >> SUBCMDSHIFT; 2153 2154 switch (quota_cmd) { 2155 case Q_QUOTAON: 2156 /* uap->arg specifies a file from which to take the quotas */ 2157 fnamelen = MAXPATHLEN; 2158 datap = kalloc(MAXPATHLEN); 2159 error = copyinstr(uap->arg, datap, MAXPATHLEN, &fnamelen); 2160 break; 2161 case Q_GETQUOTA: 2162 /* uap->arg is a pointer to a dqblk structure. */ 2163 datap = (caddr_t) &my_dqblk; 2164 break; 2165 case Q_SETQUOTA: 2166 case Q_SETUSE: 2167 /* uap->arg is a pointer to a dqblk structure. */ 2168 datap = (caddr_t) &my_dqblk; 2169 if (proc_is64bit(p)) { 2170 struct user_dqblk my_dqblk64; 2171 error = copyin(uap->arg, (caddr_t)&my_dqblk64, sizeof (my_dqblk64)); 2172 if (error == 0) { 2173 munge_dqblk(&my_dqblk, &my_dqblk64, FALSE); 2174 } 2175 } 2176 else { 2177 error = copyin(uap->arg, (caddr_t)&my_dqblk, sizeof (my_dqblk)); 2178 } 2179 break; 2180 case Q_QUOTASTAT: 2181 /* uap->arg is a pointer to an integer */ 2182 datap = (caddr_t) "a_status; 2183 break; 2184 default: 2185 datap = NULL; 2186 break; 2187 } /* switch */ 2188 2189 if (error == 0) { 2190 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, datap, ctx); 2191 } 2192 2193 switch (quota_cmd) { 2194 case Q_QUOTAON: 2195 if (datap != NULL) 2196 kfree(datap, MAXPATHLEN); 2197 break; 2198 case Q_GETQUOTA: 2199 /* uap->arg is a pointer to a dqblk structure we need to copy out to */ 2200 if (error == 0) { 2201 if (proc_is64bit(p)) { 2202 struct user_dqblk my_dqblk64; 2203 munge_dqblk(&my_dqblk, &my_dqblk64, TRUE); 2204 error = copyout((caddr_t)&my_dqblk64, uap->arg, sizeof (my_dqblk64)); 2205 } 2206 else { 2207 error = copyout(datap, uap->arg, sizeof (struct dqblk)); 2208 } 2209 } 2210 break; 2211 case Q_QUOTASTAT: 2212 /* uap->arg is a pointer to an integer */ 2213 if (error == 0) { 2214 error = copyout(datap, uap->arg, sizeof(quota_status)); 2215 } 2216 break; 2217 default: 2218 break; 2219 } /* switch */ 2220 2221 return (error); 2222} 2223#else 2224int 2225quotactl(__unused proc_t p, __unused struct quotactl_args *uap, __unused int32_t *retval) 2226{ 2227 return (EOPNOTSUPP); 2228} 2229#endif /* QUOTA */ 2230 2231/* 2232 * Get filesystem statistics. 2233 * 2234 * Returns: 0 Success 2235 * namei:??? 2236 * vfs_update_vfsstat:??? 2237 * munge_statfs:EFAULT 2238 */ 2239/* ARGSUSED */ 2240int 2241statfs(__unused proc_t p, struct statfs_args *uap, __unused int32_t *retval) 2242{ 2243 struct mount *mp; 2244 struct vfsstatfs *sp; 2245 int error; 2246 struct nameidata nd; 2247 vfs_context_t ctx = vfs_context_current(); 2248 vnode_t vp; 2249 2250 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, 2251 UIO_USERSPACE, uap->path, ctx); 2252 error = namei(&nd); 2253 if (error) 2254 return (error); 2255 vp = nd.ni_vp; 2256 mp = vp->v_mount; 2257 sp = &mp->mnt_vfsstat; 2258 nameidone(&nd); 2259 2260 error = vfs_update_vfsstat(mp, ctx, VFS_USER_EVENT); 2261 if (error != 0) { 2262 vnode_put(vp); 2263 return (error); 2264 } 2265 2266 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); 2267 vnode_put(vp); 2268 return (error); 2269} 2270 2271/* 2272 * Get filesystem statistics. 2273 */ 2274/* ARGSUSED */ 2275int 2276fstatfs(__unused proc_t p, struct fstatfs_args *uap, __unused int32_t *retval) 2277{ 2278 vnode_t vp; 2279 struct mount *mp; 2280 struct vfsstatfs *sp; 2281 int error; 2282 2283 AUDIT_ARG(fd, uap->fd); 2284 2285 if ( (error = file_vnode(uap->fd, &vp)) ) 2286 return (error); 2287 2288 error = vnode_getwithref(vp); 2289 if (error) { 2290 file_drop(uap->fd); 2291 return (error); 2292 } 2293 2294 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); 2295 2296 mp = vp->v_mount; 2297 if (!mp) { 2298 error = EBADF; 2299 goto out; 2300 } 2301 sp = &mp->mnt_vfsstat; 2302 if ((error = vfs_update_vfsstat(mp,vfs_context_current(),VFS_USER_EVENT)) != 0) { 2303 goto out; 2304 } 2305 2306 error = munge_statfs(mp, sp, uap->buf, NULL, IS_64BIT_PROCESS(p), TRUE); 2307 2308out: 2309 file_drop(uap->fd); 2310 vnode_put(vp); 2311 2312 return (error); 2313} 2314 2315/* 2316 * Common routine to handle copying of statfs64 data to user space 2317 */ 2318static int 2319statfs64_common(struct mount *mp, struct vfsstatfs *sfsp, user_addr_t bufp) 2320{ 2321 int error; 2322 struct statfs64 sfs; 2323 2324 bzero(&sfs, sizeof(sfs)); 2325 2326 sfs.f_bsize = sfsp->f_bsize; 2327 sfs.f_iosize = (int32_t)sfsp->f_iosize; 2328 sfs.f_blocks = sfsp->f_blocks; 2329 sfs.f_bfree = sfsp->f_bfree; 2330 sfs.f_bavail = sfsp->f_bavail; 2331 sfs.f_files = sfsp->f_files; 2332 sfs.f_ffree = sfsp->f_ffree; 2333 sfs.f_fsid = sfsp->f_fsid; 2334 sfs.f_owner = sfsp->f_owner; 2335 sfs.f_type = mp->mnt_vtable->vfc_typenum; 2336 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 2337 sfs.f_fssubtype = sfsp->f_fssubtype; 2338 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { 2339 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN); 2340 } else { 2341 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSTYPENAMELEN); 2342 } 2343 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MAXPATHLEN); 2344 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MAXPATHLEN); 2345 2346 error = copyout((caddr_t)&sfs, bufp, sizeof(sfs)); 2347 2348 return(error); 2349} 2350 2351/* 2352 * Get file system statistics in 64-bit mode 2353 */ 2354int 2355statfs64(__unused struct proc *p, struct statfs64_args *uap, __unused int32_t *retval) 2356{ 2357 struct mount *mp; 2358 struct vfsstatfs *sp; 2359 int error; 2360 struct nameidata nd; 2361 vfs_context_t ctxp = vfs_context_current(); 2362 vnode_t vp; 2363 2364 NDINIT(&nd, LOOKUP, OP_STATFS, NOTRIGGER | FOLLOW | AUDITVNPATH1, 2365 UIO_USERSPACE, uap->path, ctxp); 2366 error = namei(&nd); 2367 if (error) 2368 return (error); 2369 vp = nd.ni_vp; 2370 mp = vp->v_mount; 2371 sp = &mp->mnt_vfsstat; 2372 nameidone(&nd); 2373 2374 error = vfs_update_vfsstat(mp, ctxp, VFS_USER_EVENT); 2375 if (error != 0) { 2376 vnode_put(vp); 2377 return (error); 2378 } 2379 2380 error = statfs64_common(mp, sp, uap->buf); 2381 vnode_put(vp); 2382 2383 return (error); 2384} 2385 2386/* 2387 * Get file system statistics in 64-bit mode 2388 */ 2389int 2390fstatfs64(__unused struct proc *p, struct fstatfs64_args *uap, __unused int32_t *retval) 2391{ 2392 struct vnode *vp; 2393 struct mount *mp; 2394 struct vfsstatfs *sp; 2395 int error; 2396 2397 AUDIT_ARG(fd, uap->fd); 2398 2399 if ( (error = file_vnode(uap->fd, &vp)) ) 2400 return (error); 2401 2402 error = vnode_getwithref(vp); 2403 if (error) { 2404 file_drop(uap->fd); 2405 return (error); 2406 } 2407 2408 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); 2409 2410 mp = vp->v_mount; 2411 if (!mp) { 2412 error = EBADF; 2413 goto out; 2414 } 2415 sp = &mp->mnt_vfsstat; 2416 if ((error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT)) != 0) { 2417 goto out; 2418 } 2419 2420 error = statfs64_common(mp, sp, uap->buf); 2421 2422out: 2423 file_drop(uap->fd); 2424 vnode_put(vp); 2425 2426 return (error); 2427} 2428 2429struct getfsstat_struct { 2430 user_addr_t sfsp; 2431 user_addr_t *mp; 2432 int count; 2433 int maxcount; 2434 int flags; 2435 int error; 2436}; 2437 2438 2439static int 2440getfsstat_callback(mount_t mp, void * arg) 2441{ 2442 2443 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; 2444 struct vfsstatfs *sp; 2445 int error, my_size; 2446 vfs_context_t ctx = vfs_context_current(); 2447 2448 if (fstp->sfsp && fstp->count < fstp->maxcount) { 2449 sp = &mp->mnt_vfsstat; 2450 /* 2451 * If MNT_NOWAIT is specified, do not refresh the 2452 * fsstat cache. MNT_WAIT/MNT_DWAIT overrides MNT_NOWAIT. 2453 */ 2454 if (((fstp->flags & MNT_NOWAIT) == 0 || (fstp->flags & (MNT_WAIT | MNT_DWAIT))) && 2455 (error = vfs_update_vfsstat(mp, ctx, 2456 VFS_USER_EVENT))) { 2457 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); 2458 return(VFS_RETURNED); 2459 } 2460 2461 /* 2462 * Need to handle LP64 version of struct statfs 2463 */ 2464 error = munge_statfs(mp, sp, fstp->sfsp, &my_size, IS_64BIT_PROCESS(vfs_context_proc(ctx)), FALSE); 2465 if (error) { 2466 fstp->error = error; 2467 return(VFS_RETURNED_DONE); 2468 } 2469 fstp->sfsp += my_size; 2470 2471 if (fstp->mp) { 2472#if CONFIG_MACF 2473 error = mac_mount_label_get(mp, *fstp->mp); 2474 if (error) { 2475 fstp->error = error; 2476 return(VFS_RETURNED_DONE); 2477 } 2478#endif 2479 fstp->mp++; 2480 } 2481 } 2482 fstp->count++; 2483 return(VFS_RETURNED); 2484} 2485 2486/* 2487 * Get statistics on all filesystems. 2488 */ 2489int 2490getfsstat(__unused proc_t p, struct getfsstat_args *uap, int *retval) 2491{ 2492 struct __mac_getfsstat_args muap; 2493 2494 muap.buf = uap->buf; 2495 muap.bufsize = uap->bufsize; 2496 muap.mac = USER_ADDR_NULL; 2497 muap.macsize = 0; 2498 muap.flags = uap->flags; 2499 2500 return (__mac_getfsstat(p, &muap, retval)); 2501} 2502 2503/* 2504 * __mac_getfsstat: Get MAC-related file system statistics 2505 * 2506 * Parameters: p (ignored) 2507 * uap User argument descriptor (see below) 2508 * retval Count of file system statistics (N stats) 2509 * 2510 * Indirect: uap->bufsize Buffer size 2511 * uap->macsize MAC info size 2512 * uap->buf Buffer where information will be returned 2513 * uap->mac MAC info 2514 * uap->flags File system flags 2515 * 2516 * 2517 * Returns: 0 Success 2518 * !0 Not success 2519 * 2520 */ 2521int 2522__mac_getfsstat(__unused proc_t p, struct __mac_getfsstat_args *uap, int *retval) 2523{ 2524 user_addr_t sfsp; 2525 user_addr_t *mp; 2526 size_t count, maxcount, bufsize, macsize; 2527 struct getfsstat_struct fst; 2528 2529 bufsize = (size_t) uap->bufsize; 2530 macsize = (size_t) uap->macsize; 2531 2532 if (IS_64BIT_PROCESS(p)) { 2533 maxcount = bufsize / sizeof(struct user64_statfs); 2534 } 2535 else { 2536 maxcount = bufsize / sizeof(struct user32_statfs); 2537 } 2538 sfsp = uap->buf; 2539 count = 0; 2540 2541 mp = NULL; 2542 2543#if CONFIG_MACF 2544 if (uap->mac != USER_ADDR_NULL) { 2545 u_int32_t *mp0; 2546 int error; 2547 unsigned int i; 2548 2549 count = (macsize / (IS_64BIT_PROCESS(p) ? 8 : 4)); 2550 if (count != maxcount) 2551 return (EINVAL); 2552 2553 /* Copy in the array */ 2554 MALLOC(mp0, u_int32_t *, macsize, M_MACTEMP, M_WAITOK); 2555 if (mp0 == NULL) { 2556 return (ENOMEM); 2557 } 2558 2559 error = copyin(uap->mac, mp0, macsize); 2560 if (error) { 2561 FREE(mp0, M_MACTEMP); 2562 return (error); 2563 } 2564 2565 /* Normalize to an array of user_addr_t */ 2566 MALLOC(mp, user_addr_t *, count * sizeof(user_addr_t), M_MACTEMP, M_WAITOK); 2567 if (mp == NULL) { 2568 FREE(mp0, M_MACTEMP); 2569 return (ENOMEM); 2570 } 2571 2572 for (i = 0; i < count; i++) { 2573 if (IS_64BIT_PROCESS(p)) 2574 mp[i] = ((user_addr_t *)mp0)[i]; 2575 else 2576 mp[i] = (user_addr_t)mp0[i]; 2577 } 2578 FREE(mp0, M_MACTEMP); 2579 } 2580#endif 2581 2582 2583 fst.sfsp = sfsp; 2584 fst.mp = mp; 2585 fst.flags = uap->flags; 2586 fst.count = 0; 2587 fst.error = 0; 2588 fst.maxcount = maxcount; 2589 2590 2591 vfs_iterate(0, getfsstat_callback, &fst); 2592 2593 if (mp) 2594 FREE(mp, M_MACTEMP); 2595 2596 if (fst.error ) { 2597 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error); 2598 return(fst.error); 2599 } 2600 2601 if (fst.sfsp && fst.count > fst.maxcount) 2602 *retval = fst.maxcount; 2603 else 2604 *retval = fst.count; 2605 return (0); 2606} 2607 2608static int 2609getfsstat64_callback(mount_t mp, void * arg) 2610{ 2611 struct getfsstat_struct *fstp = (struct getfsstat_struct *)arg; 2612 struct vfsstatfs *sp; 2613 int error; 2614 2615 if (fstp->sfsp && fstp->count < fstp->maxcount) { 2616 sp = &mp->mnt_vfsstat; 2617 /* 2618 * If MNT_NOWAIT is specified, do not refresh the fsstat 2619 * cache. MNT_WAIT overrides MNT_NOWAIT. 2620 * 2621 * We treat MNT_DWAIT as MNT_WAIT for all instances of 2622 * getfsstat, since the constants are out of the same 2623 * namespace. 2624 */ 2625 if (((fstp->flags & MNT_NOWAIT) == 0 || 2626 (fstp->flags & (MNT_WAIT | MNT_DWAIT))) && 2627 (error = vfs_update_vfsstat(mp, vfs_context_current(), VFS_USER_EVENT))) { 2628 KAUTH_DEBUG("vfs_update_vfsstat returned %d", error); 2629 return(VFS_RETURNED); 2630 } 2631 2632 error = statfs64_common(mp, sp, fstp->sfsp); 2633 if (error) { 2634 fstp->error = error; 2635 return(VFS_RETURNED_DONE); 2636 } 2637 fstp->sfsp += sizeof(struct statfs64); 2638 } 2639 fstp->count++; 2640 return(VFS_RETURNED); 2641} 2642 2643/* 2644 * Get statistics on all file systems in 64 bit mode. 2645 */ 2646int 2647getfsstat64(__unused proc_t p, struct getfsstat64_args *uap, int *retval) 2648{ 2649 user_addr_t sfsp; 2650 int count, maxcount; 2651 struct getfsstat_struct fst; 2652 2653 maxcount = uap->bufsize / sizeof(struct statfs64); 2654 2655 sfsp = uap->buf; 2656 count = 0; 2657 2658 fst.sfsp = sfsp; 2659 fst.flags = uap->flags; 2660 fst.count = 0; 2661 fst.error = 0; 2662 fst.maxcount = maxcount; 2663 2664 vfs_iterate(0, getfsstat64_callback, &fst); 2665 2666 if (fst.error ) { 2667 KAUTH_DEBUG("ERROR - %s gets %d", p->p_comm, fst.error); 2668 return(fst.error); 2669 } 2670 2671 if (fst.sfsp && fst.count > fst.maxcount) 2672 *retval = fst.maxcount; 2673 else 2674 *retval = fst.count; 2675 2676 return (0); 2677} 2678 2679/* 2680 * Change current working directory to a given file descriptor. 2681 */ 2682/* ARGSUSED */ 2683static int 2684common_fchdir(proc_t p, struct fchdir_args *uap, int per_thread) 2685{ 2686 struct filedesc *fdp = p->p_fd; 2687 vnode_t vp; 2688 vnode_t tdp; 2689 vnode_t tvp; 2690 struct mount *mp; 2691 int error; 2692 vfs_context_t ctx = vfs_context_current(); 2693 2694 AUDIT_ARG(fd, uap->fd); 2695 if (per_thread && uap->fd == -1) { 2696 /* 2697 * Switching back from per-thread to per process CWD; verify we 2698 * in fact have one before proceeding. The only success case 2699 * for this code path is to return 0 preemptively after zapping 2700 * the thread structure contents. 2701 */ 2702 thread_t th = vfs_context_thread(ctx); 2703 if (th) { 2704 uthread_t uth = get_bsdthread_info(th); 2705 tvp = uth->uu_cdir; 2706 uth->uu_cdir = NULLVP; 2707 if (tvp != NULLVP) { 2708 vnode_rele(tvp); 2709 return (0); 2710 } 2711 } 2712 return (EBADF); 2713 } 2714 2715 if ( (error = file_vnode(uap->fd, &vp)) ) 2716 return(error); 2717 if ( (error = vnode_getwithref(vp)) ) { 2718 file_drop(uap->fd); 2719 return(error); 2720 } 2721 2722 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 2723 2724 if (vp->v_type != VDIR) { 2725 error = ENOTDIR; 2726 goto out; 2727 } 2728 2729#if CONFIG_MACF 2730 error = mac_vnode_check_chdir(ctx, vp); 2731 if (error) 2732 goto out; 2733#endif 2734 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx); 2735 if (error) 2736 goto out; 2737 2738 while (!error && (mp = vp->v_mountedhere) != NULL) { 2739 if (vfs_busy(mp, LK_NOWAIT)) { 2740 error = EACCES; 2741 goto out; 2742 } 2743 error = VFS_ROOT(mp, &tdp, ctx); 2744 vfs_unbusy(mp); 2745 if (error) 2746 break; 2747 vnode_put(vp); 2748 vp = tdp; 2749 } 2750 if (error) 2751 goto out; 2752 if ( (error = vnode_ref(vp)) ) 2753 goto out; 2754 vnode_put(vp); 2755 2756 if (per_thread) { 2757 thread_t th = vfs_context_thread(ctx); 2758 if (th) { 2759 uthread_t uth = get_bsdthread_info(th); 2760 tvp = uth->uu_cdir; 2761 uth->uu_cdir = vp; 2762 OSBitOrAtomic(P_THCWD, &p->p_flag); 2763 } else { 2764 vnode_rele(vp); 2765 return (ENOENT); 2766 } 2767 } else { 2768 proc_fdlock(p); 2769 tvp = fdp->fd_cdir; 2770 fdp->fd_cdir = vp; 2771 proc_fdunlock(p); 2772 } 2773 2774 if (tvp) 2775 vnode_rele(tvp); 2776 file_drop(uap->fd); 2777 2778 return (0); 2779out: 2780 vnode_put(vp); 2781 file_drop(uap->fd); 2782 2783 return(error); 2784} 2785 2786int 2787fchdir(proc_t p, struct fchdir_args *uap, __unused int32_t *retval) 2788{ 2789 return common_fchdir(p, uap, 0); 2790} 2791 2792int 2793__pthread_fchdir(proc_t p, struct __pthread_fchdir_args *uap, __unused int32_t *retval) 2794{ 2795 return common_fchdir(p, (void *)uap, 1); 2796} 2797 2798/* 2799 * Change current working directory ("."). 2800 * 2801 * Returns: 0 Success 2802 * change_dir:ENOTDIR 2803 * change_dir:??? 2804 * vnode_ref:ENOENT No such file or directory 2805 */ 2806/* ARGSUSED */ 2807static int 2808common_chdir(proc_t p, struct chdir_args *uap, int per_thread) 2809{ 2810 struct filedesc *fdp = p->p_fd; 2811 int error; 2812 struct nameidata nd; 2813 vnode_t tvp; 2814 vfs_context_t ctx = vfs_context_current(); 2815 2816 NDINIT(&nd, LOOKUP, OP_CHDIR, FOLLOW | AUDITVNPATH1, 2817 UIO_USERSPACE, uap->path, ctx); 2818 error = change_dir(&nd, ctx); 2819 if (error) 2820 return (error); 2821 if ( (error = vnode_ref(nd.ni_vp)) ) { 2822 vnode_put(nd.ni_vp); 2823 return (error); 2824 } 2825 /* 2826 * drop the iocount we picked up in change_dir 2827 */ 2828 vnode_put(nd.ni_vp); 2829 2830 if (per_thread) { 2831 thread_t th = vfs_context_thread(ctx); 2832 if (th) { 2833 uthread_t uth = get_bsdthread_info(th); 2834 tvp = uth->uu_cdir; 2835 uth->uu_cdir = nd.ni_vp; 2836 OSBitOrAtomic(P_THCWD, &p->p_flag); 2837 } else { 2838 vnode_rele(nd.ni_vp); 2839 return (ENOENT); 2840 } 2841 } else { 2842 proc_fdlock(p); 2843 tvp = fdp->fd_cdir; 2844 fdp->fd_cdir = nd.ni_vp; 2845 proc_fdunlock(p); 2846 } 2847 2848 if (tvp) 2849 vnode_rele(tvp); 2850 2851 return (0); 2852} 2853 2854 2855/* 2856 * chdir 2857 * 2858 * Change current working directory (".") for the entire process 2859 * 2860 * Parameters: p Process requesting the call 2861 * uap User argument descriptor (see below) 2862 * retval (ignored) 2863 * 2864 * Indirect parameters: uap->path Directory path 2865 * 2866 * Returns: 0 Success 2867 * common_chdir: ENOTDIR 2868 * common_chdir: ENOENT No such file or directory 2869 * common_chdir: ??? 2870 * 2871 */ 2872int 2873chdir(proc_t p, struct chdir_args *uap, __unused int32_t *retval) 2874{ 2875 return common_chdir(p, (void *)uap, 0); 2876} 2877 2878/* 2879 * __pthread_chdir 2880 * 2881 * Change current working directory (".") for a single thread 2882 * 2883 * Parameters: p Process requesting the call 2884 * uap User argument descriptor (see below) 2885 * retval (ignored) 2886 * 2887 * Indirect parameters: uap->path Directory path 2888 * 2889 * Returns: 0 Success 2890 * common_chdir: ENOTDIR 2891 * common_chdir: ENOENT No such file or directory 2892 * common_chdir: ??? 2893 * 2894 */ 2895int 2896__pthread_chdir(proc_t p, struct __pthread_chdir_args *uap, __unused int32_t *retval) 2897{ 2898 return common_chdir(p, (void *)uap, 1); 2899} 2900 2901 2902/* 2903 * Change notion of root (``/'') directory. 2904 */ 2905/* ARGSUSED */ 2906int 2907chroot(proc_t p, struct chroot_args *uap, __unused int32_t *retval) 2908{ 2909 struct filedesc *fdp = p->p_fd; 2910 int error; 2911 struct nameidata nd; 2912 vnode_t tvp; 2913 vfs_context_t ctx = vfs_context_current(); 2914 2915 if ((error = suser(kauth_cred_get(), &p->p_acflag))) 2916 return (error); 2917 2918 NDINIT(&nd, LOOKUP, OP_CHROOT, FOLLOW | AUDITVNPATH1, 2919 UIO_USERSPACE, uap->path, ctx); 2920 error = change_dir(&nd, ctx); 2921 if (error) 2922 return (error); 2923 2924#if CONFIG_MACF 2925 error = mac_vnode_check_chroot(ctx, nd.ni_vp, 2926 &nd.ni_cnd); 2927 if (error) { 2928 vnode_put(nd.ni_vp); 2929 return (error); 2930 } 2931#endif 2932 2933 if ( (error = vnode_ref(nd.ni_vp)) ) { 2934 vnode_put(nd.ni_vp); 2935 return (error); 2936 } 2937 vnode_put(nd.ni_vp); 2938 2939 proc_fdlock(p); 2940 tvp = fdp->fd_rdir; 2941 fdp->fd_rdir = nd.ni_vp; 2942 fdp->fd_flags |= FD_CHROOT; 2943 proc_fdunlock(p); 2944 2945 if (tvp != NULL) 2946 vnode_rele(tvp); 2947 2948 return (0); 2949} 2950 2951/* 2952 * Common routine for chroot and chdir. 2953 * 2954 * Returns: 0 Success 2955 * ENOTDIR Not a directory 2956 * namei:??? [anything namei can return] 2957 * vnode_authorize:??? [anything vnode_authorize can return] 2958 */ 2959static int 2960change_dir(struct nameidata *ndp, vfs_context_t ctx) 2961{ 2962 vnode_t vp; 2963 int error; 2964 2965 if ((error = namei(ndp))) 2966 return (error); 2967 nameidone(ndp); 2968 vp = ndp->ni_vp; 2969 2970 if (vp->v_type != VDIR) { 2971 vnode_put(vp); 2972 return (ENOTDIR); 2973 } 2974 2975#if CONFIG_MACF 2976 error = mac_vnode_check_chdir(ctx, vp); 2977 if (error) { 2978 vnode_put(vp); 2979 return (error); 2980 } 2981#endif 2982 2983 error = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, ctx); 2984 if (error) { 2985 vnode_put(vp); 2986 return (error); 2987 } 2988 2989 return (error); 2990} 2991 2992/* 2993 * Check permissions, allocate an open file structure, 2994 * and call the device open routine if any. 2995 * 2996 * Returns: 0 Success 2997 * EINVAL 2998 * EINTR 2999 * falloc:ENFILE 3000 * falloc:EMFILE 3001 * falloc:ENOMEM 3002 * vn_open_auth:??? 3003 * dupfdopen:??? 3004 * VNOP_ADVLOCK:??? 3005 * vnode_setsize:??? 3006 * 3007 * XXX Need to implement uid, gid 3008 */ 3009int 3010open1(vfs_context_t ctx, struct nameidata *ndp, int uflags, 3011 struct vnode_attr *vap, fp_allocfn_t fp_zalloc, void *cra, 3012 int32_t *retval) 3013{ 3014 proc_t p = vfs_context_proc(ctx); 3015 uthread_t uu = get_bsdthread_info(vfs_context_thread(ctx)); 3016 struct fileproc *fp; 3017 vnode_t vp; 3018 int flags, oflags; 3019 int type, indx, error; 3020 struct flock lf; 3021 int no_controlling_tty = 0; 3022 int deny_controlling_tty = 0; 3023 struct session *sessp = SESSION_NULL; 3024 3025 oflags = uflags; 3026 3027 if ((oflags & O_ACCMODE) == O_ACCMODE) 3028 return(EINVAL); 3029 flags = FFLAGS(uflags); 3030 3031 AUDIT_ARG(fflags, oflags); 3032 AUDIT_ARG(mode, vap->va_mode); 3033 3034 if ((error = falloc_withalloc(p, 3035 &fp, &indx, ctx, fp_zalloc, cra)) != 0) { 3036 return (error); 3037 } 3038 uu->uu_dupfd = -indx - 1; 3039 3040 if (!(p->p_flag & P_CONTROLT)) { 3041 sessp = proc_session(p); 3042 no_controlling_tty = 1; 3043 /* 3044 * If conditions would warrant getting a controlling tty if 3045 * the device being opened is a tty (see ttyopen in tty.c), 3046 * but the open flags deny it, set a flag in the session to 3047 * prevent it. 3048 */ 3049 if (SESS_LEADER(p, sessp) && 3050 sessp->s_ttyvp == NULL && 3051 (flags & O_NOCTTY)) { 3052 session_lock(sessp); 3053 sessp->s_flags |= S_NOCTTY; 3054 session_unlock(sessp); 3055 deny_controlling_tty = 1; 3056 } 3057 } 3058 3059 if ((error = vn_open_auth(ndp, &flags, vap))) { 3060 if ((error == ENODEV || error == ENXIO) && (uu->uu_dupfd >= 0)){ /* XXX from fdopen */ 3061 if ((error = dupfdopen(p->p_fd, indx, uu->uu_dupfd, flags, error)) == 0) { 3062 fp_drop(p, indx, NULL, 0); 3063 *retval = indx; 3064 if (deny_controlling_tty) { 3065 session_lock(sessp); 3066 sessp->s_flags &= ~S_NOCTTY; 3067 session_unlock(sessp); 3068 } 3069 if (sessp != SESSION_NULL) 3070 session_rele(sessp); 3071 return (0); 3072 } 3073 } 3074 if (error == ERESTART) 3075 error = EINTR; 3076 fp_free(p, indx, fp); 3077 3078 if (deny_controlling_tty) { 3079 session_lock(sessp); 3080 sessp->s_flags &= ~S_NOCTTY; 3081 session_unlock(sessp); 3082 } 3083 if (sessp != SESSION_NULL) 3084 session_rele(sessp); 3085 return (error); 3086 } 3087 uu->uu_dupfd = 0; 3088 vp = ndp->ni_vp; 3089 3090 fp->f_fglob->fg_flag = flags & (FMASK | O_EVTONLY); 3091 fp->f_fglob->fg_ops = &vnops; 3092 fp->f_fglob->fg_data = (caddr_t)vp; 3093 3094#if CONFIG_PROTECT 3095 if (VATTR_IS_ACTIVE (vap, va_dataprotect_flags)) { 3096 if (vap->va_dataprotect_flags & VA_DP_RAWENCRYPTED) { 3097 fp->f_fglob->fg_flag |= FENCRYPTED; 3098 } 3099 } 3100#endif 3101 3102 if (flags & (O_EXLOCK | O_SHLOCK)) { 3103 lf.l_whence = SEEK_SET; 3104 lf.l_start = 0; 3105 lf.l_len = 0; 3106 if (flags & O_EXLOCK) 3107 lf.l_type = F_WRLCK; 3108 else 3109 lf.l_type = F_RDLCK; 3110 type = F_FLOCK; 3111 if ((flags & FNONBLOCK) == 0) 3112 type |= F_WAIT; 3113#if CONFIG_MACF 3114 error = mac_file_check_lock(vfs_context_ucred(ctx), fp->f_fglob, 3115 F_SETLK, &lf); 3116 if (error) 3117 goto bad; 3118#endif 3119 if ((error = VNOP_ADVLOCK(vp, (caddr_t)fp->f_fglob, F_SETLK, &lf, type, ctx, NULL))) 3120 goto bad; 3121 fp->f_fglob->fg_flag |= FHASLOCK; 3122 } 3123 3124 /* try to truncate by setting the size attribute */ 3125 if ((flags & O_TRUNC) && ((error = vnode_setsize(vp, (off_t)0, 0, ctx)) != 0)) 3126 goto bad; 3127 3128 /* 3129 * If the open flags denied the acquisition of a controlling tty, 3130 * clear the flag in the session structure that prevented the lower 3131 * level code from assigning one. 3132 */ 3133 if (deny_controlling_tty) { 3134 session_lock(sessp); 3135 sessp->s_flags &= ~S_NOCTTY; 3136 session_unlock(sessp); 3137 } 3138 3139 /* 3140 * If a controlling tty was set by the tty line discipline, then we 3141 * want to set the vp of the tty into the session structure. We have 3142 * a race here because we can't get to the vp for the tp in ttyopen, 3143 * because it's not passed as a parameter in the open path. 3144 */ 3145 if (no_controlling_tty && (p->p_flag & P_CONTROLT)) { 3146 vnode_t ttyvp; 3147 3148 /* 3149 * We already have a ref from vn_open_auth(), so we can demand another reference. 3150 */ 3151 error = vnode_ref_ext(vp, 0, VNODE_REF_FORCE); 3152 if (error != 0) { 3153 panic("vnode_ref_ext() with VNODE_REF_FORCE failed?!"); 3154 } 3155 3156 session_lock(sessp); 3157 ttyvp = sessp->s_ttyvp; 3158 sessp->s_ttyvp = vp; 3159 sessp->s_ttyvid = vnode_vid(vp); 3160 session_unlock(sessp); 3161 if (ttyvp != NULLVP) 3162 vnode_rele(ttyvp); 3163 } 3164 3165 vnode_put(vp); 3166 3167 proc_fdlock(p); 3168 if (flags & O_CLOEXEC) 3169 *fdflags(p, indx) |= UF_EXCLOSE; 3170 if (flags & O_CLOFORK) 3171 *fdflags(p, indx) |= UF_FORKCLOSE; 3172 procfdtbl_releasefd(p, indx, NULL); 3173 fp_drop(p, indx, fp, 1); 3174 proc_fdunlock(p); 3175 3176 *retval = indx; 3177 3178 if (sessp != SESSION_NULL) 3179 session_rele(sessp); 3180 return (0); 3181bad: 3182 if (deny_controlling_tty) { 3183 session_lock(sessp); 3184 sessp->s_flags &= ~S_NOCTTY; 3185 session_unlock(sessp); 3186 } 3187 if (sessp != SESSION_NULL) 3188 session_rele(sessp); 3189 3190 struct vfs_context context = *vfs_context_current(); 3191 context.vc_ucred = fp->f_fglob->fg_cred; 3192 3193 vn_close(vp, fp->f_fglob->fg_flag, &context); 3194 vnode_put(vp); 3195 fp_free(p, indx, fp); 3196 3197 return (error); 3198} 3199 3200/* 3201 * open_extended: open a file given a path name; with extended argument list (including extended security (ACL)). 3202 * 3203 * Parameters: p Process requesting the open 3204 * uap User argument descriptor (see below) 3205 * retval Pointer to an area to receive the 3206 * return calue from the system call 3207 * 3208 * Indirect: uap->path Path to open (same as 'open') 3209 * uap->flags Flags to open (same as 'open' 3210 * uap->uid UID to set, if creating 3211 * uap->gid GID to set, if creating 3212 * uap->mode File mode, if creating (same as 'open') 3213 * uap->xsecurity ACL to set, if creating 3214 * 3215 * Returns: 0 Success 3216 * !0 errno value 3217 * 3218 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. 3219 * 3220 * XXX: We should enummerate the possible errno values here, and where 3221 * in the code they originated. 3222 */ 3223int 3224open_extended(proc_t p, struct open_extended_args *uap, int32_t *retval) 3225{ 3226 struct filedesc *fdp = p->p_fd; 3227 int ciferror; 3228 kauth_filesec_t xsecdst; 3229 struct vnode_attr va; 3230 struct nameidata nd; 3231 int cmode; 3232 3233 AUDIT_ARG(owner, uap->uid, uap->gid); 3234 3235 xsecdst = NULL; 3236 if ((uap->xsecurity != USER_ADDR_NULL) && 3237 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) 3238 return ciferror; 3239 3240 VATTR_INIT(&va); 3241 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 3242 VATTR_SET(&va, va_mode, cmode); 3243 if (uap->uid != KAUTH_UID_NONE) 3244 VATTR_SET(&va, va_uid, uap->uid); 3245 if (uap->gid != KAUTH_GID_NONE) 3246 VATTR_SET(&va, va_gid, uap->gid); 3247 if (xsecdst != NULL) 3248 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); 3249 3250 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, 3251 uap->path, vfs_context_current()); 3252 3253 ciferror = open1(vfs_context_current(), &nd, uap->flags, &va, 3254 fileproc_alloc_init, NULL, retval); 3255 if (xsecdst != NULL) 3256 kauth_filesec_free(xsecdst); 3257 3258 return ciferror; 3259} 3260 3261/* 3262 * Go through the data-protected atomically controlled open (2) 3263 * 3264 * int open_dprotected_np(user_addr_t path, int flags, int class, int dpflags, int mode) 3265 */ 3266int open_dprotected_np (__unused proc_t p, struct open_dprotected_np_args *uap, int32_t *retval) { 3267 int flags = uap->flags; 3268 int class = uap->class; 3269 int dpflags = uap->dpflags; 3270 3271 /* 3272 * Follow the same path as normal open(2) 3273 * Look up the item if it exists, and acquire the vnode. 3274 */ 3275 struct filedesc *fdp = p->p_fd; 3276 struct vnode_attr va; 3277 struct nameidata nd; 3278 int cmode; 3279 int error; 3280 3281 VATTR_INIT(&va); 3282 /* Mask off all but regular access permissions */ 3283 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 3284 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); 3285 3286 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, 3287 uap->path, vfs_context_current()); 3288 3289 /* 3290 * Initialize the extra fields in vnode_attr to pass down our 3291 * extra fields. 3292 * 1. target cprotect class. 3293 * 2. set a flag to mark it as requiring open-raw-encrypted semantics. 3294 */ 3295 if (flags & O_CREAT) { 3296 VATTR_SET(&va, va_dataprotect_class, class); 3297 } 3298 3299 if (dpflags & O_DP_GETRAWENCRYPTED) { 3300 if ( flags & (O_RDWR | O_WRONLY)) { 3301 /* Not allowed to write raw encrypted bytes */ 3302 return EINVAL; 3303 } 3304 VATTR_SET(&va, va_dataprotect_flags, VA_DP_RAWENCRYPTED); 3305 } 3306 3307 error = open1(vfs_context_current(), &nd, uap->flags, &va, 3308 fileproc_alloc_init, NULL, retval); 3309 3310 return error; 3311} 3312 3313 3314int 3315open(proc_t p, struct open_args *uap, int32_t *retval) 3316{ 3317 __pthread_testcancel(1); 3318 return(open_nocancel(p, (struct open_nocancel_args *)uap, retval)); 3319} 3320 3321int 3322open_nocancel(proc_t p, struct open_nocancel_args *uap, int32_t *retval) 3323{ 3324 struct filedesc *fdp = p->p_fd; 3325 struct vnode_attr va; 3326 struct nameidata nd; 3327 int cmode; 3328 3329 VATTR_INIT(&va); 3330 /* Mask off all but regular access permissions */ 3331 cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 3332 VATTR_SET(&va, va_mode, cmode & ACCESSPERMS); 3333 3334 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, 3335 uap->path, vfs_context_current()); 3336 3337 return (open1(vfs_context_current(), &nd, uap->flags, &va, 3338 fileproc_alloc_init, NULL, retval)); 3339} 3340 3341 3342/* 3343 * Create a special file. 3344 */ 3345static int mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap); 3346 3347int 3348mknod(proc_t p, struct mknod_args *uap, __unused int32_t *retval) 3349{ 3350 struct vnode_attr va; 3351 vfs_context_t ctx = vfs_context_current(); 3352 int error; 3353 struct nameidata nd; 3354 vnode_t vp, dvp; 3355 3356 VATTR_INIT(&va); 3357 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); 3358 VATTR_SET(&va, va_rdev, uap->dev); 3359 3360 /* If it's a mknod() of a FIFO, call mkfifo1() instead */ 3361 if ((uap->mode & S_IFMT) == S_IFIFO) 3362 return(mkfifo1(ctx, uap->path, &va)); 3363 3364 AUDIT_ARG(mode, uap->mode); 3365 AUDIT_ARG(value32, uap->dev); 3366 3367 if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) 3368 return (error); 3369 NDINIT(&nd, CREATE, OP_MKNOD, LOCKPARENT | AUDITVNPATH1, 3370 UIO_USERSPACE, uap->path, ctx); 3371 error = namei(&nd); 3372 if (error) 3373 return (error); 3374 dvp = nd.ni_dvp; 3375 vp = nd.ni_vp; 3376 3377 if (vp != NULL) { 3378 error = EEXIST; 3379 goto out; 3380 } 3381 3382 switch (uap->mode & S_IFMT) { 3383 case S_IFMT: /* used by badsect to flag bad sectors */ 3384 VATTR_SET(&va, va_type, VBAD); 3385 break; 3386 case S_IFCHR: 3387 VATTR_SET(&va, va_type, VCHR); 3388 break; 3389 case S_IFBLK: 3390 VATTR_SET(&va, va_type, VBLK); 3391 break; 3392 default: 3393 error = EINVAL; 3394 goto out; 3395 } 3396 3397#if CONFIG_MACF 3398 error = mac_vnode_check_create(ctx, 3399 nd.ni_dvp, &nd.ni_cnd, &va); 3400 if (error) 3401 goto out; 3402#endif 3403 3404 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) 3405 goto out; 3406 3407 if ((error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx)) != 0) 3408 goto out; 3409 3410 if (vp) { 3411 int update_flags = 0; 3412 3413 // Make sure the name & parent pointers are hooked up 3414 if (vp->v_name == NULL) 3415 update_flags |= VNODE_UPDATE_NAME; 3416 if (vp->v_parent == NULLVP) 3417 update_flags |= VNODE_UPDATE_PARENT; 3418 3419 if (update_flags) 3420 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); 3421 3422#if CONFIG_FSE 3423 add_fsevent(FSE_CREATE_FILE, ctx, 3424 FSE_ARG_VNODE, vp, 3425 FSE_ARG_DONE); 3426#endif 3427 } 3428 3429out: 3430 /* 3431 * nameidone has to happen before we vnode_put(dvp) 3432 * since it may need to release the fs_nodelock on the dvp 3433 */ 3434 nameidone(&nd); 3435 3436 if (vp) 3437 vnode_put(vp); 3438 vnode_put(dvp); 3439 3440 return (error); 3441} 3442 3443/* 3444 * Create a named pipe. 3445 * 3446 * Returns: 0 Success 3447 * EEXIST 3448 * namei:??? 3449 * vnode_authorize:??? 3450 * vn_create:??? 3451 */ 3452static int 3453mkfifo1(vfs_context_t ctx, user_addr_t upath, struct vnode_attr *vap) 3454{ 3455 vnode_t vp, dvp; 3456 int error; 3457 struct nameidata nd; 3458 3459 NDINIT(&nd, CREATE, OP_MKFIFO, LOCKPARENT | AUDITVNPATH1, 3460 UIO_USERSPACE, upath, ctx); 3461 error = namei(&nd); 3462 if (error) 3463 return (error); 3464 dvp = nd.ni_dvp; 3465 vp = nd.ni_vp; 3466 3467 /* check that this is a new file and authorize addition */ 3468 if (vp != NULL) { 3469 error = EEXIST; 3470 goto out; 3471 } 3472 VATTR_SET(vap, va_type, VFIFO); 3473 3474 if ((error = vn_authorize_create(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) 3475 goto out; 3476 3477 error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx); 3478out: 3479 /* 3480 * nameidone has to happen before we vnode_put(dvp) 3481 * since it may need to release the fs_nodelock on the dvp 3482 */ 3483 nameidone(&nd); 3484 3485 if (vp) 3486 vnode_put(vp); 3487 vnode_put(dvp); 3488 3489 return error; 3490} 3491 3492 3493/* 3494 * mkfifo_extended: Create a named pipe; with extended argument list (including extended security (ACL)). 3495 * 3496 * Parameters: p Process requesting the open 3497 * uap User argument descriptor (see below) 3498 * retval (Ignored) 3499 * 3500 * Indirect: uap->path Path to fifo (same as 'mkfifo') 3501 * uap->uid UID to set 3502 * uap->gid GID to set 3503 * uap->mode File mode to set (same as 'mkfifo') 3504 * uap->xsecurity ACL to set, if creating 3505 * 3506 * Returns: 0 Success 3507 * !0 errno value 3508 * 3509 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. 3510 * 3511 * XXX: We should enummerate the possible errno values here, and where 3512 * in the code they originated. 3513 */ 3514int 3515mkfifo_extended(proc_t p, struct mkfifo_extended_args *uap, __unused int32_t *retval) 3516{ 3517 int ciferror; 3518 kauth_filesec_t xsecdst; 3519 struct vnode_attr va; 3520 3521 AUDIT_ARG(owner, uap->uid, uap->gid); 3522 3523 xsecdst = KAUTH_FILESEC_NONE; 3524 if (uap->xsecurity != USER_ADDR_NULL) { 3525 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) 3526 return ciferror; 3527 } 3528 3529 VATTR_INIT(&va); 3530 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); 3531 if (uap->uid != KAUTH_UID_NONE) 3532 VATTR_SET(&va, va_uid, uap->uid); 3533 if (uap->gid != KAUTH_GID_NONE) 3534 VATTR_SET(&va, va_gid, uap->gid); 3535 if (xsecdst != KAUTH_FILESEC_NONE) 3536 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); 3537 3538 ciferror = mkfifo1(vfs_context_current(), uap->path, &va); 3539 3540 if (xsecdst != KAUTH_FILESEC_NONE) 3541 kauth_filesec_free(xsecdst); 3542 return ciferror; 3543} 3544 3545/* ARGSUSED */ 3546int 3547mkfifo(proc_t p, struct mkfifo_args *uap, __unused int32_t *retval) 3548{ 3549 struct vnode_attr va; 3550 3551 VATTR_INIT(&va); 3552 VATTR_SET(&va, va_mode, (uap->mode & ALLPERMS) & ~p->p_fd->fd_cmask); 3553 3554 return(mkfifo1(vfs_context_current(), uap->path, &va)); 3555} 3556 3557 3558static char * 3559my_strrchr(char *p, int ch) 3560{ 3561 char *save; 3562 3563 for (save = NULL;; ++p) { 3564 if (*p == ch) 3565 save = p; 3566 if (!*p) 3567 return(save); 3568 } 3569 /* NOTREACHED */ 3570} 3571 3572extern int safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path); 3573 3574int 3575safe_getpath(struct vnode *dvp, char *leafname, char *path, int _len, int *truncated_path) 3576{ 3577 int ret, len = _len; 3578 3579 *truncated_path = 0; 3580 ret = vn_getpath(dvp, path, &len); 3581 if (ret == 0 && len < (MAXPATHLEN - 1)) { 3582 if (leafname) { 3583 path[len-1] = '/'; 3584 len += strlcpy(&path[len], leafname, MAXPATHLEN-len) + 1; 3585 if (len > MAXPATHLEN) { 3586 char *ptr; 3587 3588 // the string got truncated! 3589 *truncated_path = 1; 3590 ptr = my_strrchr(path, '/'); 3591 if (ptr) { 3592 *ptr = '\0'; // chop off the string at the last directory component 3593 } 3594 len = strlen(path) + 1; 3595 } 3596 } 3597 } else if (ret == 0) { 3598 *truncated_path = 1; 3599 } else if (ret != 0) { 3600 struct vnode *mydvp=dvp; 3601 3602 if (ret != ENOSPC) { 3603 printf("safe_getpath: failed to get the path for vp %p (%s) : err %d\n", 3604 dvp, dvp->v_name ? dvp->v_name : "no-name", ret); 3605 } 3606 *truncated_path = 1; 3607 3608 do { 3609 if (mydvp->v_parent != NULL) { 3610 mydvp = mydvp->v_parent; 3611 } else if (mydvp->v_mount) { 3612 strlcpy(path, mydvp->v_mount->mnt_vfsstat.f_mntonname, _len); 3613 break; 3614 } else { 3615 // no parent and no mount point? only thing is to punt and say "/" changed 3616 strlcpy(path, "/", _len); 3617 len = 2; 3618 mydvp = NULL; 3619 } 3620 3621 if (mydvp == NULL) { 3622 break; 3623 } 3624 3625 len = _len; 3626 ret = vn_getpath(mydvp, path, &len); 3627 } while (ret == ENOSPC); 3628 } 3629 3630 return len; 3631} 3632 3633 3634/* 3635 * Make a hard file link. 3636 * 3637 * Returns: 0 Success 3638 * EPERM 3639 * EEXIST 3640 * EXDEV 3641 * namei:??? 3642 * vnode_authorize:??? 3643 * VNOP_LINK:??? 3644 */ 3645/* ARGSUSED */ 3646int 3647link(__unused proc_t p, struct link_args *uap, __unused int32_t *retval) 3648{ 3649 vnode_t vp, dvp, lvp; 3650 struct nameidata nd; 3651 vfs_context_t ctx = vfs_context_current(); 3652 int error; 3653#if CONFIG_FSE 3654 fse_info finfo; 3655#endif 3656 int need_event, has_listeners; 3657 char *target_path = NULL; 3658 int truncated=0; 3659 3660 vp = dvp = lvp = NULLVP; 3661 3662 /* look up the object we are linking to */ 3663 NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | AUDITVNPATH1, 3664 UIO_USERSPACE, uap->path, ctx); 3665 error = namei(&nd); 3666 if (error) 3667 return (error); 3668 vp = nd.ni_vp; 3669 3670 nameidone(&nd); 3671 3672 /* 3673 * Normally, linking to directories is not supported. 3674 * However, some file systems may have limited support. 3675 */ 3676 if (vp->v_type == VDIR) { 3677 if (!(vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSDIRLINKS)) { 3678 error = EPERM; /* POSIX */ 3679 goto out; 3680 } 3681 /* Linking to a directory requires ownership. */ 3682 if (!kauth_cred_issuser(vfs_context_ucred(ctx))) { 3683 struct vnode_attr dva; 3684 3685 VATTR_INIT(&dva); 3686 VATTR_WANTED(&dva, va_uid); 3687 if (vnode_getattr(vp, &dva, ctx) != 0 || 3688 !VATTR_IS_SUPPORTED(&dva, va_uid) || 3689 (dva.va_uid != kauth_cred_getuid(vfs_context_ucred(ctx)))) { 3690 error = EACCES; 3691 goto out; 3692 } 3693 } 3694 } 3695 3696 /* lookup the target node */ 3697#if CONFIG_TRIGGERS 3698 nd.ni_op = OP_LINK; 3699#endif 3700 nd.ni_cnd.cn_nameiop = CREATE; 3701 nd.ni_cnd.cn_flags = LOCKPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK; 3702 nd.ni_dirp = uap->link; 3703 error = namei(&nd); 3704 if (error != 0) 3705 goto out; 3706 dvp = nd.ni_dvp; 3707 lvp = nd.ni_vp; 3708 3709#if CONFIG_MACF 3710 if ((error = mac_vnode_check_link(ctx, dvp, vp, &nd.ni_cnd)) != 0) 3711 goto out2; 3712#endif 3713 3714 /* or to anything that kauth doesn't want us to (eg. immutable items) */ 3715 if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_LINKTARGET, ctx)) != 0) 3716 goto out2; 3717 3718 /* target node must not exist */ 3719 if (lvp != NULLVP) { 3720 error = EEXIST; 3721 goto out2; 3722 } 3723 /* cannot link across mountpoints */ 3724 if (vnode_mount(vp) != vnode_mount(dvp)) { 3725 error = EXDEV; 3726 goto out2; 3727 } 3728 3729 /* authorize creation of the target note */ 3730 if ((error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) 3731 goto out2; 3732 3733 /* and finally make the link */ 3734 error = VNOP_LINK(vp, dvp, &nd.ni_cnd, ctx); 3735 if (error) 3736 goto out2; 3737 3738#if CONFIG_MACF 3739 (void)mac_vnode_notify_link(ctx, vp, dvp, &nd.ni_cnd); 3740#endif 3741 3742#if CONFIG_FSE 3743 need_event = need_fsevent(FSE_CREATE_FILE, dvp); 3744#else 3745 need_event = 0; 3746#endif 3747 has_listeners = kauth_authorize_fileop_has_listeners(); 3748 3749 if (need_event || has_listeners) { 3750 char *link_to_path = NULL; 3751 int len, link_name_len; 3752 3753 /* build the path to the new link file */ 3754 GET_PATH(target_path); 3755 if (target_path == NULL) { 3756 error = ENOMEM; 3757 goto out2; 3758 } 3759 3760 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, target_path, MAXPATHLEN, &truncated); 3761 3762 if (has_listeners) { 3763 /* build the path to file we are linking to */ 3764 GET_PATH(link_to_path); 3765 if (link_to_path == NULL) { 3766 error = ENOMEM; 3767 goto out2; 3768 } 3769 3770 link_name_len = MAXPATHLEN; 3771 vn_getpath(vp, link_to_path, &link_name_len); 3772 3773 /* 3774 * Call out to allow 3rd party notification of rename. 3775 * Ignore result of kauth_authorize_fileop call. 3776 */ 3777 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_LINK, 3778 (uintptr_t)link_to_path, (uintptr_t)target_path); 3779 if (link_to_path != NULL) { 3780 RELEASE_PATH(link_to_path); 3781 } 3782 } 3783#if CONFIG_FSE 3784 if (need_event) { 3785 /* construct fsevent */ 3786 if (get_fse_info(vp, &finfo, ctx) == 0) { 3787 if (truncated) { 3788 finfo.mode |= FSE_TRUNCATED_PATH; 3789 } 3790 3791 // build the path to the destination of the link 3792 add_fsevent(FSE_CREATE_FILE, ctx, 3793 FSE_ARG_STRING, len, target_path, 3794 FSE_ARG_FINFO, &finfo, 3795 FSE_ARG_DONE); 3796 } 3797 if (vp->v_parent) { 3798 add_fsevent(FSE_STAT_CHANGED, ctx, 3799 FSE_ARG_VNODE, vp->v_parent, 3800 FSE_ARG_DONE); 3801 } 3802 } 3803#endif 3804 } 3805out2: 3806 /* 3807 * nameidone has to happen before we vnode_put(dvp) 3808 * since it may need to release the fs_nodelock on the dvp 3809 */ 3810 nameidone(&nd); 3811 if (target_path != NULL) { 3812 RELEASE_PATH(target_path); 3813 } 3814out: 3815 if (lvp) 3816 vnode_put(lvp); 3817 if (dvp) 3818 vnode_put(dvp); 3819 vnode_put(vp); 3820 return (error); 3821} 3822 3823/* 3824 * Make a symbolic link. 3825 * 3826 * We could add support for ACLs here too... 3827 */ 3828/* ARGSUSED */ 3829int 3830symlink(proc_t p, struct symlink_args *uap, __unused int32_t *retval) 3831{ 3832 struct vnode_attr va; 3833 char *path; 3834 int error; 3835 struct nameidata nd; 3836 vfs_context_t ctx = vfs_context_current(); 3837 vnode_t vp, dvp; 3838 size_t dummy=0; 3839 3840 MALLOC_ZONE(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 3841 error = copyinstr(uap->path, path, MAXPATHLEN, &dummy); 3842 if (error) 3843 goto out; 3844 AUDIT_ARG(text, path); /* This is the link string */ 3845 3846 NDINIT(&nd, CREATE, OP_SYMLINK, LOCKPARENT | AUDITVNPATH1, 3847 UIO_USERSPACE, uap->link, ctx); 3848 error = namei(&nd); 3849 if (error) 3850 goto out; 3851 dvp = nd.ni_dvp; 3852 vp = nd.ni_vp; 3853 3854 VATTR_INIT(&va); 3855 VATTR_SET(&va, va_type, VLNK); 3856 VATTR_SET(&va, va_mode, ACCESSPERMS & ~p->p_fd->fd_cmask); 3857#if CONFIG_MACF 3858 error = mac_vnode_check_create(ctx, 3859 dvp, &nd.ni_cnd, &va); 3860#endif 3861 if (error != 0) { 3862 goto skipit; 3863 } 3864 3865 if (vp != NULL) { 3866 error = EEXIST; 3867 goto skipit; 3868 } 3869 3870 /* authorize */ 3871 if (error == 0) 3872 error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx); 3873 /* get default ownership, etc. */ 3874 if (error == 0) 3875 error = vnode_authattr_new(dvp, &va, 0, ctx); 3876 if (error == 0) 3877 error = VNOP_SYMLINK(dvp, &vp, &nd.ni_cnd, &va, path, ctx); 3878 3879#if CONFIG_MACF 3880 if (error == 0) 3881 error = vnode_label(vnode_mount(vp), dvp, vp, &nd.ni_cnd, VNODE_LABEL_CREATE, ctx); 3882#endif 3883 3884 /* do fallback attribute handling */ 3885 if (error == 0) 3886 error = vnode_setattr_fallback(vp, &va, ctx); 3887 3888 if (error == 0) { 3889 int update_flags = 0; 3890 3891 if (vp == NULL) { 3892 nd.ni_cnd.cn_nameiop = LOOKUP; 3893#if CONFIG_TRIGGERS 3894 nd.ni_op = OP_LOOKUP; 3895#endif 3896 nd.ni_cnd.cn_flags = 0; 3897 error = namei(&nd); 3898 vp = nd.ni_vp; 3899 3900 if (vp == NULL) 3901 goto skipit; 3902 } 3903 3904#if 0 /* XXX - kauth_todo - is KAUTH_FILEOP_SYMLINK needed? */ 3905 /* call out to allow 3rd party notification of rename. 3906 * Ignore result of kauth_authorize_fileop call. 3907 */ 3908 if (kauth_authorize_fileop_has_listeners() && 3909 namei(&nd) == 0) { 3910 char *new_link_path = NULL; 3911 int len; 3912 3913 /* build the path to the new link file */ 3914 new_link_path = get_pathbuff(); 3915 len = MAXPATHLEN; 3916 vn_getpath(dvp, new_link_path, &len); 3917 if ((len + 1 + nd.ni_cnd.cn_namelen + 1) < MAXPATHLEN) { 3918 new_link_path[len - 1] = '/'; 3919 strlcpy(&new_link_path[len], nd.ni_cnd.cn_nameptr, MAXPATHLEN-len); 3920 } 3921 3922 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_SYMLINK, 3923 (uintptr_t)path, (uintptr_t)new_link_path); 3924 if (new_link_path != NULL) 3925 release_pathbuff(new_link_path); 3926 } 3927#endif 3928 // Make sure the name & parent pointers are hooked up 3929 if (vp->v_name == NULL) 3930 update_flags |= VNODE_UPDATE_NAME; 3931 if (vp->v_parent == NULLVP) 3932 update_flags |= VNODE_UPDATE_PARENT; 3933 3934 if (update_flags) 3935 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); 3936 3937#if CONFIG_FSE 3938 add_fsevent(FSE_CREATE_FILE, ctx, 3939 FSE_ARG_VNODE, vp, 3940 FSE_ARG_DONE); 3941#endif 3942 } 3943 3944skipit: 3945 /* 3946 * nameidone has to happen before we vnode_put(dvp) 3947 * since it may need to release the fs_nodelock on the dvp 3948 */ 3949 nameidone(&nd); 3950 3951 if (vp) 3952 vnode_put(vp); 3953 vnode_put(dvp); 3954out: 3955 FREE_ZONE(path, MAXPATHLEN, M_NAMEI); 3956 3957 return (error); 3958} 3959 3960/* 3961 * Delete a whiteout from the filesystem. 3962 * XXX authorization not implmented for whiteouts 3963 */ 3964int 3965undelete(__unused proc_t p, struct undelete_args *uap, __unused int32_t *retval) 3966{ 3967 int error; 3968 struct nameidata nd; 3969 vfs_context_t ctx = vfs_context_current(); 3970 vnode_t vp, dvp; 3971 3972 NDINIT(&nd, DELETE, OP_UNLINK, LOCKPARENT | DOWHITEOUT | AUDITVNPATH1, 3973 UIO_USERSPACE, uap->path, ctx); 3974 error = namei(&nd); 3975 if (error) 3976 return (error); 3977 dvp = nd.ni_dvp; 3978 vp = nd.ni_vp; 3979 3980 if (vp == NULLVP && (nd.ni_cnd.cn_flags & ISWHITEOUT)) { 3981 error = VNOP_WHITEOUT(dvp, &nd.ni_cnd, DELETE, ctx); 3982 } else 3983 error = EEXIST; 3984 3985 /* 3986 * nameidone has to happen before we vnode_put(dvp) 3987 * since it may need to release the fs_nodelock on the dvp 3988 */ 3989 nameidone(&nd); 3990 3991 if (vp) 3992 vnode_put(vp); 3993 vnode_put(dvp); 3994 3995 return (error); 3996} 3997 3998 3999/* 4000 * Delete a name from the filesystem. 4001 */ 4002/* ARGSUSED */ 4003int 4004unlink1(vfs_context_t ctx, struct nameidata *ndp, int unlink_flags) 4005{ 4006 vnode_t vp, dvp; 4007 int error; 4008 struct componentname *cnp; 4009 char *path = NULL; 4010 int len=0; 4011#if CONFIG_FSE 4012 fse_info finfo; 4013 struct vnode_attr va; 4014#endif 4015 int flags = 0; 4016 int need_event = 0; 4017 int has_listeners = 0; 4018 int truncated_path=0; 4019 int batched; 4020 struct vnode_attr *vap = NULL; 4021 4022#if NAMEDRSRCFORK 4023 /* unlink or delete is allowed on rsrc forks and named streams */ 4024 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 4025#endif 4026 4027 ndp->ni_cnd.cn_flags |= LOCKPARENT; 4028 ndp->ni_flag |= NAMEI_COMPOUNDREMOVE; 4029 cnp = &ndp->ni_cnd; 4030 4031lookup_continue: 4032 error = namei(ndp); 4033 if (error) 4034 return (error); 4035 4036 dvp = ndp->ni_dvp; 4037 vp = ndp->ni_vp; 4038 4039 4040 /* With Carbon delete semantics, busy files cannot be deleted */ 4041 if (unlink_flags & VNODE_REMOVE_NODELETEBUSY) { 4042 flags |= VNODE_REMOVE_NODELETEBUSY; 4043 } 4044 4045 /* Skip any potential upcalls if told to. */ 4046 if (unlink_flags & VNODE_REMOVE_SKIP_NAMESPACE_EVENT) { 4047 flags |= VNODE_REMOVE_SKIP_NAMESPACE_EVENT; 4048 } 4049 4050 if (vp) { 4051 batched = vnode_compound_remove_available(vp); 4052 /* 4053 * The root of a mounted filesystem cannot be deleted. 4054 */ 4055 if (vp->v_flag & VROOT) { 4056 error = EBUSY; 4057 } 4058 4059 if (!batched) { 4060 error = vn_authorize_unlink(dvp, vp, cnp, ctx, NULL); 4061 if (error) { 4062 goto out; 4063 } 4064 } 4065 } else { 4066 batched = 1; 4067 4068 if (!vnode_compound_remove_available(dvp)) { 4069 panic("No vp, but no compound remove?"); 4070 } 4071 } 4072 4073#if CONFIG_FSE 4074 need_event = need_fsevent(FSE_DELETE, dvp); 4075 if (need_event) { 4076 if (!batched) { 4077 if ((vp->v_flag & VISHARDLINK) == 0) { 4078 /* XXX need to get these data in batched VNOP */ 4079 get_fse_info(vp, &finfo, ctx); 4080 } 4081 } else { 4082 error = vfs_get_notify_attributes(&va); 4083 if (error) { 4084 goto out; 4085 } 4086 4087 vap = &va; 4088 } 4089 } 4090#endif 4091 has_listeners = kauth_authorize_fileop_has_listeners(); 4092 if (need_event || has_listeners) { 4093 if (path == NULL) { 4094 GET_PATH(path); 4095 if (path == NULL) { 4096 error = ENOMEM; 4097 goto out; 4098 } 4099 } 4100 len = safe_getpath(dvp, ndp->ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated_path); 4101 } 4102 4103#if NAMEDRSRCFORK 4104 if (ndp->ni_cnd.cn_flags & CN_WANTSRSRCFORK) 4105 error = vnode_removenamedstream(dvp, vp, XATTR_RESOURCEFORK_NAME, 0, ctx); 4106 else 4107#endif 4108 { 4109 error = vn_remove(dvp, &ndp->ni_vp, ndp, flags, vap, ctx); 4110 vp = ndp->ni_vp; 4111 if (error == EKEEPLOOKING) { 4112 if (!batched) { 4113 panic("EKEEPLOOKING, but not a filesystem that supports compound VNOPs?"); 4114 } 4115 4116 if ((ndp->ni_flag & NAMEI_CONTLOOKUP) == 0) { 4117 panic("EKEEPLOOKING, but continue flag not set?"); 4118 } 4119 4120 if (vnode_isdir(vp)) { 4121 error = EISDIR; 4122 goto out; 4123 } 4124 goto lookup_continue; 4125 } 4126 } 4127 4128 /* 4129 * Call out to allow 3rd party notification of delete. 4130 * Ignore result of kauth_authorize_fileop call. 4131 */ 4132 if (!error) { 4133 if (has_listeners) { 4134 kauth_authorize_fileop(vfs_context_ucred(ctx), 4135 KAUTH_FILEOP_DELETE, 4136 (uintptr_t)vp, 4137 (uintptr_t)path); 4138 } 4139 4140 if (vp->v_flag & VISHARDLINK) { 4141 // 4142 // if a hardlink gets deleted we want to blow away the 4143 // v_parent link because the path that got us to this 4144 // instance of the link is no longer valid. this will 4145 // force the next call to get the path to ask the file 4146 // system instead of just following the v_parent link. 4147 // 4148 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT); 4149 } 4150 4151#if CONFIG_FSE 4152 if (need_event) { 4153 if (vp->v_flag & VISHARDLINK) { 4154 get_fse_info(vp, &finfo, ctx); 4155 } else if (vap) { 4156 vnode_get_fse_info_from_vap(vp, &finfo, vap); 4157 } 4158 if (truncated_path) { 4159 finfo.mode |= FSE_TRUNCATED_PATH; 4160 } 4161 add_fsevent(FSE_DELETE, ctx, 4162 FSE_ARG_STRING, len, path, 4163 FSE_ARG_FINFO, &finfo, 4164 FSE_ARG_DONE); 4165 } 4166#endif 4167 } 4168 4169out: 4170 if (path != NULL) 4171 RELEASE_PATH(path); 4172 4173#if NAMEDRSRCFORK 4174 /* recycle the deleted rsrc fork vnode to force a reclaim, which 4175 * will cause its shadow file to go away if necessary. 4176 */ 4177 if (vp && (vnode_isnamedstream(vp)) && 4178 (vp->v_parent != NULLVP) && 4179 vnode_isshadow(vp)) { 4180 vnode_recycle(vp); 4181 } 4182#endif 4183 /* 4184 * nameidone has to happen before we vnode_put(dvp) 4185 * since it may need to release the fs_nodelock on the dvp 4186 */ 4187 nameidone(ndp); 4188 vnode_put(dvp); 4189 if (vp) { 4190 vnode_put(vp); 4191 } 4192 return (error); 4193} 4194 4195/* 4196 * Delete a name from the filesystem using POSIX semantics. 4197 */ 4198int 4199unlink(__unused proc_t p, struct unlink_args *uap, __unused int32_t *retval) 4200{ 4201 struct nameidata nd; 4202 vfs_context_t ctx = vfs_context_current(); 4203 4204 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE, 4205 uap->path, ctx); 4206 return unlink1(ctx, &nd, 0); 4207} 4208 4209/* 4210 * Delete a name from the filesystem using Carbon semantics. 4211 */ 4212int 4213delete(__unused proc_t p, struct delete_args *uap, __unused int32_t *retval) 4214{ 4215 struct nameidata nd; 4216 vfs_context_t ctx = vfs_context_current(); 4217 4218 NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_USERSPACE, 4219 uap->path, ctx); 4220 return unlink1(ctx, &nd, VNODE_REMOVE_NODELETEBUSY); 4221} 4222 4223/* 4224 * Reposition read/write file offset. 4225 */ 4226int 4227lseek(proc_t p, struct lseek_args *uap, off_t *retval) 4228{ 4229 struct fileproc *fp; 4230 vnode_t vp; 4231 struct vfs_context *ctx; 4232 off_t offset = uap->offset, file_size; 4233 int error; 4234 4235 if ( (error = fp_getfvp(p,uap->fd, &fp, &vp)) ) { 4236 if (error == ENOTSUP) 4237 return (ESPIPE); 4238 return (error); 4239 } 4240 if (vnode_isfifo(vp)) { 4241 file_drop(uap->fd); 4242 return(ESPIPE); 4243 } 4244 4245 4246 ctx = vfs_context_current(); 4247#if CONFIG_MACF 4248 if (uap->whence == L_INCR && uap->offset == 0) 4249 error = mac_file_check_get_offset(vfs_context_ucred(ctx), 4250 fp->f_fglob); 4251 else 4252 error = mac_file_check_change_offset(vfs_context_ucred(ctx), 4253 fp->f_fglob); 4254 if (error) { 4255 file_drop(uap->fd); 4256 return (error); 4257 } 4258#endif 4259 if ( (error = vnode_getwithref(vp)) ) { 4260 file_drop(uap->fd); 4261 return(error); 4262 } 4263 4264 switch (uap->whence) { 4265 case L_INCR: 4266 offset += fp->f_fglob->fg_offset; 4267 break; 4268 case L_XTND: 4269 if ((error = vnode_size(vp, &file_size, ctx)) != 0) 4270 break; 4271 offset += file_size; 4272 break; 4273 case L_SET: 4274 break; 4275 default: 4276 error = EINVAL; 4277 } 4278 if (error == 0) { 4279 if (uap->offset > 0 && offset < 0) { 4280 /* Incremented/relative move past max size */ 4281 error = EOVERFLOW; 4282 } else { 4283 /* 4284 * Allow negative offsets on character devices, per 4285 * POSIX 1003.1-2001. Most likely for writing disk 4286 * labels. 4287 */ 4288 if (offset < 0 && vp->v_type != VCHR) { 4289 /* Decremented/relative move before start */ 4290 error = EINVAL; 4291 } else { 4292 /* Success */ 4293 fp->f_fglob->fg_offset = offset; 4294 *retval = fp->f_fglob->fg_offset; 4295 } 4296 } 4297 } 4298 4299 /* 4300 * An lseek can affect whether data is "available to read." Use 4301 * hint of NOTE_NONE so no EVFILT_VNODE events fire 4302 */ 4303 post_event_if_success(vp, error, NOTE_NONE); 4304 (void)vnode_put(vp); 4305 file_drop(uap->fd); 4306 return (error); 4307} 4308 4309 4310/* 4311 * Check access permissions. 4312 * 4313 * Returns: 0 Success 4314 * vnode_authorize:??? 4315 */ 4316static int 4317access1(vnode_t vp, vnode_t dvp, int uflags, vfs_context_t ctx) 4318{ 4319 kauth_action_t action; 4320 int error; 4321 4322 /* 4323 * If just the regular access bits, convert them to something 4324 * that vnode_authorize will understand. 4325 */ 4326 if (!(uflags & _ACCESS_EXTENDED_MASK)) { 4327 action = 0; 4328 if (uflags & R_OK) 4329 action |= KAUTH_VNODE_READ_DATA; /* aka KAUTH_VNODE_LIST_DIRECTORY */ 4330 if (uflags & W_OK) { 4331 if (vnode_isdir(vp)) { 4332 action |= KAUTH_VNODE_ADD_FILE | 4333 KAUTH_VNODE_ADD_SUBDIRECTORY; 4334 /* might want delete rights here too */ 4335 } else { 4336 action |= KAUTH_VNODE_WRITE_DATA; 4337 } 4338 } 4339 if (uflags & X_OK) { 4340 if (vnode_isdir(vp)) { 4341 action |= KAUTH_VNODE_SEARCH; 4342 } else { 4343 action |= KAUTH_VNODE_EXECUTE; 4344 } 4345 } 4346 } else { 4347 /* take advantage of definition of uflags */ 4348 action = uflags >> 8; 4349 } 4350 4351#if CONFIG_MACF 4352 error = mac_vnode_check_access(ctx, vp, uflags); 4353 if (error) 4354 return (error); 4355#endif /* MAC */ 4356 4357 /* action == 0 means only check for existence */ 4358 if (action != 0) { 4359 error = vnode_authorize(vp, dvp, action | KAUTH_VNODE_ACCESS, ctx); 4360 } else { 4361 error = 0; 4362 } 4363 4364 return(error); 4365} 4366 4367 4368 4369/* 4370 * access_extended: Check access permissions in bulk. 4371 * 4372 * Description: uap->entries Pointer to an array of accessx 4373 * descriptor structs, plus one or 4374 * more NULL terminated strings (see 4375 * "Notes" section below). 4376 * uap->size Size of the area pointed to by 4377 * uap->entries. 4378 * uap->results Pointer to the results array. 4379 * 4380 * Returns: 0 Success 4381 * ENOMEM Insufficient memory 4382 * EINVAL Invalid arguments 4383 * namei:EFAULT Bad address 4384 * namei:ENAMETOOLONG Filename too long 4385 * namei:ENOENT No such file or directory 4386 * namei:ELOOP Too many levels of symbolic links 4387 * namei:EBADF Bad file descriptor 4388 * namei:ENOTDIR Not a directory 4389 * namei:??? 4390 * access1: 4391 * 4392 * Implicit returns: 4393 * uap->results Array contents modified 4394 * 4395 * Notes: The uap->entries are structured as an arbitrary length array 4396 * of accessx descriptors, followed by one or more NULL terminated 4397 * strings 4398 * 4399 * struct accessx_descriptor[0] 4400 * ... 4401 * struct accessx_descriptor[n] 4402 * char name_data[0]; 4403 * 4404 * We determine the entry count by walking the buffer containing 4405 * the uap->entries argument descriptor. For each descriptor we 4406 * see, the valid values for the offset ad_name_offset will be 4407 * in the byte range: 4408 * 4409 * [ uap->entries + sizeof(struct accessx_descriptor) ] 4410 * to 4411 * [ uap->entries + uap->size - 2 ] 4412 * 4413 * since we must have at least one string, and the string must 4414 * be at least one character plus the NULL terminator in length. 4415 * 4416 * XXX: Need to support the check-as uid argument 4417 */ 4418int 4419access_extended(__unused proc_t p, struct access_extended_args *uap, __unused int32_t *retval) 4420{ 4421 struct accessx_descriptor *input = NULL; 4422 errno_t *result = NULL; 4423 errno_t error = 0; 4424 int wantdelete = 0; 4425 unsigned int desc_max, desc_actual, i, j; 4426 struct vfs_context context; 4427 struct nameidata nd; 4428 int niopts; 4429 vnode_t vp = NULL; 4430 vnode_t dvp = NULL; 4431#define ACCESSX_MAX_DESCR_ON_STACK 10 4432 struct accessx_descriptor stack_input[ACCESSX_MAX_DESCR_ON_STACK]; 4433 4434 context.vc_ucred = NULL; 4435 4436 /* 4437 * Validate parameters; if valid, copy the descriptor array and string 4438 * arguments into local memory. Before proceeding, the following 4439 * conditions must have been met: 4440 * 4441 * o The total size is not permitted to exceed ACCESSX_MAX_TABLESIZE 4442 * o There must be sufficient room in the request for at least one 4443 * descriptor and a one yte NUL terminated string. 4444 * o The allocation of local storage must not fail. 4445 */ 4446 if (uap->size > ACCESSX_MAX_TABLESIZE) 4447 return(ENOMEM); 4448 if (uap->size < (sizeof(struct accessx_descriptor) + 2)) 4449 return(EINVAL); 4450 if (uap->size <= sizeof (stack_input)) { 4451 input = stack_input; 4452 } else { 4453 MALLOC(input, struct accessx_descriptor *, uap->size, M_TEMP, M_WAITOK); 4454 if (input == NULL) { 4455 error = ENOMEM; 4456 goto out; 4457 } 4458 } 4459 error = copyin(uap->entries, input, uap->size); 4460 if (error) 4461 goto out; 4462 4463 AUDIT_ARG(opaque, input, uap->size); 4464 4465 /* 4466 * Force NUL termination of the copyin buffer to avoid nami() running 4467 * off the end. If the caller passes us bogus data, they may get a 4468 * bogus result. 4469 */ 4470 ((char *)input)[uap->size - 1] = 0; 4471 4472 /* 4473 * Access is defined as checking against the process' real identity, 4474 * even if operations are checking the effective identity. This 4475 * requires that we use a local vfs context. 4476 */ 4477 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); 4478 context.vc_thread = current_thread(); 4479 4480 /* 4481 * Find out how many entries we have, so we can allocate the result 4482 * array by walking the list and adjusting the count downward by the 4483 * earliest string offset we see. 4484 */ 4485 desc_max = (uap->size - 2) / sizeof(struct accessx_descriptor); 4486 desc_actual = desc_max; 4487 for (i = 0; i < desc_actual; i++) { 4488 /* 4489 * Take the offset to the name string for this entry and 4490 * convert to an input array index, which would be one off 4491 * the end of the array if this entry was the lowest-addressed 4492 * name string. 4493 */ 4494 j = input[i].ad_name_offset / sizeof(struct accessx_descriptor); 4495 4496 /* 4497 * An offset greater than the max allowable offset is an error. 4498 * It is also an error for any valid entry to point 4499 * to a location prior to the end of the current entry, if 4500 * it's not a reference to the string of the previous entry. 4501 */ 4502 if (j > desc_max || (j != 0 && j <= i)) { 4503 error = EINVAL; 4504 goto out; 4505 } 4506 4507 /* 4508 * An offset of 0 means use the previous descriptor's offset; 4509 * this is used to chain multiple requests for the same file 4510 * to avoid multiple lookups. 4511 */ 4512 if (j == 0) { 4513 /* This is not valid for the first entry */ 4514 if (i == 0) { 4515 error = EINVAL; 4516 goto out; 4517 } 4518 continue; 4519 } 4520 4521 /* 4522 * If the offset of the string for this descriptor is before 4523 * what we believe is the current actual last descriptor, 4524 * then we need to adjust our estimate downward; this permits 4525 * the string table following the last descriptor to be out 4526 * of order relative to the descriptor list. 4527 */ 4528 if (j < desc_actual) 4529 desc_actual = j; 4530 } 4531 4532 /* 4533 * We limit the actual number of descriptors we are willing to process 4534 * to a hard maximum of ACCESSX_MAX_DESCRIPTORS. If the number being 4535 * requested does not exceed this limit, 4536 */ 4537 if (desc_actual > ACCESSX_MAX_DESCRIPTORS) { 4538 error = ENOMEM; 4539 goto out; 4540 } 4541 MALLOC(result, errno_t *, desc_actual * sizeof(errno_t), M_TEMP, M_WAITOK); 4542 if (result == NULL) { 4543 error = ENOMEM; 4544 goto out; 4545 } 4546 4547 /* 4548 * Do the work by iterating over the descriptor entries we know to 4549 * at least appear to contain valid data. 4550 */ 4551 error = 0; 4552 for (i = 0; i < desc_actual; i++) { 4553 /* 4554 * If the ad_name_offset is 0, then we use the previous 4555 * results to make the check; otherwise, we are looking up 4556 * a new file name. 4557 */ 4558 if (input[i].ad_name_offset != 0) { 4559 /* discard old vnodes */ 4560 if (vp) { 4561 vnode_put(vp); 4562 vp = NULL; 4563 } 4564 if (dvp) { 4565 vnode_put(dvp); 4566 dvp = NULL; 4567 } 4568 4569 /* 4570 * Scan forward in the descriptor list to see if we 4571 * need the parent vnode. We will need it if we are 4572 * deleting, since we must have rights to remove 4573 * entries in the parent directory, as well as the 4574 * rights to delete the object itself. 4575 */ 4576 wantdelete = input[i].ad_flags & _DELETE_OK; 4577 for (j = i + 1; (j < desc_actual) && (input[j].ad_name_offset == 0); j++) 4578 if (input[j].ad_flags & _DELETE_OK) 4579 wantdelete = 1; 4580 4581 niopts = FOLLOW | AUDITVNPATH1; 4582 4583 /* need parent for vnode_authorize for deletion test */ 4584 if (wantdelete) 4585 niopts |= WANTPARENT; 4586 4587 /* do the lookup */ 4588 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_SYSSPACE, 4589 CAST_USER_ADDR_T(((const char *)input) + input[i].ad_name_offset), 4590 &context); 4591 error = namei(&nd); 4592 if (!error) { 4593 vp = nd.ni_vp; 4594 if (wantdelete) 4595 dvp = nd.ni_dvp; 4596 } 4597 nameidone(&nd); 4598 } 4599 4600 /* 4601 * Handle lookup errors. 4602 */ 4603 switch(error) { 4604 case ENOENT: 4605 case EACCES: 4606 case EPERM: 4607 case ENOTDIR: 4608 result[i] = error; 4609 break; 4610 case 0: 4611 /* run this access check */ 4612 result[i] = access1(vp, dvp, input[i].ad_flags, &context); 4613 break; 4614 default: 4615 /* fatal lookup error */ 4616 4617 goto out; 4618 } 4619 } 4620 4621 AUDIT_ARG(data, result, sizeof(errno_t), desc_actual); 4622 4623 /* copy out results */ 4624 error = copyout(result, uap->results, desc_actual * sizeof(errno_t)); 4625 4626out: 4627 if (input && input != stack_input) 4628 FREE(input, M_TEMP); 4629 if (result) 4630 FREE(result, M_TEMP); 4631 if (vp) 4632 vnode_put(vp); 4633 if (dvp) 4634 vnode_put(dvp); 4635 if (IS_VALID_CRED(context.vc_ucred)) 4636 kauth_cred_unref(&context.vc_ucred); 4637 return(error); 4638} 4639 4640 4641/* 4642 * Returns: 0 Success 4643 * namei:EFAULT Bad address 4644 * namei:ENAMETOOLONG Filename too long 4645 * namei:ENOENT No such file or directory 4646 * namei:ELOOP Too many levels of symbolic links 4647 * namei:EBADF Bad file descriptor 4648 * namei:ENOTDIR Not a directory 4649 * namei:??? 4650 * access1: 4651 */ 4652int 4653access(__unused proc_t p, struct access_args *uap, __unused int32_t *retval) 4654{ 4655 int error; 4656 struct nameidata nd; 4657 int niopts; 4658 struct vfs_context context; 4659#if NAMEDRSRCFORK 4660 int is_namedstream = 0; 4661#endif 4662 4663 /* 4664 * Access is defined as checking against the process' 4665 * real identity, even if operations are checking the 4666 * effective identity. So we need to tweak the credential 4667 * in the context. 4668 */ 4669 context.vc_ucred = kauth_cred_copy_real(kauth_cred_get()); 4670 context.vc_thread = current_thread(); 4671 4672 niopts = FOLLOW | AUDITVNPATH1; 4673 /* need parent for vnode_authorize for deletion test */ 4674 if (uap->flags & _DELETE_OK) 4675 niopts |= WANTPARENT; 4676 NDINIT(&nd, LOOKUP, OP_ACCESS, niopts, UIO_USERSPACE, 4677 uap->path, &context); 4678 4679#if NAMEDRSRCFORK 4680 /* access(F_OK) calls are allowed for resource forks. */ 4681 if (uap->flags == F_OK) 4682 nd.ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 4683#endif 4684 error = namei(&nd); 4685 if (error) 4686 goto out; 4687 4688#if NAMEDRSRCFORK 4689 /* Grab reference on the shadow stream file vnode to 4690 * force an inactive on release which will mark it 4691 * for recycle. 4692 */ 4693 if (vnode_isnamedstream(nd.ni_vp) && 4694 (nd.ni_vp->v_parent != NULLVP) && 4695 vnode_isshadow(nd.ni_vp)) { 4696 is_namedstream = 1; 4697 vnode_ref(nd.ni_vp); 4698 } 4699#endif 4700 4701 error = access1(nd.ni_vp, nd.ni_dvp, uap->flags, &context); 4702 4703#if NAMEDRSRCFORK 4704 if (is_namedstream) { 4705 vnode_rele(nd.ni_vp); 4706 } 4707#endif 4708 4709 vnode_put(nd.ni_vp); 4710 if (uap->flags & _DELETE_OK) 4711 vnode_put(nd.ni_dvp); 4712 nameidone(&nd); 4713 4714out: 4715 kauth_cred_unref(&context.vc_ucred); 4716 return(error); 4717} 4718 4719 4720/* 4721 * Returns: 0 Success 4722 * EFAULT 4723 * copyout:EFAULT 4724 * namei:??? 4725 * vn_stat:??? 4726 */ 4727static int 4728stat2(vfs_context_t ctx, struct nameidata *ndp, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) 4729{ 4730 union { 4731 struct stat sb; 4732 struct stat64 sb64; 4733 } source; 4734 union { 4735 struct user64_stat user64_sb; 4736 struct user32_stat user32_sb; 4737 struct user64_stat64 user64_sb64; 4738 struct user32_stat64 user32_sb64; 4739 } dest; 4740 caddr_t sbp; 4741 int error, my_size; 4742 kauth_filesec_t fsec; 4743 size_t xsecurity_bufsize; 4744 void * statptr; 4745 4746#if NAMEDRSRCFORK 4747 int is_namedstream = 0; 4748 /* stat calls are allowed for resource forks. */ 4749 ndp->ni_cnd.cn_flags |= CN_ALLOWRSRCFORK; 4750#endif 4751 error = namei(ndp); 4752 if (error) 4753 return (error); 4754 fsec = KAUTH_FILESEC_NONE; 4755 4756 statptr = (void *)&source; 4757 4758#if NAMEDRSRCFORK 4759 /* Grab reference on the shadow stream file vnode to 4760 * force an inactive on release which will mark it 4761 * for recycle. 4762 */ 4763 if (vnode_isnamedstream(ndp->ni_vp) && 4764 (ndp->ni_vp->v_parent != NULLVP) && 4765 vnode_isshadow(ndp->ni_vp)) { 4766 is_namedstream = 1; 4767 vnode_ref(ndp->ni_vp); 4768 } 4769#endif 4770 4771 error = vn_stat(ndp->ni_vp, statptr, (xsecurity != USER_ADDR_NULL ? &fsec : NULL), isstat64, ctx); 4772 4773#if NAMEDRSRCFORK 4774 if (is_namedstream) { 4775 vnode_rele(ndp->ni_vp); 4776 } 4777#endif 4778 vnode_put(ndp->ni_vp); 4779 nameidone(ndp); 4780 4781 if (error) 4782 return (error); 4783 /* Zap spare fields */ 4784 if (isstat64 != 0) { 4785 source.sb64.st_lspare = 0; 4786 source.sb64.st_qspare[0] = 0LL; 4787 source.sb64.st_qspare[1] = 0LL; 4788 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 4789 munge_user64_stat64(&source.sb64, &dest.user64_sb64); 4790 my_size = sizeof(dest.user64_sb64); 4791 sbp = (caddr_t)&dest.user64_sb64; 4792 } else { 4793 munge_user32_stat64(&source.sb64, &dest.user32_sb64); 4794 my_size = sizeof(dest.user32_sb64); 4795 sbp = (caddr_t)&dest.user32_sb64; 4796 } 4797 /* 4798 * Check if we raced (post lookup) against the last unlink of a file. 4799 */ 4800 if ((source.sb64.st_nlink == 0) && S_ISREG(source.sb64.st_mode)) { 4801 source.sb64.st_nlink = 1; 4802 } 4803 } else { 4804 source.sb.st_lspare = 0; 4805 source.sb.st_qspare[0] = 0LL; 4806 source.sb.st_qspare[1] = 0LL; 4807 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 4808 munge_user64_stat(&source.sb, &dest.user64_sb); 4809 my_size = sizeof(dest.user64_sb); 4810 sbp = (caddr_t)&dest.user64_sb; 4811 } else { 4812 munge_user32_stat(&source.sb, &dest.user32_sb); 4813 my_size = sizeof(dest.user32_sb); 4814 sbp = (caddr_t)&dest.user32_sb; 4815 } 4816 4817 /* 4818 * Check if we raced (post lookup) against the last unlink of a file. 4819 */ 4820 if ((source.sb.st_nlink == 0) && S_ISREG(source.sb.st_mode)) { 4821 source.sb.st_nlink = 1; 4822 } 4823 } 4824 if ((error = copyout(sbp, ub, my_size)) != 0) 4825 goto out; 4826 4827 /* caller wants extended security information? */ 4828 if (xsecurity != USER_ADDR_NULL) { 4829 4830 /* did we get any? */ 4831 if (fsec == KAUTH_FILESEC_NONE) { 4832 if (susize(xsecurity_size, 0) != 0) { 4833 error = EFAULT; 4834 goto out; 4835 } 4836 } else { 4837 /* find the user buffer size */ 4838 xsecurity_bufsize = fusize(xsecurity_size); 4839 4840 /* copy out the actual data size */ 4841 if (susize(xsecurity_size, KAUTH_FILESEC_COPYSIZE(fsec)) != 0) { 4842 error = EFAULT; 4843 goto out; 4844 } 4845 4846 /* if the caller supplied enough room, copy out to it */ 4847 if (xsecurity_bufsize >= KAUTH_FILESEC_COPYSIZE(fsec)) 4848 error = copyout(fsec, xsecurity, KAUTH_FILESEC_COPYSIZE(fsec)); 4849 } 4850 } 4851out: 4852 if (fsec != KAUTH_FILESEC_NONE) 4853 kauth_filesec_free(fsec); 4854 return (error); 4855} 4856 4857/* 4858 * Get file status; this version follows links. 4859 * 4860 * Returns: 0 Success 4861 * stat2:??? [see stat2() in this file] 4862 */ 4863static int 4864stat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) 4865{ 4866 struct nameidata nd; 4867 vfs_context_t ctx = vfs_context_current(); 4868 4869 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | FOLLOW | AUDITVNPATH1, 4870 UIO_USERSPACE, path, ctx); 4871 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); 4872} 4873 4874/* 4875 * stat_extended: Get file status; with extended security (ACL). 4876 * 4877 * Parameters: p (ignored) 4878 * uap User argument descriptor (see below) 4879 * retval (ignored) 4880 * 4881 * Indirect: uap->path Path of file to get status from 4882 * uap->ub User buffer (holds file status info) 4883 * uap->xsecurity ACL to get (extended security) 4884 * uap->xsecurity_size Size of ACL 4885 * 4886 * Returns: 0 Success 4887 * !0 errno value 4888 * 4889 */ 4890int 4891stat_extended(__unused proc_t p, struct stat_extended_args *uap, __unused int32_t *retval) 4892{ 4893 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); 4894} 4895 4896/* 4897 * Returns: 0 Success 4898 * stat1:??? [see stat1() in this file] 4899 */ 4900int 4901stat(__unused proc_t p, struct stat_args *uap, __unused int32_t *retval) 4902{ 4903 return(stat1(uap->path, uap->ub, 0, 0, 0)); 4904} 4905 4906int 4907stat64(__unused proc_t p, struct stat64_args *uap, __unused int32_t *retval) 4908{ 4909 return(stat1(uap->path, uap->ub, 0, 0, 1)); 4910} 4911 4912/* 4913 * stat64_extended: Get file status; can handle large inode numbers; with extended security (ACL). 4914 * 4915 * Parameters: p (ignored) 4916 * uap User argument descriptor (see below) 4917 * retval (ignored) 4918 * 4919 * Indirect: uap->path Path of file to get status from 4920 * uap->ub User buffer (holds file status info) 4921 * uap->xsecurity ACL to get (extended security) 4922 * uap->xsecurity_size Size of ACL 4923 * 4924 * Returns: 0 Success 4925 * !0 errno value 4926 * 4927 */ 4928int 4929stat64_extended(__unused proc_t p, struct stat64_extended_args *uap, __unused int32_t *retval) 4930{ 4931 return (stat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); 4932} 4933/* 4934 * Get file status; this version does not follow links. 4935 */ 4936static int 4937lstat1(user_addr_t path, user_addr_t ub, user_addr_t xsecurity, user_addr_t xsecurity_size, int isstat64) 4938{ 4939 struct nameidata nd; 4940 vfs_context_t ctx = vfs_context_current(); 4941 4942 NDINIT(&nd, LOOKUP, OP_GETATTR, NOTRIGGER | NOFOLLOW | AUDITVNPATH1, 4943 UIO_USERSPACE, path, ctx); 4944 4945 return(stat2(ctx, &nd, ub, xsecurity, xsecurity_size, isstat64)); 4946} 4947 4948/* 4949 * lstat_extended: Get file status; does not follow links; with extended security (ACL). 4950 * 4951 * Parameters: p (ignored) 4952 * uap User argument descriptor (see below) 4953 * retval (ignored) 4954 * 4955 * Indirect: uap->path Path of file to get status from 4956 * uap->ub User buffer (holds file status info) 4957 * uap->xsecurity ACL to get (extended security) 4958 * uap->xsecurity_size Size of ACL 4959 * 4960 * Returns: 0 Success 4961 * !0 errno value 4962 * 4963 */ 4964int 4965lstat_extended(__unused proc_t p, struct lstat_extended_args *uap, __unused int32_t *retval) 4966{ 4967 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 0)); 4968} 4969 4970int 4971lstat(__unused proc_t p, struct lstat_args *uap, __unused int32_t *retval) 4972{ 4973 return(lstat1(uap->path, uap->ub, 0, 0, 0)); 4974} 4975 4976int 4977lstat64(__unused proc_t p, struct lstat64_args *uap, __unused int32_t *retval) 4978{ 4979 return(lstat1(uap->path, uap->ub, 0, 0, 1)); 4980} 4981 4982/* 4983 * lstat64_extended: Get file status; can handle large inode numbers; does not 4984 * follow links; with extended security (ACL). 4985 * 4986 * Parameters: p (ignored) 4987 * uap User argument descriptor (see below) 4988 * retval (ignored) 4989 * 4990 * Indirect: uap->path Path of file to get status from 4991 * uap->ub User buffer (holds file status info) 4992 * uap->xsecurity ACL to get (extended security) 4993 * uap->xsecurity_size Size of ACL 4994 * 4995 * Returns: 0 Success 4996 * !0 errno value 4997 * 4998 */ 4999int 5000lstat64_extended(__unused proc_t p, struct lstat64_extended_args *uap, __unused int32_t *retval) 5001{ 5002 return (lstat1(uap->path, uap->ub, uap->xsecurity, uap->xsecurity_size, 1)); 5003} 5004 5005/* 5006 * Get configurable pathname variables. 5007 * 5008 * Returns: 0 Success 5009 * namei:??? 5010 * vn_pathconf:??? 5011 * 5012 * Notes: Global implementation constants are intended to be 5013 * implemented in this function directly; all other constants 5014 * are per-FS implementation, and therefore must be handled in 5015 * each respective FS, instead. 5016 * 5017 * XXX We implement some things globally right now that should actually be 5018 * XXX per-FS; we will need to deal with this at some point. 5019 */ 5020/* ARGSUSED */ 5021int 5022pathconf(__unused proc_t p, struct pathconf_args *uap, int32_t *retval) 5023{ 5024 int error; 5025 struct nameidata nd; 5026 vfs_context_t ctx = vfs_context_current(); 5027 5028 NDINIT(&nd, LOOKUP, OP_PATHCONF, FOLLOW | AUDITVNPATH1, 5029 UIO_USERSPACE, uap->path, ctx); 5030 error = namei(&nd); 5031 if (error) 5032 return (error); 5033 5034 error = vn_pathconf(nd.ni_vp, uap->name, retval, ctx); 5035 5036 vnode_put(nd.ni_vp); 5037 nameidone(&nd); 5038 return (error); 5039} 5040 5041/* 5042 * Return target name of a symbolic link. 5043 */ 5044/* ARGSUSED */ 5045int 5046readlink(proc_t p, struct readlink_args *uap, int32_t *retval) 5047{ 5048 vnode_t vp; 5049 uio_t auio; 5050 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 5051 int error; 5052 struct nameidata nd; 5053 vfs_context_t ctx = vfs_context_current(); 5054 char uio_buf[ UIO_SIZEOF(1) ]; 5055 5056 NDINIT(&nd, LOOKUP, OP_READLINK, NOFOLLOW | AUDITVNPATH1, 5057 UIO_USERSPACE, uap->path, ctx); 5058 error = namei(&nd); 5059 if (error) 5060 return (error); 5061 vp = nd.ni_vp; 5062 5063 nameidone(&nd); 5064 5065 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, 5066 &uio_buf[0], sizeof(uio_buf)); 5067 uio_addiov(auio, uap->buf, uap->count); 5068 if (vp->v_type != VLNK) 5069 error = EINVAL; 5070 else { 5071#if CONFIG_MACF 5072 error = mac_vnode_check_readlink(ctx, 5073 vp); 5074#endif 5075 if (error == 0) 5076 error = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, ctx); 5077 if (error == 0) 5078 error = VNOP_READLINK(vp, auio, ctx); 5079 } 5080 vnode_put(vp); 5081 5082 /* Safe: uio_resid() is bounded above by "count", and "count" is an int */ 5083 *retval = uap->count - (int)uio_resid(auio); 5084 return (error); 5085} 5086 5087/* 5088 * Change file flags. 5089 */ 5090static int 5091chflags1(vnode_t vp, int flags, vfs_context_t ctx) 5092{ 5093 struct vnode_attr va; 5094 kauth_action_t action; 5095 int error; 5096 5097 VATTR_INIT(&va); 5098 VATTR_SET(&va, va_flags, flags); 5099 5100#if CONFIG_MACF 5101 error = mac_vnode_check_setflags(ctx, vp, flags); 5102 if (error) 5103 goto out; 5104#endif 5105 5106 /* request authorisation, disregard immutability */ 5107 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) 5108 goto out; 5109 /* 5110 * Request that the auth layer disregard those file flags it's allowed to when 5111 * authorizing this operation; we need to do this in order to be able to 5112 * clear immutable flags. 5113 */ 5114 if (action && ((error = vnode_authorize(vp, NULL, action | KAUTH_VNODE_NOIMMUTABLE, ctx)) != 0)) 5115 goto out; 5116 error = vnode_setattr(vp, &va, ctx); 5117 5118 if ((error == 0) && !VATTR_IS_SUPPORTED(&va, va_flags)) { 5119 error = ENOTSUP; 5120 } 5121out: 5122 vnode_put(vp); 5123 return(error); 5124} 5125 5126/* 5127 * Change flags of a file given a path name. 5128 */ 5129/* ARGSUSED */ 5130int 5131chflags(__unused proc_t p, struct chflags_args *uap, __unused int32_t *retval) 5132{ 5133 vnode_t vp; 5134 vfs_context_t ctx = vfs_context_current(); 5135 int error; 5136 struct nameidata nd; 5137 5138 AUDIT_ARG(fflags, uap->flags); 5139 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 5140 UIO_USERSPACE, uap->path, ctx); 5141 error = namei(&nd); 5142 if (error) 5143 return (error); 5144 vp = nd.ni_vp; 5145 nameidone(&nd); 5146 5147 error = chflags1(vp, uap->flags, ctx); 5148 5149 return(error); 5150} 5151 5152/* 5153 * Change flags of a file given a file descriptor. 5154 */ 5155/* ARGSUSED */ 5156int 5157fchflags(__unused proc_t p, struct fchflags_args *uap, __unused int32_t *retval) 5158{ 5159 vnode_t vp; 5160 int error; 5161 5162 AUDIT_ARG(fd, uap->fd); 5163 AUDIT_ARG(fflags, uap->flags); 5164 if ( (error = file_vnode(uap->fd, &vp)) ) 5165 return (error); 5166 5167 if ((error = vnode_getwithref(vp))) { 5168 file_drop(uap->fd); 5169 return(error); 5170 } 5171 5172 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5173 5174 error = chflags1(vp, uap->flags, vfs_context_current()); 5175 5176 file_drop(uap->fd); 5177 return (error); 5178} 5179 5180/* 5181 * Change security information on a filesystem object. 5182 * 5183 * Returns: 0 Success 5184 * EPERM Operation not permitted 5185 * vnode_authattr:??? [anything vnode_authattr can return] 5186 * vnode_authorize:??? [anything vnode_authorize can return] 5187 * vnode_setattr:??? [anything vnode_setattr can return] 5188 * 5189 * Notes: If vnode_authattr or vnode_authorize return EACCES, it will be 5190 * translated to EPERM before being returned. 5191 */ 5192static int 5193chmod2(vfs_context_t ctx, vnode_t vp, struct vnode_attr *vap) 5194{ 5195 kauth_action_t action; 5196 int error; 5197 5198 AUDIT_ARG(mode, vap->va_mode); 5199 /* XXX audit new args */ 5200 5201#if NAMEDSTREAMS 5202 /* chmod calls are not allowed for resource forks. */ 5203 if (vp->v_flag & VISNAMEDSTREAM) { 5204 return (EPERM); 5205 } 5206#endif 5207 5208#if CONFIG_MACF 5209 if (VATTR_IS_ACTIVE(vap, va_mode) && 5210 (error = mac_vnode_check_setmode(ctx, vp, (mode_t)vap->va_mode)) != 0) 5211 return (error); 5212#endif 5213 5214 /* make sure that the caller is allowed to set this security information */ 5215 if (((error = vnode_authattr(vp, vap, &action, ctx)) != 0) || 5216 ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { 5217 if (error == EACCES) 5218 error = EPERM; 5219 return(error); 5220 } 5221 5222 error = vnode_setattr(vp, vap, ctx); 5223 5224 return (error); 5225} 5226 5227 5228/* 5229 * Change mode of a file given a path name. 5230 * 5231 * Returns: 0 Success 5232 * namei:??? [anything namei can return] 5233 * chmod2:??? [anything chmod2 can return] 5234 */ 5235static int 5236chmod1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) 5237{ 5238 struct nameidata nd; 5239 int error; 5240 5241 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 5242 UIO_USERSPACE, path, ctx); 5243 if ((error = namei(&nd))) 5244 return (error); 5245 error = chmod2(ctx, nd.ni_vp, vap); 5246 vnode_put(nd.ni_vp); 5247 nameidone(&nd); 5248 return(error); 5249} 5250 5251/* 5252 * chmod_extended: Change the mode of a file given a path name; with extended 5253 * argument list (including extended security (ACL)). 5254 * 5255 * Parameters: p Process requesting the open 5256 * uap User argument descriptor (see below) 5257 * retval (ignored) 5258 * 5259 * Indirect: uap->path Path to object (same as 'chmod') 5260 * uap->uid UID to set 5261 * uap->gid GID to set 5262 * uap->mode File mode to set (same as 'chmod') 5263 * uap->xsecurity ACL to set (or delete) 5264 * 5265 * Returns: 0 Success 5266 * !0 errno value 5267 * 5268 * Notes: The kauth_filesec_t in 'va', if any, is in host byte order. 5269 * 5270 * XXX: We should enummerate the possible errno values here, and where 5271 * in the code they originated. 5272 */ 5273int 5274chmod_extended(__unused proc_t p, struct chmod_extended_args *uap, __unused int32_t *retval) 5275{ 5276 int error; 5277 struct vnode_attr va; 5278 kauth_filesec_t xsecdst; 5279 5280 AUDIT_ARG(owner, uap->uid, uap->gid); 5281 5282 VATTR_INIT(&va); 5283 if (uap->mode != -1) 5284 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); 5285 if (uap->uid != KAUTH_UID_NONE) 5286 VATTR_SET(&va, va_uid, uap->uid); 5287 if (uap->gid != KAUTH_GID_NONE) 5288 VATTR_SET(&va, va_gid, uap->gid); 5289 5290 xsecdst = NULL; 5291 switch(uap->xsecurity) { 5292 /* explicit remove request */ 5293 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */ 5294 VATTR_SET(&va, va_acl, NULL); 5295 break; 5296 /* not being set */ 5297 case USER_ADDR_NULL: 5298 break; 5299 default: 5300 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) 5301 return(error); 5302 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); 5303 KAUTH_DEBUG("CHMOD - setting ACL with %d entries", va.va_acl->acl_entrycount); 5304 } 5305 5306 error = chmod1(vfs_context_current(), uap->path, &va); 5307 5308 if (xsecdst != NULL) 5309 kauth_filesec_free(xsecdst); 5310 return(error); 5311} 5312 5313/* 5314 * Returns: 0 Success 5315 * chmod1:??? [anything chmod1 can return] 5316 */ 5317int 5318chmod(__unused proc_t p, struct chmod_args *uap, __unused int32_t *retval) 5319{ 5320 struct vnode_attr va; 5321 5322 VATTR_INIT(&va); 5323 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); 5324 5325 return(chmod1(vfs_context_current(), uap->path, &va)); 5326} 5327 5328/* 5329 * Change mode of a file given a file descriptor. 5330 */ 5331static int 5332fchmod1(__unused proc_t p, int fd, struct vnode_attr *vap) 5333{ 5334 vnode_t vp; 5335 int error; 5336 5337 AUDIT_ARG(fd, fd); 5338 5339 if ((error = file_vnode(fd, &vp)) != 0) 5340 return (error); 5341 if ((error = vnode_getwithref(vp)) != 0) { 5342 file_drop(fd); 5343 return(error); 5344 } 5345 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5346 5347 error = chmod2(vfs_context_current(), vp, vap); 5348 (void)vnode_put(vp); 5349 file_drop(fd); 5350 5351 return (error); 5352} 5353 5354/* 5355 * fchmod_extended: Change mode of a file given a file descriptor; with 5356 * extended argument list (including extended security (ACL)). 5357 * 5358 * Parameters: p Process requesting to change file mode 5359 * uap User argument descriptor (see below) 5360 * retval (ignored) 5361 * 5362 * Indirect: uap->mode File mode to set (same as 'chmod') 5363 * uap->uid UID to set 5364 * uap->gid GID to set 5365 * uap->xsecurity ACL to set (or delete) 5366 * uap->fd File descriptor of file to change mode 5367 * 5368 * Returns: 0 Success 5369 * !0 errno value 5370 * 5371 */ 5372int 5373fchmod_extended(proc_t p, struct fchmod_extended_args *uap, __unused int32_t *retval) 5374{ 5375 int error; 5376 struct vnode_attr va; 5377 kauth_filesec_t xsecdst; 5378 5379 AUDIT_ARG(owner, uap->uid, uap->gid); 5380 5381 VATTR_INIT(&va); 5382 if (uap->mode != -1) 5383 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); 5384 if (uap->uid != KAUTH_UID_NONE) 5385 VATTR_SET(&va, va_uid, uap->uid); 5386 if (uap->gid != KAUTH_GID_NONE) 5387 VATTR_SET(&va, va_gid, uap->gid); 5388 5389 xsecdst = NULL; 5390 switch(uap->xsecurity) { 5391 case USER_ADDR_NULL: 5392 VATTR_SET(&va, va_acl, NULL); 5393 break; 5394 case CAST_USER_ADDR_T((void *)1): /* _FILESEC_REMOVE_ACL */ 5395 VATTR_SET(&va, va_acl, NULL); 5396 break; 5397 /* not being set */ 5398 case CAST_USER_ADDR_T(-1): 5399 break; 5400 default: 5401 if ((error = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) 5402 return(error); 5403 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); 5404 } 5405 5406 error = fchmod1(p, uap->fd, &va); 5407 5408 5409 switch(uap->xsecurity) { 5410 case USER_ADDR_NULL: 5411 case CAST_USER_ADDR_T(-1): 5412 break; 5413 default: 5414 if (xsecdst != NULL) 5415 kauth_filesec_free(xsecdst); 5416 } 5417 return(error); 5418} 5419 5420int 5421fchmod(proc_t p, struct fchmod_args *uap, __unused int32_t *retval) 5422{ 5423 struct vnode_attr va; 5424 5425 VATTR_INIT(&va); 5426 VATTR_SET(&va, va_mode, uap->mode & ALLPERMS); 5427 5428 return(fchmod1(p, uap->fd, &va)); 5429} 5430 5431 5432/* 5433 * Set ownership given a path name. 5434 */ 5435/* ARGSUSED */ 5436static int 5437chown1(vfs_context_t ctx, struct chown_args *uap, __unused int32_t *retval, int follow) 5438{ 5439 vnode_t vp; 5440 struct vnode_attr va; 5441 int error; 5442 struct nameidata nd; 5443 kauth_action_t action; 5444 5445 AUDIT_ARG(owner, uap->uid, uap->gid); 5446 5447 NDINIT(&nd, LOOKUP, OP_SETATTR, 5448 (follow ? FOLLOW : 0) | NOTRIGGER | AUDITVNPATH1, 5449 UIO_USERSPACE, uap->path, ctx); 5450 error = namei(&nd); 5451 if (error) 5452 return (error); 5453 vp = nd.ni_vp; 5454 5455 nameidone(&nd); 5456 5457 VATTR_INIT(&va); 5458 if (uap->uid != VNOVAL) 5459 VATTR_SET(&va, va_uid, uap->uid); 5460 if (uap->gid != VNOVAL) 5461 VATTR_SET(&va, va_gid, uap->gid); 5462 5463#if CONFIG_MACF 5464 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid); 5465 if (error) 5466 goto out; 5467#endif 5468 5469 /* preflight and authorize attribute changes */ 5470 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) 5471 goto out; 5472 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) 5473 goto out; 5474 error = vnode_setattr(vp, &va, ctx); 5475 5476out: 5477 /* 5478 * EACCES is only allowed from namei(); permissions failure should 5479 * return EPERM, so we need to translate the error code. 5480 */ 5481 if (error == EACCES) 5482 error = EPERM; 5483 5484 vnode_put(vp); 5485 return (error); 5486} 5487 5488int 5489chown(__unused proc_t p, struct chown_args *uap, int32_t *retval) 5490{ 5491 return chown1(vfs_context_current(), uap, retval, 1); 5492} 5493 5494int 5495lchown(__unused proc_t p, struct lchown_args *uap, int32_t *retval) 5496{ 5497 /* Argument list identical, but machine generated; cast for chown1() */ 5498 return chown1(vfs_context_current(), (struct chown_args *)uap, retval, 0); 5499} 5500 5501/* 5502 * Set ownership given a file descriptor. 5503 */ 5504/* ARGSUSED */ 5505int 5506fchown(__unused proc_t p, struct fchown_args *uap, __unused int32_t *retval) 5507{ 5508 struct vnode_attr va; 5509 vfs_context_t ctx = vfs_context_current(); 5510 vnode_t vp; 5511 int error; 5512 kauth_action_t action; 5513 5514 AUDIT_ARG(owner, uap->uid, uap->gid); 5515 AUDIT_ARG(fd, uap->fd); 5516 5517 if ( (error = file_vnode(uap->fd, &vp)) ) 5518 return (error); 5519 5520 if ( (error = vnode_getwithref(vp)) ) { 5521 file_drop(uap->fd); 5522 return(error); 5523 } 5524 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5525 5526 VATTR_INIT(&va); 5527 if (uap->uid != VNOVAL) 5528 VATTR_SET(&va, va_uid, uap->uid); 5529 if (uap->gid != VNOVAL) 5530 VATTR_SET(&va, va_gid, uap->gid); 5531 5532#if NAMEDSTREAMS 5533 /* chown calls are not allowed for resource forks. */ 5534 if (vp->v_flag & VISNAMEDSTREAM) { 5535 error = EPERM; 5536 goto out; 5537 } 5538#endif 5539 5540#if CONFIG_MACF 5541 error = mac_vnode_check_setowner(ctx, vp, uap->uid, uap->gid); 5542 if (error) 5543 goto out; 5544#endif 5545 5546 /* preflight and authorize attribute changes */ 5547 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) 5548 goto out; 5549 if (action && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { 5550 if (error == EACCES) 5551 error = EPERM; 5552 goto out; 5553 } 5554 error = vnode_setattr(vp, &va, ctx); 5555 5556out: 5557 (void)vnode_put(vp); 5558 file_drop(uap->fd); 5559 return (error); 5560} 5561 5562static int 5563getutimes(user_addr_t usrtvp, struct timespec *tsp) 5564{ 5565 int error; 5566 5567 if (usrtvp == USER_ADDR_NULL) { 5568 struct timeval old_tv; 5569 /* XXX Y2038 bug because of microtime argument */ 5570 microtime(&old_tv); 5571 TIMEVAL_TO_TIMESPEC(&old_tv, &tsp[0]); 5572 tsp[1] = tsp[0]; 5573 } else { 5574 if (IS_64BIT_PROCESS(current_proc())) { 5575 struct user64_timeval tv[2]; 5576 error = copyin(usrtvp, (void *)tv, sizeof(tv)); 5577 if (error) 5578 return (error); 5579 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 5580 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); 5581 } else { 5582 struct user32_timeval tv[2]; 5583 error = copyin(usrtvp, (void *)tv, sizeof(tv)); 5584 if (error) 5585 return (error); 5586 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 5587 TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); 5588 } 5589 } 5590 return 0; 5591} 5592 5593static int 5594setutimes(vfs_context_t ctx, vnode_t vp, const struct timespec *ts, 5595 int nullflag) 5596{ 5597 int error; 5598 struct vnode_attr va; 5599 kauth_action_t action; 5600 5601 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5602 5603 VATTR_INIT(&va); 5604 VATTR_SET(&va, va_access_time, ts[0]); 5605 VATTR_SET(&va, va_modify_time, ts[1]); 5606 if (nullflag) 5607 va.va_vaflags |= VA_UTIMES_NULL; 5608 5609#if NAMEDSTREAMS 5610 /* utimes calls are not allowed for resource forks. */ 5611 if (vp->v_flag & VISNAMEDSTREAM) { 5612 error = EPERM; 5613 goto out; 5614 } 5615#endif 5616 5617#if CONFIG_MACF 5618 error = mac_vnode_check_setutimes(ctx, vp, ts[0], ts[1]); 5619 if (error) 5620 goto out; 5621#endif 5622 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) { 5623 if (!nullflag && error == EACCES) 5624 error = EPERM; 5625 goto out; 5626 } 5627 5628 /* since we may not need to auth anything, check here */ 5629 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) { 5630 if (!nullflag && error == EACCES) 5631 error = EPERM; 5632 goto out; 5633 } 5634 error = vnode_setattr(vp, &va, ctx); 5635 5636out: 5637 return error; 5638} 5639 5640/* 5641 * Set the access and modification times of a file. 5642 */ 5643/* ARGSUSED */ 5644int 5645utimes(__unused proc_t p, struct utimes_args *uap, __unused int32_t *retval) 5646{ 5647 struct timespec ts[2]; 5648 user_addr_t usrtvp; 5649 int error; 5650 struct nameidata nd; 5651 vfs_context_t ctx = vfs_context_current(); 5652 5653 /* 5654 * AUDIT: Needed to change the order of operations to do the 5655 * name lookup first because auditing wants the path. 5656 */ 5657 NDINIT(&nd, LOOKUP, OP_SETATTR, FOLLOW | AUDITVNPATH1, 5658 UIO_USERSPACE, uap->path, ctx); 5659 error = namei(&nd); 5660 if (error) 5661 return (error); 5662 nameidone(&nd); 5663 5664 /* 5665 * Fetch the user-supplied time. If usrtvp is USER_ADDR_NULL, we fetch 5666 * the current time instead. 5667 */ 5668 usrtvp = uap->tptr; 5669 if ((error = getutimes(usrtvp, ts)) != 0) 5670 goto out; 5671 5672 error = setutimes(ctx, nd.ni_vp, ts, usrtvp == USER_ADDR_NULL); 5673 5674out: 5675 vnode_put(nd.ni_vp); 5676 return (error); 5677} 5678 5679/* 5680 * Set the access and modification times of a file. 5681 */ 5682/* ARGSUSED */ 5683int 5684futimes(__unused proc_t p, struct futimes_args *uap, __unused int32_t *retval) 5685{ 5686 struct timespec ts[2]; 5687 vnode_t vp; 5688 user_addr_t usrtvp; 5689 int error; 5690 5691 AUDIT_ARG(fd, uap->fd); 5692 usrtvp = uap->tptr; 5693 if ((error = getutimes(usrtvp, ts)) != 0) 5694 return (error); 5695 if ((error = file_vnode(uap->fd, &vp)) != 0) 5696 return (error); 5697 if((error = vnode_getwithref(vp))) { 5698 file_drop(uap->fd); 5699 return(error); 5700 } 5701 5702 error = setutimes(vfs_context_current(), vp, ts, usrtvp == 0); 5703 vnode_put(vp); 5704 file_drop(uap->fd); 5705 return(error); 5706} 5707 5708/* 5709 * Truncate a file given its path name. 5710 */ 5711/* ARGSUSED */ 5712int 5713truncate(__unused proc_t p, struct truncate_args *uap, __unused int32_t *retval) 5714{ 5715 vnode_t vp; 5716 struct vnode_attr va; 5717 vfs_context_t ctx = vfs_context_current(); 5718 int error; 5719 struct nameidata nd; 5720 kauth_action_t action; 5721 5722 if (uap->length < 0) 5723 return(EINVAL); 5724 NDINIT(&nd, LOOKUP, OP_TRUNCATE, FOLLOW | AUDITVNPATH1, 5725 UIO_USERSPACE, uap->path, ctx); 5726 if ((error = namei(&nd))) 5727 return (error); 5728 vp = nd.ni_vp; 5729 5730 nameidone(&nd); 5731 5732 VATTR_INIT(&va); 5733 VATTR_SET(&va, va_data_size, uap->length); 5734 5735#if CONFIG_MACF 5736 error = mac_vnode_check_truncate(ctx, NOCRED, vp); 5737 if (error) 5738 goto out; 5739#endif 5740 5741 if ((error = vnode_authattr(vp, &va, &action, ctx)) != 0) 5742 goto out; 5743 if ((action != 0) && ((error = vnode_authorize(vp, NULL, action, ctx)) != 0)) 5744 goto out; 5745 error = vnode_setattr(vp, &va, ctx); 5746out: 5747 vnode_put(vp); 5748 return (error); 5749} 5750 5751/* 5752 * Truncate a file given a file descriptor. 5753 */ 5754/* ARGSUSED */ 5755int 5756ftruncate(proc_t p, struct ftruncate_args *uap, int32_t *retval) 5757{ 5758 vfs_context_t ctx = vfs_context_current(); 5759 struct vnode_attr va; 5760 vnode_t vp; 5761 struct fileproc *fp; 5762 int error ; 5763 int fd = uap->fd; 5764 5765 AUDIT_ARG(fd, uap->fd); 5766 if (uap->length < 0) 5767 return(EINVAL); 5768 5769 if ( (error = fp_lookup(p,fd,&fp,0)) ) { 5770 return(error); 5771 } 5772 5773 switch (FILEGLOB_DTYPE(fp->f_fglob)) { 5774 case DTYPE_PSXSHM: 5775 error = pshm_truncate(p, fp, uap->fd, uap->length, retval); 5776 goto out; 5777 case DTYPE_VNODE: 5778 break; 5779 default: 5780 error = EINVAL; 5781 goto out; 5782 } 5783 5784 vp = (vnode_t)fp->f_fglob->fg_data; 5785 5786 if ((fp->f_fglob->fg_flag & FWRITE) == 0) { 5787 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); 5788 error = EINVAL; 5789 goto out; 5790 } 5791 5792 if ((error = vnode_getwithref(vp)) != 0) { 5793 goto out; 5794 } 5795 5796 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5797 5798#if CONFIG_MACF 5799 error = mac_vnode_check_truncate(ctx, 5800 fp->f_fglob->fg_cred, vp); 5801 if (error) { 5802 (void)vnode_put(vp); 5803 goto out; 5804 } 5805#endif 5806 VATTR_INIT(&va); 5807 VATTR_SET(&va, va_data_size, uap->length); 5808 error = vnode_setattr(vp, &va, ctx); 5809 (void)vnode_put(vp); 5810out: 5811 file_drop(fd); 5812 return (error); 5813} 5814 5815 5816/* 5817 * Sync an open file with synchronized I/O _file_ integrity completion 5818 */ 5819/* ARGSUSED */ 5820int 5821fsync(proc_t p, struct fsync_args *uap, __unused int32_t *retval) 5822{ 5823 __pthread_testcancel(1); 5824 return(fsync_common(p, uap, MNT_WAIT)); 5825} 5826 5827 5828/* 5829 * Sync an open file with synchronized I/O _file_ integrity completion 5830 * 5831 * Notes: This is a legacy support function that does not test for 5832 * thread cancellation points. 5833 */ 5834/* ARGSUSED */ 5835int 5836fsync_nocancel(proc_t p, struct fsync_nocancel_args *uap, __unused int32_t *retval) 5837{ 5838 return(fsync_common(p, (struct fsync_args *)uap, MNT_WAIT)); 5839} 5840 5841 5842/* 5843 * Sync an open file with synchronized I/O _data_ integrity completion 5844 */ 5845/* ARGSUSED */ 5846int 5847fdatasync(proc_t p, struct fdatasync_args *uap, __unused int32_t *retval) 5848{ 5849 __pthread_testcancel(1); 5850 return(fsync_common(p, (struct fsync_args *)uap, MNT_DWAIT)); 5851} 5852 5853 5854/* 5855 * fsync_common 5856 * 5857 * Common fsync code to support both synchronized I/O file integrity completion 5858 * (normal fsync) and synchronized I/O data integrity completion (fdatasync). 5859 * 5860 * If 'flags' is MNT_DWAIT, the caller is requesting data integrity, which 5861 * will only guarantee that the file data contents are retrievable. If 5862 * 'flags' is MNT_WAIT, the caller is rewuesting file integrity, which also 5863 * includes additional metadata unnecessary for retrieving the file data 5864 * contents, such as atime, mtime, ctime, etc., also be committed to stable 5865 * storage. 5866 * 5867 * Parameters: p The process 5868 * uap->fd The descriptor to synchronize 5869 * flags The data integrity flags 5870 * 5871 * Returns: int Success 5872 * fp_getfvp:EBADF Bad file descriptor 5873 * fp_getfvp:ENOTSUP fd does not refer to a vnode 5874 * VNOP_FSYNC:??? unspecified 5875 * 5876 * Notes: We use struct fsync_args because it is a short name, and all 5877 * caller argument structures are otherwise identical. 5878 */ 5879static int 5880fsync_common(proc_t p, struct fsync_args *uap, int flags) 5881{ 5882 vnode_t vp; 5883 struct fileproc *fp; 5884 vfs_context_t ctx = vfs_context_current(); 5885 int error; 5886 5887 AUDIT_ARG(fd, uap->fd); 5888 5889 if ( (error = fp_getfvp(p, uap->fd, &fp, &vp)) ) 5890 return (error); 5891 if ( (error = vnode_getwithref(vp)) ) { 5892 file_drop(uap->fd); 5893 return(error); 5894 } 5895 5896 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 5897 5898 error = VNOP_FSYNC(vp, flags, ctx); 5899 5900#if NAMEDRSRCFORK 5901 /* Sync resource fork shadow file if necessary. */ 5902 if ((error == 0) && 5903 (vp->v_flag & VISNAMEDSTREAM) && 5904 (vp->v_parent != NULLVP) && 5905 vnode_isshadow(vp) && 5906 (fp->f_flags & FP_WRITTEN)) { 5907 (void) vnode_flushnamedstream(vp->v_parent, vp, ctx); 5908 } 5909#endif 5910 5911 (void)vnode_put(vp); 5912 file_drop(uap->fd); 5913 return (error); 5914} 5915 5916/* 5917 * Duplicate files. Source must be a file, target must be a file or 5918 * must not exist. 5919 * 5920 * XXX Copyfile authorisation checking is woefully inadequate, and will not 5921 * perform inheritance correctly. 5922 */ 5923/* ARGSUSED */ 5924int 5925copyfile(__unused proc_t p, struct copyfile_args *uap, __unused int32_t *retval) 5926{ 5927 vnode_t tvp, fvp, tdvp, sdvp; 5928 struct nameidata fromnd, tond; 5929 int error; 5930 vfs_context_t ctx = vfs_context_current(); 5931 5932 /* Check that the flags are valid. */ 5933 5934 if (uap->flags & ~CPF_MASK) { 5935 return(EINVAL); 5936 } 5937 5938 NDINIT(&fromnd, LOOKUP, OP_COPYFILE, SAVESTART | AUDITVNPATH1, 5939 UIO_USERSPACE, uap->from, ctx); 5940 if ((error = namei(&fromnd))) 5941 return (error); 5942 fvp = fromnd.ni_vp; 5943 5944 NDINIT(&tond, CREATE, OP_LINK, 5945 LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNPATH2 | CN_NBMOUNTLOOK, 5946 UIO_USERSPACE, uap->to, ctx); 5947 if ((error = namei(&tond))) { 5948 goto out1; 5949 } 5950 tdvp = tond.ni_dvp; 5951 tvp = tond.ni_vp; 5952 5953 if (tvp != NULL) { 5954 if (!(uap->flags & CPF_OVERWRITE)) { 5955 error = EEXIST; 5956 goto out; 5957 } 5958 } 5959 if (fvp->v_type == VDIR || (tvp && tvp->v_type == VDIR)) { 5960 error = EISDIR; 5961 goto out; 5962 } 5963 5964 if ((error = vnode_authorize(tdvp, NULL, KAUTH_VNODE_ADD_FILE, ctx)) != 0) 5965 goto out; 5966 5967 if (fvp == tdvp) 5968 error = EINVAL; 5969 /* 5970 * If source is the same as the destination (that is the 5971 * same inode number) then there is nothing to do. 5972 * (fixed to have POSIX semantics - CSM 3/2/98) 5973 */ 5974 if (fvp == tvp) 5975 error = -1; 5976 if (!error) 5977 error = VNOP_COPYFILE(fvp, tdvp, tvp, &tond.ni_cnd, uap->mode, uap->flags, ctx); 5978out: 5979 sdvp = tond.ni_startdir; 5980 /* 5981 * nameidone has to happen before we vnode_put(tdvp) 5982 * since it may need to release the fs_nodelock on the tdvp 5983 */ 5984 nameidone(&tond); 5985 5986 if (tvp) 5987 vnode_put(tvp); 5988 vnode_put(tdvp); 5989 vnode_put(sdvp); 5990out1: 5991 vnode_put(fvp); 5992 5993 if (fromnd.ni_startdir) 5994 vnode_put(fromnd.ni_startdir); 5995 nameidone(&fromnd); 5996 5997 if (error == -1) 5998 return (0); 5999 return (error); 6000} 6001 6002 6003/* 6004 * Rename files. Source and destination must either both be directories, 6005 * or both not be directories. If target is a directory, it must be empty. 6006 */ 6007/* ARGSUSED */ 6008int 6009rename(__unused proc_t p, struct rename_args *uap, __unused int32_t *retval) 6010{ 6011 vnode_t tvp, tdvp; 6012 vnode_t fvp, fdvp; 6013 struct nameidata *fromnd, *tond; 6014 vfs_context_t ctx = vfs_context_current(); 6015 int error; 6016 int do_retry; 6017 int mntrename; 6018 int need_event; 6019 const char *oname = NULL; 6020 char *from_name = NULL, *to_name = NULL; 6021 int from_len=0, to_len=0; 6022 int holding_mntlock; 6023 mount_t locked_mp = NULL; 6024 vnode_t oparent = NULLVP; 6025#if CONFIG_FSE 6026 fse_info from_finfo, to_finfo; 6027#endif 6028 int from_truncated=0, to_truncated; 6029 int batched = 0; 6030 struct vnode_attr *fvap, *tvap; 6031 int continuing = 0; 6032 /* carving out a chunk for structs that are too big to be on stack. */ 6033 struct { 6034 struct nameidata from_node, to_node; 6035 struct vnode_attr fv_attr, tv_attr; 6036 } * __rename_data; 6037 MALLOC(__rename_data, void *, sizeof(*__rename_data), M_TEMP, M_WAITOK); 6038 fromnd = &__rename_data->from_node; 6039 tond = &__rename_data->to_node; 6040 6041 holding_mntlock = 0; 6042 do_retry = 0; 6043retry: 6044 fvp = tvp = NULL; 6045 fdvp = tdvp = NULL; 6046 fvap = tvap = NULL; 6047 mntrename = FALSE; 6048 6049 NDINIT(fromnd, DELETE, OP_UNLINK, WANTPARENT | AUDITVNPATH1, 6050 UIO_USERSPACE, uap->from, ctx); 6051 fromnd->ni_flag = NAMEI_COMPOUNDRENAME; 6052 6053 NDINIT(tond, RENAME, OP_RENAME, WANTPARENT | AUDITVNPATH2 | CN_NBMOUNTLOOK, 6054 UIO_USERSPACE, uap->to, ctx); 6055 tond->ni_flag = NAMEI_COMPOUNDRENAME; 6056 6057continue_lookup: 6058 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) { 6059 if ( (error = namei(fromnd)) ) 6060 goto out1; 6061 fdvp = fromnd->ni_dvp; 6062 fvp = fromnd->ni_vp; 6063 6064 if (fvp && fvp->v_type == VDIR) 6065 tond->ni_cnd.cn_flags |= WILLBEDIR; 6066 } 6067 6068 if ((tond->ni_flag & NAMEI_CONTLOOKUP) != 0 || !continuing) { 6069 if ( (error = namei(tond)) ) { 6070 /* 6071 * Translate error code for rename("dir1", "dir2/."). 6072 */ 6073 if (error == EISDIR && fvp->v_type == VDIR) 6074 error = EINVAL; 6075 goto out1; 6076 } 6077 tdvp = tond->ni_dvp; 6078 tvp = tond->ni_vp; 6079 } 6080 6081 batched = vnode_compound_rename_available(fdvp); 6082 if (!fvp) { 6083 /* 6084 * Claim: this check will never reject a valid rename. 6085 * For success, either fvp must be on the same mount as tdvp, or fvp must sit atop a vnode on the same mount as tdvp. 6086 * Suppose fdvp and tdvp are not on the same mount. 6087 * If fvp is on the same mount as tdvp, then fvp is not on the same mount as fdvp, so fvp is the root of its filesystem. If fvp is the root, 6088 * then you can't move it to within another dir on the same mountpoint. 6089 * If fvp sits atop a vnode on the same mount as fdvp, then that vnode must be part of the same mount as fdvp, which is a contradiction. 6090 * 6091 * If this check passes, then we are safe to pass these vnodes to the same FS. 6092 */ 6093 if (fdvp->v_mount != tdvp->v_mount) { 6094 error = EXDEV; 6095 goto out1; 6096 } 6097 goto skipped_lookup; 6098 } 6099 6100 if (!batched) { 6101 error = vn_authorize_rename(fdvp, fvp, &fromnd->ni_cnd, tdvp, tvp, &tond->ni_cnd, ctx, NULL); 6102 if (error) { 6103 if (error == ENOENT) { 6104 /* 6105 * We encountered a race where after doing the namei, tvp stops 6106 * being valid. If so, simply re-drive the rename call from the 6107 * top. 6108 */ 6109 do_retry = 1; 6110 } 6111 goto out1; 6112 } 6113 } 6114 6115 /* 6116 * If the source and destination are the same (i.e. they're 6117 * links to the same vnode) and the target file system is 6118 * case sensitive, then there is nothing to do. 6119 * 6120 * XXX Come back to this. 6121 */ 6122 if (fvp == tvp) { 6123 int pathconf_val; 6124 6125 /* 6126 * Note: if _PC_CASE_SENSITIVE selector isn't supported, 6127 * then assume that this file system is case sensitive. 6128 */ 6129 if (VNOP_PATHCONF(fvp, _PC_CASE_SENSITIVE, &pathconf_val, ctx) != 0 || 6130 pathconf_val != 0) { 6131 goto out1; 6132 } 6133 } 6134 6135 /* 6136 * Allow the renaming of mount points. 6137 * - target must not exist 6138 * - target must reside in the same directory as source 6139 * - union mounts cannot be renamed 6140 * - "/" cannot be renamed 6141 * 6142 * XXX Handle this in VFS after a continued lookup (if we missed 6143 * in the cache to start off) 6144 */ 6145 if ((fvp->v_flag & VROOT) && 6146 (fvp->v_type == VDIR) && 6147 (tvp == NULL) && 6148 (fvp->v_mountedhere == NULL) && 6149 (fdvp == tdvp) && 6150 ((fvp->v_mount->mnt_flag & (MNT_UNION | MNT_ROOTFS)) == 0) && 6151 (fvp->v_mount->mnt_vnodecovered != NULLVP)) { 6152 vnode_t coveredvp; 6153 6154 /* switch fvp to the covered vnode */ 6155 coveredvp = fvp->v_mount->mnt_vnodecovered; 6156 if ( (vnode_getwithref(coveredvp)) ) { 6157 error = ENOENT; 6158 goto out1; 6159 } 6160 vnode_put(fvp); 6161 6162 fvp = coveredvp; 6163 mntrename = TRUE; 6164 } 6165 /* 6166 * Check for cross-device rename. 6167 */ 6168 if ((fvp->v_mount != tdvp->v_mount) || 6169 (tvp && (fvp->v_mount != tvp->v_mount))) { 6170 error = EXDEV; 6171 goto out1; 6172 } 6173 6174 /* 6175 * If source is the same as the destination (that is the 6176 * same inode number) then there is nothing to do... 6177 * EXCEPT if the underlying file system supports case 6178 * insensitivity and is case preserving. In this case 6179 * the file system needs to handle the special case of 6180 * getting the same vnode as target (fvp) and source (tvp). 6181 * 6182 * Only file systems that support pathconf selectors _PC_CASE_SENSITIVE 6183 * and _PC_CASE_PRESERVING can have this exception, and they need to 6184 * handle the special case of getting the same vnode as target and 6185 * source. NOTE: Then the target is unlocked going into vnop_rename, 6186 * so not to cause locking problems. There is a single reference on tvp. 6187 * 6188 * NOTE - that fvp == tvp also occurs if they are hard linked and 6189 * that correct behaviour then is just to return success without doing 6190 * anything. 6191 * 6192 * XXX filesystem should take care of this itself, perhaps... 6193 */ 6194 if (fvp == tvp && fdvp == tdvp) { 6195 if (fromnd->ni_cnd.cn_namelen == tond->ni_cnd.cn_namelen && 6196 !bcmp(fromnd->ni_cnd.cn_nameptr, tond->ni_cnd.cn_nameptr, 6197 fromnd->ni_cnd.cn_namelen)) { 6198 goto out1; 6199 } 6200 } 6201 6202 if (holding_mntlock && fvp->v_mount != locked_mp) { 6203 /* 6204 * we're holding a reference and lock 6205 * on locked_mp, but it no longer matches 6206 * what we want to do... so drop our hold 6207 */ 6208 mount_unlock_renames(locked_mp); 6209 mount_drop(locked_mp, 0); 6210 holding_mntlock = 0; 6211 } 6212 if (tdvp != fdvp && fvp->v_type == VDIR) { 6213 /* 6214 * serialize renames that re-shape 6215 * the tree... if holding_mntlock is 6216 * set, then we're ready to go... 6217 * otherwise we 6218 * first need to drop the iocounts 6219 * we picked up, second take the 6220 * lock to serialize the access, 6221 * then finally start the lookup 6222 * process over with the lock held 6223 */ 6224 if (!holding_mntlock) { 6225 /* 6226 * need to grab a reference on 6227 * the mount point before we 6228 * drop all the iocounts... once 6229 * the iocounts are gone, the mount 6230 * could follow 6231 */ 6232 locked_mp = fvp->v_mount; 6233 mount_ref(locked_mp, 0); 6234 6235 /* 6236 * nameidone has to happen before we vnode_put(tvp) 6237 * since it may need to release the fs_nodelock on the tvp 6238 */ 6239 nameidone(tond); 6240 6241 if (tvp) 6242 vnode_put(tvp); 6243 vnode_put(tdvp); 6244 6245 /* 6246 * nameidone has to happen before we vnode_put(fdvp) 6247 * since it may need to release the fs_nodelock on the fvp 6248 */ 6249 nameidone(fromnd); 6250 6251 vnode_put(fvp); 6252 vnode_put(fdvp); 6253 6254 mount_lock_renames(locked_mp); 6255 holding_mntlock = 1; 6256 6257 goto retry; 6258 } 6259 } else { 6260 /* 6261 * when we dropped the iocounts to take 6262 * the lock, we allowed the identity of 6263 * the various vnodes to change... if they did, 6264 * we may no longer be dealing with a rename 6265 * that reshapes the tree... once we're holding 6266 * the iocounts, the vnodes can't change type 6267 * so we're free to drop the lock at this point 6268 * and continue on 6269 */ 6270 if (holding_mntlock) { 6271 mount_unlock_renames(locked_mp); 6272 mount_drop(locked_mp, 0); 6273 holding_mntlock = 0; 6274 } 6275 } 6276 6277 // save these off so we can later verify that fvp is the same 6278 oname = fvp->v_name; 6279 oparent = fvp->v_parent; 6280 6281skipped_lookup: 6282#if CONFIG_FSE 6283 need_event = need_fsevent(FSE_RENAME, fdvp); 6284 if (need_event) { 6285 if (fvp) { 6286 get_fse_info(fvp, &from_finfo, ctx); 6287 } else { 6288 error = vfs_get_notify_attributes(&__rename_data->fv_attr); 6289 if (error) { 6290 goto out1; 6291 } 6292 6293 fvap = &__rename_data->fv_attr; 6294 } 6295 6296 if (tvp) { 6297 get_fse_info(tvp, &to_finfo, ctx); 6298 } else if (batched) { 6299 error = vfs_get_notify_attributes(&__rename_data->tv_attr); 6300 if (error) { 6301 goto out1; 6302 } 6303 6304 tvap = &__rename_data->tv_attr; 6305 } 6306 } 6307#else 6308 need_event = 0; 6309#endif /* CONFIG_FSE */ 6310 6311 if (need_event || kauth_authorize_fileop_has_listeners()) { 6312 if (from_name == NULL) { 6313 GET_PATH(from_name); 6314 if (from_name == NULL) { 6315 error = ENOMEM; 6316 goto out1; 6317 } 6318 } 6319 6320 from_len = safe_getpath(fdvp, fromnd->ni_cnd.cn_nameptr, from_name, MAXPATHLEN, &from_truncated); 6321 6322 if (to_name == NULL) { 6323 GET_PATH(to_name); 6324 if (to_name == NULL) { 6325 error = ENOMEM; 6326 goto out1; 6327 } 6328 } 6329 6330 to_len = safe_getpath(tdvp, tond->ni_cnd.cn_nameptr, to_name, MAXPATHLEN, &to_truncated); 6331 } 6332 6333 error = vn_rename(fdvp, &fvp, &fromnd->ni_cnd, fvap, 6334 tdvp, &tvp, &tond->ni_cnd, tvap, 6335 0, ctx); 6336 6337 if (holding_mntlock) { 6338 /* 6339 * we can drop our serialization 6340 * lock now 6341 */ 6342 mount_unlock_renames(locked_mp); 6343 mount_drop(locked_mp, 0); 6344 holding_mntlock = 0; 6345 } 6346 if (error) { 6347 if (error == EKEEPLOOKING) { 6348 if ((fromnd->ni_flag & NAMEI_CONTLOOKUP) == 0) { 6349 if ((tond->ni_flag & NAMEI_CONTLOOKUP) == 0) { 6350 panic("EKEEPLOOKING without NAMEI_CONTLOOKUP on either ndp?"); 6351 } 6352 } 6353 6354 fromnd->ni_vp = fvp; 6355 tond->ni_vp = tvp; 6356 6357 goto continue_lookup; 6358 } 6359 6360 /* 6361 * We may encounter a race in the VNOP where the destination didn't 6362 * exist when we did the namei, but it does by the time we go and 6363 * try to create the entry. In this case, we should re-drive this rename 6364 * call from the top again. Currently, only HFS bubbles out ERECYCLE, 6365 * but other filesystems susceptible to this race could return it, too. 6366 */ 6367 if (error == ERECYCLE) { 6368 do_retry = 1; 6369 } 6370 6371 goto out1; 6372 } 6373 6374 /* call out to allow 3rd party notification of rename. 6375 * Ignore result of kauth_authorize_fileop call. 6376 */ 6377 kauth_authorize_fileop(vfs_context_ucred(ctx), 6378 KAUTH_FILEOP_RENAME, 6379 (uintptr_t)from_name, (uintptr_t)to_name); 6380 6381#if CONFIG_FSE 6382 if (from_name != NULL && to_name != NULL) { 6383 if (from_truncated || to_truncated) { 6384 // set it here since only the from_finfo gets reported up to user space 6385 from_finfo.mode |= FSE_TRUNCATED_PATH; 6386 } 6387 6388 if (tvap && tvp) { 6389 vnode_get_fse_info_from_vap(tvp, &to_finfo, tvap); 6390 } 6391 if (fvap) { 6392 vnode_get_fse_info_from_vap(fvp, &from_finfo, fvap); 6393 } 6394 6395 if (tvp) { 6396 add_fsevent(FSE_RENAME, ctx, 6397 FSE_ARG_STRING, from_len, from_name, 6398 FSE_ARG_FINFO, &from_finfo, 6399 FSE_ARG_STRING, to_len, to_name, 6400 FSE_ARG_FINFO, &to_finfo, 6401 FSE_ARG_DONE); 6402 } else { 6403 add_fsevent(FSE_RENAME, ctx, 6404 FSE_ARG_STRING, from_len, from_name, 6405 FSE_ARG_FINFO, &from_finfo, 6406 FSE_ARG_STRING, to_len, to_name, 6407 FSE_ARG_DONE); 6408 } 6409 } 6410#endif /* CONFIG_FSE */ 6411 6412 /* 6413 * update filesystem's mount point data 6414 */ 6415 if (mntrename) { 6416 char *cp, *pathend, *mpname; 6417 char * tobuf; 6418 struct mount *mp; 6419 int maxlen; 6420 size_t len = 0; 6421 6422 mp = fvp->v_mountedhere; 6423 6424 if (vfs_busy(mp, LK_NOWAIT)) { 6425 error = EBUSY; 6426 goto out1; 6427 } 6428 MALLOC_ZONE(tobuf, char *, MAXPATHLEN, M_NAMEI, M_WAITOK); 6429 6430 error = copyinstr(uap->to, tobuf, MAXPATHLEN, &len); 6431 if (!error) { 6432 /* find current mount point prefix */ 6433 pathend = &mp->mnt_vfsstat.f_mntonname[0]; 6434 for (cp = pathend; *cp != '\0'; ++cp) { 6435 if (*cp == '/') 6436 pathend = cp + 1; 6437 } 6438 /* find last component of target name */ 6439 for (mpname = cp = tobuf; *cp != '\0'; ++cp) { 6440 if (*cp == '/') 6441 mpname = cp + 1; 6442 } 6443 /* append name to prefix */ 6444 maxlen = MAXPATHLEN - (pathend - mp->mnt_vfsstat.f_mntonname); 6445 bzero(pathend, maxlen); 6446 strlcpy(pathend, mpname, maxlen); 6447 } 6448 FREE_ZONE(tobuf, MAXPATHLEN, M_NAMEI); 6449 6450 vfs_unbusy(mp); 6451 } 6452 /* 6453 * fix up name & parent pointers. note that we first 6454 * check that fvp has the same name/parent pointers it 6455 * had before the rename call... this is a 'weak' check 6456 * at best... 6457 * 6458 * XXX oparent and oname may not be set in the compound vnop case 6459 */ 6460 if (batched || (oname == fvp->v_name && oparent == fvp->v_parent)) { 6461 int update_flags; 6462 6463 update_flags = VNODE_UPDATE_NAME; 6464 6465 if (fdvp != tdvp) 6466 update_flags |= VNODE_UPDATE_PARENT; 6467 6468 vnode_update_identity(fvp, tdvp, tond->ni_cnd.cn_nameptr, tond->ni_cnd.cn_namelen, tond->ni_cnd.cn_hash, update_flags); 6469 } 6470out1: 6471 if (to_name != NULL) { 6472 RELEASE_PATH(to_name); 6473 to_name = NULL; 6474 } 6475 if (from_name != NULL) { 6476 RELEASE_PATH(from_name); 6477 from_name = NULL; 6478 } 6479 if (holding_mntlock) { 6480 mount_unlock_renames(locked_mp); 6481 mount_drop(locked_mp, 0); 6482 holding_mntlock = 0; 6483 } 6484 if (tdvp) { 6485 /* 6486 * nameidone has to happen before we vnode_put(tdvp) 6487 * since it may need to release the fs_nodelock on the tdvp 6488 */ 6489 nameidone(tond); 6490 6491 if (tvp) 6492 vnode_put(tvp); 6493 vnode_put(tdvp); 6494 } 6495 if (fdvp) { 6496 /* 6497 * nameidone has to happen before we vnode_put(fdvp) 6498 * since it may need to release the fs_nodelock on the fdvp 6499 */ 6500 nameidone(fromnd); 6501 6502 if (fvp) 6503 vnode_put(fvp); 6504 vnode_put(fdvp); 6505 } 6506 6507 6508 /* 6509 * If things changed after we did the namei, then we will re-drive 6510 * this rename call from the top. 6511 */ 6512 if (do_retry) { 6513 do_retry = 0; 6514 goto retry; 6515 } 6516 6517 FREE(__rename_data, M_TEMP); 6518 return (error); 6519} 6520 6521/* 6522 * Make a directory file. 6523 * 6524 * Returns: 0 Success 6525 * EEXIST 6526 * namei:??? 6527 * vnode_authorize:??? 6528 * vn_create:??? 6529 */ 6530/* ARGSUSED */ 6531static int 6532mkdir1(vfs_context_t ctx, user_addr_t path, struct vnode_attr *vap) 6533{ 6534 vnode_t vp, dvp; 6535 int error; 6536 int update_flags = 0; 6537 int batched; 6538 struct nameidata nd; 6539 6540 AUDIT_ARG(mode, vap->va_mode); 6541 NDINIT(&nd, CREATE, OP_MKDIR, LOCKPARENT | AUDITVNPATH1, UIO_USERSPACE, 6542 path, ctx); 6543 nd.ni_cnd.cn_flags |= WILLBEDIR; 6544 nd.ni_flag = NAMEI_COMPOUNDMKDIR; 6545 6546continue_lookup: 6547 error = namei(&nd); 6548 if (error) 6549 return (error); 6550 dvp = nd.ni_dvp; 6551 vp = nd.ni_vp; 6552 6553 if (vp != NULL) { 6554 error = EEXIST; 6555 goto out; 6556 } 6557 6558 batched = vnode_compound_mkdir_available(dvp); 6559 6560 VATTR_SET(vap, va_type, VDIR); 6561 6562 /* 6563 * XXX 6564 * Don't authorize in VFS for compound VNOP.... mkdir -p today assumes that it will 6565 * only get EXISTS or EISDIR for existing path components, and not that it could see 6566 * EACCESS/EPERM--so if we authorize for mkdir on "/" for "mkdir -p /tmp/foo/bar/baz" 6567 * it will fail in a spurious manner. Need to figure out if this is valid behavior. 6568 */ 6569 if ((error = vn_authorize_mkdir(dvp, &nd.ni_cnd, vap, ctx, NULL)) != 0) { 6570 if (error == EACCES || error == EPERM) { 6571 int error2; 6572 6573 nameidone(&nd); 6574 vnode_put(dvp); 6575 dvp = NULLVP; 6576 6577 /* 6578 * Try a lookup without "NAMEI_COMPOUNDVNOP" to make sure we return EEXIST 6579 * rather than EACCESS if the target exists. 6580 */ 6581 NDINIT(&nd, LOOKUP, OP_MKDIR, AUDITVNPATH1, UIO_USERSPACE, 6582 path, ctx); 6583 error2 = namei(&nd); 6584 if (error2) { 6585 goto out; 6586 } else { 6587 vp = nd.ni_vp; 6588 error = EEXIST; 6589 goto out; 6590 } 6591 } 6592 6593 goto out; 6594 } 6595 6596 /* 6597 * make the directory 6598 */ 6599 if ((error = vn_create(dvp, &vp, &nd, vap, 0, 0, NULL, ctx)) != 0) { 6600 if (error == EKEEPLOOKING) { 6601 nd.ni_vp = vp; 6602 goto continue_lookup; 6603 } 6604 6605 goto out; 6606 } 6607 6608 // Make sure the name & parent pointers are hooked up 6609 if (vp->v_name == NULL) 6610 update_flags |= VNODE_UPDATE_NAME; 6611 if (vp->v_parent == NULLVP) 6612 update_flags |= VNODE_UPDATE_PARENT; 6613 6614 if (update_flags) 6615 vnode_update_identity(vp, dvp, nd.ni_cnd.cn_nameptr, nd.ni_cnd.cn_namelen, nd.ni_cnd.cn_hash, update_flags); 6616 6617#if CONFIG_FSE 6618 add_fsevent(FSE_CREATE_DIR, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); 6619#endif 6620 6621out: 6622 /* 6623 * nameidone has to happen before we vnode_put(dvp) 6624 * since it may need to release the fs_nodelock on the dvp 6625 */ 6626 nameidone(&nd); 6627 6628 if (vp) 6629 vnode_put(vp); 6630 if (dvp) 6631 vnode_put(dvp); 6632 6633 return (error); 6634} 6635 6636/* 6637 * mkdir_extended: Create a directory; with extended security (ACL). 6638 * 6639 * Parameters: p Process requesting to create the directory 6640 * uap User argument descriptor (see below) 6641 * retval (ignored) 6642 * 6643 * Indirect: uap->path Path of directory to create 6644 * uap->mode Access permissions to set 6645 * uap->xsecurity ACL to set 6646 * 6647 * Returns: 0 Success 6648 * !0 Not success 6649 * 6650 */ 6651int 6652mkdir_extended(proc_t p, struct mkdir_extended_args *uap, __unused int32_t *retval) 6653{ 6654 int ciferror; 6655 kauth_filesec_t xsecdst; 6656 struct vnode_attr va; 6657 6658 AUDIT_ARG(owner, uap->uid, uap->gid); 6659 6660 xsecdst = NULL; 6661 if ((uap->xsecurity != USER_ADDR_NULL) && 6662 ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0)) 6663 return ciferror; 6664 6665 VATTR_INIT(&va); 6666 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); 6667 if (xsecdst != NULL) 6668 VATTR_SET(&va, va_acl, &xsecdst->fsec_acl); 6669 6670 ciferror = mkdir1(vfs_context_current(), uap->path, &va); 6671 if (xsecdst != NULL) 6672 kauth_filesec_free(xsecdst); 6673 return ciferror; 6674} 6675 6676int 6677mkdir(proc_t p, struct mkdir_args *uap, __unused int32_t *retval) 6678{ 6679 struct vnode_attr va; 6680 6681 VATTR_INIT(&va); 6682 VATTR_SET(&va, va_mode, (uap->mode & ACCESSPERMS) & ~p->p_fd->fd_cmask); 6683 6684 return(mkdir1(vfs_context_current(), uap->path, &va)); 6685} 6686 6687/* 6688 * Remove a directory file. 6689 */ 6690/* ARGSUSED */ 6691int 6692rmdir(__unused proc_t p, struct rmdir_args *uap, __unused int32_t *retval) 6693{ 6694 vnode_t vp, dvp; 6695 int error; 6696 struct nameidata nd; 6697 char *path = NULL; 6698 int len=0; 6699 int has_listeners = 0; 6700 int need_event = 0; 6701 int truncated = 0; 6702 vfs_context_t ctx = vfs_context_current(); 6703#if CONFIG_FSE 6704 struct vnode_attr va; 6705#endif /* CONFIG_FSE */ 6706 struct vnode_attr *vap = NULL; 6707 int batched; 6708 6709 int restart_flag; 6710 6711 /* 6712 * This loop exists to restart rmdir in the unlikely case that two 6713 * processes are simultaneously trying to remove the same directory 6714 * containing orphaned appleDouble files. 6715 */ 6716 do { 6717 NDINIT(&nd, DELETE, OP_RMDIR, LOCKPARENT | AUDITVNPATH1, 6718 UIO_USERSPACE, uap->path, ctx); 6719 nd.ni_flag = NAMEI_COMPOUNDRMDIR; 6720continue_lookup: 6721 restart_flag = 0; 6722 vap = NULL; 6723 6724 error = namei(&nd); 6725 if (error) 6726 return (error); 6727 6728 dvp = nd.ni_dvp; 6729 vp = nd.ni_vp; 6730 6731 if (vp) { 6732 batched = vnode_compound_rmdir_available(vp); 6733 6734 if (vp->v_flag & VROOT) { 6735 /* 6736 * The root of a mounted filesystem cannot be deleted. 6737 */ 6738 error = EBUSY; 6739 goto out; 6740 } 6741 6742 /* 6743 * Removed a check here; we used to abort if vp's vid 6744 * was not the same as what we'd seen the last time around. 6745 * I do not think that check was valid, because if we retry 6746 * and all dirents are gone, the directory could legitimately 6747 * be recycled but still be present in a situation where we would 6748 * have had permission to delete. Therefore, we won't make 6749 * an effort to preserve that check now that we may not have a 6750 * vp here. 6751 */ 6752 6753 if (!batched) { 6754 error = vn_authorize_rmdir(dvp, vp, &nd.ni_cnd, ctx, NULL); 6755 if (error) { 6756 goto out; 6757 } 6758 } 6759 } else { 6760 batched = 1; 6761 6762 if (!vnode_compound_rmdir_available(dvp)) { 6763 panic("No error, but no compound rmdir?"); 6764 } 6765 } 6766 6767#if CONFIG_FSE 6768 fse_info finfo; 6769 6770 need_event = need_fsevent(FSE_DELETE, dvp); 6771 if (need_event) { 6772 if (!batched) { 6773 get_fse_info(vp, &finfo, ctx); 6774 } else { 6775 error = vfs_get_notify_attributes(&va); 6776 if (error) { 6777 goto out; 6778 } 6779 6780 vap = &va; 6781 } 6782 } 6783#endif 6784 has_listeners = kauth_authorize_fileop_has_listeners(); 6785 if (need_event || has_listeners) { 6786 if (path == NULL) { 6787 GET_PATH(path); 6788 if (path == NULL) { 6789 error = ENOMEM; 6790 goto out; 6791 } 6792 } 6793 6794 len = safe_getpath(dvp, nd.ni_cnd.cn_nameptr, path, MAXPATHLEN, &truncated); 6795#if CONFIG_FSE 6796 if (truncated) { 6797 finfo.mode |= FSE_TRUNCATED_PATH; 6798 } 6799#endif 6800 } 6801 6802 error = vn_rmdir(dvp, &vp, &nd, vap, ctx); 6803 nd.ni_vp = vp; 6804 if (vp == NULLVP) { 6805 /* Couldn't find a vnode */ 6806 goto out; 6807 } 6808 6809 if (error == EKEEPLOOKING) { 6810 goto continue_lookup; 6811 } 6812#if CONFIG_APPLEDOUBLE 6813 /* 6814 * Special case to remove orphaned AppleDouble 6815 * files. I don't like putting this in the kernel, 6816 * but carbon does not like putting this in carbon either, 6817 * so here we are. 6818 */ 6819 if (error == ENOTEMPTY) { 6820 error = rmdir_remove_orphaned_appleDouble(vp, ctx, &restart_flag); 6821 if (error == EBUSY) { 6822 goto out; 6823 } 6824 6825 6826 /* 6827 * Assuming everything went well, we will try the RMDIR again 6828 */ 6829 if (!error) 6830 error = vn_rmdir(dvp, &vp, &nd, vap, ctx); 6831 } 6832#endif /* CONFIG_APPLEDOUBLE */ 6833 /* 6834 * Call out to allow 3rd party notification of delete. 6835 * Ignore result of kauth_authorize_fileop call. 6836 */ 6837 if (!error) { 6838 if (has_listeners) { 6839 kauth_authorize_fileop(vfs_context_ucred(ctx), 6840 KAUTH_FILEOP_DELETE, 6841 (uintptr_t)vp, 6842 (uintptr_t)path); 6843 } 6844 6845 if (vp->v_flag & VISHARDLINK) { 6846 // see the comment in unlink1() about why we update 6847 // the parent of a hard link when it is removed 6848 vnode_update_identity(vp, NULL, NULL, 0, 0, VNODE_UPDATE_PARENT); 6849 } 6850 6851#if CONFIG_FSE 6852 if (need_event) { 6853 if (vap) { 6854 vnode_get_fse_info_from_vap(vp, &finfo, vap); 6855 } 6856 add_fsevent(FSE_DELETE, ctx, 6857 FSE_ARG_STRING, len, path, 6858 FSE_ARG_FINFO, &finfo, 6859 FSE_ARG_DONE); 6860 } 6861#endif 6862 } 6863 6864out: 6865 if (path != NULL) { 6866 RELEASE_PATH(path); 6867 path = NULL; 6868 } 6869 /* 6870 * nameidone has to happen before we vnode_put(dvp) 6871 * since it may need to release the fs_nodelock on the dvp 6872 */ 6873 nameidone(&nd); 6874 vnode_put(dvp); 6875 6876 if (vp) 6877 vnode_put(vp); 6878 6879 if (restart_flag == 0) { 6880 wakeup_one((caddr_t)vp); 6881 return (error); 6882 } 6883 tsleep(vp, PVFS, "rm AD", 1); 6884 6885 } while (restart_flag != 0); 6886 6887 return (error); 6888 6889} 6890 6891/* Get direntry length padded to 8 byte alignment */ 6892#define DIRENT64_LEN(namlen) \ 6893 ((sizeof(struct direntry) + (namlen) - (MAXPATHLEN-1) + 7) & ~7) 6894 6895static errno_t 6896vnode_readdir64(struct vnode *vp, struct uio *uio, int flags, int *eofflag, 6897 int *numdirent, vfs_context_t ctxp) 6898{ 6899 /* Check if fs natively supports VNODE_READDIR_EXTENDED */ 6900 if ((vp->v_mount->mnt_vtable->vfc_vfsflags & VFC_VFSREADDIR_EXTENDED) && 6901 ((vp->v_mount->mnt_kern_flag & MNTK_DENY_READDIREXT) == 0)) { 6902 return VNOP_READDIR(vp, uio, flags, eofflag, numdirent, ctxp); 6903 } else { 6904 size_t bufsize; 6905 void * bufptr; 6906 uio_t auio; 6907 struct direntry *entry64; 6908 struct dirent *dep; 6909 int bytesread; 6910 int error; 6911 6912 /* 6913 * Our kernel buffer needs to be smaller since re-packing 6914 * will expand each dirent. The worse case (when the name 6915 * length is 3) corresponds to a struct direntry size of 32 6916 * bytes (8-byte aligned) and a struct dirent size of 12 bytes 6917 * (4-byte aligned). So having a buffer that is 3/8 the size 6918 * will prevent us from reading more than we can pack. 6919 * 6920 * Since this buffer is wired memory, we will limit the 6921 * buffer size to a maximum of 32K. We would really like to 6922 * use 32K in the MIN(), but we use magic number 87371 to 6923 * prevent uio_resid() * 3 / 8 from overflowing. 6924 */ 6925 bufsize = 3 * MIN((user_size_t)uio_resid(uio), 87371u) / 8; 6926 MALLOC(bufptr, void *, bufsize, M_TEMP, M_WAITOK); 6927 if (bufptr == NULL) { 6928 return ENOMEM; 6929 } 6930 6931 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ); 6932 uio_addiov(auio, (uintptr_t)bufptr, bufsize); 6933 auio->uio_offset = uio->uio_offset; 6934 6935 error = VNOP_READDIR(vp, auio, 0, eofflag, numdirent, ctxp); 6936 6937 dep = (struct dirent *)bufptr; 6938 bytesread = bufsize - uio_resid(auio); 6939 6940 MALLOC(entry64, struct direntry *, sizeof(struct direntry), 6941 M_TEMP, M_WAITOK); 6942 /* 6943 * Convert all the entries and copy them out to user's buffer. 6944 */ 6945 while (error == 0 && (char *)dep < ((char *)bufptr + bytesread)) { 6946 size_t enbufsize = DIRENT64_LEN(dep->d_namlen); 6947 6948 bzero(entry64, enbufsize); 6949 /* Convert a dirent to a dirent64. */ 6950 entry64->d_ino = dep->d_ino; 6951 entry64->d_seekoff = 0; 6952 entry64->d_reclen = enbufsize; 6953 entry64->d_namlen = dep->d_namlen; 6954 entry64->d_type = dep->d_type; 6955 bcopy(dep->d_name, entry64->d_name, dep->d_namlen + 1); 6956 6957 /* Move to next entry. */ 6958 dep = (struct dirent *)((char *)dep + dep->d_reclen); 6959 6960 /* Copy entry64 to user's buffer. */ 6961 error = uiomove((caddr_t)entry64, entry64->d_reclen, uio); 6962 } 6963 6964 /* Update the real offset using the offset we got from VNOP_READDIR. */ 6965 if (error == 0) { 6966 uio->uio_offset = auio->uio_offset; 6967 } 6968 uio_free(auio); 6969 FREE(bufptr, M_TEMP); 6970 FREE(entry64, M_TEMP); 6971 return (error); 6972 } 6973} 6974 6975#define GETDIRENTRIES_MAXBUFSIZE (128 * 1024 * 1024U) 6976 6977/* 6978 * Read a block of directory entries in a file system independent format. 6979 */ 6980static int 6981getdirentries_common(int fd, user_addr_t bufp, user_size_t bufsize, ssize_t *bytesread, 6982 off_t *offset, int flags) 6983{ 6984 vnode_t vp; 6985 struct vfs_context context = *vfs_context_current(); /* local copy */ 6986 struct fileproc *fp; 6987 uio_t auio; 6988 int spacetype = proc_is64bit(vfs_context_proc(&context)) ? UIO_USERSPACE64 : UIO_USERSPACE32; 6989 off_t loff; 6990 int error, eofflag, numdirent; 6991 char uio_buf[ UIO_SIZEOF(1) ]; 6992 6993 error = fp_getfvp(vfs_context_proc(&context), fd, &fp, &vp); 6994 if (error) { 6995 return (error); 6996 } 6997 if ((fp->f_fglob->fg_flag & FREAD) == 0) { 6998 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); 6999 error = EBADF; 7000 goto out; 7001 } 7002 7003 if (bufsize > GETDIRENTRIES_MAXBUFSIZE) 7004 bufsize = GETDIRENTRIES_MAXBUFSIZE; 7005 7006#if CONFIG_MACF 7007 error = mac_file_check_change_offset(vfs_context_ucred(&context), fp->f_fglob); 7008 if (error) 7009 goto out; 7010#endif 7011 if ( (error = vnode_getwithref(vp)) ) { 7012 goto out; 7013 } 7014 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 7015 7016unionread: 7017 if (vp->v_type != VDIR) { 7018 (void)vnode_put(vp); 7019 error = EINVAL; 7020 goto out; 7021 } 7022 7023#if CONFIG_MACF 7024 error = mac_vnode_check_readdir(&context, vp); 7025 if (error != 0) { 7026 (void)vnode_put(vp); 7027 goto out; 7028 } 7029#endif /* MAC */ 7030 7031 loff = fp->f_fglob->fg_offset; 7032 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); 7033 uio_addiov(auio, bufp, bufsize); 7034 7035 if (flags & VNODE_READDIR_EXTENDED) { 7036 error = vnode_readdir64(vp, auio, flags, &eofflag, &numdirent, &context); 7037 fp->f_fglob->fg_offset = uio_offset(auio); 7038 } else { 7039 error = VNOP_READDIR(vp, auio, 0, &eofflag, &numdirent, &context); 7040 fp->f_fglob->fg_offset = uio_offset(auio); 7041 } 7042 if (error) { 7043 (void)vnode_put(vp); 7044 goto out; 7045 } 7046 7047 if ((user_ssize_t)bufsize == uio_resid(auio)){ 7048 if (union_dircheckp) { 7049 error = union_dircheckp(&vp, fp, &context); 7050 if (error == -1) 7051 goto unionread; 7052 if (error) 7053 goto out; 7054 } 7055 7056 if ((vp->v_mount->mnt_flag & MNT_UNION)) { 7057 struct vnode *tvp = vp; 7058 if (lookup_traverse_union(tvp, &vp, &context) == 0) { 7059 vnode_ref(vp); 7060 fp->f_fglob->fg_data = (caddr_t) vp; 7061 fp->f_fglob->fg_offset = 0; 7062 vnode_rele(tvp); 7063 vnode_put(tvp); 7064 goto unionread; 7065 } 7066 vp = tvp; 7067 } 7068 } 7069 7070 vnode_put(vp); 7071 if (offset) { 7072 *offset = loff; 7073 } 7074 7075 *bytesread = bufsize - uio_resid(auio); 7076out: 7077 file_drop(fd); 7078 return (error); 7079} 7080 7081 7082int 7083getdirentries(__unused struct proc *p, struct getdirentries_args *uap, int32_t *retval) 7084{ 7085 off_t offset; 7086 ssize_t bytesread; 7087 int error; 7088 7089 AUDIT_ARG(fd, uap->fd); 7090 error = getdirentries_common(uap->fd, uap->buf, uap->count, &bytesread, &offset, 0); 7091 7092 if (error == 0) { 7093 if (proc_is64bit(p)) { 7094 user64_long_t base = (user64_long_t)offset; 7095 error = copyout((caddr_t)&base, uap->basep, sizeof(user64_long_t)); 7096 } else { 7097 user32_long_t base = (user32_long_t)offset; 7098 error = copyout((caddr_t)&base, uap->basep, sizeof(user32_long_t)); 7099 } 7100 *retval = bytesread; 7101 } 7102 return (error); 7103} 7104 7105int 7106getdirentries64(__unused struct proc *p, struct getdirentries64_args *uap, user_ssize_t *retval) 7107{ 7108 off_t offset; 7109 ssize_t bytesread; 7110 int error; 7111 7112 AUDIT_ARG(fd, uap->fd); 7113 error = getdirentries_common(uap->fd, uap->buf, uap->bufsize, &bytesread, &offset, VNODE_READDIR_EXTENDED); 7114 7115 if (error == 0) { 7116 *retval = bytesread; 7117 error = copyout((caddr_t)&offset, uap->position, sizeof(off_t)); 7118 } 7119 return (error); 7120} 7121 7122 7123/* 7124 * Set the mode mask for creation of filesystem nodes. 7125 * XXX implement xsecurity 7126 */ 7127#define UMASK_NOXSECURITY (void *)1 /* leave existing xsecurity alone */ 7128static int 7129umask1(proc_t p, int newmask, __unused kauth_filesec_t fsec, int32_t *retval) 7130{ 7131 struct filedesc *fdp; 7132 7133 AUDIT_ARG(mask, newmask); 7134 proc_fdlock(p); 7135 fdp = p->p_fd; 7136 *retval = fdp->fd_cmask; 7137 fdp->fd_cmask = newmask & ALLPERMS; 7138 proc_fdunlock(p); 7139 return (0); 7140} 7141 7142/* 7143 * umask_extended: Set the mode mask for creation of filesystem nodes; with extended security (ACL). 7144 * 7145 * Parameters: p Process requesting to set the umask 7146 * uap User argument descriptor (see below) 7147 * retval umask of the process (parameter p) 7148 * 7149 * Indirect: uap->newmask umask to set 7150 * uap->xsecurity ACL to set 7151 * 7152 * Returns: 0 Success 7153 * !0 Not success 7154 * 7155 */ 7156int 7157umask_extended(proc_t p, struct umask_extended_args *uap, int32_t *retval) 7158{ 7159 int ciferror; 7160 kauth_filesec_t xsecdst; 7161 7162 xsecdst = KAUTH_FILESEC_NONE; 7163 if (uap->xsecurity != USER_ADDR_NULL) { 7164 if ((ciferror = kauth_copyinfilesec(uap->xsecurity, &xsecdst)) != 0) 7165 return ciferror; 7166 } else { 7167 xsecdst = KAUTH_FILESEC_NONE; 7168 } 7169 7170 ciferror = umask1(p, uap->newmask, xsecdst, retval); 7171 7172 if (xsecdst != KAUTH_FILESEC_NONE) 7173 kauth_filesec_free(xsecdst); 7174 return ciferror; 7175} 7176 7177int 7178umask(proc_t p, struct umask_args *uap, int32_t *retval) 7179{ 7180 return(umask1(p, uap->newmask, UMASK_NOXSECURITY, retval)); 7181} 7182 7183/* 7184 * Void all references to file by ripping underlying filesystem 7185 * away from vnode. 7186 */ 7187/* ARGSUSED */ 7188int 7189revoke(proc_t p, struct revoke_args *uap, __unused int32_t *retval) 7190{ 7191 vnode_t vp; 7192 struct vnode_attr va; 7193 vfs_context_t ctx = vfs_context_current(); 7194 int error; 7195 struct nameidata nd; 7196 7197 NDINIT(&nd, LOOKUP, OP_REVOKE, FOLLOW | AUDITVNPATH1, UIO_USERSPACE, 7198 uap->path, ctx); 7199 error = namei(&nd); 7200 if (error) 7201 return (error); 7202 vp = nd.ni_vp; 7203 7204 nameidone(&nd); 7205 7206 if (!(vnode_ischr(vp) || vnode_isblk(vp))) { 7207 error = ENOTSUP; 7208 goto out; 7209 } 7210 7211 if (vnode_isblk(vp) && vnode_ismountedon(vp)) { 7212 error = EBUSY; 7213 goto out; 7214 } 7215 7216#if CONFIG_MACF 7217 error = mac_vnode_check_revoke(ctx, vp); 7218 if (error) 7219 goto out; 7220#endif 7221 7222 VATTR_INIT(&va); 7223 VATTR_WANTED(&va, va_uid); 7224 if ((error = vnode_getattr(vp, &va, ctx))) 7225 goto out; 7226 if (kauth_cred_getuid(vfs_context_ucred(ctx)) != va.va_uid && 7227 (error = suser(vfs_context_ucred(ctx), &p->p_acflag))) 7228 goto out; 7229 if (vp->v_usecount > 0 || (vnode_isaliased(vp))) 7230 VNOP_REVOKE(vp, REVOKEALL, ctx); 7231out: 7232 vnode_put(vp); 7233 return (error); 7234} 7235 7236 7237/* 7238 * HFS/HFS PlUS SPECIFIC SYSTEM CALLS 7239 * The following system calls are designed to support features 7240 * which are specific to the HFS & HFS Plus volume formats 7241 */ 7242 7243 7244/* 7245 * Obtain attribute information on objects in a directory while enumerating 7246 * the directory. 7247 */ 7248/* ARGSUSED */ 7249int 7250getdirentriesattr (proc_t p, struct getdirentriesattr_args *uap, int32_t *retval) 7251{ 7252 vnode_t vp; 7253 struct fileproc *fp; 7254 uio_t auio = NULL; 7255 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 7256 uint32_t count, savecount; 7257 uint32_t newstate; 7258 int error, eofflag; 7259 uint32_t loff; 7260 struct attrlist attributelist; 7261 vfs_context_t ctx = vfs_context_current(); 7262 int fd = uap->fd; 7263 char uio_buf[ UIO_SIZEOF(1) ]; 7264 kauth_action_t action; 7265 7266 AUDIT_ARG(fd, fd); 7267 7268 /* Get the attributes into kernel space */ 7269 if ((error = copyin(uap->alist, (caddr_t)&attributelist, sizeof(attributelist)))) { 7270 return(error); 7271 } 7272 if ((error = copyin(uap->count, (caddr_t)&count, sizeof(count)))) { 7273 return(error); 7274 } 7275 savecount = count; 7276 if ( (error = fp_getfvp(p, fd, &fp, &vp)) ) { 7277 return (error); 7278 } 7279 if ((fp->f_fglob->fg_flag & FREAD) == 0) { 7280 AUDIT_ARG(vnpath_withref, vp, ARG_VNODE1); 7281 error = EBADF; 7282 goto out; 7283 } 7284 7285 7286#if CONFIG_MACF 7287 error = mac_file_check_change_offset(vfs_context_ucred(ctx), 7288 fp->f_fglob); 7289 if (error) 7290 goto out; 7291#endif 7292 7293 7294 if ( (error = vnode_getwithref(vp)) ) 7295 goto out; 7296 7297 AUDIT_ARG(vnpath, vp, ARG_VNODE1); 7298 7299unionread: 7300 if (vp->v_type != VDIR) { 7301 (void)vnode_put(vp); 7302 error = EINVAL; 7303 goto out; 7304 } 7305 7306#if CONFIG_MACF 7307 error = mac_vnode_check_readdir(ctx, vp); 7308 if (error != 0) { 7309 (void)vnode_put(vp); 7310 goto out; 7311 } 7312#endif /* MAC */ 7313 7314 /* set up the uio structure which will contain the users return buffer */ 7315 loff = fp->f_fglob->fg_offset; 7316 auio = uio_createwithbuffer(1, loff, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); 7317 uio_addiov(auio, uap->buffer, uap->buffersize); 7318 7319 /* 7320 * If the only item requested is file names, we can let that past with 7321 * just LIST_DIRECTORY. If they want any other attributes, that means 7322 * they need SEARCH as well. 7323 */ 7324 action = KAUTH_VNODE_LIST_DIRECTORY; 7325 if ((attributelist.commonattr & ~ATTR_CMN_NAME) || 7326 attributelist.fileattr || attributelist.dirattr) 7327 action |= KAUTH_VNODE_SEARCH; 7328 7329 if ((error = vnode_authorize(vp, NULL, action, ctx)) == 0) { 7330 7331 /* Believe it or not, uap->options only has 32-bits of valid 7332 * info, so truncate before extending again */ 7333 7334 error = VNOP_READDIRATTR(vp, &attributelist, auio, count, 7335 (u_long)(uint32_t)uap->options, &newstate, &eofflag, &count, ctx); 7336 } 7337 7338 if (error) { 7339 (void) vnode_put(vp); 7340 goto out; 7341 } 7342 7343 /* 7344 * If we've got the last entry of a directory in a union mount 7345 * then reset the eofflag and pretend there's still more to come. 7346 * The next call will again set eofflag and the buffer will be empty, 7347 * so traverse to the underlying directory and do the directory 7348 * read there. 7349 */ 7350 if (eofflag && vp->v_mount->mnt_flag & MNT_UNION) { 7351 if (uio_resid(auio) < (user_ssize_t) uap->buffersize) { // Got some entries 7352 eofflag = 0; 7353 } else { // Empty buffer 7354 struct vnode *tvp = vp; 7355 if (lookup_traverse_union(tvp, &vp, ctx) == 0) { 7356 vnode_ref_ext(vp, fp->f_fglob->fg_flag & O_EVTONLY, 0); 7357 fp->f_fglob->fg_data = (caddr_t) vp; 7358 fp->f_fglob->fg_offset = 0; // reset index for new dir 7359 count = savecount; 7360 vnode_rele_internal(tvp, fp->f_fglob->fg_flag & O_EVTONLY, 0, 0); 7361 vnode_put(tvp); 7362 goto unionread; 7363 } 7364 vp = tvp; 7365 } 7366 } 7367 7368 (void)vnode_put(vp); 7369 7370 if (error) 7371 goto out; 7372 fp->f_fglob->fg_offset = uio_offset(auio); /* should be multiple of dirent, not variable */ 7373 7374 if ((error = copyout((caddr_t) &count, uap->count, sizeof(count)))) 7375 goto out; 7376 if ((error = copyout((caddr_t) &newstate, uap->newstate, sizeof(newstate)))) 7377 goto out; 7378 if ((error = copyout((caddr_t) &loff, uap->basep, sizeof(loff)))) 7379 goto out; 7380 7381 *retval = eofflag; /* similar to getdirentries */ 7382 error = 0; 7383out: 7384 file_drop(fd); 7385 return (error); /* return error earlier, an retval of 0 or 1 now */ 7386 7387} /* end of getdirentriesattr system call */ 7388 7389/* 7390* Exchange data between two files 7391*/ 7392 7393/* ARGSUSED */ 7394int 7395exchangedata (__unused proc_t p, struct exchangedata_args *uap, __unused int32_t *retval) 7396{ 7397 7398 struct nameidata fnd, snd; 7399 vfs_context_t ctx = vfs_context_current(); 7400 vnode_t fvp; 7401 vnode_t svp; 7402 int error; 7403 u_int32_t nameiflags; 7404 char *fpath = NULL; 7405 char *spath = NULL; 7406 int flen=0, slen=0; 7407 int from_truncated=0, to_truncated=0; 7408#if CONFIG_FSE 7409 fse_info f_finfo, s_finfo; 7410#endif 7411 7412 nameiflags = 0; 7413 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; 7414 7415 NDINIT(&fnd, LOOKUP, OP_EXCHANGEDATA, nameiflags | AUDITVNPATH1, 7416 UIO_USERSPACE, uap->path1, ctx); 7417 7418 error = namei(&fnd); 7419 if (error) 7420 goto out2; 7421 7422 nameidone(&fnd); 7423 fvp = fnd.ni_vp; 7424 7425 NDINIT(&snd, LOOKUP, OP_EXCHANGEDATA, CN_NBMOUNTLOOK | nameiflags | AUDITVNPATH2, 7426 UIO_USERSPACE, uap->path2, ctx); 7427 7428 error = namei(&snd); 7429 if (error) { 7430 vnode_put(fvp); 7431 goto out2; 7432 } 7433 nameidone(&snd); 7434 svp = snd.ni_vp; 7435 7436 /* 7437 * if the files are the same, return an inval error 7438 */ 7439 if (svp == fvp) { 7440 error = EINVAL; 7441 goto out; 7442 } 7443 7444 /* 7445 * if the files are on different volumes, return an error 7446 */ 7447 if (svp->v_mount != fvp->v_mount) { 7448 error = EXDEV; 7449 goto out; 7450 } 7451 7452 /* If they're not files, return an error */ 7453 if ( (vnode_isreg(fvp) == 0) || (vnode_isreg(svp) == 0)) { 7454 error = EINVAL; 7455 goto out; 7456 } 7457 7458#if CONFIG_MACF 7459 error = mac_vnode_check_exchangedata(ctx, 7460 fvp, svp); 7461 if (error) 7462 goto out; 7463#endif 7464 if (((error = vnode_authorize(fvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0) || 7465 ((error = vnode_authorize(svp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, ctx)) != 0)) 7466 goto out; 7467 7468 if ( 7469#if CONFIG_FSE 7470 need_fsevent(FSE_EXCHANGE, fvp) || 7471#endif 7472 kauth_authorize_fileop_has_listeners()) { 7473 GET_PATH(fpath); 7474 GET_PATH(spath); 7475 if (fpath == NULL || spath == NULL) { 7476 error = ENOMEM; 7477 goto out; 7478 } 7479 7480 flen = safe_getpath(fvp, NULL, fpath, MAXPATHLEN, &from_truncated); 7481 slen = safe_getpath(svp, NULL, spath, MAXPATHLEN, &to_truncated); 7482 7483#if CONFIG_FSE 7484 get_fse_info(fvp, &f_finfo, ctx); 7485 get_fse_info(svp, &s_finfo, ctx); 7486 if (from_truncated || to_truncated) { 7487 // set it here since only the f_finfo gets reported up to user space 7488 f_finfo.mode |= FSE_TRUNCATED_PATH; 7489 } 7490#endif 7491 } 7492 /* Ok, make the call */ 7493 error = VNOP_EXCHANGE(fvp, svp, 0, ctx); 7494 7495 if (error == 0) { 7496 const char *tmpname; 7497 7498 if (fpath != NULL && spath != NULL) { 7499 /* call out to allow 3rd party notification of exchangedata. 7500 * Ignore result of kauth_authorize_fileop call. 7501 */ 7502 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_EXCHANGE, 7503 (uintptr_t)fpath, (uintptr_t)spath); 7504 } 7505 name_cache_lock(); 7506 7507 tmpname = fvp->v_name; 7508 fvp->v_name = svp->v_name; 7509 svp->v_name = tmpname; 7510 7511 if (fvp->v_parent != svp->v_parent) { 7512 vnode_t tmp; 7513 7514 tmp = fvp->v_parent; 7515 fvp->v_parent = svp->v_parent; 7516 svp->v_parent = tmp; 7517 } 7518 name_cache_unlock(); 7519 7520#if CONFIG_FSE 7521 if (fpath != NULL && spath != NULL) { 7522 add_fsevent(FSE_EXCHANGE, ctx, 7523 FSE_ARG_STRING, flen, fpath, 7524 FSE_ARG_FINFO, &f_finfo, 7525 FSE_ARG_STRING, slen, spath, 7526 FSE_ARG_FINFO, &s_finfo, 7527 FSE_ARG_DONE); 7528 } 7529#endif 7530 } 7531 7532out: 7533 if (fpath != NULL) 7534 RELEASE_PATH(fpath); 7535 if (spath != NULL) 7536 RELEASE_PATH(spath); 7537 vnode_put(svp); 7538 vnode_put(fvp); 7539out2: 7540 return (error); 7541} 7542 7543/* 7544 * Return (in MB) the amount of freespace on the given vnode's volume. 7545 */ 7546uint32_t freespace_mb(vnode_t vp); 7547 7548uint32_t 7549freespace_mb(vnode_t vp) 7550{ 7551 vfs_update_vfsstat(vp->v_mount, vfs_context_current(), VFS_USER_EVENT); 7552 return (((uint64_t)vp->v_mount->mnt_vfsstat.f_bavail * 7553 vp->v_mount->mnt_vfsstat.f_bsize) >> 20); 7554} 7555 7556#if CONFIG_SEARCHFS 7557 7558/* ARGSUSED */ 7559 7560int 7561searchfs(proc_t p, struct searchfs_args *uap, __unused int32_t *retval) 7562{ 7563 vnode_t vp, tvp; 7564 int i, error=0; 7565 int fserror = 0; 7566 struct nameidata nd; 7567 struct user64_fssearchblock searchblock; 7568 struct searchstate *state; 7569 struct attrlist *returnattrs; 7570 struct timeval timelimit; 7571 void *searchparams1,*searchparams2; 7572 uio_t auio = NULL; 7573 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 7574 uint32_t nummatches; 7575 int mallocsize; 7576 uint32_t nameiflags; 7577 vfs_context_t ctx = vfs_context_current(); 7578 char uio_buf[ UIO_SIZEOF(1) ]; 7579 7580 /* Start by copying in fsearchblock parameter list */ 7581 if (IS_64BIT_PROCESS(p)) { 7582 error = copyin(uap->searchblock, (caddr_t) &searchblock, sizeof(searchblock)); 7583 timelimit.tv_sec = searchblock.timelimit.tv_sec; 7584 timelimit.tv_usec = searchblock.timelimit.tv_usec; 7585 } 7586 else { 7587 struct user32_fssearchblock tmp_searchblock; 7588 7589 error = copyin(uap->searchblock, (caddr_t) &tmp_searchblock, sizeof(tmp_searchblock)); 7590 // munge into 64-bit version 7591 searchblock.returnattrs = CAST_USER_ADDR_T(tmp_searchblock.returnattrs); 7592 searchblock.returnbuffer = CAST_USER_ADDR_T(tmp_searchblock.returnbuffer); 7593 searchblock.returnbuffersize = tmp_searchblock.returnbuffersize; 7594 searchblock.maxmatches = tmp_searchblock.maxmatches; 7595 /* 7596 * These casts are safe. We will promote the tv_sec into a 64 bit long if necessary 7597 * from a 32 bit long, and tv_usec is already a signed 32 bit int. 7598 */ 7599 timelimit.tv_sec = (__darwin_time_t) tmp_searchblock.timelimit.tv_sec; 7600 timelimit.tv_usec = (__darwin_useconds_t) tmp_searchblock.timelimit.tv_usec; 7601 searchblock.searchparams1 = CAST_USER_ADDR_T(tmp_searchblock.searchparams1); 7602 searchblock.sizeofsearchparams1 = tmp_searchblock.sizeofsearchparams1; 7603 searchblock.searchparams2 = CAST_USER_ADDR_T(tmp_searchblock.searchparams2); 7604 searchblock.sizeofsearchparams2 = tmp_searchblock.sizeofsearchparams2; 7605 searchblock.searchattrs = tmp_searchblock.searchattrs; 7606 } 7607 if (error) 7608 return(error); 7609 7610 /* Do a sanity check on sizeofsearchparams1 and sizeofsearchparams2. 7611 */ 7612 if (searchblock.sizeofsearchparams1 > SEARCHFS_MAX_SEARCHPARMS || 7613 searchblock.sizeofsearchparams2 > SEARCHFS_MAX_SEARCHPARMS) 7614 return(EINVAL); 7615 7616 /* Now malloc a big bunch of space to hold the search parameters, the attrlists and the search state. */ 7617 /* It all has to do into local memory and it's not that big so we might as well put it all together. */ 7618 /* Searchparams1 shall be first so we might as well use that to hold the base address of the allocated*/ 7619 /* block. */ 7620 7621 mallocsize = searchblock.sizeofsearchparams1 + searchblock.sizeofsearchparams2 + 7622 sizeof(struct attrlist) + sizeof(struct searchstate); 7623 7624 MALLOC(searchparams1, void *, mallocsize, M_TEMP, M_WAITOK); 7625 7626 /* Now set up the various pointers to the correct place in our newly allocated memory */ 7627 7628 searchparams2 = (void *) (((caddr_t) searchparams1) + searchblock.sizeofsearchparams1); 7629 returnattrs = (struct attrlist *) (((caddr_t) searchparams2) + searchblock.sizeofsearchparams2); 7630 state = (struct searchstate *) (((caddr_t) returnattrs) + sizeof (struct attrlist)); 7631 7632 /* Now copy in the stuff given our local variables. */ 7633 7634 if ((error = copyin(searchblock.searchparams1, searchparams1, searchblock.sizeofsearchparams1))) 7635 goto freeandexit; 7636 7637 if ((error = copyin(searchblock.searchparams2, searchparams2, searchblock.sizeofsearchparams2))) 7638 goto freeandexit; 7639 7640 if ((error = copyin(searchblock.returnattrs, (caddr_t) returnattrs, sizeof(struct attrlist)))) 7641 goto freeandexit; 7642 7643 if ((error = copyin(uap->state, (caddr_t) state, sizeof(struct searchstate)))) 7644 goto freeandexit; 7645 7646 /* 7647 * When searching a union mount, need to set the 7648 * start flag at the first call on each layer to 7649 * reset state for the new volume. 7650 */ 7651 if (uap->options & SRCHFS_START) 7652 state->ss_union_layer = 0; 7653 else 7654 uap->options |= state->ss_union_flags; 7655 state->ss_union_flags = 0; 7656 7657 /* 7658 * Because searchparams1 and searchparams2 may contain an ATTR_CMN_NAME search parameter, 7659 * which is passed in with an attrreference_t, we need to inspect the buffer manually here. 7660 * The KPI does not provide us the ability to pass in the length of the buffers searchparams1 7661 * and searchparams2. To obviate the need for all searchfs-supporting filesystems to 7662 * validate the user-supplied data offset of the attrreference_t, we'll do it here. 7663 */ 7664 7665 if (searchblock.searchattrs.commonattr & ATTR_CMN_NAME) { 7666 attrreference_t* string_ref; 7667 u_int32_t* start_length; 7668 user64_size_t param_length; 7669 7670 /* validate searchparams1 */ 7671 param_length = searchblock.sizeofsearchparams1; 7672 /* skip the word that specifies length of the buffer */ 7673 start_length= (u_int32_t*) searchparams1; 7674 start_length= start_length+1; 7675 string_ref= (attrreference_t*) start_length; 7676 7677 /* ensure no negative offsets or too big offsets */ 7678 if (string_ref->attr_dataoffset < 0 ) { 7679 error = EINVAL; 7680 goto freeandexit; 7681 } 7682 if (string_ref->attr_length > MAXPATHLEN) { 7683 error = EINVAL; 7684 goto freeandexit; 7685 } 7686 7687 /* Check for pointer overflow in the string ref */ 7688 if (((char*) string_ref + string_ref->attr_dataoffset) < (char*) string_ref) { 7689 error = EINVAL; 7690 goto freeandexit; 7691 } 7692 7693 if (((char*) string_ref + string_ref->attr_dataoffset) > ((char*)searchparams1 + param_length)) { 7694 error = EINVAL; 7695 goto freeandexit; 7696 } 7697 if (((char*)string_ref + string_ref->attr_dataoffset + string_ref->attr_length) > ((char*)searchparams1 + param_length)) { 7698 error = EINVAL; 7699 goto freeandexit; 7700 } 7701 } 7702 7703 /* set up the uio structure which will contain the users return buffer */ 7704 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, &uio_buf[0], sizeof(uio_buf)); 7705 uio_addiov(auio, searchblock.returnbuffer, searchblock.returnbuffersize); 7706 7707 nameiflags = 0; 7708 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; 7709 NDINIT(&nd, LOOKUP, OP_SEARCHFS, nameiflags | AUDITVNPATH1, 7710 UIO_USERSPACE, uap->path, ctx); 7711 7712 error = namei(&nd); 7713 if (error) 7714 goto freeandexit; 7715 vp = nd.ni_vp; 7716 nameidone(&nd); 7717 7718 /* 7719 * Switch to the root vnode for the volume 7720 */ 7721 error = VFS_ROOT(vnode_mount(vp), &tvp, ctx); 7722 if (error) 7723 goto freeandexit; 7724 vnode_put(vp); 7725 vp = tvp; 7726 7727 /* 7728 * If it's a union mount, the path lookup takes 7729 * us to the top layer. But we may need to descend 7730 * to a lower layer. For non-union mounts the layer 7731 * is always zero. 7732 */ 7733 for (i = 0; i < (int) state->ss_union_layer; i++) { 7734 if ((vp->v_mount->mnt_flag & MNT_UNION) == 0) 7735 break; 7736 tvp = vp; 7737 vp = vp->v_mount->mnt_vnodecovered; 7738 if (vp == NULL) { 7739 vp = tvp; 7740 error = ENOENT; 7741 goto freeandexit; 7742 } 7743 vnode_getwithref(vp); 7744 vnode_put(tvp); 7745 } 7746 7747#if CONFIG_MACF 7748 error = mac_vnode_check_searchfs(ctx, vp, &searchblock.searchattrs); 7749 if (error) { 7750 vnode_put(vp); 7751 goto freeandexit; 7752 } 7753#endif 7754 7755 7756 /* 7757 * If searchblock.maxmatches == 0, then skip the search. This has happened 7758 * before and sometimes the underlying code doesnt deal with it well. 7759 */ 7760 if (searchblock.maxmatches == 0) { 7761 nummatches = 0; 7762 goto saveandexit; 7763 } 7764 7765 /* 7766 * Allright, we have everything we need, so lets make that call. 7767 * 7768 * We keep special track of the return value from the file system: 7769 * EAGAIN is an acceptable error condition that shouldn't keep us 7770 * from copying out any results... 7771 */ 7772 7773 fserror = VNOP_SEARCHFS(vp, 7774 searchparams1, 7775 searchparams2, 7776 &searchblock.searchattrs, 7777 (u_long)searchblock.maxmatches, 7778 &timelimit, 7779 returnattrs, 7780 &nummatches, 7781 (u_long)uap->scriptcode, 7782 (u_long)uap->options, 7783 auio, 7784 (struct searchstate *) &state->ss_fsstate, 7785 ctx); 7786 7787 /* 7788 * If it's a union mount we need to be called again 7789 * to search the mounted-on filesystem. 7790 */ 7791 if ((vp->v_mount->mnt_flag & MNT_UNION) && fserror == 0) { 7792 state->ss_union_flags = SRCHFS_START; 7793 state->ss_union_layer++; // search next layer down 7794 fserror = EAGAIN; 7795 } 7796 7797saveandexit: 7798 7799 vnode_put(vp); 7800 7801 /* Now copy out the stuff that needs copying out. That means the number of matches, the 7802 search state. Everything was already put into he return buffer by the vop call. */ 7803 7804 if ((error = copyout((caddr_t) state, uap->state, sizeof(struct searchstate))) != 0) 7805 goto freeandexit; 7806 7807 if ((error = suulong(uap->nummatches, (uint64_t)nummatches)) != 0) 7808 goto freeandexit; 7809 7810 error = fserror; 7811 7812freeandexit: 7813 7814 FREE(searchparams1,M_TEMP); 7815 7816 return(error); 7817 7818 7819} /* end of searchfs system call */ 7820 7821#else /* CONFIG_SEARCHFS */ 7822 7823int 7824searchfs(__unused proc_t p, __unused struct searchfs_args *uap, __unused int32_t *retval) 7825{ 7826 return (ENOTSUP); 7827} 7828 7829#endif /* CONFIG_SEARCHFS */ 7830 7831 7832lck_grp_attr_t * nspace_group_attr; 7833lck_attr_t * nspace_lock_attr; 7834lck_grp_t * nspace_mutex_group; 7835 7836lck_mtx_t nspace_handler_lock; 7837lck_mtx_t nspace_handler_exclusion_lock; 7838 7839time_t snapshot_timestamp=0; 7840int nspace_allow_virtual_devs=0; 7841 7842void nspace_handler_init(void); 7843 7844typedef struct nspace_item_info { 7845 struct vnode *vp; 7846 void *arg; 7847 uint64_t op; 7848 uint32_t vid; 7849 uint32_t flags; 7850 uint32_t token; 7851 uint32_t refcount; 7852} nspace_item_info; 7853 7854#define MAX_NSPACE_ITEMS 128 7855nspace_item_info nspace_items[MAX_NSPACE_ITEMS]; 7856uint32_t nspace_item_idx=0; // also used as the sleep/wakeup rendezvous address 7857uint32_t nspace_token_id=0; 7858uint32_t nspace_handler_timeout = 15; // seconds 7859 7860#define NSPACE_ITEM_NEW 0x0001 7861#define NSPACE_ITEM_PROCESSING 0x0002 7862#define NSPACE_ITEM_DEAD 0x0004 7863#define NSPACE_ITEM_CANCELLED 0x0008 7864#define NSPACE_ITEM_DONE 0x0010 7865#define NSPACE_ITEM_RESET_TIMER 0x0020 7866 7867#define NSPACE_ITEM_NSPACE_EVENT 0x0040 7868#define NSPACE_ITEM_SNAPSHOT_EVENT 0x0080 7869#define NSPACE_ITEM_TRACK_EVENT 0x0100 7870 7871#define NSPACE_ITEM_ALL_EVENT_TYPES (NSPACE_ITEM_NSPACE_EVENT | NSPACE_ITEM_SNAPSHOT_EVENT | NSPACE_ITEM_TRACK_EVENT) 7872 7873//#pragma optimization_level 0 7874 7875typedef enum { 7876 NSPACE_HANDLER_NSPACE = 0, 7877 NSPACE_HANDLER_SNAPSHOT = 1, 7878 NSPACE_HANDLER_TRACK = 2, 7879 7880 NSPACE_HANDLER_COUNT, 7881} nspace_type_t; 7882 7883typedef struct { 7884 uint64_t handler_tid; 7885 struct proc *handler_proc; 7886 int handler_busy; 7887} nspace_handler_t; 7888 7889nspace_handler_t nspace_handlers[NSPACE_HANDLER_COUNT]; 7890 7891/* namespace fsctl functions */ 7892static int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type); 7893static int nspace_item_flags_for_type(nspace_type_t nspace_type); 7894static int nspace_open_flags_for_type(nspace_type_t nspace_type); 7895static nspace_type_t nspace_type_for_op(uint64_t op); 7896static int nspace_is_special_process(struct proc *proc); 7897static int vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx); 7898static int wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type); 7899static int validate_namespace_args (int is64bit, int size); 7900static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data); 7901 7902 7903static inline int nspace_flags_matches_handler(uint32_t event_flags, nspace_type_t nspace_type) 7904{ 7905 switch(nspace_type) { 7906 case NSPACE_HANDLER_NSPACE: 7907 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_NSPACE_EVENT; 7908 case NSPACE_HANDLER_SNAPSHOT: 7909 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_SNAPSHOT_EVENT; 7910 case NSPACE_HANDLER_TRACK: 7911 return (event_flags & NSPACE_ITEM_ALL_EVENT_TYPES) == NSPACE_ITEM_TRACK_EVENT; 7912 default: 7913 printf("nspace_flags_matches_handler: invalid type %u\n", (int)nspace_type); 7914 return 0; 7915 } 7916} 7917 7918static inline int nspace_item_flags_for_type(nspace_type_t nspace_type) 7919{ 7920 switch(nspace_type) { 7921 case NSPACE_HANDLER_NSPACE: 7922 return NSPACE_ITEM_NSPACE_EVENT; 7923 case NSPACE_HANDLER_SNAPSHOT: 7924 return NSPACE_ITEM_SNAPSHOT_EVENT; 7925 case NSPACE_HANDLER_TRACK: 7926 return NSPACE_ITEM_TRACK_EVENT; 7927 default: 7928 printf("nspace_item_flags_for_type: invalid type %u\n", (int)nspace_type); 7929 return 0; 7930 } 7931} 7932 7933static inline int nspace_open_flags_for_type(nspace_type_t nspace_type) 7934{ 7935 switch(nspace_type) { 7936 case NSPACE_HANDLER_NSPACE: 7937 return FREAD | FWRITE | O_EVTONLY; 7938 case NSPACE_HANDLER_SNAPSHOT: 7939 case NSPACE_HANDLER_TRACK: 7940 return FREAD | O_EVTONLY; 7941 default: 7942 printf("nspace_open_flags_for_type: invalid type %u\n", (int)nspace_type); 7943 return 0; 7944 } 7945} 7946 7947static inline nspace_type_t nspace_type_for_op(uint64_t op) 7948{ 7949 switch(op & NAMESPACE_HANDLER_EVENT_TYPE_MASK) { 7950 case NAMESPACE_HANDLER_NSPACE_EVENT: 7951 return NSPACE_HANDLER_NSPACE; 7952 case NAMESPACE_HANDLER_SNAPSHOT_EVENT: 7953 return NSPACE_HANDLER_SNAPSHOT; 7954 case NAMESPACE_HANDLER_TRACK_EVENT: 7955 return NSPACE_HANDLER_TRACK; 7956 default: 7957 printf("nspace_type_for_op: invalid op mask %llx\n", op & NAMESPACE_HANDLER_EVENT_TYPE_MASK); 7958 return NSPACE_HANDLER_NSPACE; 7959 } 7960} 7961 7962static inline int nspace_is_special_process(struct proc *proc) 7963{ 7964 int i; 7965 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) { 7966 if (proc == nspace_handlers[i].handler_proc) 7967 return 1; 7968 } 7969 return 0; 7970} 7971 7972void 7973nspace_handler_init(void) 7974{ 7975 nspace_lock_attr = lck_attr_alloc_init(); 7976 nspace_group_attr = lck_grp_attr_alloc_init(); 7977 nspace_mutex_group = lck_grp_alloc_init("nspace-mutex", nspace_group_attr); 7978 lck_mtx_init(&nspace_handler_lock, nspace_mutex_group, nspace_lock_attr); 7979 lck_mtx_init(&nspace_handler_exclusion_lock, nspace_mutex_group, nspace_lock_attr); 7980 memset(&nspace_items[0], 0, sizeof(nspace_items)); 7981} 7982 7983void 7984nspace_proc_exit(struct proc *p) 7985{ 7986 int i, event_mask = 0; 7987 7988 for (i = 0; i < NSPACE_HANDLER_COUNT; i++) { 7989 if (p == nspace_handlers[i].handler_proc) { 7990 event_mask |= nspace_item_flags_for_type(i); 7991 nspace_handlers[i].handler_tid = 0; 7992 nspace_handlers[i].handler_proc = NULL; 7993 } 7994 } 7995 7996 if (event_mask == 0) { 7997 return; 7998 } 7999 8000 if (event_mask & NSPACE_ITEM_SNAPSHOT_EVENT) { 8001 // if this process was the snapshot handler, zero snapshot_timeout 8002 snapshot_timestamp = 0; 8003 } 8004 8005 // 8006 // unblock anyone that's waiting for the handler that died 8007 // 8008 lck_mtx_lock(&nspace_handler_lock); 8009 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8010 if (nspace_items[i].flags & (NSPACE_ITEM_NEW | NSPACE_ITEM_PROCESSING)) { 8011 8012 if ( nspace_items[i].flags & event_mask ) { 8013 8014 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) { 8015 vnode_lock_spin(nspace_items[i].vp); 8016 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; 8017 vnode_unlock(nspace_items[i].vp); 8018 } 8019 nspace_items[i].vp = NULL; 8020 nspace_items[i].vid = 0; 8021 nspace_items[i].flags = NSPACE_ITEM_DONE; 8022 nspace_items[i].token = 0; 8023 8024 wakeup((caddr_t)&(nspace_items[i].vp)); 8025 } 8026 } 8027 } 8028 8029 wakeup((caddr_t)&nspace_item_idx); 8030 lck_mtx_unlock(&nspace_handler_lock); 8031} 8032 8033 8034int 8035resolve_nspace_item(struct vnode *vp, uint64_t op) 8036{ 8037 return resolve_nspace_item_ext(vp, op, NULL); 8038} 8039 8040int 8041resolve_nspace_item_ext(struct vnode *vp, uint64_t op, void *arg) 8042{ 8043 int i, error, keep_waiting; 8044 struct timespec ts; 8045 nspace_type_t nspace_type = nspace_type_for_op(op); 8046 8047 // only allow namespace events on regular files, directories and symlinks. 8048 if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { 8049 return 0; 8050 } 8051 8052 // 8053 // if this is a snapshot event and the vnode is on a 8054 // disk image just pretend nothing happened since any 8055 // change to the disk image will cause the disk image 8056 // itself to get backed up and this avoids multi-way 8057 // deadlocks between the snapshot handler and the ever 8058 // popular diskimages-helper process. the variable 8059 // nspace_allow_virtual_devs allows this behavior to 8060 // be overridden (for use by the Mobile TimeMachine 8061 // testing infrastructure which uses disk images) 8062 // 8063 if ( (op & NAMESPACE_HANDLER_SNAPSHOT_EVENT) 8064 && (vp->v_mount != NULL) 8065 && (vp->v_mount->mnt_kern_flag & MNTK_VIRTUALDEV) 8066 && !nspace_allow_virtual_devs) { 8067 8068 return 0; 8069 } 8070 8071 // if (thread_tid(current_thread()) == namespace_handler_tid) { 8072 if (nspace_handlers[nspace_type].handler_proc == NULL) { 8073 return 0; 8074 } 8075 8076 if (nspace_is_special_process(current_proc())) { 8077 return EDEADLK; 8078 } 8079 8080 lck_mtx_lock(&nspace_handler_lock); 8081 8082retry: 8083 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8084 if (vp == nspace_items[i].vp && op == nspace_items[i].op) { 8085 break; 8086 } 8087 } 8088 8089 if (i >= MAX_NSPACE_ITEMS) { 8090 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8091 if (nspace_items[i].flags == 0) { 8092 break; 8093 } 8094 } 8095 } else { 8096 nspace_items[i].refcount++; 8097 } 8098 8099 if (i >= MAX_NSPACE_ITEMS) { 8100 ts.tv_sec = nspace_handler_timeout; 8101 ts.tv_nsec = 0; 8102 8103 error = msleep((caddr_t)&nspace_token_id, &nspace_handler_lock, PVFS|PCATCH, "nspace-no-space", &ts); 8104 if (error == 0) { 8105 // an entry got free'd up, go see if we can get a slot 8106 goto retry; 8107 } else { 8108 lck_mtx_unlock(&nspace_handler_lock); 8109 return error; 8110 } 8111 } 8112 8113 // 8114 // if it didn't already exist, add it. if it did exist 8115 // we'll get woken up when someone does a wakeup() on 8116 // the slot in the nspace_items table. 8117 // 8118 if (vp != nspace_items[i].vp) { 8119 nspace_items[i].vp = vp; 8120 nspace_items[i].arg = (arg == NSPACE_REARM_NO_ARG) ? NULL : arg; // arg is {NULL, true, uio *} - only pass uio thru to the user 8121 nspace_items[i].op = op; 8122 nspace_items[i].vid = vnode_vid(vp); 8123 nspace_items[i].flags = NSPACE_ITEM_NEW; 8124 nspace_items[i].flags |= nspace_item_flags_for_type(nspace_type); 8125 if (nspace_items[i].flags & NSPACE_ITEM_SNAPSHOT_EVENT) { 8126 if (arg) { 8127 vnode_lock_spin(vp); 8128 vp->v_flag |= VNEEDSSNAPSHOT; 8129 vnode_unlock(vp); 8130 } 8131 } 8132 8133 nspace_items[i].token = 0; 8134 nspace_items[i].refcount = 1; 8135 8136 wakeup((caddr_t)&nspace_item_idx); 8137 } 8138 8139 // 8140 // Now go to sleep until the handler does a wakeup on this 8141 // slot in the nspace_items table (or we timeout). 8142 // 8143 keep_waiting = 1; 8144 while(keep_waiting) { 8145 ts.tv_sec = nspace_handler_timeout; 8146 ts.tv_nsec = 0; 8147 error = msleep((caddr_t)&(nspace_items[i].vp), &nspace_handler_lock, PVFS|PCATCH, "namespace-done", &ts); 8148 8149 if (nspace_items[i].flags & NSPACE_ITEM_DONE) { 8150 error = 0; 8151 } else if (nspace_items[i].flags & NSPACE_ITEM_CANCELLED) { 8152 error = nspace_items[i].token; 8153 } else if (error == EWOULDBLOCK || error == ETIMEDOUT) { 8154 if (nspace_items[i].flags & NSPACE_ITEM_RESET_TIMER) { 8155 nspace_items[i].flags &= ~NSPACE_ITEM_RESET_TIMER; 8156 continue; 8157 } else { 8158 error = ETIMEDOUT; 8159 } 8160 } else if (error == 0) { 8161 // hmmm, why did we get woken up? 8162 printf("woken up for token %d but it's not done, cancelled or timedout and error == 0.\n", 8163 nspace_items[i].token); 8164 } 8165 8166 if (--nspace_items[i].refcount == 0) { 8167 nspace_items[i].vp = NULL; // clear this so that no one will match on it again 8168 nspace_items[i].arg = NULL; 8169 nspace_items[i].token = 0; // clear this so that the handler will not find it anymore 8170 nspace_items[i].flags = 0; // this clears it for re-use 8171 } 8172 wakeup(&nspace_token_id); 8173 keep_waiting = 0; 8174 } 8175 8176 lck_mtx_unlock(&nspace_handler_lock); 8177 8178 return error; 8179} 8180 8181 8182int 8183get_nspace_item_status(struct vnode *vp, int32_t *status) 8184{ 8185 int i; 8186 8187 lck_mtx_lock(&nspace_handler_lock); 8188 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8189 if (nspace_items[i].vp == vp) { 8190 break; 8191 } 8192 } 8193 8194 if (i >= MAX_NSPACE_ITEMS) { 8195 lck_mtx_unlock(&nspace_handler_lock); 8196 return ENOENT; 8197 } 8198 8199 *status = nspace_items[i].flags; 8200 lck_mtx_unlock(&nspace_handler_lock); 8201 return 0; 8202} 8203 8204 8205#if 0 8206static int 8207build_volfs_path(struct vnode *vp, char *path, int *len) 8208{ 8209 struct vnode_attr va; 8210 int ret; 8211 8212 VATTR_INIT(&va); 8213 VATTR_WANTED(&va, va_fsid); 8214 VATTR_WANTED(&va, va_fileid); 8215 8216 if (vnode_getattr(vp, &va, vfs_context_kernel()) != 0) { 8217 *len = snprintf(path, *len, "/non/existent/path/because/vnode_getattr/failed") + 1; 8218 ret = -1; 8219 } else { 8220 *len = snprintf(path, *len, "/.vol/%d/%lld", (dev_t)va.va_fsid, va.va_fileid) + 1; 8221 ret = 0; 8222 } 8223 8224 return ret; 8225} 8226#endif 8227 8228// 8229// Note: this function does NOT check permissions on all of the 8230// parent directories leading to this vnode. It should only be 8231// called on behalf of a root process. Otherwise a process may 8232// get access to a file because the file itself is readable even 8233// though its parent directories would prevent access. 8234// 8235static int 8236vn_open_with_vp(vnode_t vp, int fmode, vfs_context_t ctx) 8237{ 8238 int error, action; 8239 8240 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { 8241 return error; 8242 } 8243 8244#if CONFIG_MACF 8245 error = mac_vnode_check_open(ctx, vp, fmode); 8246 if (error) 8247 return error; 8248#endif 8249 8250 /* compute action to be authorized */ 8251 action = 0; 8252 if (fmode & FREAD) { 8253 action |= KAUTH_VNODE_READ_DATA; 8254 } 8255 if (fmode & (FWRITE | O_TRUNC)) { 8256 /* 8257 * If we are writing, appending, and not truncating, 8258 * indicate that we are appending so that if the 8259 * UF_APPEND or SF_APPEND bits are set, we do not deny 8260 * the open. 8261 */ 8262 if ((fmode & O_APPEND) && !(fmode & O_TRUNC)) { 8263 action |= KAUTH_VNODE_APPEND_DATA; 8264 } else { 8265 action |= KAUTH_VNODE_WRITE_DATA; 8266 } 8267 } 8268 8269 if ((error = vnode_authorize(vp, NULL, action, ctx)) != 0) 8270 return error; 8271 8272 8273 // 8274 // if the vnode is tagged VOPENEVT and the current process 8275 // has the P_CHECKOPENEVT flag set, then we or in the O_EVTONLY 8276 // flag to the open mode so that this open won't count against 8277 // the vnode when carbon delete() does a vnode_isinuse() to see 8278 // if a file is currently in use. this allows spotlight 8279 // importers to not interfere with carbon apps that depend on 8280 // the no-delete-if-busy semantics of carbon delete(). 8281 // 8282 if ((vp->v_flag & VOPENEVT) && (current_proc()->p_flag & P_CHECKOPENEVT)) { 8283 fmode |= O_EVTONLY; 8284 } 8285 8286 if ( (error = VNOP_OPEN(vp, fmode, ctx)) ) { 8287 return error; 8288 } 8289 if ( (error = vnode_ref_ext(vp, fmode, 0)) ) { 8290 VNOP_CLOSE(vp, fmode, ctx); 8291 return error; 8292 } 8293 8294 /* Call out to allow 3rd party notification of open. 8295 * Ignore result of kauth_authorize_fileop call. 8296 */ 8297#if CONFIG_MACF 8298 mac_vnode_notify_open(ctx, vp, fmode); 8299#endif 8300 kauth_authorize_fileop(vfs_context_ucred(ctx), KAUTH_FILEOP_OPEN, 8301 (uintptr_t)vp, 0); 8302 8303 8304 return 0; 8305} 8306 8307static int 8308wait_for_namespace_event(namespace_handler_data *nhd, nspace_type_t nspace_type) 8309{ 8310 int i, error=0, unblock=0; 8311 task_t curtask; 8312 8313 lck_mtx_lock(&nspace_handler_exclusion_lock); 8314 if (nspace_handlers[nspace_type].handler_busy) { 8315 lck_mtx_unlock(&nspace_handler_exclusion_lock); 8316 return EBUSY; 8317 } 8318 nspace_handlers[nspace_type].handler_busy = 1; 8319 lck_mtx_unlock(&nspace_handler_exclusion_lock); 8320 8321 /* 8322 * Any process that gets here will be one of the namespace handlers. 8323 * As such, they should be prevented from acquiring DMG vnodes during vnode reclamation 8324 * as we can cause deadlocks to occur, because the namespace handler may prevent 8325 * VNOP_INACTIVE from proceeding. Mark the current task as a P_DEPENDENCY_CAPABLE 8326 * process. 8327 */ 8328 curtask = current_task(); 8329 bsd_set_dependency_capable (curtask); 8330 8331 lck_mtx_lock(&nspace_handler_lock); 8332 if (nspace_handlers[nspace_type].handler_proc == NULL) { 8333 nspace_handlers[nspace_type].handler_tid = thread_tid(current_thread()); 8334 nspace_handlers[nspace_type].handler_proc = current_proc(); 8335 } 8336 8337 while (error == 0) { 8338 8339 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8340 if (nspace_items[i].flags & NSPACE_ITEM_NEW) { 8341 if (!nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) { 8342 continue; 8343 } 8344 break; 8345 } 8346 } 8347 8348 if (i < MAX_NSPACE_ITEMS) { 8349 nspace_items[i].flags &= ~NSPACE_ITEM_NEW; 8350 nspace_items[i].flags |= NSPACE_ITEM_PROCESSING; 8351 nspace_items[i].token = ++nspace_token_id; 8352 8353 if (nspace_items[i].vp) { 8354 struct fileproc *fp; 8355 int32_t indx, fmode; 8356 struct proc *p = current_proc(); 8357 vfs_context_t ctx = vfs_context_current(); 8358 struct vnode_attr va; 8359 8360 8361 /* 8362 * Use vnode pointer to acquire a file descriptor for 8363 * hand-off to userland 8364 */ 8365 fmode = nspace_open_flags_for_type(nspace_type); 8366 error = vnode_getwithvid(nspace_items[i].vp, nspace_items[i].vid); 8367 if (error) { 8368 unblock = 1; 8369 break; 8370 } 8371 error = vn_open_with_vp(nspace_items[i].vp, fmode, ctx); 8372 if (error) { 8373 unblock = 1; 8374 vnode_put(nspace_items[i].vp); 8375 break; 8376 } 8377 8378 if ((error = falloc(p, &fp, &indx, ctx))) { 8379 vn_close(nspace_items[i].vp, fmode, ctx); 8380 vnode_put(nspace_items[i].vp); 8381 unblock = 1; 8382 break; 8383 } 8384 8385 fp->f_fglob->fg_flag = fmode; 8386 fp->f_fglob->fg_ops = &vnops; 8387 fp->f_fglob->fg_data = (caddr_t)nspace_items[i].vp; 8388 8389 proc_fdlock(p); 8390 procfdtbl_releasefd(p, indx, NULL); 8391 fp_drop(p, indx, fp, 1); 8392 proc_fdunlock(p); 8393 8394 /* 8395 * All variants of the namespace handler struct support these three fields: 8396 * token, flags, and the FD pointer 8397 */ 8398 error = copyout(&nspace_items[i].token, nhd->token, sizeof(uint32_t)); 8399 error = copyout(&nspace_items[i].op, nhd->flags, sizeof(uint64_t)); 8400 error = copyout(&indx, nhd->fdptr, sizeof(uint32_t)); 8401 8402 /* 8403 * Handle optional fields: 8404 * extended version support an info ptr (offset, length), and the 8405 * 8406 * namedata version supports a unique per-link object ID 8407 * 8408 */ 8409 if (nhd->infoptr) { 8410 uio_t uio = (uio_t)nspace_items[i].arg; 8411 uint64_t u_offset, u_length; 8412 8413 if (uio) { 8414 u_offset = uio_offset(uio); 8415 u_length = uio_resid(uio); 8416 } else { 8417 u_offset = 0; 8418 u_length = 0; 8419 } 8420 error = copyout(&u_offset, nhd->infoptr, sizeof(uint64_t)); 8421 error = copyout(&u_length, nhd->infoptr+sizeof(uint64_t), sizeof(uint64_t)); 8422 } 8423 8424 if (nhd->objid) { 8425 VATTR_INIT(&va); 8426 VATTR_WANTED(&va, va_linkid); 8427 error = vnode_getattr(nspace_items[i].vp, &va, ctx); 8428 if (error == 0 ) { 8429 uint64_t linkid = 0; 8430 if (VATTR_IS_SUPPORTED (&va, va_linkid)) { 8431 linkid = (uint64_t)va.va_linkid; 8432 } 8433 error = copyout (&linkid, nhd->objid, sizeof(uint64_t)); 8434 } 8435 } 8436 8437 if (error) { 8438 vn_close(nspace_items[i].vp, fmode, ctx); 8439 fp_free(p, indx, fp); 8440 unblock = 1; 8441 } 8442 8443 vnode_put(nspace_items[i].vp); 8444 8445 break; 8446 } else { 8447 printf("wait_for_nspace_event: failed (nspace_items[%d] == %p error %d, name %s)\n", 8448 i, nspace_items[i].vp, error, nspace_items[i].vp->v_name); 8449 } 8450 8451 } else { 8452 error = msleep((caddr_t)&nspace_item_idx, &nspace_handler_lock, PVFS|PCATCH, "namespace-items", 0); 8453 if ((nspace_type == NSPACE_HANDLER_SNAPSHOT) && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { 8454 error = EINVAL; 8455 break; 8456 } 8457 8458 } 8459 } 8460 8461 if (unblock) { 8462 if (nspace_items[i].vp && (nspace_items[i].vp->v_flag & VNEEDSSNAPSHOT)) { 8463 vnode_lock_spin(nspace_items[i].vp); 8464 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; 8465 vnode_unlock(nspace_items[i].vp); 8466 } 8467 nspace_items[i].vp = NULL; 8468 nspace_items[i].vid = 0; 8469 nspace_items[i].flags = NSPACE_ITEM_DONE; 8470 nspace_items[i].token = 0; 8471 8472 wakeup((caddr_t)&(nspace_items[i].vp)); 8473 } 8474 8475 if (nspace_type == NSPACE_HANDLER_SNAPSHOT) { 8476 // just go through every snapshot event and unblock it immediately. 8477 if (error && (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { 8478 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8479 if (nspace_items[i].flags & NSPACE_ITEM_NEW) { 8480 if (nspace_flags_matches_handler(nspace_items[i].flags, nspace_type)) { 8481 nspace_items[i].vp = NULL; 8482 nspace_items[i].vid = 0; 8483 nspace_items[i].flags = NSPACE_ITEM_DONE; 8484 nspace_items[i].token = 0; 8485 8486 wakeup((caddr_t)&(nspace_items[i].vp)); 8487 } 8488 } 8489 } 8490 } 8491 } 8492 8493 lck_mtx_unlock(&nspace_handler_lock); 8494 8495 lck_mtx_lock(&nspace_handler_exclusion_lock); 8496 nspace_handlers[nspace_type].handler_busy = 0; 8497 lck_mtx_unlock(&nspace_handler_exclusion_lock); 8498 8499 return error; 8500} 8501 8502static inline int validate_namespace_args (int is64bit, int size) { 8503 8504 if (is64bit) { 8505 /* Must be one of these */ 8506 if (size == sizeof(user64_namespace_handler_info)) { 8507 goto sizeok; 8508 } 8509 if (size == sizeof(user64_namespace_handler_info_ext)) { 8510 goto sizeok; 8511 } 8512 if (size == sizeof(user64_namespace_handler_data)) { 8513 goto sizeok; 8514 } 8515 return EINVAL; 8516 } 8517 else { 8518 /* 32 bit -- must be one of these */ 8519 if (size == sizeof(user32_namespace_handler_info)) { 8520 goto sizeok; 8521 } 8522 if (size == sizeof(user32_namespace_handler_info_ext)) { 8523 goto sizeok; 8524 } 8525 if (size == sizeof(user32_namespace_handler_data)) { 8526 goto sizeok; 8527 } 8528 return EINVAL; 8529 } 8530 8531sizeok: 8532 8533 return 0; 8534 8535} 8536 8537static int process_namespace_fsctl(nspace_type_t nspace_type, int is64bit, u_int size, caddr_t data) 8538{ 8539 int error = 0; 8540 namespace_handler_data nhd; 8541 8542 bzero (&nhd, sizeof(namespace_handler_data)); 8543 8544 if (nspace_type == NSPACE_HANDLER_SNAPSHOT && 8545 (snapshot_timestamp == 0 || snapshot_timestamp == ~0)) { 8546 return EINVAL; 8547 } 8548 8549 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { 8550 return error; 8551 } 8552 8553 error = validate_namespace_args (is64bit, size); 8554 if (error) { 8555 return error; 8556 } 8557 8558 /* Copy in the userland pointers into our kernel-only struct */ 8559 8560 if (is64bit) { 8561 /* 64 bit userland structures */ 8562 nhd.token = (user_addr_t)((user64_namespace_handler_info *)data)->token; 8563 nhd.flags = (user_addr_t)((user64_namespace_handler_info *)data)->flags; 8564 nhd.fdptr = (user_addr_t)((user64_namespace_handler_info *)data)->fdptr; 8565 8566 /* If the size is greater than the standard info struct, add in extra fields */ 8567 if (size > (sizeof(user64_namespace_handler_info))) { 8568 if (size >= (sizeof(user64_namespace_handler_info_ext))) { 8569 nhd.infoptr = (user_addr_t)((user64_namespace_handler_info_ext *)data)->infoptr; 8570 } 8571 if (size == (sizeof(user64_namespace_handler_data))) { 8572 nhd.objid = (user_addr_t)((user64_namespace_handler_data*)data)->objid; 8573 } 8574 /* Otherwise the fields were pre-zeroed when we did the bzero above. */ 8575 } 8576 } 8577 else { 8578 /* 32 bit userland structures */ 8579 nhd.token = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->token); 8580 nhd.flags = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->flags); 8581 nhd.fdptr = CAST_USER_ADDR_T(((user32_namespace_handler_info *)data)->fdptr); 8582 8583 if (size > (sizeof(user32_namespace_handler_info))) { 8584 if (size >= (sizeof(user32_namespace_handler_info_ext))) { 8585 nhd.infoptr = CAST_USER_ADDR_T(((user32_namespace_handler_info_ext *)data)->infoptr); 8586 } 8587 if (size == (sizeof(user32_namespace_handler_data))) { 8588 nhd.objid = (user_addr_t)((user32_namespace_handler_data*)data)->objid; 8589 } 8590 /* Otherwise the fields were pre-zeroed when we did the bzero above. */ 8591 } 8592 } 8593 8594 return wait_for_namespace_event(&nhd, nspace_type); 8595} 8596 8597/* 8598 * Make a filesystem-specific control call: 8599 */ 8600/* ARGSUSED */ 8601static int 8602fsctl_internal(proc_t p, vnode_t *arg_vp, u_long cmd, user_addr_t udata, u_long options, vfs_context_t ctx) 8603{ 8604 int error=0; 8605 boolean_t is64bit; 8606 u_int size; 8607#define STK_PARAMS 128 8608 char stkbuf[STK_PARAMS]; 8609 caddr_t data, memp; 8610 vnode_t vp = *arg_vp; 8611 8612 size = IOCPARM_LEN(cmd); 8613 if (size > IOCPARM_MAX) return (EINVAL); 8614 8615 is64bit = proc_is64bit(p); 8616 8617 memp = NULL; 8618 if (size > sizeof (stkbuf)) { 8619 if ((memp = (caddr_t)kalloc(size)) == 0) return ENOMEM; 8620 data = memp; 8621 } else { 8622 data = &stkbuf[0]; 8623 }; 8624 8625 if (cmd & IOC_IN) { 8626 if (size) { 8627 error = copyin(udata, data, size); 8628 if (error) goto FSCtl_Exit; 8629 } else { 8630 if (is64bit) { 8631 *(user_addr_t *)data = udata; 8632 } 8633 else { 8634 *(uint32_t *)data = (uint32_t)udata; 8635 } 8636 }; 8637 } else if ((cmd & IOC_OUT) && size) { 8638 /* 8639 * Zero the buffer so the user always 8640 * gets back something deterministic. 8641 */ 8642 bzero(data, size); 8643 } else if (cmd & IOC_VOID) { 8644 if (is64bit) { 8645 *(user_addr_t *)data = udata; 8646 } 8647 else { 8648 *(uint32_t *)data = (uint32_t)udata; 8649 } 8650 } 8651 8652 /* Check to see if it's a generic command */ 8653 if (IOCBASECMD(cmd) == FSCTL_SYNC_VOLUME) { 8654 mount_t mp = vp->v_mount; 8655 int arg = *(uint32_t*)data; 8656 8657 /* record vid of vp so we can drop it below. */ 8658 uint32_t vvid = vp->v_id; 8659 8660 /* 8661 * Then grab mount_iterref so that we can release the vnode. 8662 * Without this, a thread may call vnode_iterate_prepare then 8663 * get into a deadlock because we've never released the root vp 8664 */ 8665 error = mount_iterref (mp, 0); 8666 if (error) { 8667 goto FSCtl_Exit; 8668 } 8669 vnode_put(vp); 8670 8671 /* issue the sync for this volume */ 8672 (void)sync_callback(mp, (arg & FSCTL_SYNC_WAIT) ? &arg : NULL); 8673 8674 /* 8675 * Then release the mount_iterref once we're done syncing; it's not 8676 * needed for the VNOP_IOCTL below 8677 */ 8678 mount_iterdrop(mp); 8679 8680 if (arg & FSCTL_SYNC_FULLSYNC) { 8681 /* re-obtain vnode iocount on the root vp, if possible */ 8682 error = vnode_getwithvid (vp, vvid); 8683 if (error == 0) { 8684 error = VNOP_IOCTL(vp, F_FULLFSYNC, (caddr_t)NULL, 0, ctx); 8685 vnode_put (vp); 8686 } 8687 } 8688 /* mark the argument VP as having been released */ 8689 *arg_vp = NULL; 8690 8691 } else if (IOCBASECMD(cmd) == FSCTL_SET_PACKAGE_EXTS) { 8692 user_addr_t ext_strings; 8693 uint32_t num_entries; 8694 uint32_t max_width; 8695 8696 if ( (is64bit && size != sizeof(user64_package_ext_info)) 8697 || (is64bit == 0 && size != sizeof(user32_package_ext_info))) { 8698 8699 // either you're 64-bit and passed a 64-bit struct or 8700 // you're 32-bit and passed a 32-bit struct. otherwise 8701 // it's not ok. 8702 error = EINVAL; 8703 goto FSCtl_Exit; 8704 } 8705 8706 if (is64bit) { 8707 ext_strings = ((user64_package_ext_info *)data)->strings; 8708 num_entries = ((user64_package_ext_info *)data)->num_entries; 8709 max_width = ((user64_package_ext_info *)data)->max_width; 8710 } else { 8711 ext_strings = CAST_USER_ADDR_T(((user32_package_ext_info *)data)->strings); 8712 num_entries = ((user32_package_ext_info *)data)->num_entries; 8713 max_width = ((user32_package_ext_info *)data)->max_width; 8714 } 8715 8716 error = set_package_extensions_table(ext_strings, num_entries, max_width); 8717 8718 8719 } 8720 8721 /* namespace handlers */ 8722 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GET) { 8723 error = process_namespace_fsctl(NSPACE_HANDLER_NSPACE, is64bit, size, data); 8724 } 8725 8726 /* Snapshot handlers */ 8727 else if (IOCBASECMD(cmd) == FSCTL_OLD_SNAPSHOT_HANDLER_GET) { 8728 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); 8729 } else if (IOCBASECMD(cmd) == FSCTL_SNAPSHOT_HANDLER_GET_EXT) { 8730 error = process_namespace_fsctl(NSPACE_HANDLER_SNAPSHOT, is64bit, size, data); 8731 } 8732 8733 /* Tracked File Handlers */ 8734 else if (IOCBASECMD(cmd) == FSCTL_TRACKED_HANDLER_GET) { 8735 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); 8736 } 8737 else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_GETDATA) { 8738 error = process_namespace_fsctl(NSPACE_HANDLER_TRACK, is64bit, size, data); 8739 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UPDATE) { 8740 uint32_t token, val; 8741 int i; 8742 8743 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { 8744 goto FSCtl_Exit; 8745 } 8746 8747 if (!nspace_is_special_process(p)) { 8748 error = EINVAL; 8749 goto FSCtl_Exit; 8750 } 8751 8752 token = ((uint32_t *)data)[0]; 8753 val = ((uint32_t *)data)[1]; 8754 8755 lck_mtx_lock(&nspace_handler_lock); 8756 8757 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8758 if (nspace_items[i].token == token) { 8759 break; 8760 } 8761 } 8762 8763 if (i >= MAX_NSPACE_ITEMS) { 8764 error = ENOENT; 8765 } else { 8766 // 8767 // if this bit is set, when resolve_nspace_item() times out 8768 // it will loop and go back to sleep. 8769 // 8770 nspace_items[i].flags |= NSPACE_ITEM_RESET_TIMER; 8771 } 8772 8773 lck_mtx_unlock(&nspace_handler_lock); 8774 8775 if (error) { 8776 printf("nspace-handler-update: did not find token %u\n", token); 8777 } 8778 8779 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_UNBLOCK) { 8780 uint32_t token, val; 8781 int i; 8782 8783 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { 8784 goto FSCtl_Exit; 8785 } 8786 8787 if (!nspace_is_special_process(p)) { 8788 error = EINVAL; 8789 goto FSCtl_Exit; 8790 } 8791 8792 token = ((uint32_t *)data)[0]; 8793 val = ((uint32_t *)data)[1]; 8794 8795 lck_mtx_lock(&nspace_handler_lock); 8796 8797 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8798 if (nspace_items[i].token == token) { 8799 break; 8800 } 8801 } 8802 8803 if (i >= MAX_NSPACE_ITEMS) { 8804 printf("nspace-handler-unblock: did not find token %u\n", token); 8805 error = ENOENT; 8806 } else { 8807 if (val == 0 && nspace_items[i].vp) { 8808 vnode_lock_spin(nspace_items[i].vp); 8809 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; 8810 vnode_unlock(nspace_items[i].vp); 8811 } 8812 8813 nspace_items[i].vp = NULL; 8814 nspace_items[i].arg = NULL; 8815 nspace_items[i].op = 0; 8816 nspace_items[i].vid = 0; 8817 nspace_items[i].flags = NSPACE_ITEM_DONE; 8818 nspace_items[i].token = 0; 8819 8820 wakeup((caddr_t)&(nspace_items[i].vp)); 8821 } 8822 8823 lck_mtx_unlock(&nspace_handler_lock); 8824 8825 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_CANCEL) { 8826 uint32_t token, val; 8827 int i; 8828 8829 if ((error = suser(kauth_cred_get(), &(p->p_acflag)))) { 8830 goto FSCtl_Exit; 8831 } 8832 8833 if (!nspace_is_special_process(p)) { 8834 error = EINVAL; 8835 goto FSCtl_Exit; 8836 } 8837 8838 token = ((uint32_t *)data)[0]; 8839 val = ((uint32_t *)data)[1]; 8840 8841 lck_mtx_lock(&nspace_handler_lock); 8842 8843 for(i=0; i < MAX_NSPACE_ITEMS; i++) { 8844 if (nspace_items[i].token == token) { 8845 break; 8846 } 8847 } 8848 8849 if (i >= MAX_NSPACE_ITEMS) { 8850 printf("nspace-handler-cancel: did not find token %u\n", token); 8851 error = ENOENT; 8852 } else { 8853 if (nspace_items[i].vp) { 8854 vnode_lock_spin(nspace_items[i].vp); 8855 nspace_items[i].vp->v_flag &= ~VNEEDSSNAPSHOT; 8856 vnode_unlock(nspace_items[i].vp); 8857 } 8858 8859 nspace_items[i].vp = NULL; 8860 nspace_items[i].arg = NULL; 8861 nspace_items[i].vid = 0; 8862 nspace_items[i].token = val; 8863 nspace_items[i].flags &= ~NSPACE_ITEM_PROCESSING; 8864 nspace_items[i].flags |= NSPACE_ITEM_CANCELLED; 8865 8866 wakeup((caddr_t)&(nspace_items[i].vp)); 8867 } 8868 8869 lck_mtx_unlock(&nspace_handler_lock); 8870 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_HANDLER_SET_SNAPSHOT_TIME) { 8871 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { 8872 goto FSCtl_Exit; 8873 } 8874 8875 // we explicitly do not do the namespace_handler_proc check here 8876 8877 lck_mtx_lock(&nspace_handler_lock); 8878 snapshot_timestamp = ((uint32_t *)data)[0]; 8879 wakeup(&nspace_item_idx); 8880 lck_mtx_unlock(&nspace_handler_lock); 8881 printf("nspace-handler-set-snapshot-time: %d\n", (int)snapshot_timestamp); 8882 8883 } else if (IOCBASECMD(cmd) == FSCTL_NAMESPACE_ALLOW_DMG_SNAPSHOT_EVENTS) { 8884 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { 8885 goto FSCtl_Exit; 8886 } 8887 8888 lck_mtx_lock(&nspace_handler_lock); 8889 nspace_allow_virtual_devs = ((uint32_t *)data)[0]; 8890 lck_mtx_unlock(&nspace_handler_lock); 8891 printf("nspace-snapshot-handler will%s allow events on disk-images\n", 8892 nspace_allow_virtual_devs ? "" : " NOT"); 8893 error = 0; 8894 8895 } else if (IOCBASECMD(cmd) == FSCTL_SET_FSTYPENAME_OVERRIDE) { 8896 if ((error = suser(kauth_cred_get(), &(current_proc()->p_acflag)))) { 8897 goto FSCtl_Exit; 8898 } 8899 if (vp->v_mount) { 8900 mount_lock(vp->v_mount); 8901 if (data[0] != 0) { 8902 strlcpy(&vp->v_mount->fstypename_override[0], data, MFSTYPENAMELEN); 8903 vp->v_mount->mnt_kern_flag |= MNTK_TYPENAME_OVERRIDE; 8904 if (vfs_isrdonly(vp->v_mount) && strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { 8905 vp->v_mount->mnt_kern_flag |= MNTK_EXTENDED_SECURITY; 8906 vp->v_mount->mnt_kern_flag &= ~MNTK_AUTH_OPAQUE; 8907 } 8908 } else { 8909 if (strcmp(vp->v_mount->fstypename_override, "mtmfs") == 0) { 8910 vp->v_mount->mnt_kern_flag &= ~MNTK_EXTENDED_SECURITY; 8911 } 8912 vp->v_mount->mnt_kern_flag &= ~MNTK_TYPENAME_OVERRIDE; 8913 vp->v_mount->fstypename_override[0] = '\0'; 8914 } 8915 mount_unlock(vp->v_mount); 8916 } 8917 } else { 8918 /* Invoke the filesystem-specific code */ 8919 error = VNOP_IOCTL(vp, IOCBASECMD(cmd), data, options, ctx); 8920 } 8921 8922 8923 /* 8924 * Copy any data to user, size was 8925 * already set and checked above. 8926 */ 8927 if (error == 0 && (cmd & IOC_OUT) && size) 8928 error = copyout(data, udata, size); 8929 8930FSCtl_Exit: 8931 if (memp) kfree(memp, size); 8932 8933 return error; 8934} 8935 8936/* ARGSUSED */ 8937int 8938fsctl (proc_t p, struct fsctl_args *uap, __unused int32_t *retval) 8939{ 8940 int error; 8941 struct nameidata nd; 8942 u_long nameiflags; 8943 vnode_t vp = NULL; 8944 vfs_context_t ctx = vfs_context_current(); 8945 8946 AUDIT_ARG(cmd, uap->cmd); 8947 AUDIT_ARG(value32, uap->options); 8948 /* Get the vnode for the file we are getting info on: */ 8949 nameiflags = 0; 8950 if ((uap->options & FSOPT_NOFOLLOW) == 0) nameiflags |= FOLLOW; 8951 NDINIT(&nd, LOOKUP, OP_FSCTL, nameiflags | AUDITVNPATH1, 8952 UIO_USERSPACE, uap->path, ctx); 8953 if ((error = namei(&nd))) goto done; 8954 vp = nd.ni_vp; 8955 nameidone(&nd); 8956 8957#if CONFIG_MACF 8958 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd); 8959 if (error) { 8960 goto done; 8961 } 8962#endif 8963 8964 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx); 8965 8966done: 8967 if (vp) 8968 vnode_put(vp); 8969 return error; 8970} 8971/* ARGSUSED */ 8972int 8973ffsctl (proc_t p, struct ffsctl_args *uap, __unused int32_t *retval) 8974{ 8975 int error; 8976 vnode_t vp = NULL; 8977 vfs_context_t ctx = vfs_context_current(); 8978 int fd = -1; 8979 8980 AUDIT_ARG(fd, uap->fd); 8981 AUDIT_ARG(cmd, uap->cmd); 8982 AUDIT_ARG(value32, uap->options); 8983 8984 /* Get the vnode for the file we are getting info on: */ 8985 if ((error = file_vnode(uap->fd, &vp))) 8986 goto done; 8987 fd = uap->fd; 8988 if ((error = vnode_getwithref(vp))) { 8989 goto done; 8990 } 8991 8992#if CONFIG_MACF 8993 error = mac_mount_check_fsctl(ctx, vnode_mount(vp), uap->cmd); 8994 if (error) { 8995 goto done; 8996 } 8997#endif 8998 8999 error = fsctl_internal(p, &vp, uap->cmd, (user_addr_t)uap->data, uap->options, ctx); 9000 9001done: 9002 if (fd != -1) 9003 file_drop(fd); 9004 9005 if (vp) 9006 vnode_put(vp); 9007 return error; 9008} 9009/* end of fsctl system call */ 9010 9011/* 9012 * An in-kernel sync for power management to call. 9013 */ 9014__private_extern__ int 9015sync_internal(void) 9016{ 9017 int error; 9018 9019 struct sync_args data; 9020 9021 int retval[2]; 9022 9023 9024 error = sync(current_proc(), &data, &retval[0]); 9025 9026 9027 return (error); 9028} /* end of sync_internal call */ 9029 9030 9031/* 9032 * Retrieve the data of an extended attribute. 9033 */ 9034int 9035getxattr(proc_t p, struct getxattr_args *uap, user_ssize_t *retval) 9036{ 9037 vnode_t vp; 9038 struct nameidata nd; 9039 char attrname[XATTR_MAXNAMELEN+1]; 9040 vfs_context_t ctx = vfs_context_current(); 9041 uio_t auio = NULL; 9042 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9043 size_t attrsize = 0; 9044 size_t namelen; 9045 u_int32_t nameiflags; 9046 int error; 9047 char uio_buf[ UIO_SIZEOF(1) ]; 9048 9049 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) 9050 return (EINVAL); 9051 9052 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; 9053 NDINIT(&nd, LOOKUP, OP_GETXATTR, nameiflags, spacetype, uap->path, ctx); 9054 if ((error = namei(&nd))) { 9055 return (error); 9056 } 9057 vp = nd.ni_vp; 9058 nameidone(&nd); 9059 9060 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { 9061 goto out; 9062 } 9063 if (xattr_protected(attrname)) { 9064 if (!vfs_context_issuser(ctx) || strcmp(attrname, "com.apple.system.Security") != 0) { 9065 error = EPERM; 9066 goto out; 9067 } 9068 } 9069 /* 9070 * the specific check for 0xffffffff is a hack to preserve 9071 * binaray compatibilty in K64 with applications that discovered 9072 * that passing in a buf pointer and a size of -1 resulted in 9073 * just the size of the indicated extended attribute being returned. 9074 * this isn't part of the documented behavior, but because of the 9075 * original implemtation's check for "uap->size > 0", this behavior 9076 * was allowed. In K32 that check turned into a signed comparison 9077 * even though uap->size is unsigned... in K64, we blow by that 9078 * check because uap->size is unsigned and doesn't get sign smeared 9079 * in the munger for a 32 bit user app. we also need to add a 9080 * check to limit the maximum size of the buffer being passed in... 9081 * unfortunately, the underlying fileystems seem to just malloc 9082 * the requested size even if the actual extended attribute is tiny. 9083 * because that malloc is for kernel wired memory, we have to put a 9084 * sane limit on it. 9085 * 9086 * U32 running on K64 will yield 0x00000000ffffffff for uap->size 9087 * U64 running on K64 will yield -1 (64 bits wide) 9088 * U32/U64 running on K32 will yield -1 (32 bits wide) 9089 */ 9090 if (uap->size == 0xffffffff || uap->size == (size_t)-1) 9091 goto no_uio; 9092 9093 if (uap->value) { 9094 if (uap->size > (size_t)XATTR_MAXSIZE) 9095 uap->size = XATTR_MAXSIZE; 9096 9097 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, 9098 &uio_buf[0], sizeof(uio_buf)); 9099 uio_addiov(auio, uap->value, uap->size); 9100 } 9101no_uio: 9102 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, ctx); 9103out: 9104 vnode_put(vp); 9105 9106 if (auio) { 9107 *retval = uap->size - uio_resid(auio); 9108 } else { 9109 *retval = (user_ssize_t)attrsize; 9110 } 9111 9112 return (error); 9113} 9114 9115/* 9116 * Retrieve the data of an extended attribute. 9117 */ 9118int 9119fgetxattr(proc_t p, struct fgetxattr_args *uap, user_ssize_t *retval) 9120{ 9121 vnode_t vp; 9122 char attrname[XATTR_MAXNAMELEN+1]; 9123 uio_t auio = NULL; 9124 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9125 size_t attrsize = 0; 9126 size_t namelen; 9127 int error; 9128 char uio_buf[ UIO_SIZEOF(1) ]; 9129 9130 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) 9131 return (EINVAL); 9132 9133 if ( (error = file_vnode(uap->fd, &vp)) ) { 9134 return (error); 9135 } 9136 if ( (error = vnode_getwithref(vp)) ) { 9137 file_drop(uap->fd); 9138 return(error); 9139 } 9140 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { 9141 goto out; 9142 } 9143 if (xattr_protected(attrname)) { 9144 error = EPERM; 9145 goto out; 9146 } 9147 if (uap->value && uap->size > 0) { 9148 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_READ, 9149 &uio_buf[0], sizeof(uio_buf)); 9150 uio_addiov(auio, uap->value, uap->size); 9151 } 9152 9153 error = vn_getxattr(vp, attrname, auio, &attrsize, uap->options, vfs_context_current()); 9154out: 9155 (void)vnode_put(vp); 9156 file_drop(uap->fd); 9157 9158 if (auio) { 9159 *retval = uap->size - uio_resid(auio); 9160 } else { 9161 *retval = (user_ssize_t)attrsize; 9162 } 9163 return (error); 9164} 9165 9166/* 9167 * Set the data of an extended attribute. 9168 */ 9169int 9170setxattr(proc_t p, struct setxattr_args *uap, int *retval) 9171{ 9172 vnode_t vp; 9173 struct nameidata nd; 9174 char attrname[XATTR_MAXNAMELEN+1]; 9175 vfs_context_t ctx = vfs_context_current(); 9176 uio_t auio = NULL; 9177 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9178 size_t namelen; 9179 u_int32_t nameiflags; 9180 int error; 9181 char uio_buf[ UIO_SIZEOF(1) ]; 9182 9183 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) 9184 return (EINVAL); 9185 9186 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { 9187 if (error == EPERM) { 9188 /* if the string won't fit in attrname, copyinstr emits EPERM */ 9189 return (ENAMETOOLONG); 9190 } 9191 /* Otherwise return the default error from copyinstr to detect ERANGE, etc */ 9192 return error; 9193 } 9194 if (xattr_protected(attrname)) 9195 return(EPERM); 9196 if (uap->size != 0 && uap->value == 0) { 9197 return (EINVAL); 9198 } 9199 9200 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; 9201 NDINIT(&nd, LOOKUP, OP_SETXATTR, nameiflags, spacetype, uap->path, ctx); 9202 if ((error = namei(&nd))) { 9203 return (error); 9204 } 9205 vp = nd.ni_vp; 9206 nameidone(&nd); 9207 9208 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, 9209 &uio_buf[0], sizeof(uio_buf)); 9210 uio_addiov(auio, uap->value, uap->size); 9211 9212 error = vn_setxattr(vp, attrname, auio, uap->options, ctx); 9213#if CONFIG_FSE 9214 if (error == 0) { 9215 add_fsevent(FSE_XATTR_MODIFIED, ctx, 9216 FSE_ARG_VNODE, vp, 9217 FSE_ARG_DONE); 9218 } 9219#endif 9220 vnode_put(vp); 9221 *retval = 0; 9222 return (error); 9223} 9224 9225/* 9226 * Set the data of an extended attribute. 9227 */ 9228int 9229fsetxattr(proc_t p, struct fsetxattr_args *uap, int *retval) 9230{ 9231 vnode_t vp; 9232 char attrname[XATTR_MAXNAMELEN+1]; 9233 uio_t auio = NULL; 9234 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9235 size_t namelen; 9236 int error; 9237 char uio_buf[ UIO_SIZEOF(1) ]; 9238#if CONFIG_FSE 9239 vfs_context_t ctx = vfs_context_current(); 9240#endif 9241 9242 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) 9243 return (EINVAL); 9244 9245 if ((error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen) != 0)) { 9246 return (error); 9247 } 9248 if (xattr_protected(attrname)) 9249 return(EPERM); 9250 if (uap->size != 0 && uap->value == 0) { 9251 return (EINVAL); 9252 } 9253 if ( (error = file_vnode(uap->fd, &vp)) ) { 9254 return (error); 9255 } 9256 if ( (error = vnode_getwithref(vp)) ) { 9257 file_drop(uap->fd); 9258 return(error); 9259 } 9260 auio = uio_createwithbuffer(1, uap->position, spacetype, UIO_WRITE, 9261 &uio_buf[0], sizeof(uio_buf)); 9262 uio_addiov(auio, uap->value, uap->size); 9263 9264 error = vn_setxattr(vp, attrname, auio, uap->options, vfs_context_current()); 9265#if CONFIG_FSE 9266 if (error == 0) { 9267 add_fsevent(FSE_XATTR_MODIFIED, ctx, 9268 FSE_ARG_VNODE, vp, 9269 FSE_ARG_DONE); 9270 } 9271#endif 9272 vnode_put(vp); 9273 file_drop(uap->fd); 9274 *retval = 0; 9275 return (error); 9276} 9277 9278/* 9279 * Remove an extended attribute. 9280 * XXX Code duplication here. 9281 */ 9282int 9283removexattr(proc_t p, struct removexattr_args *uap, int *retval) 9284{ 9285 vnode_t vp; 9286 struct nameidata nd; 9287 char attrname[XATTR_MAXNAMELEN+1]; 9288 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9289 vfs_context_t ctx = vfs_context_current(); 9290 size_t namelen; 9291 u_int32_t nameiflags; 9292 int error; 9293 9294 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) 9295 return (EINVAL); 9296 9297 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); 9298 if (error != 0) { 9299 return (error); 9300 } 9301 if (xattr_protected(attrname)) 9302 return(EPERM); 9303 nameiflags = (uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW; 9304 NDINIT(&nd, LOOKUP, OP_REMOVEXATTR, nameiflags, spacetype, uap->path, ctx); 9305 if ((error = namei(&nd))) { 9306 return (error); 9307 } 9308 vp = nd.ni_vp; 9309 nameidone(&nd); 9310 9311 error = vn_removexattr(vp, attrname, uap->options, ctx); 9312#if CONFIG_FSE 9313 if (error == 0) { 9314 add_fsevent(FSE_XATTR_REMOVED, ctx, 9315 FSE_ARG_VNODE, vp, 9316 FSE_ARG_DONE); 9317 } 9318#endif 9319 vnode_put(vp); 9320 *retval = 0; 9321 return (error); 9322} 9323 9324/* 9325 * Remove an extended attribute. 9326 * XXX Code duplication here. 9327 */ 9328int 9329fremovexattr(__unused proc_t p, struct fremovexattr_args *uap, int *retval) 9330{ 9331 vnode_t vp; 9332 char attrname[XATTR_MAXNAMELEN+1]; 9333 size_t namelen; 9334 int error; 9335#if CONFIG_FSE 9336 vfs_context_t ctx = vfs_context_current(); 9337#endif 9338 9339 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) 9340 return (EINVAL); 9341 9342 error = copyinstr(uap->attrname, attrname, sizeof(attrname), &namelen); 9343 if (error != 0) { 9344 return (error); 9345 } 9346 if (xattr_protected(attrname)) 9347 return(EPERM); 9348 if ( (error = file_vnode(uap->fd, &vp)) ) { 9349 return (error); 9350 } 9351 if ( (error = vnode_getwithref(vp)) ) { 9352 file_drop(uap->fd); 9353 return(error); 9354 } 9355 9356 error = vn_removexattr(vp, attrname, uap->options, vfs_context_current()); 9357#if CONFIG_FSE 9358 if (error == 0) { 9359 add_fsevent(FSE_XATTR_REMOVED, ctx, 9360 FSE_ARG_VNODE, vp, 9361 FSE_ARG_DONE); 9362 } 9363#endif 9364 vnode_put(vp); 9365 file_drop(uap->fd); 9366 *retval = 0; 9367 return (error); 9368} 9369 9370/* 9371 * Retrieve the list of extended attribute names. 9372 * XXX Code duplication here. 9373 */ 9374int 9375listxattr(proc_t p, struct listxattr_args *uap, user_ssize_t *retval) 9376{ 9377 vnode_t vp; 9378 struct nameidata nd; 9379 vfs_context_t ctx = vfs_context_current(); 9380 uio_t auio = NULL; 9381 int spacetype = IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9382 size_t attrsize = 0; 9383 u_int32_t nameiflags; 9384 int error; 9385 char uio_buf[ UIO_SIZEOF(1) ]; 9386 9387 if (uap->options & (XATTR_NOSECURITY | XATTR_NODEFAULT)) 9388 return (EINVAL); 9389 9390 nameiflags = ((uap->options & XATTR_NOFOLLOW) ? 0 : FOLLOW) | NOTRIGGER; 9391 NDINIT(&nd, LOOKUP, OP_LISTXATTR, nameiflags, spacetype, uap->path, ctx); 9392 if ((error = namei(&nd))) { 9393 return (error); 9394 } 9395 vp = nd.ni_vp; 9396 nameidone(&nd); 9397 if (uap->namebuf != 0 && uap->bufsize > 0) { 9398 auio = uio_createwithbuffer(1, 0, spacetype, UIO_READ, 9399 &uio_buf[0], sizeof(uio_buf)); 9400 uio_addiov(auio, uap->namebuf, uap->bufsize); 9401 } 9402 9403 error = vn_listxattr(vp, auio, &attrsize, uap->options, ctx); 9404 9405 vnode_put(vp); 9406 if (auio) { 9407 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); 9408 } else { 9409 *retval = (user_ssize_t)attrsize; 9410 } 9411 return (error); 9412} 9413 9414/* 9415 * Retrieve the list of extended attribute names. 9416 * XXX Code duplication here. 9417 */ 9418int 9419flistxattr(proc_t p, struct flistxattr_args *uap, user_ssize_t *retval) 9420{ 9421 vnode_t vp; 9422 uio_t auio = NULL; 9423 int spacetype = proc_is64bit(p) ? UIO_USERSPACE64 : UIO_USERSPACE32; 9424 size_t attrsize = 0; 9425 int error; 9426 char uio_buf[ UIO_SIZEOF(1) ]; 9427 9428 if (uap->options & (XATTR_NOFOLLOW | XATTR_NOSECURITY | XATTR_NODEFAULT)) 9429 return (EINVAL); 9430 9431 if ( (error = file_vnode(uap->fd, &vp)) ) { 9432 return (error); 9433 } 9434 if ( (error = vnode_getwithref(vp)) ) { 9435 file_drop(uap->fd); 9436 return(error); 9437 } 9438 if (uap->namebuf != 0 && uap->bufsize > 0) { 9439 auio = uio_createwithbuffer(1, 0, spacetype, 9440 UIO_READ, &uio_buf[0], sizeof(uio_buf)); 9441 uio_addiov(auio, uap->namebuf, uap->bufsize); 9442 } 9443 9444 error = vn_listxattr(vp, auio, &attrsize, uap->options, vfs_context_current()); 9445 9446 vnode_put(vp); 9447 file_drop(uap->fd); 9448 if (auio) { 9449 *retval = (user_ssize_t)uap->bufsize - uio_resid(auio); 9450 } else { 9451 *retval = (user_ssize_t)attrsize; 9452 } 9453 return (error); 9454} 9455 9456/* 9457 * Obtain the full pathname of a file system object by id. 9458 * 9459 * This is a private SPI used by the File Manager. 9460 */ 9461__private_extern__ 9462int 9463fsgetpath(__unused proc_t p, struct fsgetpath_args *uap, user_ssize_t *retval) 9464{ 9465 vnode_t vp; 9466 struct mount *mp = NULL; 9467 vfs_context_t ctx = vfs_context_current(); 9468 fsid_t fsid; 9469 char *realpath; 9470 int bpflags; 9471 int length; 9472 int error; 9473 9474 if ((error = copyin(uap->fsid, (caddr_t)&fsid, sizeof(fsid)))) { 9475 return (error); 9476 } 9477 AUDIT_ARG(value32, fsid.val[0]); 9478 AUDIT_ARG(value64, uap->objid); 9479 /* Restrict output buffer size for now. */ 9480 if (uap->bufsize > PAGE_SIZE) { 9481 return (EINVAL); 9482 } 9483 MALLOC(realpath, char *, uap->bufsize, M_TEMP, M_WAITOK); 9484 if (realpath == NULL) { 9485 return (ENOMEM); 9486 } 9487 /* Find the target mountpoint. */ 9488 if ((mp = mount_lookupby_volfsid(fsid.val[0], 1)) == NULL) { 9489 error = ENOTSUP; /* unexpected failure */ 9490 goto out; 9491 } 9492unionget: 9493 /* Find the target vnode. */ 9494 if (uap->objid == 2) { 9495 error = VFS_ROOT(mp, &vp, ctx); 9496 } else { 9497 error = VFS_VGET(mp, (ino64_t)uap->objid, &vp, ctx); 9498 } 9499 9500 if (error == ENOENT && (mp->mnt_flag & MNT_UNION)) { 9501 /* 9502 * If the fileid isn't found and we're in a union 9503 * mount volume, then see if the fileid is in the 9504 * mounted-on volume. 9505 */ 9506 struct mount *tmp = mp; 9507 mp = vnode_mount(tmp->mnt_vnodecovered); 9508 vfs_unbusy(tmp); 9509 if (vfs_busy(mp, LK_NOWAIT) == 0) 9510 goto unionget; 9511 } else 9512 vfs_unbusy(mp); 9513 9514 if (error) { 9515 goto out; 9516 } 9517#if CONFIG_MACF 9518 error = mac_vnode_check_fsgetpath(ctx, vp); 9519 if (error) { 9520 vnode_put(vp); 9521 goto out; 9522 } 9523#endif 9524 /* Obtain the absolute path to this vnode. */ 9525 bpflags = vfs_context_suser(ctx) ? BUILDPATH_CHECKACCESS : 0; 9526 bpflags |= BUILDPATH_CHECK_MOVED; 9527 error = build_path(vp, realpath, uap->bufsize, &length, bpflags, ctx); 9528 vnode_put(vp); 9529 if (error) { 9530 goto out; 9531 } 9532 AUDIT_ARG(text, realpath); 9533 9534 if (kdebug_enable) { 9535 long dbg_parms[NUMPARMS]; 9536 int dbg_namelen; 9537 9538 dbg_namelen = (int)sizeof(dbg_parms); 9539 9540 if (length < dbg_namelen) { 9541 memcpy((char *)dbg_parms, realpath, length); 9542 memset((char *)dbg_parms + length, 0, dbg_namelen - length); 9543 9544 dbg_namelen = length; 9545 } else 9546 memcpy((char *)dbg_parms, realpath + (length - dbg_namelen), dbg_namelen); 9547 9548 kdebug_lookup_gen_events(dbg_parms, dbg_namelen, (void *)vp, TRUE); 9549 } 9550 error = copyout((caddr_t)realpath, uap->buf, length); 9551 9552 *retval = (user_ssize_t)length; /* may be superseded by error */ 9553out: 9554 if (realpath) { 9555 FREE(realpath, M_TEMP); 9556 } 9557 return (error); 9558} 9559 9560/* 9561 * Common routine to handle various flavors of statfs data heading out 9562 * to user space. 9563 * 9564 * Returns: 0 Success 9565 * EFAULT 9566 */ 9567static int 9568munge_statfs(struct mount *mp, struct vfsstatfs *sfsp, 9569 user_addr_t bufp, int *sizep, boolean_t is_64_bit, 9570 boolean_t partial_copy) 9571{ 9572 int error; 9573 int my_size, copy_size; 9574 9575 if (is_64_bit) { 9576 struct user64_statfs sfs; 9577 my_size = copy_size = sizeof(sfs); 9578 bzero(&sfs, my_size); 9579 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 9580 sfs.f_type = mp->mnt_vtable->vfc_typenum; 9581 sfs.f_reserved1 = (short)sfsp->f_fssubtype; 9582 sfs.f_bsize = (user64_long_t)sfsp->f_bsize; 9583 sfs.f_iosize = (user64_long_t)sfsp->f_iosize; 9584 sfs.f_blocks = (user64_long_t)sfsp->f_blocks; 9585 sfs.f_bfree = (user64_long_t)sfsp->f_bfree; 9586 sfs.f_bavail = (user64_long_t)sfsp->f_bavail; 9587 sfs.f_files = (user64_long_t)sfsp->f_files; 9588 sfs.f_ffree = (user64_long_t)sfsp->f_ffree; 9589 sfs.f_fsid = sfsp->f_fsid; 9590 sfs.f_owner = sfsp->f_owner; 9591 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { 9592 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN); 9593 } else { 9594 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); 9595 } 9596 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN); 9597 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN); 9598 9599 if (partial_copy) { 9600 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); 9601 } 9602 error = copyout((caddr_t)&sfs, bufp, copy_size); 9603 } 9604 else { 9605 struct user32_statfs sfs; 9606 9607 my_size = copy_size = sizeof(sfs); 9608 bzero(&sfs, my_size); 9609 9610 sfs.f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 9611 sfs.f_type = mp->mnt_vtable->vfc_typenum; 9612 sfs.f_reserved1 = (short)sfsp->f_fssubtype; 9613 9614 /* 9615 * It's possible for there to be more than 2^^31 blocks in the filesystem, so we 9616 * have to fudge the numbers here in that case. We inflate the blocksize in order 9617 * to reflect the filesystem size as best we can. 9618 */ 9619 if ((sfsp->f_blocks > INT_MAX) 9620 /* Hack for 4061702 . I think the real fix is for Carbon to 9621 * look for some volume capability and not depend on hidden 9622 * semantics agreed between a FS and carbon. 9623 * f_blocks, f_bfree, and f_bavail set to -1 is the trigger 9624 * for Carbon to set bNoVolumeSizes volume attribute. 9625 * Without this the webdavfs files cannot be copied onto 9626 * disk as they look huge. This change should not affect 9627 * XSAN as they should not setting these to -1.. 9628 */ 9629 && (sfsp->f_blocks != 0xffffffffffffffffULL) 9630 && (sfsp->f_bfree != 0xffffffffffffffffULL) 9631 && (sfsp->f_bavail != 0xffffffffffffffffULL)) { 9632 int shift; 9633 9634 /* 9635 * Work out how far we have to shift the block count down to make it fit. 9636 * Note that it's possible to have to shift so far that the resulting 9637 * blocksize would be unreportably large. At that point, we will clip 9638 * any values that don't fit. 9639 * 9640 * For safety's sake, we also ensure that f_iosize is never reported as 9641 * being smaller than f_bsize. 9642 */ 9643 for (shift = 0; shift < 32; shift++) { 9644 if ((sfsp->f_blocks >> shift) <= INT_MAX) 9645 break; 9646 if ((sfsp->f_bsize << (shift + 1)) > INT_MAX) 9647 break; 9648 } 9649#define __SHIFT_OR_CLIP(x, s) ((((x) >> (s)) > INT_MAX) ? INT_MAX : ((x) >> (s))) 9650 sfs.f_blocks = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_blocks, shift); 9651 sfs.f_bfree = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bfree, shift); 9652 sfs.f_bavail = (user32_long_t)__SHIFT_OR_CLIP(sfsp->f_bavail, shift); 9653#undef __SHIFT_OR_CLIP 9654 sfs.f_bsize = (user32_long_t)(sfsp->f_bsize << shift); 9655 sfs.f_iosize = lmax(sfsp->f_iosize, sfsp->f_bsize); 9656 } else { 9657 /* filesystem is small enough to be reported honestly */ 9658 sfs.f_bsize = (user32_long_t)sfsp->f_bsize; 9659 sfs.f_iosize = (user32_long_t)sfsp->f_iosize; 9660 sfs.f_blocks = (user32_long_t)sfsp->f_blocks; 9661 sfs.f_bfree = (user32_long_t)sfsp->f_bfree; 9662 sfs.f_bavail = (user32_long_t)sfsp->f_bavail; 9663 } 9664 sfs.f_files = (user32_long_t)sfsp->f_files; 9665 sfs.f_ffree = (user32_long_t)sfsp->f_ffree; 9666 sfs.f_fsid = sfsp->f_fsid; 9667 sfs.f_owner = sfsp->f_owner; 9668 if (mp->mnt_kern_flag & MNTK_TYPENAME_OVERRIDE) { 9669 strlcpy(&sfs.f_fstypename[0], &mp->fstypename_override[0], MFSTYPENAMELEN); 9670 } else { 9671 strlcpy(&sfs.f_fstypename[0], &sfsp->f_fstypename[0], MFSNAMELEN); 9672 } 9673 strlcpy(&sfs.f_mntonname[0], &sfsp->f_mntonname[0], MNAMELEN); 9674 strlcpy(&sfs.f_mntfromname[0], &sfsp->f_mntfromname[0], MNAMELEN); 9675 9676 if (partial_copy) { 9677 copy_size -= (sizeof(sfs.f_reserved3) + sizeof(sfs.f_reserved4)); 9678 } 9679 error = copyout((caddr_t)&sfs, bufp, copy_size); 9680 } 9681 9682 if (sizep != NULL) { 9683 *sizep = my_size; 9684 } 9685 return(error); 9686} 9687 9688/* 9689 * copy stat structure into user_stat structure. 9690 */ 9691void munge_user64_stat(struct stat *sbp, struct user64_stat *usbp) 9692{ 9693 bzero(usbp, sizeof(*usbp)); 9694 9695 usbp->st_dev = sbp->st_dev; 9696 usbp->st_ino = sbp->st_ino; 9697 usbp->st_mode = sbp->st_mode; 9698 usbp->st_nlink = sbp->st_nlink; 9699 usbp->st_uid = sbp->st_uid; 9700 usbp->st_gid = sbp->st_gid; 9701 usbp->st_rdev = sbp->st_rdev; 9702#ifndef _POSIX_C_SOURCE 9703 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; 9704 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; 9705 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; 9706 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; 9707 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; 9708 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; 9709#else 9710 usbp->st_atime = sbp->st_atime; 9711 usbp->st_atimensec = sbp->st_atimensec; 9712 usbp->st_mtime = sbp->st_mtime; 9713 usbp->st_mtimensec = sbp->st_mtimensec; 9714 usbp->st_ctime = sbp->st_ctime; 9715 usbp->st_ctimensec = sbp->st_ctimensec; 9716#endif 9717 usbp->st_size = sbp->st_size; 9718 usbp->st_blocks = sbp->st_blocks; 9719 usbp->st_blksize = sbp->st_blksize; 9720 usbp->st_flags = sbp->st_flags; 9721 usbp->st_gen = sbp->st_gen; 9722 usbp->st_lspare = sbp->st_lspare; 9723 usbp->st_qspare[0] = sbp->st_qspare[0]; 9724 usbp->st_qspare[1] = sbp->st_qspare[1]; 9725} 9726 9727void munge_user32_stat(struct stat *sbp, struct user32_stat *usbp) 9728{ 9729 bzero(usbp, sizeof(*usbp)); 9730 9731 usbp->st_dev = sbp->st_dev; 9732 usbp->st_ino = sbp->st_ino; 9733 usbp->st_mode = sbp->st_mode; 9734 usbp->st_nlink = sbp->st_nlink; 9735 usbp->st_uid = sbp->st_uid; 9736 usbp->st_gid = sbp->st_gid; 9737 usbp->st_rdev = sbp->st_rdev; 9738#ifndef _POSIX_C_SOURCE 9739 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; 9740 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; 9741 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; 9742 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; 9743 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; 9744 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; 9745#else 9746 usbp->st_atime = sbp->st_atime; 9747 usbp->st_atimensec = sbp->st_atimensec; 9748 usbp->st_mtime = sbp->st_mtime; 9749 usbp->st_mtimensec = sbp->st_mtimensec; 9750 usbp->st_ctime = sbp->st_ctime; 9751 usbp->st_ctimensec = sbp->st_ctimensec; 9752#endif 9753 usbp->st_size = sbp->st_size; 9754 usbp->st_blocks = sbp->st_blocks; 9755 usbp->st_blksize = sbp->st_blksize; 9756 usbp->st_flags = sbp->st_flags; 9757 usbp->st_gen = sbp->st_gen; 9758 usbp->st_lspare = sbp->st_lspare; 9759 usbp->st_qspare[0] = sbp->st_qspare[0]; 9760 usbp->st_qspare[1] = sbp->st_qspare[1]; 9761} 9762 9763/* 9764 * copy stat64 structure into user_stat64 structure. 9765 */ 9766void munge_user64_stat64(struct stat64 *sbp, struct user64_stat64 *usbp) 9767{ 9768 bzero(usbp, sizeof(*usbp)); 9769 9770 usbp->st_dev = sbp->st_dev; 9771 usbp->st_ino = sbp->st_ino; 9772 usbp->st_mode = sbp->st_mode; 9773 usbp->st_nlink = sbp->st_nlink; 9774 usbp->st_uid = sbp->st_uid; 9775 usbp->st_gid = sbp->st_gid; 9776 usbp->st_rdev = sbp->st_rdev; 9777#ifndef _POSIX_C_SOURCE 9778 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; 9779 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; 9780 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; 9781 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; 9782 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; 9783 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; 9784 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec; 9785 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec; 9786#else 9787 usbp->st_atime = sbp->st_atime; 9788 usbp->st_atimensec = sbp->st_atimensec; 9789 usbp->st_mtime = sbp->st_mtime; 9790 usbp->st_mtimensec = sbp->st_mtimensec; 9791 usbp->st_ctime = sbp->st_ctime; 9792 usbp->st_ctimensec = sbp->st_ctimensec; 9793 usbp->st_birthtime = sbp->st_birthtime; 9794 usbp->st_birthtimensec = sbp->st_birthtimensec; 9795#endif 9796 usbp->st_size = sbp->st_size; 9797 usbp->st_blocks = sbp->st_blocks; 9798 usbp->st_blksize = sbp->st_blksize; 9799 usbp->st_flags = sbp->st_flags; 9800 usbp->st_gen = sbp->st_gen; 9801 usbp->st_lspare = sbp->st_lspare; 9802 usbp->st_qspare[0] = sbp->st_qspare[0]; 9803 usbp->st_qspare[1] = sbp->st_qspare[1]; 9804} 9805 9806void munge_user32_stat64(struct stat64 *sbp, struct user32_stat64 *usbp) 9807{ 9808 bzero(usbp, sizeof(*usbp)); 9809 9810 usbp->st_dev = sbp->st_dev; 9811 usbp->st_ino = sbp->st_ino; 9812 usbp->st_mode = sbp->st_mode; 9813 usbp->st_nlink = sbp->st_nlink; 9814 usbp->st_uid = sbp->st_uid; 9815 usbp->st_gid = sbp->st_gid; 9816 usbp->st_rdev = sbp->st_rdev; 9817#ifndef _POSIX_C_SOURCE 9818 usbp->st_atimespec.tv_sec = sbp->st_atimespec.tv_sec; 9819 usbp->st_atimespec.tv_nsec = sbp->st_atimespec.tv_nsec; 9820 usbp->st_mtimespec.tv_sec = sbp->st_mtimespec.tv_sec; 9821 usbp->st_mtimespec.tv_nsec = sbp->st_mtimespec.tv_nsec; 9822 usbp->st_ctimespec.tv_sec = sbp->st_ctimespec.tv_sec; 9823 usbp->st_ctimespec.tv_nsec = sbp->st_ctimespec.tv_nsec; 9824 usbp->st_birthtimespec.tv_sec = sbp->st_birthtimespec.tv_sec; 9825 usbp->st_birthtimespec.tv_nsec = sbp->st_birthtimespec.tv_nsec; 9826#else 9827 usbp->st_atime = sbp->st_atime; 9828 usbp->st_atimensec = sbp->st_atimensec; 9829 usbp->st_mtime = sbp->st_mtime; 9830 usbp->st_mtimensec = sbp->st_mtimensec; 9831 usbp->st_ctime = sbp->st_ctime; 9832 usbp->st_ctimensec = sbp->st_ctimensec; 9833 usbp->st_birthtime = sbp->st_birthtime; 9834 usbp->st_birthtimensec = sbp->st_birthtimensec; 9835#endif 9836 usbp->st_size = sbp->st_size; 9837 usbp->st_blocks = sbp->st_blocks; 9838 usbp->st_blksize = sbp->st_blksize; 9839 usbp->st_flags = sbp->st_flags; 9840 usbp->st_gen = sbp->st_gen; 9841 usbp->st_lspare = sbp->st_lspare; 9842 usbp->st_qspare[0] = sbp->st_qspare[0]; 9843 usbp->st_qspare[1] = sbp->st_qspare[1]; 9844} 9845 9846/* 9847 * Purge buffer cache for simulating cold starts 9848 */ 9849static int vnode_purge_callback(struct vnode *vp, __unused void *cargs) 9850{ 9851 ubc_msync(vp, (off_t)0, ubc_getsize(vp), NULL /* off_t *resid_off */, UBC_PUSHALL | UBC_INVALIDATE); 9852 9853 return VNODE_RETURNED; 9854} 9855 9856static int vfs_purge_callback(mount_t mp, __unused void * arg) 9857{ 9858 vnode_iterate(mp, VNODE_WAIT | VNODE_ITERATE_ALL, vnode_purge_callback, NULL); 9859 9860 return VFS_RETURNED; 9861} 9862 9863int 9864vfs_purge(__unused struct proc *p, __unused struct vfs_purge_args *uap, __unused int32_t *retval) 9865{ 9866 if (!kauth_cred_issuser(kauth_cred_get())) 9867 return EPERM; 9868 9869 vfs_iterate(0/* flags */, vfs_purge_callback, NULL); 9870 9871 return 0; 9872} 9873 9874