1/* $NetBSD: vfs_syscalls.c,v 1.449.2.2 2012/05/19 15:01:35 riz Exp $ */ 2 3/*- 4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1989, 1993 34 * The Regents of the University of California. All rights reserved. 35 * (c) UNIX System Laboratories, Inc. 36 * All or some portions of this file are derived from material licensed 37 * to the University of California by American Telephone and Telegraph 38 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 39 * the permission of UNIX System Laboratories, Inc. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)vfs_syscalls.c 8.42 (Berkeley) 7/31/95 66 */ 67 68/* 69 * Virtual File System System Calls 70 */ 71 72#include <sys/cdefs.h> 73__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.449.2.2 2012/05/19 15:01:35 riz Exp $"); 74 75#ifdef _KERNEL_OPT 76#include "opt_fileassoc.h" 77#include "veriexec.h" 78#endif 79 80#include <sys/param.h> 81#include <sys/systm.h> 82#include <sys/namei.h> 83#include <sys/filedesc.h> 84#include <sys/kernel.h> 85#include <sys/file.h> 86#include <sys/fcntl.h> 87#include <sys/stat.h> 88#include <sys/vnode.h> 89#include <sys/mount.h> 90#include <sys/proc.h> 91#include <sys/uio.h> 92#include <sys/kmem.h> 93#include <sys/dirent.h> 94#include <sys/sysctl.h> 95#include <sys/syscallargs.h> 96#include <sys/vfs_syscalls.h> 97#include <sys/quota.h> 98#include <sys/quotactl.h> 99#include <sys/ktrace.h> 100#ifdef FILEASSOC 101#include <sys/fileassoc.h> 102#endif /* FILEASSOC */ 103#include <sys/extattr.h> 104#include <sys/verified_exec.h> 105#include <sys/kauth.h> 106#include <sys/atomic.h> 107#include <sys/module.h> 108#include <sys/buf.h> 109 110#include <miscfs/genfs/genfs.h> 111#include <miscfs/syncfs/syncfs.h> 112#include <miscfs/specfs/specdev.h> 113 114#include <nfs/rpcv2.h> 115#include <nfs/nfsproto.h> 116#include <nfs/nfs.h> 117#include <nfs/nfs_var.h> 118 119static int change_flags(struct vnode *, u_long, struct lwp *); 120static int change_mode(struct vnode *, int, struct lwp *l); 121static int change_owner(struct vnode *, uid_t, gid_t, struct lwp *, int); 122static int do_open(lwp_t *, struct pathbuf *, int, int, int *); 123 124/* 125 * This table is used to maintain compatibility with 4.3BSD 126 * and NetBSD 0.9 mount syscalls - and possibly other systems. 127 * Note, the order is important! 128 * 129 * Do not modify this table. It should only contain filesystems 130 * supported by NetBSD 0.9 and 4.3BSD. 131 */ 132const char * const mountcompatnames[] = { 133 NULL, /* 0 = MOUNT_NONE */ 134 MOUNT_FFS, /* 1 = MOUNT_UFS */ 135 MOUNT_NFS, /* 2 */ 136 MOUNT_MFS, /* 3 */ 137 MOUNT_MSDOS, /* 4 */ 138 MOUNT_CD9660, /* 5 = MOUNT_ISOFS */ 139 MOUNT_FDESC, /* 6 */ 140 MOUNT_KERNFS, /* 7 */ 141 NULL, /* 8 = MOUNT_DEVFS */ 142 MOUNT_AFS, /* 9 */ 143}; 144 145const int nmountcompatnames = __arraycount(mountcompatnames); 146 147static int 148open_setfp(struct lwp *l, file_t *fp, struct vnode *vp, int indx, int flags) 149{ 150 int error; 151 152 fp->f_flag = flags & FMASK; 153 fp->f_type = DTYPE_VNODE; 154 fp->f_ops = &vnops; 155 fp->f_data = vp; 156 157 if (flags & (O_EXLOCK | O_SHLOCK)) { 158 struct flock lf; 159 int type; 160 161 lf.l_whence = SEEK_SET; 162 lf.l_start = 0; 163 lf.l_len = 0; 164 if (flags & O_EXLOCK) 165 lf.l_type = F_WRLCK; 166 else 167 lf.l_type = F_RDLCK; 168 type = F_FLOCK; 169 if ((flags & FNONBLOCK) == 0) 170 type |= F_WAIT; 171 VOP_UNLOCK(vp); 172 error = VOP_ADVLOCK(vp, fp, F_SETLK, &lf, type); 173 if (error) { 174 (void) vn_close(vp, fp->f_flag, fp->f_cred); 175 fd_abort(l->l_proc, fp, indx); 176 return error; 177 } 178 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 179 atomic_or_uint(&fp->f_flag, FHASLOCK); 180 } 181 if (flags & O_CLOEXEC) 182 fd_set_exclose(l, indx, true); 183 return 0; 184} 185 186static int 187mount_update(struct lwp *l, struct vnode *vp, const char *path, int flags, 188 void *data, size_t *data_len) 189{ 190 struct mount *mp; 191 int error = 0, saved_flags; 192 193 mp = vp->v_mount; 194 saved_flags = mp->mnt_flag; 195 196 /* We can operate only on VV_ROOT nodes. */ 197 if ((vp->v_vflag & VV_ROOT) == 0) { 198 error = EINVAL; 199 goto out; 200 } 201 202 /* 203 * We only allow the filesystem to be reloaded if it 204 * is currently mounted read-only. Additionally, we 205 * prevent read-write to read-only downgrades. 206 */ 207 if ((flags & (MNT_RELOAD | MNT_RDONLY)) != 0 && 208 (mp->mnt_flag & MNT_RDONLY) == 0 && 209 (mp->mnt_iflag & IMNT_CAN_RWTORO) == 0) { 210 error = EOPNOTSUPP; /* Needs translation */ 211 goto out; 212 } 213 214 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 215 KAUTH_REQ_SYSTEM_MOUNT_UPDATE, mp, KAUTH_ARG(flags), data); 216 if (error) 217 goto out; 218 219 if (vfs_busy(mp, NULL)) { 220 error = EPERM; 221 goto out; 222 } 223 224 mutex_enter(&mp->mnt_updating); 225 226 mp->mnt_flag &= ~MNT_OP_FLAGS; 227 mp->mnt_flag |= flags & MNT_OP_FLAGS; 228 229 /* 230 * Set the mount level flags. 231 */ 232 if (flags & MNT_RDONLY) 233 mp->mnt_flag |= MNT_RDONLY; 234 else if (mp->mnt_flag & MNT_RDONLY) 235 mp->mnt_iflag |= IMNT_WANTRDWR; 236 mp->mnt_flag &= ~MNT_BASIC_FLAGS; 237 mp->mnt_flag |= flags & MNT_BASIC_FLAGS; 238 error = VFS_MOUNT(mp, path, data, data_len); 239 240 if (error && data != NULL) { 241 int error2; 242 243 /* 244 * Update failed; let's try and see if it was an 245 * export request. For compat with 3.0 and earlier. 246 */ 247 error2 = vfs_hooks_reexport(mp, path, data); 248 249 /* 250 * Only update error code if the export request was 251 * understood but some problem occurred while 252 * processing it. 253 */ 254 if (error2 != EJUSTRETURN) 255 error = error2; 256 } 257 258 if (mp->mnt_iflag & IMNT_WANTRDWR) 259 mp->mnt_flag &= ~MNT_RDONLY; 260 if (error) 261 mp->mnt_flag = saved_flags; 262 mp->mnt_flag &= ~MNT_OP_FLAGS; 263 mp->mnt_iflag &= ~IMNT_WANTRDWR; 264 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) { 265 if (mp->mnt_syncer == NULL) 266 error = vfs_allocate_syncvnode(mp); 267 } else { 268 if (mp->mnt_syncer != NULL) 269 vfs_deallocate_syncvnode(mp); 270 } 271 mutex_exit(&mp->mnt_updating); 272 vfs_unbusy(mp, false, NULL); 273 274 if ((error == 0) && !(saved_flags & MNT_EXTATTR) && 275 (flags & MNT_EXTATTR)) { 276 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, 277 NULL, 0, NULL) != 0) { 278 printf("%s: failed to start extattr, error = %d", 279 mp->mnt_stat.f_mntonname, error); 280 mp->mnt_flag &= ~MNT_EXTATTR; 281 } 282 } 283 284 if ((error == 0) && (saved_flags & MNT_EXTATTR) && 285 !(flags & MNT_EXTATTR)) { 286 if (VFS_EXTATTRCTL(mp, EXTATTR_CMD_STOP, 287 NULL, 0, NULL) != 0) { 288 printf("%s: failed to stop extattr, error = %d", 289 mp->mnt_stat.f_mntonname, error); 290 mp->mnt_flag |= MNT_RDONLY; 291 } 292 } 293 out: 294 return (error); 295} 296 297static int 298mount_get_vfsops(const char *fstype, struct vfsops **vfsops) 299{ 300 char fstypename[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 301 int error; 302 303 /* Copy file-system type from userspace. */ 304 error = copyinstr(fstype, fstypename, sizeof(fstypename), NULL); 305 if (error) { 306 /* 307 * Historically, filesystem types were identified by numbers. 308 * If we get an integer for the filesystem type instead of a 309 * string, we check to see if it matches one of the historic 310 * filesystem types. 311 */ 312 u_long fsindex = (u_long)fstype; 313 if (fsindex >= nmountcompatnames || 314 mountcompatnames[fsindex] == NULL) 315 return ENODEV; 316 strlcpy(fstypename, mountcompatnames[fsindex], 317 sizeof(fstypename)); 318 } 319 320 /* Accept `ufs' as an alias for `ffs', for compatibility. */ 321 if (strcmp(fstypename, "ufs") == 0) 322 fstypename[0] = 'f'; 323 324 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 325 return 0; 326 327 /* If we can autoload a vfs module, try again */ 328 (void)module_autoload(fstypename, MODULE_CLASS_VFS); 329 330 if ((*vfsops = vfs_getopsbyname(fstypename)) != NULL) 331 return 0; 332 333 return ENODEV; 334} 335 336static int 337mount_getargs(struct lwp *l, struct vnode *vp, const char *path, int flags, 338 void *data, size_t *data_len) 339{ 340 struct mount *mp; 341 int error; 342 343 /* If MNT_GETARGS is specified, it should be the only flag. */ 344 if (flags & ~MNT_GETARGS) 345 return EINVAL; 346 347 mp = vp->v_mount; 348 349 /* XXX: probably some notion of "can see" here if we want isolation. */ 350 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 351 KAUTH_REQ_SYSTEM_MOUNT_GET, mp, data, NULL); 352 if (error) 353 return error; 354 355 if ((vp->v_vflag & VV_ROOT) == 0) 356 return EINVAL; 357 358 if (vfs_busy(mp, NULL)) 359 return EPERM; 360 361 mutex_enter(&mp->mnt_updating); 362 mp->mnt_flag &= ~MNT_OP_FLAGS; 363 mp->mnt_flag |= MNT_GETARGS; 364 error = VFS_MOUNT(mp, path, data, data_len); 365 mp->mnt_flag &= ~MNT_OP_FLAGS; 366 mutex_exit(&mp->mnt_updating); 367 368 vfs_unbusy(mp, false, NULL); 369 return (error); 370} 371 372int 373sys___mount50(struct lwp *l, const struct sys___mount50_args *uap, register_t *retval) 374{ 375 /* { 376 syscallarg(const char *) type; 377 syscallarg(const char *) path; 378 syscallarg(int) flags; 379 syscallarg(void *) data; 380 syscallarg(size_t) data_len; 381 } */ 382 383 return do_sys_mount(l, NULL, SCARG(uap, type), SCARG(uap, path), 384 SCARG(uap, flags), SCARG(uap, data), UIO_USERSPACE, 385 SCARG(uap, data_len), retval); 386} 387 388int 389do_sys_mount(struct lwp *l, struct vfsops *vfsops, const char *type, 390 const char *path, int flags, void *data, enum uio_seg data_seg, 391 size_t data_len, register_t *retval) 392{ 393 struct vnode *vp; 394 void *data_buf = data; 395 bool vfsopsrele = false; 396 size_t alloc_sz = 0; 397 int error; 398 399 /* XXX: The calling convention of this routine is totally bizarre */ 400 if (vfsops) 401 vfsopsrele = true; 402 403 /* 404 * Get vnode to be covered 405 */ 406 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 407 if (error != 0) { 408 vp = NULL; 409 goto done; 410 } 411 412 if (vfsops == NULL) { 413 if (flags & (MNT_GETARGS | MNT_UPDATE)) { 414 vfsops = vp->v_mount->mnt_op; 415 } else { 416 /* 'type' is userspace */ 417 error = mount_get_vfsops(type, &vfsops); 418 if (error != 0) 419 goto done; 420 vfsopsrele = true; 421 } 422 } 423 424 /* 425 * We allow data to be NULL, even for userspace. Some fs's don't need 426 * it. The others will handle NULL. 427 */ 428 if (data != NULL && data_seg == UIO_USERSPACE) { 429 if (data_len == 0) { 430 /* No length supplied, use default for filesystem */ 431 data_len = vfsops->vfs_min_mount_data; 432 433 /* 434 * Hopefully a longer buffer won't make copyin() fail. 435 * For compatibility with 3.0 and earlier. 436 */ 437 if (flags & MNT_UPDATE 438 && data_len < sizeof (struct mnt_export_args30)) 439 data_len = sizeof (struct mnt_export_args30); 440 } 441 if ((data_len == 0) || (data_len > VFS_MAX_MOUNT_DATA)) { 442 error = EINVAL; 443 goto done; 444 } 445 alloc_sz = data_len; 446 data_buf = kmem_alloc(alloc_sz, KM_SLEEP); 447 448 /* NFS needs the buffer even for mnt_getargs .... */ 449 error = copyin(data, data_buf, data_len); 450 if (error != 0) 451 goto done; 452 } 453 454 if (flags & MNT_GETARGS) { 455 if (data_len == 0) { 456 error = EINVAL; 457 goto done; 458 } 459 error = mount_getargs(l, vp, path, flags, data_buf, &data_len); 460 if (error != 0) 461 goto done; 462 if (data_seg == UIO_USERSPACE) 463 error = copyout(data_buf, data, data_len); 464 *retval = data_len; 465 } else if (flags & MNT_UPDATE) { 466 error = mount_update(l, vp, path, flags, data_buf, &data_len); 467 } else { 468 /* Locking is handled internally in mount_domount(). */ 469 KASSERT(vfsopsrele == true); 470 error = mount_domount(l, &vp, vfsops, path, flags, data_buf, 471 &data_len); 472 vfsopsrele = false; 473 } 474 475 done: 476 if (vfsopsrele) 477 vfs_delref(vfsops); 478 if (vp != NULL) { 479 vrele(vp); 480 } 481 if (data_buf != data) 482 kmem_free(data_buf, alloc_sz); 483 return (error); 484} 485 486/* 487 * Unmount a file system. 488 * 489 * Note: unmount takes a path to the vnode mounted on as argument, 490 * not special file (as before). 491 */ 492/* ARGSUSED */ 493int 494sys_unmount(struct lwp *l, const struct sys_unmount_args *uap, register_t *retval) 495{ 496 /* { 497 syscallarg(const char *) path; 498 syscallarg(int) flags; 499 } */ 500 struct vnode *vp; 501 struct mount *mp; 502 int error; 503 struct pathbuf *pb; 504 struct nameidata nd; 505 506 error = pathbuf_copyin(SCARG(uap, path), &pb); 507 if (error) { 508 return error; 509 } 510 511 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 512 if ((error = namei(&nd)) != 0) { 513 pathbuf_destroy(pb); 514 return error; 515 } 516 vp = nd.ni_vp; 517 pathbuf_destroy(pb); 518 519 mp = vp->v_mount; 520 atomic_inc_uint(&mp->mnt_refcnt); 521 VOP_UNLOCK(vp); 522 523 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 524 KAUTH_REQ_SYSTEM_MOUNT_UNMOUNT, mp, NULL, NULL); 525 if (error) { 526 vrele(vp); 527 vfs_destroy(mp); 528 return (error); 529 } 530 531 /* 532 * Don't allow unmounting the root file system. 533 */ 534 if (mp->mnt_flag & MNT_ROOTFS) { 535 vrele(vp); 536 vfs_destroy(mp); 537 return (EINVAL); 538 } 539 540 /* 541 * Must be the root of the filesystem 542 */ 543 if ((vp->v_vflag & VV_ROOT) == 0) { 544 vrele(vp); 545 vfs_destroy(mp); 546 return (EINVAL); 547 } 548 549 vrele(vp); 550 error = dounmount(mp, SCARG(uap, flags), l); 551 vfs_destroy(mp); 552 return error; 553} 554 555/* 556 * Sync each mounted filesystem. 557 */ 558#ifdef DEBUG 559int syncprt = 0; 560struct ctldebug debug0 = { "syncprt", &syncprt }; 561#endif 562 563void 564do_sys_sync(struct lwp *l) 565{ 566 struct mount *mp, *nmp; 567 int asyncflag; 568 569 mutex_enter(&mountlist_lock); 570 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 571 mp = nmp) { 572 if (vfs_busy(mp, &nmp)) { 573 continue; 574 } 575 mutex_enter(&mp->mnt_updating); 576 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 577 asyncflag = mp->mnt_flag & MNT_ASYNC; 578 mp->mnt_flag &= ~MNT_ASYNC; 579 VFS_SYNC(mp, MNT_NOWAIT, l->l_cred); 580 if (asyncflag) 581 mp->mnt_flag |= MNT_ASYNC; 582 } 583 mutex_exit(&mp->mnt_updating); 584 vfs_unbusy(mp, false, &nmp); 585 } 586 mutex_exit(&mountlist_lock); 587#ifdef DEBUG 588 if (syncprt) 589 vfs_bufstats(); 590#endif /* DEBUG */ 591} 592 593/* ARGSUSED */ 594int 595sys_sync(struct lwp *l, const void *v, register_t *retval) 596{ 597 do_sys_sync(l); 598 return (0); 599} 600 601 602/* 603 * Access or change filesystem quotas. 604 * 605 * (this is really 14 different calls bundled into one) 606 */ 607 608static int 609do_sys_quotactl_stat(struct mount *mp, struct quotastat *info_u) 610{ 611 struct quotastat info_k; 612 int error; 613 614 /* ensure any padding bytes are cleared */ 615 memset(&info_k, 0, sizeof(info_k)); 616 617 error = vfs_quotactl_stat(mp, &info_k); 618 if (error) { 619 return error; 620 } 621 622 return copyout(&info_k, info_u, sizeof(info_k)); 623} 624 625static int 626do_sys_quotactl_idtypestat(struct mount *mp, int idtype, 627 struct quotaidtypestat *info_u) 628{ 629 struct quotaidtypestat info_k; 630 int error; 631 632 /* ensure any padding bytes are cleared */ 633 memset(&info_k, 0, sizeof(info_k)); 634 635 error = vfs_quotactl_idtypestat(mp, idtype, &info_k); 636 if (error) { 637 return error; 638 } 639 640 return copyout(&info_k, info_u, sizeof(info_k)); 641} 642 643static int 644do_sys_quotactl_objtypestat(struct mount *mp, int objtype, 645 struct quotaobjtypestat *info_u) 646{ 647 struct quotaobjtypestat info_k; 648 int error; 649 650 /* ensure any padding bytes are cleared */ 651 memset(&info_k, 0, sizeof(info_k)); 652 653 error = vfs_quotactl_objtypestat(mp, objtype, &info_k); 654 if (error) { 655 return error; 656 } 657 658 return copyout(&info_k, info_u, sizeof(info_k)); 659} 660 661static int 662do_sys_quotactl_get(struct mount *mp, const struct quotakey *key_u, 663 struct quotaval *val_u) 664{ 665 struct quotakey key_k; 666 struct quotaval val_k; 667 int error; 668 669 /* ensure any padding bytes are cleared */ 670 memset(&val_k, 0, sizeof(val_k)); 671 672 error = copyin(key_u, &key_k, sizeof(key_k)); 673 if (error) { 674 return error; 675 } 676 677 error = vfs_quotactl_get(mp, &key_k, &val_k); 678 if (error) { 679 return error; 680 } 681 682 return copyout(&val_k, val_u, sizeof(val_k)); 683} 684 685static int 686do_sys_quotactl_put(struct mount *mp, const struct quotakey *key_u, 687 const struct quotaval *val_u) 688{ 689 struct quotakey key_k; 690 struct quotaval val_k; 691 int error; 692 693 error = copyin(key_u, &key_k, sizeof(key_k)); 694 if (error) { 695 return error; 696 } 697 698 error = copyin(val_u, &val_k, sizeof(val_k)); 699 if (error) { 700 return error; 701 } 702 703 return vfs_quotactl_put(mp, &key_k, &val_k); 704} 705 706static int 707do_sys_quotactl_delete(struct mount *mp, const struct quotakey *key_u) 708{ 709 struct quotakey key_k; 710 int error; 711 712 error = copyin(key_u, &key_k, sizeof(key_k)); 713 if (error) { 714 return error; 715 } 716 717 return vfs_quotactl_delete(mp, &key_k); 718} 719 720static int 721do_sys_quotactl_cursoropen(struct mount *mp, struct quotakcursor *cursor_u) 722{ 723 struct quotakcursor cursor_k; 724 int error; 725 726 /* ensure any padding bytes are cleared */ 727 memset(&cursor_k, 0, sizeof(cursor_k)); 728 729 error = vfs_quotactl_cursoropen(mp, &cursor_k); 730 if (error) { 731 return error; 732 } 733 734 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 735} 736 737static int 738do_sys_quotactl_cursorclose(struct mount *mp, struct quotakcursor *cursor_u) 739{ 740 struct quotakcursor cursor_k; 741 int error; 742 743 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 744 if (error) { 745 return error; 746 } 747 748 return vfs_quotactl_cursorclose(mp, &cursor_k); 749} 750 751static int 752do_sys_quotactl_cursorskipidtype(struct mount *mp, 753 struct quotakcursor *cursor_u, int idtype) 754{ 755 struct quotakcursor cursor_k; 756 int error; 757 758 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 759 if (error) { 760 return error; 761 } 762 763 error = vfs_quotactl_cursorskipidtype(mp, &cursor_k, idtype); 764 if (error) { 765 return error; 766 } 767 768 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 769} 770 771static int 772do_sys_quotactl_cursorget(struct mount *mp, struct quotakcursor *cursor_u, 773 struct quotakey *keys_u, struct quotaval *vals_u, unsigned maxnum, 774 unsigned *ret_u) 775{ 776#define CGET_STACK_MAX 8 777 struct quotakcursor cursor_k; 778 struct quotakey stackkeys[CGET_STACK_MAX]; 779 struct quotaval stackvals[CGET_STACK_MAX]; 780 struct quotakey *keys_k; 781 struct quotaval *vals_k; 782 unsigned ret_k; 783 int error; 784 785 if (maxnum > 128) { 786 maxnum = 128; 787 } 788 789 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 790 if (error) { 791 return error; 792 } 793 794 if (maxnum <= CGET_STACK_MAX) { 795 keys_k = stackkeys; 796 vals_k = stackvals; 797 /* ensure any padding bytes are cleared */ 798 memset(keys_k, 0, maxnum * sizeof(keys_k[0])); 799 memset(vals_k, 0, maxnum * sizeof(vals_k[0])); 800 } else { 801 keys_k = kmem_zalloc(maxnum * sizeof(keys_k[0]), KM_SLEEP); 802 vals_k = kmem_zalloc(maxnum * sizeof(vals_k[0]), KM_SLEEP); 803 } 804 805 error = vfs_quotactl_cursorget(mp, &cursor_k, keys_k, vals_k, maxnum, 806 &ret_k); 807 if (error) { 808 goto fail; 809 } 810 811 error = copyout(keys_k, keys_u, ret_k * sizeof(keys_k[0])); 812 if (error) { 813 goto fail; 814 } 815 816 error = copyout(vals_k, vals_u, ret_k * sizeof(vals_k[0])); 817 if (error) { 818 goto fail; 819 } 820 821 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 822 if (error) { 823 goto fail; 824 } 825 826 /* do last to maximize the chance of being able to recover a failure */ 827 error = copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 828 829fail: 830 if (keys_k != stackkeys) { 831 kmem_free(keys_k, maxnum * sizeof(keys_k[0])); 832 } 833 if (vals_k != stackvals) { 834 kmem_free(vals_k, maxnum * sizeof(vals_k[0])); 835 } 836 return error; 837} 838 839static int 840do_sys_quotactl_cursoratend(struct mount *mp, struct quotakcursor *cursor_u, 841 int *ret_u) 842{ 843 struct quotakcursor cursor_k; 844 int ret_k; 845 int error; 846 847 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 848 if (error) { 849 return error; 850 } 851 852 error = vfs_quotactl_cursoratend(mp, &cursor_k, &ret_k); 853 if (error) { 854 return error; 855 } 856 857 error = copyout(&ret_k, ret_u, sizeof(ret_k)); 858 if (error) { 859 return error; 860 } 861 862 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 863} 864 865static int 866do_sys_quotactl_cursorrewind(struct mount *mp, struct quotakcursor *cursor_u) 867{ 868 struct quotakcursor cursor_k; 869 int error; 870 871 error = copyin(cursor_u, &cursor_k, sizeof(cursor_k)); 872 if (error) { 873 return error; 874 } 875 876 error = vfs_quotactl_cursorrewind(mp, &cursor_k); 877 if (error) { 878 return error; 879 } 880 881 return copyout(&cursor_k, cursor_u, sizeof(cursor_k)); 882} 883 884static int 885do_sys_quotactl_quotaon(struct mount *mp, int idtype, const char *path_u) 886{ 887 char *path_k; 888 int error; 889 890 /* XXX this should probably be a struct pathbuf */ 891 path_k = PNBUF_GET(); 892 error = copyin(path_u, path_k, PATH_MAX); 893 if (error) { 894 PNBUF_PUT(path_k); 895 return error; 896 } 897 898 error = vfs_quotactl_quotaon(mp, idtype, path_k); 899 900 PNBUF_PUT(path_k); 901 return error; 902} 903 904static int 905do_sys_quotactl_quotaoff(struct mount *mp, int idtype) 906{ 907 return vfs_quotactl_quotaoff(mp, idtype); 908} 909 910int 911do_sys_quotactl(const char *path_u, const struct quotactl_args *args) 912{ 913 struct mount *mp; 914 struct vnode *vp; 915 int error; 916 917 error = namei_simple_user(path_u, NSM_FOLLOW_TRYEMULROOT, &vp); 918 if (error != 0) 919 return (error); 920 mp = vp->v_mount; 921 922 switch (args->qc_op) { 923 case QUOTACTL_STAT: 924 error = do_sys_quotactl_stat(mp, args->u.stat.qc_info); 925 break; 926 case QUOTACTL_IDTYPESTAT: 927 error = do_sys_quotactl_idtypestat(mp, 928 args->u.idtypestat.qc_idtype, 929 args->u.idtypestat.qc_info); 930 break; 931 case QUOTACTL_OBJTYPESTAT: 932 error = do_sys_quotactl_objtypestat(mp, 933 args->u.objtypestat.qc_objtype, 934 args->u.objtypestat.qc_info); 935 break; 936 case QUOTACTL_GET: 937 error = do_sys_quotactl_get(mp, 938 args->u.get.qc_key, 939 args->u.get.qc_val); 940 break; 941 case QUOTACTL_PUT: 942 error = do_sys_quotactl_put(mp, 943 args->u.put.qc_key, 944 args->u.put.qc_val); 945 break; 946 case QUOTACTL_DELETE: 947 error = do_sys_quotactl_delete(mp, args->u.delete.qc_key); 948 break; 949 case QUOTACTL_CURSOROPEN: 950 error = do_sys_quotactl_cursoropen(mp, 951 args->u.cursoropen.qc_cursor); 952 break; 953 case QUOTACTL_CURSORCLOSE: 954 error = do_sys_quotactl_cursorclose(mp, 955 args->u.cursorclose.qc_cursor); 956 break; 957 case QUOTACTL_CURSORSKIPIDTYPE: 958 error = do_sys_quotactl_cursorskipidtype(mp, 959 args->u.cursorskipidtype.qc_cursor, 960 args->u.cursorskipidtype.qc_idtype); 961 break; 962 case QUOTACTL_CURSORGET: 963 error = do_sys_quotactl_cursorget(mp, 964 args->u.cursorget.qc_cursor, 965 args->u.cursorget.qc_keys, 966 args->u.cursorget.qc_vals, 967 args->u.cursorget.qc_maxnum, 968 args->u.cursorget.qc_ret); 969 break; 970 case QUOTACTL_CURSORATEND: 971 error = do_sys_quotactl_cursoratend(mp, 972 args->u.cursoratend.qc_cursor, 973 args->u.cursoratend.qc_ret); 974 break; 975 case QUOTACTL_CURSORREWIND: 976 error = do_sys_quotactl_cursorrewind(mp, 977 args->u.cursorrewind.qc_cursor); 978 break; 979 case QUOTACTL_QUOTAON: 980 error = do_sys_quotactl_quotaon(mp, 981 args->u.quotaon.qc_idtype, 982 args->u.quotaon.qc_quotafile); 983 break; 984 case QUOTACTL_QUOTAOFF: 985 error = do_sys_quotactl_quotaoff(mp, 986 args->u.quotaoff.qc_idtype); 987 break; 988 default: 989 error = EINVAL; 990 break; 991 } 992 993 vrele(vp); 994 return error; 995} 996 997/* ARGSUSED */ 998int 999sys___quotactl(struct lwp *l, const struct sys___quotactl_args *uap, 1000 register_t *retval) 1001{ 1002 /* { 1003 syscallarg(const char *) path; 1004 syscallarg(struct quotactl_args *) args; 1005 } */ 1006 struct quotactl_args args; 1007 int error; 1008 1009 error = copyin(SCARG(uap, args), &args, sizeof(args)); 1010 if (error) { 1011 return error; 1012 } 1013 1014 return do_sys_quotactl(SCARG(uap, path), &args); 1015} 1016 1017int 1018dostatvfs(struct mount *mp, struct statvfs *sp, struct lwp *l, int flags, 1019 int root) 1020{ 1021 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 1022 int error = 0; 1023 1024 /* 1025 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1026 * refresh the fsstat cache. MNT_WAIT or MNT_LAZY 1027 * overrides MNT_NOWAIT. 1028 */ 1029 if (flags == MNT_NOWAIT || flags == MNT_LAZY || 1030 (flags != MNT_WAIT && flags != 0)) { 1031 memcpy(sp, &mp->mnt_stat, sizeof(*sp)); 1032 goto done; 1033 } 1034 1035 /* Get the filesystem stats now */ 1036 memset(sp, 0, sizeof(*sp)); 1037 if ((error = VFS_STATVFS(mp, sp)) != 0) { 1038 return error; 1039 } 1040 1041 if (cwdi->cwdi_rdir == NULL) 1042 (void)memcpy(&mp->mnt_stat, sp, sizeof(mp->mnt_stat)); 1043done: 1044 if (cwdi->cwdi_rdir != NULL) { 1045 size_t len; 1046 char *bp; 1047 char c; 1048 char *path = PNBUF_GET(); 1049 1050 bp = path + MAXPATHLEN; 1051 *--bp = '\0'; 1052 rw_enter(&cwdi->cwdi_lock, RW_READER); 1053 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, path, 1054 MAXPATHLEN / 2, 0, l); 1055 rw_exit(&cwdi->cwdi_lock); 1056 if (error) { 1057 PNBUF_PUT(path); 1058 return error; 1059 } 1060 len = strlen(bp); 1061 if (len != 1) { 1062 /* 1063 * for mount points that are below our root, we can see 1064 * them, so we fix up the pathname and return them. The 1065 * rest we cannot see, so we don't allow viewing the 1066 * data. 1067 */ 1068 if (strncmp(bp, sp->f_mntonname, len) == 0 && 1069 ((c = sp->f_mntonname[len]) == '/' || c == '\0')) { 1070 (void)strlcpy(sp->f_mntonname, 1071 c == '\0' ? "/" : &sp->f_mntonname[len], 1072 sizeof(sp->f_mntonname)); 1073 } else { 1074 if (root) 1075 (void)strlcpy(sp->f_mntonname, "/", 1076 sizeof(sp->f_mntonname)); 1077 else 1078 error = EPERM; 1079 } 1080 } 1081 PNBUF_PUT(path); 1082 } 1083 sp->f_flag = mp->mnt_flag & MNT_VISFLAGMASK; 1084 return error; 1085} 1086 1087/* 1088 * Get filesystem statistics by path. 1089 */ 1090int 1091do_sys_pstatvfs(struct lwp *l, const char *path, int flags, struct statvfs *sb) 1092{ 1093 struct mount *mp; 1094 int error; 1095 struct vnode *vp; 1096 1097 error = namei_simple_user(path, NSM_FOLLOW_TRYEMULROOT, &vp); 1098 if (error != 0) 1099 return error; 1100 mp = vp->v_mount; 1101 error = dostatvfs(mp, sb, l, flags, 1); 1102 vrele(vp); 1103 return error; 1104} 1105 1106/* ARGSUSED */ 1107int 1108sys_statvfs1(struct lwp *l, const struct sys_statvfs1_args *uap, register_t *retval) 1109{ 1110 /* { 1111 syscallarg(const char *) path; 1112 syscallarg(struct statvfs *) buf; 1113 syscallarg(int) flags; 1114 } */ 1115 struct statvfs *sb; 1116 int error; 1117 1118 sb = STATVFSBUF_GET(); 1119 error = do_sys_pstatvfs(l, SCARG(uap, path), SCARG(uap, flags), sb); 1120 if (error == 0) 1121 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1122 STATVFSBUF_PUT(sb); 1123 return error; 1124} 1125 1126/* 1127 * Get filesystem statistics by fd. 1128 */ 1129int 1130do_sys_fstatvfs(struct lwp *l, int fd, int flags, struct statvfs *sb) 1131{ 1132 file_t *fp; 1133 struct mount *mp; 1134 int error; 1135 1136 /* fd_getvnode() will use the descriptor for us */ 1137 if ((error = fd_getvnode(fd, &fp)) != 0) 1138 return (error); 1139 mp = ((struct vnode *)fp->f_data)->v_mount; 1140 error = dostatvfs(mp, sb, curlwp, flags, 1); 1141 fd_putfile(fd); 1142 return error; 1143} 1144 1145/* ARGSUSED */ 1146int 1147sys_fstatvfs1(struct lwp *l, const struct sys_fstatvfs1_args *uap, register_t *retval) 1148{ 1149 /* { 1150 syscallarg(int) fd; 1151 syscallarg(struct statvfs *) buf; 1152 syscallarg(int) flags; 1153 } */ 1154 struct statvfs *sb; 1155 int error; 1156 1157 sb = STATVFSBUF_GET(); 1158 error = do_sys_fstatvfs(l, SCARG(uap, fd), SCARG(uap, flags), sb); 1159 if (error == 0) 1160 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 1161 STATVFSBUF_PUT(sb); 1162 return error; 1163} 1164 1165 1166/* 1167 * Get statistics on all filesystems. 1168 */ 1169int 1170do_sys_getvfsstat(struct lwp *l, void *sfsp, size_t bufsize, int flags, 1171 int (*copyfn)(const void *, void *, size_t), size_t entry_sz, 1172 register_t *retval) 1173{ 1174 int root = 0; 1175 struct proc *p = l->l_proc; 1176 struct mount *mp, *nmp; 1177 struct statvfs *sb; 1178 size_t count, maxcount; 1179 int error = 0; 1180 1181 sb = STATVFSBUF_GET(); 1182 maxcount = bufsize / entry_sz; 1183 mutex_enter(&mountlist_lock); 1184 count = 0; 1185 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1186 mp = nmp) { 1187 if (vfs_busy(mp, &nmp)) { 1188 continue; 1189 } 1190 if (sfsp && count < maxcount) { 1191 error = dostatvfs(mp, sb, l, flags, 0); 1192 if (error) { 1193 vfs_unbusy(mp, false, &nmp); 1194 error = 0; 1195 continue; 1196 } 1197 error = copyfn(sb, sfsp, entry_sz); 1198 if (error) { 1199 vfs_unbusy(mp, false, NULL); 1200 goto out; 1201 } 1202 sfsp = (char *)sfsp + entry_sz; 1203 root |= strcmp(sb->f_mntonname, "/") == 0; 1204 } 1205 count++; 1206 vfs_unbusy(mp, false, &nmp); 1207 } 1208 mutex_exit(&mountlist_lock); 1209 1210 if (root == 0 && p->p_cwdi->cwdi_rdir) { 1211 /* 1212 * fake a root entry 1213 */ 1214 error = dostatvfs(p->p_cwdi->cwdi_rdir->v_mount, 1215 sb, l, flags, 1); 1216 if (error != 0) 1217 goto out; 1218 if (sfsp) { 1219 error = copyfn(sb, sfsp, entry_sz); 1220 if (error != 0) 1221 goto out; 1222 } 1223 count++; 1224 } 1225 if (sfsp && count > maxcount) 1226 *retval = maxcount; 1227 else 1228 *retval = count; 1229out: 1230 STATVFSBUF_PUT(sb); 1231 return error; 1232} 1233 1234int 1235sys_getvfsstat(struct lwp *l, const struct sys_getvfsstat_args *uap, register_t *retval) 1236{ 1237 /* { 1238 syscallarg(struct statvfs *) buf; 1239 syscallarg(size_t) bufsize; 1240 syscallarg(int) flags; 1241 } */ 1242 1243 return do_sys_getvfsstat(l, SCARG(uap, buf), SCARG(uap, bufsize), 1244 SCARG(uap, flags), copyout, sizeof (struct statvfs), retval); 1245} 1246 1247/* 1248 * Change current working directory to a given file descriptor. 1249 */ 1250/* ARGSUSED */ 1251int 1252sys_fchdir(struct lwp *l, const struct sys_fchdir_args *uap, register_t *retval) 1253{ 1254 /* { 1255 syscallarg(int) fd; 1256 } */ 1257 struct proc *p = l->l_proc; 1258 struct cwdinfo *cwdi; 1259 struct vnode *vp, *tdp; 1260 struct mount *mp; 1261 file_t *fp; 1262 int error, fd; 1263 1264 /* fd_getvnode() will use the descriptor for us */ 1265 fd = SCARG(uap, fd); 1266 if ((error = fd_getvnode(fd, &fp)) != 0) 1267 return (error); 1268 vp = fp->f_data; 1269 1270 vref(vp); 1271 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1272 if (vp->v_type != VDIR) 1273 error = ENOTDIR; 1274 else 1275 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1276 if (error) { 1277 vput(vp); 1278 goto out; 1279 } 1280 while ((mp = vp->v_mountedhere) != NULL) { 1281 error = vfs_busy(mp, NULL); 1282 vput(vp); 1283 if (error != 0) 1284 goto out; 1285 error = VFS_ROOT(mp, &tdp); 1286 vfs_unbusy(mp, false, NULL); 1287 if (error) 1288 goto out; 1289 vp = tdp; 1290 } 1291 VOP_UNLOCK(vp); 1292 1293 /* 1294 * Disallow changing to a directory not under the process's 1295 * current root directory (if there is one). 1296 */ 1297 cwdi = p->p_cwdi; 1298 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1299 if (cwdi->cwdi_rdir && !vn_isunder(vp, NULL, l)) { 1300 vrele(vp); 1301 error = EPERM; /* operation not permitted */ 1302 } else { 1303 vrele(cwdi->cwdi_cdir); 1304 cwdi->cwdi_cdir = vp; 1305 } 1306 rw_exit(&cwdi->cwdi_lock); 1307 1308 out: 1309 fd_putfile(fd); 1310 return (error); 1311} 1312 1313/* 1314 * Change this process's notion of the root directory to a given file 1315 * descriptor. 1316 */ 1317int 1318sys_fchroot(struct lwp *l, const struct sys_fchroot_args *uap, register_t *retval) 1319{ 1320 struct proc *p = l->l_proc; 1321 struct vnode *vp; 1322 file_t *fp; 1323 int error, fd = SCARG(uap, fd); 1324 1325 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1326 KAUTH_REQ_SYSTEM_CHROOT_FCHROOT, NULL, NULL, NULL)) != 0) 1327 return error; 1328 /* fd_getvnode() will use the descriptor for us */ 1329 if ((error = fd_getvnode(fd, &fp)) != 0) 1330 return error; 1331 vp = fp->f_data; 1332 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1333 if (vp->v_type != VDIR) 1334 error = ENOTDIR; 1335 else 1336 error = VOP_ACCESS(vp, VEXEC, l->l_cred); 1337 VOP_UNLOCK(vp); 1338 if (error) 1339 goto out; 1340 vref(vp); 1341 1342 change_root(p->p_cwdi, vp, l); 1343 1344 out: 1345 fd_putfile(fd); 1346 return (error); 1347} 1348 1349/* 1350 * Change current working directory (``.''). 1351 */ 1352/* ARGSUSED */ 1353int 1354sys_chdir(struct lwp *l, const struct sys_chdir_args *uap, register_t *retval) 1355{ 1356 /* { 1357 syscallarg(const char *) path; 1358 } */ 1359 struct proc *p = l->l_proc; 1360 struct cwdinfo *cwdi; 1361 int error; 1362 struct vnode *vp; 1363 1364 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1365 &vp, l)) != 0) 1366 return (error); 1367 cwdi = p->p_cwdi; 1368 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1369 vrele(cwdi->cwdi_cdir); 1370 cwdi->cwdi_cdir = vp; 1371 rw_exit(&cwdi->cwdi_lock); 1372 return (0); 1373} 1374 1375/* 1376 * Change notion of root (``/'') directory. 1377 */ 1378/* ARGSUSED */ 1379int 1380sys_chroot(struct lwp *l, const struct sys_chroot_args *uap, register_t *retval) 1381{ 1382 /* { 1383 syscallarg(const char *) path; 1384 } */ 1385 struct proc *p = l->l_proc; 1386 int error; 1387 struct vnode *vp; 1388 1389 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_CHROOT, 1390 KAUTH_REQ_SYSTEM_CHROOT_CHROOT, NULL, NULL, NULL)) != 0) 1391 return (error); 1392 if ((error = chdir_lookup(SCARG(uap, path), UIO_USERSPACE, 1393 &vp, l)) != 0) 1394 return (error); 1395 1396 change_root(p->p_cwdi, vp, l); 1397 1398 return (0); 1399} 1400 1401/* 1402 * Common routine for chroot and fchroot. 1403 * NB: callers need to properly authorize the change root operation. 1404 */ 1405void 1406change_root(struct cwdinfo *cwdi, struct vnode *vp, struct lwp *l) 1407{ 1408 1409 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 1410 if (cwdi->cwdi_rdir != NULL) 1411 vrele(cwdi->cwdi_rdir); 1412 cwdi->cwdi_rdir = vp; 1413 1414 /* 1415 * Prevent escaping from chroot by putting the root under 1416 * the working directory. Silently chdir to / if we aren't 1417 * already there. 1418 */ 1419 if (!vn_isunder(cwdi->cwdi_cdir, vp, l)) { 1420 /* 1421 * XXX would be more failsafe to change directory to a 1422 * deadfs node here instead 1423 */ 1424 vrele(cwdi->cwdi_cdir); 1425 vref(vp); 1426 cwdi->cwdi_cdir = vp; 1427 } 1428 rw_exit(&cwdi->cwdi_lock); 1429} 1430 1431/* 1432 * Common routine for chroot and chdir. 1433 * XXX "where" should be enum uio_seg 1434 */ 1435int 1436chdir_lookup(const char *path, int where, struct vnode **vpp, struct lwp *l) 1437{ 1438 struct pathbuf *pb; 1439 struct nameidata nd; 1440 int error; 1441 1442 error = pathbuf_maybe_copyin(path, where, &pb); 1443 if (error) { 1444 return error; 1445 } 1446 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1447 if ((error = namei(&nd)) != 0) { 1448 pathbuf_destroy(pb); 1449 return error; 1450 } 1451 *vpp = nd.ni_vp; 1452 pathbuf_destroy(pb); 1453 1454 if ((*vpp)->v_type != VDIR) 1455 error = ENOTDIR; 1456 else 1457 error = VOP_ACCESS(*vpp, VEXEC, l->l_cred); 1458 1459 if (error) 1460 vput(*vpp); 1461 else 1462 VOP_UNLOCK(*vpp); 1463 return (error); 1464} 1465 1466/* 1467 * Internals of sys_open - path has already been converted into a pathbuf 1468 * (so we can easily reuse this function from other parts of the kernel, 1469 * like posix_spawn post-processing). 1470 */ 1471static int 1472do_open(lwp_t *l, struct pathbuf *pb, int open_flags, int open_mode, int *fd) 1473{ 1474 struct proc *p = l->l_proc; 1475 struct cwdinfo *cwdi = p->p_cwdi; 1476 file_t *fp; 1477 struct vnode *vp; 1478 int flags, cmode; 1479 int indx, error; 1480 struct nameidata nd; 1481 1482 flags = FFLAGS(open_flags); 1483 if ((flags & (FREAD | FWRITE)) == 0) 1484 return EINVAL; 1485 1486 if ((error = fd_allocfile(&fp, &indx)) != 0) { 1487 return error; 1488 } 1489 1490 /* We're going to read cwdi->cwdi_cmask unlocked here. */ 1491 cmode = ((open_mode &~ cwdi->cwdi_cmask) & ALLPERMS) &~ S_ISTXT; 1492 NDINIT(&nd, LOOKUP, FOLLOW | TRYEMULROOT, pb); 1493 l->l_dupfd = -indx - 1; /* XXX check for fdopen */ 1494 if ((error = vn_open(&nd, flags, cmode)) != 0) { 1495 fd_abort(p, fp, indx); 1496 if ((error == EDUPFD || error == EMOVEFD) && 1497 l->l_dupfd >= 0 && /* XXX from fdopen */ 1498 (error = 1499 fd_dupopen(l->l_dupfd, &indx, flags, error)) == 0) { 1500 *fd = indx; 1501 return 0; 1502 } 1503 if (error == ERESTART) 1504 error = EINTR; 1505 return error; 1506 } 1507 1508 l->l_dupfd = 0; 1509 vp = nd.ni_vp; 1510 1511 if ((error = open_setfp(l, fp, vp, indx, flags))) 1512 return error; 1513 1514 VOP_UNLOCK(vp); 1515 *fd = indx; 1516 fd_affix(p, fp, indx); 1517 return 0; 1518} 1519 1520int 1521fd_open(const char *path, int open_flags, int open_mode, int *fd) 1522{ 1523 struct pathbuf *pb; 1524 int error, oflags; 1525 1526 oflags = FFLAGS(open_flags); 1527 if ((oflags & (FREAD | FWRITE)) == 0) 1528 return EINVAL; 1529 1530 pb = pathbuf_create(path); 1531 if (pb == NULL) 1532 return ENOMEM; 1533 1534 error = do_open(curlwp, pb, open_flags, open_mode, fd); 1535 pathbuf_destroy(pb); 1536 1537 return error; 1538} 1539 1540/* 1541 * Check permissions, allocate an open file structure, 1542 * and call the device open routine if any. 1543 */ 1544int 1545sys_open(struct lwp *l, const struct sys_open_args *uap, register_t *retval) 1546{ 1547 /* { 1548 syscallarg(const char *) path; 1549 syscallarg(int) flags; 1550 syscallarg(int) mode; 1551 } */ 1552 struct pathbuf *pb; 1553 int result, flags, error; 1554 1555 flags = FFLAGS(SCARG(uap, flags)); 1556 if ((flags & (FREAD | FWRITE)) == 0) 1557 return EINVAL; 1558 1559 error = pathbuf_copyin(SCARG(uap, path), &pb); 1560 if (error) 1561 return error; 1562 1563 error = do_open(l, pb, SCARG(uap, flags), SCARG(uap, mode), &result); 1564 pathbuf_destroy(pb); 1565 1566 *retval = result; 1567 return error; 1568} 1569 1570int 1571sys_openat(struct lwp *l, const struct sys_openat_args *uap, register_t *retval) 1572{ 1573 /* { 1574 syscallarg(int) fd; 1575 syscallarg(const char *) path; 1576 syscallarg(int) flags; 1577 syscallarg(int) mode; 1578 } */ 1579 1580 return ENOSYS; 1581} 1582 1583static void 1584vfs__fhfree(fhandle_t *fhp) 1585{ 1586 size_t fhsize; 1587 1588 if (fhp == NULL) { 1589 return; 1590 } 1591 fhsize = FHANDLE_SIZE(fhp); 1592 kmem_free(fhp, fhsize); 1593} 1594 1595/* 1596 * vfs_composefh: compose a filehandle. 1597 */ 1598 1599int 1600vfs_composefh(struct vnode *vp, fhandle_t *fhp, size_t *fh_size) 1601{ 1602 struct mount *mp; 1603 struct fid *fidp; 1604 int error; 1605 size_t needfhsize; 1606 size_t fidsize; 1607 1608 mp = vp->v_mount; 1609 fidp = NULL; 1610 if (*fh_size < FHANDLE_SIZE_MIN) { 1611 fidsize = 0; 1612 } else { 1613 fidsize = *fh_size - offsetof(fhandle_t, fh_fid); 1614 if (fhp != NULL) { 1615 memset(fhp, 0, *fh_size); 1616 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1617 fidp = &fhp->fh_fid; 1618 } 1619 } 1620 error = VFS_VPTOFH(vp, fidp, &fidsize); 1621 needfhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1622 if (error == 0 && *fh_size < needfhsize) { 1623 error = E2BIG; 1624 } 1625 *fh_size = needfhsize; 1626 return error; 1627} 1628 1629int 1630vfs_composefh_alloc(struct vnode *vp, fhandle_t **fhpp) 1631{ 1632 struct mount *mp; 1633 fhandle_t *fhp; 1634 size_t fhsize; 1635 size_t fidsize; 1636 int error; 1637 1638 *fhpp = NULL; 1639 mp = vp->v_mount; 1640 fidsize = 0; 1641 error = VFS_VPTOFH(vp, NULL, &fidsize); 1642 KASSERT(error != 0); 1643 if (error != E2BIG) { 1644 goto out; 1645 } 1646 fhsize = FHANDLE_SIZE_FROM_FILEID_SIZE(fidsize); 1647 fhp = kmem_zalloc(fhsize, KM_SLEEP); 1648 if (fhp == NULL) { 1649 error = ENOMEM; 1650 goto out; 1651 } 1652 fhp->fh_fsid = mp->mnt_stat.f_fsidx; 1653 error = VFS_VPTOFH(vp, &fhp->fh_fid, &fidsize); 1654 if (error == 0) { 1655 KASSERT((FHANDLE_SIZE(fhp) == fhsize && 1656 FHANDLE_FILEID(fhp)->fid_len == fidsize)); 1657 *fhpp = fhp; 1658 } else { 1659 kmem_free(fhp, fhsize); 1660 } 1661out: 1662 return error; 1663} 1664 1665void 1666vfs_composefh_free(fhandle_t *fhp) 1667{ 1668 1669 vfs__fhfree(fhp); 1670} 1671 1672/* 1673 * vfs_fhtovp: lookup a vnode by a filehandle. 1674 */ 1675 1676int 1677vfs_fhtovp(fhandle_t *fhp, struct vnode **vpp) 1678{ 1679 struct mount *mp; 1680 int error; 1681 1682 *vpp = NULL; 1683 mp = vfs_getvfs(FHANDLE_FSID(fhp)); 1684 if (mp == NULL) { 1685 error = ESTALE; 1686 goto out; 1687 } 1688 if (mp->mnt_op->vfs_fhtovp == NULL) { 1689 error = EOPNOTSUPP; 1690 goto out; 1691 } 1692 error = VFS_FHTOVP(mp, FHANDLE_FILEID(fhp), vpp); 1693out: 1694 return error; 1695} 1696 1697/* 1698 * vfs_copyinfh_alloc: allocate and copyin a filehandle, given 1699 * the needed size. 1700 */ 1701 1702int 1703vfs_copyinfh_alloc(const void *ufhp, size_t fhsize, fhandle_t **fhpp) 1704{ 1705 fhandle_t *fhp; 1706 int error; 1707 1708 *fhpp = NULL; 1709 if (fhsize > FHANDLE_SIZE_MAX) { 1710 return EINVAL; 1711 } 1712 if (fhsize < FHANDLE_SIZE_MIN) { 1713 return EINVAL; 1714 } 1715again: 1716 fhp = kmem_alloc(fhsize, KM_SLEEP); 1717 if (fhp == NULL) { 1718 return ENOMEM; 1719 } 1720 error = copyin(ufhp, fhp, fhsize); 1721 if (error == 0) { 1722 /* XXX this check shouldn't be here */ 1723 if (FHANDLE_SIZE(fhp) == fhsize) { 1724 *fhpp = fhp; 1725 return 0; 1726 } else if (fhsize == NFSX_V2FH && FHANDLE_SIZE(fhp) < fhsize) { 1727 /* 1728 * a kludge for nfsv2 padded handles. 1729 */ 1730 size_t sz; 1731 1732 sz = FHANDLE_SIZE(fhp); 1733 kmem_free(fhp, fhsize); 1734 fhsize = sz; 1735 goto again; 1736 } else { 1737 /* 1738 * userland told us wrong size. 1739 */ 1740 error = EINVAL; 1741 } 1742 } 1743 kmem_free(fhp, fhsize); 1744 return error; 1745} 1746 1747void 1748vfs_copyinfh_free(fhandle_t *fhp) 1749{ 1750 1751 vfs__fhfree(fhp); 1752} 1753 1754/* 1755 * Get file handle system call 1756 */ 1757int 1758sys___getfh30(struct lwp *l, const struct sys___getfh30_args *uap, register_t *retval) 1759{ 1760 /* { 1761 syscallarg(char *) fname; 1762 syscallarg(fhandle_t *) fhp; 1763 syscallarg(size_t *) fh_size; 1764 } */ 1765 struct vnode *vp; 1766 fhandle_t *fh; 1767 int error; 1768 struct pathbuf *pb; 1769 struct nameidata nd; 1770 size_t sz; 1771 size_t usz; 1772 1773 /* 1774 * Must be super user 1775 */ 1776 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1777 0, NULL, NULL, NULL); 1778 if (error) 1779 return (error); 1780 1781 error = pathbuf_copyin(SCARG(uap, fname), &pb); 1782 if (error) { 1783 return error; 1784 } 1785 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 1786 error = namei(&nd); 1787 if (error) { 1788 pathbuf_destroy(pb); 1789 return error; 1790 } 1791 vp = nd.ni_vp; 1792 pathbuf_destroy(pb); 1793 1794 error = vfs_composefh_alloc(vp, &fh); 1795 vput(vp); 1796 if (error != 0) { 1797 goto out; 1798 } 1799 error = copyin(SCARG(uap, fh_size), &usz, sizeof(size_t)); 1800 if (error != 0) { 1801 goto out; 1802 } 1803 sz = FHANDLE_SIZE(fh); 1804 error = copyout(&sz, SCARG(uap, fh_size), sizeof(size_t)); 1805 if (error != 0) { 1806 goto out; 1807 } 1808 if (usz >= sz) { 1809 error = copyout(fh, SCARG(uap, fhp), sz); 1810 } else { 1811 error = E2BIG; 1812 } 1813out: 1814 vfs_composefh_free(fh); 1815 return (error); 1816} 1817 1818/* 1819 * Open a file given a file handle. 1820 * 1821 * Check permissions, allocate an open file structure, 1822 * and call the device open routine if any. 1823 */ 1824 1825int 1826dofhopen(struct lwp *l, const void *ufhp, size_t fhsize, int oflags, 1827 register_t *retval) 1828{ 1829 file_t *fp; 1830 struct vnode *vp = NULL; 1831 kauth_cred_t cred = l->l_cred; 1832 file_t *nfp; 1833 int indx, error = 0; 1834 struct vattr va; 1835 fhandle_t *fh; 1836 int flags; 1837 proc_t *p; 1838 1839 p = curproc; 1840 1841 /* 1842 * Must be super user 1843 */ 1844 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1845 0, NULL, NULL, NULL))) 1846 return (error); 1847 1848 flags = FFLAGS(oflags); 1849 if ((flags & (FREAD | FWRITE)) == 0) 1850 return (EINVAL); 1851 if ((flags & O_CREAT)) 1852 return (EINVAL); 1853 if ((error = fd_allocfile(&nfp, &indx)) != 0) 1854 return (error); 1855 fp = nfp; 1856 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1857 if (error != 0) { 1858 goto bad; 1859 } 1860 error = vfs_fhtovp(fh, &vp); 1861 if (error != 0) { 1862 goto bad; 1863 } 1864 1865 /* Now do an effective vn_open */ 1866 1867 if (vp->v_type == VSOCK) { 1868 error = EOPNOTSUPP; 1869 goto bad; 1870 } 1871 error = vn_openchk(vp, cred, flags); 1872 if (error != 0) 1873 goto bad; 1874 if (flags & O_TRUNC) { 1875 VOP_UNLOCK(vp); /* XXX */ 1876 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 1877 vattr_null(&va); 1878 va.va_size = 0; 1879 error = VOP_SETATTR(vp, &va, cred); 1880 if (error) 1881 goto bad; 1882 } 1883 if ((error = VOP_OPEN(vp, flags, cred)) != 0) 1884 goto bad; 1885 if (flags & FWRITE) { 1886 mutex_enter(vp->v_interlock); 1887 vp->v_writecount++; 1888 mutex_exit(vp->v_interlock); 1889 } 1890 1891 /* done with modified vn_open, now finish what sys_open does. */ 1892 if ((error = open_setfp(l, fp, vp, indx, flags))) 1893 return error; 1894 1895 VOP_UNLOCK(vp); 1896 *retval = indx; 1897 fd_affix(p, fp, indx); 1898 vfs_copyinfh_free(fh); 1899 return (0); 1900 1901bad: 1902 fd_abort(p, fp, indx); 1903 if (vp != NULL) 1904 vput(vp); 1905 vfs_copyinfh_free(fh); 1906 return (error); 1907} 1908 1909int 1910sys___fhopen40(struct lwp *l, const struct sys___fhopen40_args *uap, register_t *retval) 1911{ 1912 /* { 1913 syscallarg(const void *) fhp; 1914 syscallarg(size_t) fh_size; 1915 syscallarg(int) flags; 1916 } */ 1917 1918 return dofhopen(l, SCARG(uap, fhp), SCARG(uap, fh_size), 1919 SCARG(uap, flags), retval); 1920} 1921 1922int 1923do_fhstat(struct lwp *l, const void *ufhp, size_t fhsize, struct stat *sb) 1924{ 1925 int error; 1926 fhandle_t *fh; 1927 struct vnode *vp; 1928 1929 /* 1930 * Must be super user 1931 */ 1932 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1933 0, NULL, NULL, NULL))) 1934 return (error); 1935 1936 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1937 if (error != 0) 1938 return error; 1939 1940 error = vfs_fhtovp(fh, &vp); 1941 vfs_copyinfh_free(fh); 1942 if (error != 0) 1943 return error; 1944 1945 error = vn_stat(vp, sb); 1946 vput(vp); 1947 return error; 1948} 1949 1950 1951/* ARGSUSED */ 1952int 1953sys___fhstat50(struct lwp *l, const struct sys___fhstat50_args *uap, register_t *retval) 1954{ 1955 /* { 1956 syscallarg(const void *) fhp; 1957 syscallarg(size_t) fh_size; 1958 syscallarg(struct stat *) sb; 1959 } */ 1960 struct stat sb; 1961 int error; 1962 1963 error = do_fhstat(l, SCARG(uap, fhp), SCARG(uap, fh_size), &sb); 1964 if (error) 1965 return error; 1966 return copyout(&sb, SCARG(uap, sb), sizeof(sb)); 1967} 1968 1969int 1970do_fhstatvfs(struct lwp *l, const void *ufhp, size_t fhsize, struct statvfs *sb, 1971 int flags) 1972{ 1973 fhandle_t *fh; 1974 struct mount *mp; 1975 struct vnode *vp; 1976 int error; 1977 1978 /* 1979 * Must be super user 1980 */ 1981 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FILEHANDLE, 1982 0, NULL, NULL, NULL))) 1983 return error; 1984 1985 error = vfs_copyinfh_alloc(ufhp, fhsize, &fh); 1986 if (error != 0) 1987 return error; 1988 1989 error = vfs_fhtovp(fh, &vp); 1990 vfs_copyinfh_free(fh); 1991 if (error != 0) 1992 return error; 1993 1994 mp = vp->v_mount; 1995 error = dostatvfs(mp, sb, l, flags, 1); 1996 vput(vp); 1997 return error; 1998} 1999 2000/* ARGSUSED */ 2001int 2002sys___fhstatvfs140(struct lwp *l, const struct sys___fhstatvfs140_args *uap, register_t *retval) 2003{ 2004 /* { 2005 syscallarg(const void *) fhp; 2006 syscallarg(size_t) fh_size; 2007 syscallarg(struct statvfs *) buf; 2008 syscallarg(int) flags; 2009 } */ 2010 struct statvfs *sb = STATVFSBUF_GET(); 2011 int error; 2012 2013 error = do_fhstatvfs(l, SCARG(uap, fhp), SCARG(uap, fh_size), sb, 2014 SCARG(uap, flags)); 2015 if (error == 0) 2016 error = copyout(sb, SCARG(uap, buf), sizeof(*sb)); 2017 STATVFSBUF_PUT(sb); 2018 return error; 2019} 2020 2021/* 2022 * Create a special file. 2023 */ 2024/* ARGSUSED */ 2025int 2026sys___mknod50(struct lwp *l, const struct sys___mknod50_args *uap, 2027 register_t *retval) 2028{ 2029 /* { 2030 syscallarg(const char *) path; 2031 syscallarg(mode_t) mode; 2032 syscallarg(dev_t) dev; 2033 } */ 2034 return do_sys_mknod(l, SCARG(uap, path), SCARG(uap, mode), 2035 SCARG(uap, dev), retval, UIO_USERSPACE); 2036} 2037 2038int 2039sys_mknodat(struct lwp *l, const struct sys_mknodat_args *uap, 2040 register_t *retval) 2041{ 2042 /* { 2043 syscallarg(int) fd; 2044 syscallarg(const char *) path; 2045 syscallarg(mode_t) mode; 2046 syscallarg(uint32_t) dev; 2047 } */ 2048 2049 return ENOSYS; 2050} 2051 2052int 2053do_sys_mknod(struct lwp *l, const char *pathname, mode_t mode, dev_t dev, 2054 register_t *retval, enum uio_seg seg) 2055{ 2056 struct proc *p = l->l_proc; 2057 struct vnode *vp; 2058 struct vattr vattr; 2059 int error, optype; 2060 struct pathbuf *pb; 2061 struct nameidata nd; 2062 const char *pathstring; 2063 2064 if ((error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MKNOD, 2065 0, NULL, NULL, NULL)) != 0) 2066 return (error); 2067 2068 optype = VOP_MKNOD_DESCOFFSET; 2069 2070 error = pathbuf_maybe_copyin(pathname, seg, &pb); 2071 if (error) { 2072 return error; 2073 } 2074 pathstring = pathbuf_stringcopy_get(pb); 2075 if (pathstring == NULL) { 2076 pathbuf_destroy(pb); 2077 return ENOMEM; 2078 } 2079 2080 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2081 if ((error = namei(&nd)) != 0) 2082 goto out; 2083 vp = nd.ni_vp; 2084 2085 if (vp != NULL) 2086 error = EEXIST; 2087 else { 2088 vattr_null(&vattr); 2089 /* We will read cwdi->cwdi_cmask unlocked. */ 2090 vattr.va_mode = (mode & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2091 vattr.va_rdev = dev; 2092 2093 switch (mode & S_IFMT) { 2094 case S_IFMT: /* used by badsect to flag bad sectors */ 2095 vattr.va_type = VBAD; 2096 break; 2097 case S_IFCHR: 2098 vattr.va_type = VCHR; 2099 break; 2100 case S_IFBLK: 2101 vattr.va_type = VBLK; 2102 break; 2103 case S_IFWHT: 2104 optype = VOP_WHITEOUT_DESCOFFSET; 2105 break; 2106 case S_IFREG: 2107#if NVERIEXEC > 0 2108 error = veriexec_openchk(l, nd.ni_vp, pathstring, 2109 O_CREAT); 2110#endif /* NVERIEXEC > 0 */ 2111 vattr.va_type = VREG; 2112 vattr.va_rdev = VNOVAL; 2113 optype = VOP_CREATE_DESCOFFSET; 2114 break; 2115 default: 2116 error = EINVAL; 2117 break; 2118 } 2119 } 2120 if (error == 0 && optype == VOP_MKNOD_DESCOFFSET 2121 && vattr.va_rdev == VNOVAL) 2122 error = EINVAL; 2123 if (!error) { 2124 switch (optype) { 2125 case VOP_WHITEOUT_DESCOFFSET: 2126 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 2127 if (error) 2128 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2129 vput(nd.ni_dvp); 2130 break; 2131 2132 case VOP_MKNOD_DESCOFFSET: 2133 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 2134 &nd.ni_cnd, &vattr); 2135 if (error == 0) 2136 vput(nd.ni_vp); 2137 break; 2138 2139 case VOP_CREATE_DESCOFFSET: 2140 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, 2141 &nd.ni_cnd, &vattr); 2142 if (error == 0) 2143 vput(nd.ni_vp); 2144 break; 2145 } 2146 } else { 2147 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2148 if (nd.ni_dvp == vp) 2149 vrele(nd.ni_dvp); 2150 else 2151 vput(nd.ni_dvp); 2152 if (vp) 2153 vrele(vp); 2154 } 2155out: 2156 pathbuf_stringcopy_put(pb, pathstring); 2157 pathbuf_destroy(pb); 2158 return (error); 2159} 2160 2161/* 2162 * Create a named pipe. 2163 */ 2164/* ARGSUSED */ 2165int 2166sys_mkfifo(struct lwp *l, const struct sys_mkfifo_args *uap, register_t *retval) 2167{ 2168 /* { 2169 syscallarg(const char *) path; 2170 syscallarg(int) mode; 2171 } */ 2172 struct proc *p = l->l_proc; 2173 struct vattr vattr; 2174 int error; 2175 struct pathbuf *pb; 2176 struct nameidata nd; 2177 2178 error = pathbuf_copyin(SCARG(uap, path), &pb); 2179 if (error) { 2180 return error; 2181 } 2182 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, pb); 2183 if ((error = namei(&nd)) != 0) { 2184 pathbuf_destroy(pb); 2185 return error; 2186 } 2187 if (nd.ni_vp != NULL) { 2188 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2189 if (nd.ni_dvp == nd.ni_vp) 2190 vrele(nd.ni_dvp); 2191 else 2192 vput(nd.ni_dvp); 2193 vrele(nd.ni_vp); 2194 pathbuf_destroy(pb); 2195 return (EEXIST); 2196 } 2197 vattr_null(&vattr); 2198 vattr.va_type = VFIFO; 2199 /* We will read cwdi->cwdi_cmask unlocked. */ 2200 vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_cwdi->cwdi_cmask; 2201 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 2202 if (error == 0) 2203 vput(nd.ni_vp); 2204 pathbuf_destroy(pb); 2205 return (error); 2206} 2207 2208int 2209sys_mkfifoat(struct lwp *l, const struct sys_mkfifoat_args *uap, 2210 register_t *retval) 2211{ 2212 /* { 2213 syscallarg(int) fd; 2214 syscallarg(const char *) path; 2215 syscallarg(int) mode; 2216 } */ 2217 2218 return ENOSYS; 2219} 2220/* 2221 * Make a hard file link. 2222 */ 2223/* ARGSUSED */ 2224static int 2225do_sys_link(struct lwp *l, const char *path, const char *link, 2226 int follow, register_t *retval) 2227{ 2228 struct vnode *vp; 2229 struct pathbuf *linkpb; 2230 struct nameidata nd; 2231 namei_simple_flags_t namei_simple_flags; 2232 int error; 2233 2234 if (follow) 2235 namei_simple_flags = NSM_FOLLOW_TRYEMULROOT; 2236 else 2237 namei_simple_flags = NSM_NOFOLLOW_TRYEMULROOT; 2238 2239 error = namei_simple_user(path, namei_simple_flags, &vp); 2240 if (error != 0) 2241 return (error); 2242 error = pathbuf_copyin(link, &linkpb); 2243 if (error) { 2244 goto out1; 2245 } 2246 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2247 if ((error = namei(&nd)) != 0) 2248 goto out2; 2249 if (nd.ni_vp) { 2250 error = EEXIST; 2251 goto abortop; 2252 } 2253 /* Prevent hard links on directories. */ 2254 if (vp->v_type == VDIR) { 2255 error = EPERM; 2256 goto abortop; 2257 } 2258 /* Prevent cross-mount operation. */ 2259 if (nd.ni_dvp->v_mount != vp->v_mount) { 2260 error = EXDEV; 2261 goto abortop; 2262 } 2263 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 2264out2: 2265 pathbuf_destroy(linkpb); 2266out1: 2267 vrele(vp); 2268 return (error); 2269abortop: 2270 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2271 if (nd.ni_dvp == nd.ni_vp) 2272 vrele(nd.ni_dvp); 2273 else 2274 vput(nd.ni_dvp); 2275 if (nd.ni_vp != NULL) 2276 vrele(nd.ni_vp); 2277 goto out2; 2278} 2279 2280int 2281sys_link(struct lwp *l, const struct sys_link_args *uap, register_t *retval) 2282{ 2283 /* { 2284 syscallarg(const char *) path; 2285 syscallarg(const char *) link; 2286 } */ 2287 const char *path = SCARG(uap, path); 2288 const char *link = SCARG(uap, link); 2289 2290 return do_sys_link(l, path, link, 1, retval); 2291} 2292 2293int 2294sys_linkat(struct lwp *l, const struct sys_linkat_args *uap, 2295 register_t *retval) 2296{ 2297 /* { 2298 syscallarg(int) fd1; 2299 syscallarg(const char *) name1; 2300 syscallarg(int) fd2; 2301 syscallarg(const char *) name2; 2302 syscallarg(int) flags; 2303 } */ 2304 const char *name1 = SCARG(uap, name1); 2305 const char *name2 = SCARG(uap, name2); 2306 int follow; 2307 2308 /* 2309 * Specified fd1 and fd2 are not yet implemented 2310 */ 2311 if ((SCARG(uap, fd1) != AT_FDCWD) || (SCARG(uap, fd2) != AT_FDCWD)) 2312 return ENOSYS; 2313 2314 follow = SCARG(uap, flags) & AT_SYMLINK_FOLLOW; 2315 2316 return do_sys_link(l, name1, name2, follow, retval); 2317} 2318 2319 2320int 2321do_sys_symlink(const char *patharg, const char *link, enum uio_seg seg) 2322{ 2323 struct proc *p = curproc; 2324 struct vattr vattr; 2325 char *path; 2326 int error; 2327 struct pathbuf *linkpb; 2328 struct nameidata nd; 2329 2330 path = PNBUF_GET(); 2331 if (seg == UIO_USERSPACE) { 2332 if ((error = copyinstr(patharg, path, MAXPATHLEN, NULL)) != 0) 2333 goto out1; 2334 if ((error = pathbuf_copyin(link, &linkpb)) != 0) 2335 goto out1; 2336 } else { 2337 KASSERT(strlen(patharg) < MAXPATHLEN); 2338 strcpy(path, patharg); 2339 linkpb = pathbuf_create(link); 2340 if (linkpb == NULL) { 2341 error = ENOMEM; 2342 goto out1; 2343 } 2344 } 2345 ktrkuser("symlink-target", path, strlen(path)); 2346 2347 NDINIT(&nd, CREATE, LOCKPARENT | TRYEMULROOT, linkpb); 2348 if ((error = namei(&nd)) != 0) 2349 goto out2; 2350 if (nd.ni_vp) { 2351 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2352 if (nd.ni_dvp == nd.ni_vp) 2353 vrele(nd.ni_dvp); 2354 else 2355 vput(nd.ni_dvp); 2356 vrele(nd.ni_vp); 2357 error = EEXIST; 2358 goto out2; 2359 } 2360 vattr_null(&vattr); 2361 vattr.va_type = VLNK; 2362 /* We will read cwdi->cwdi_cmask unlocked. */ 2363 vattr.va_mode = ACCESSPERMS &~ p->p_cwdi->cwdi_cmask; 2364 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); 2365 if (error == 0) 2366 vput(nd.ni_vp); 2367out2: 2368 pathbuf_destroy(linkpb); 2369out1: 2370 PNBUF_PUT(path); 2371 return (error); 2372} 2373 2374/* 2375 * Make a symbolic link. 2376 */ 2377/* ARGSUSED */ 2378int 2379sys_symlink(struct lwp *l, const struct sys_symlink_args *uap, register_t *retval) 2380{ 2381 /* { 2382 syscallarg(const char *) path; 2383 syscallarg(const char *) link; 2384 } */ 2385 2386 return do_sys_symlink(SCARG(uap, path), SCARG(uap, link), 2387 UIO_USERSPACE); 2388} 2389 2390int 2391sys_symlinkat(struct lwp *l, const struct sys_symlinkat_args *uap, 2392 register_t *retval) 2393{ 2394 /* { 2395 syscallarg(int) fd; 2396 syscallarg(const char *) path; 2397 syscallarg(const char *) link; 2398 } */ 2399 2400 return ENOSYS; 2401} 2402 2403/* 2404 * Delete a whiteout from the filesystem. 2405 */ 2406/* ARGSUSED */ 2407int 2408sys_undelete(struct lwp *l, const struct sys_undelete_args *uap, register_t *retval) 2409{ 2410 /* { 2411 syscallarg(const char *) path; 2412 } */ 2413 int error; 2414 struct pathbuf *pb; 2415 struct nameidata nd; 2416 2417 error = pathbuf_copyin(SCARG(uap, path), &pb); 2418 if (error) { 2419 return error; 2420 } 2421 2422 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | TRYEMULROOT, pb); 2423 error = namei(&nd); 2424 if (error) { 2425 pathbuf_destroy(pb); 2426 return (error); 2427 } 2428 2429 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 2430 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2431 if (nd.ni_dvp == nd.ni_vp) 2432 vrele(nd.ni_dvp); 2433 else 2434 vput(nd.ni_dvp); 2435 if (nd.ni_vp) 2436 vrele(nd.ni_vp); 2437 pathbuf_destroy(pb); 2438 return (EEXIST); 2439 } 2440 if ((error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE)) != 0) 2441 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2442 vput(nd.ni_dvp); 2443 pathbuf_destroy(pb); 2444 return (error); 2445} 2446 2447/* 2448 * Delete a name from the filesystem. 2449 */ 2450/* ARGSUSED */ 2451int 2452sys_unlink(struct lwp *l, const struct sys_unlink_args *uap, register_t *retval) 2453{ 2454 /* { 2455 syscallarg(const char *) path; 2456 } */ 2457 2458 return do_sys_unlink(SCARG(uap, path), UIO_USERSPACE); 2459} 2460 2461int 2462sys_unlinkat(struct lwp *l, const struct sys_unlinkat_args *uap, 2463 register_t *retval) 2464{ 2465 /* { 2466 syscallarg(int) fd; 2467 syscallarg(const char *) path; 2468 } */ 2469 2470 return ENOSYS; 2471} 2472 2473int 2474do_sys_unlink(const char *arg, enum uio_seg seg) 2475{ 2476 struct vnode *vp; 2477 int error; 2478 struct pathbuf *pb; 2479 struct nameidata nd; 2480 const char *pathstring; 2481 2482 error = pathbuf_maybe_copyin(arg, seg, &pb); 2483 if (error) { 2484 return error; 2485 } 2486 pathstring = pathbuf_stringcopy_get(pb); 2487 if (pathstring == NULL) { 2488 pathbuf_destroy(pb); 2489 return ENOMEM; 2490 } 2491 2492 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 2493 if ((error = namei(&nd)) != 0) 2494 goto out; 2495 vp = nd.ni_vp; 2496 2497 /* 2498 * The root of a mounted filesystem cannot be deleted. 2499 */ 2500 if (vp->v_vflag & VV_ROOT) { 2501 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2502 if (nd.ni_dvp == vp) 2503 vrele(nd.ni_dvp); 2504 else 2505 vput(nd.ni_dvp); 2506 vput(vp); 2507 error = EBUSY; 2508 goto out; 2509 } 2510 2511#if NVERIEXEC > 0 2512 /* Handle remove requests for veriexec entries. */ 2513 if ((error = veriexec_removechk(curlwp, nd.ni_vp, pathstring)) != 0) { 2514 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 2515 if (nd.ni_dvp == vp) 2516 vrele(nd.ni_dvp); 2517 else 2518 vput(nd.ni_dvp); 2519 vput(vp); 2520 goto out; 2521 } 2522#endif /* NVERIEXEC > 0 */ 2523 2524#ifdef FILEASSOC 2525 (void)fileassoc_file_delete(vp); 2526#endif /* FILEASSOC */ 2527 error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 2528out: 2529 pathbuf_stringcopy_put(pb, pathstring); 2530 pathbuf_destroy(pb); 2531 return (error); 2532} 2533 2534/* 2535 * Reposition read/write file offset. 2536 */ 2537int 2538sys_lseek(struct lwp *l, const struct sys_lseek_args *uap, register_t *retval) 2539{ 2540 /* { 2541 syscallarg(int) fd; 2542 syscallarg(int) pad; 2543 syscallarg(off_t) offset; 2544 syscallarg(int) whence; 2545 } */ 2546 kauth_cred_t cred = l->l_cred; 2547 file_t *fp; 2548 struct vnode *vp; 2549 struct vattr vattr; 2550 off_t newoff; 2551 int error, fd; 2552 2553 fd = SCARG(uap, fd); 2554 2555 if ((fp = fd_getfile(fd)) == NULL) 2556 return (EBADF); 2557 2558 vp = fp->f_data; 2559 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2560 error = ESPIPE; 2561 goto out; 2562 } 2563 2564 switch (SCARG(uap, whence)) { 2565 case SEEK_CUR: 2566 newoff = fp->f_offset + SCARG(uap, offset); 2567 break; 2568 case SEEK_END: 2569 vn_lock(vp, LK_SHARED | LK_RETRY); 2570 error = VOP_GETATTR(vp, &vattr, cred); 2571 VOP_UNLOCK(vp); 2572 if (error) { 2573 goto out; 2574 } 2575 newoff = SCARG(uap, offset) + vattr.va_size; 2576 break; 2577 case SEEK_SET: 2578 newoff = SCARG(uap, offset); 2579 break; 2580 default: 2581 error = EINVAL; 2582 goto out; 2583 } 2584 if ((error = VOP_SEEK(vp, fp->f_offset, newoff, cred)) == 0) { 2585 *(off_t *)retval = fp->f_offset = newoff; 2586 } 2587 out: 2588 fd_putfile(fd); 2589 return (error); 2590} 2591 2592/* 2593 * Positional read system call. 2594 */ 2595int 2596sys_pread(struct lwp *l, const struct sys_pread_args *uap, register_t *retval) 2597{ 2598 /* { 2599 syscallarg(int) fd; 2600 syscallarg(void *) buf; 2601 syscallarg(size_t) nbyte; 2602 syscallarg(off_t) offset; 2603 } */ 2604 file_t *fp; 2605 struct vnode *vp; 2606 off_t offset; 2607 int error, fd = SCARG(uap, fd); 2608 2609 if ((fp = fd_getfile(fd)) == NULL) 2610 return (EBADF); 2611 2612 if ((fp->f_flag & FREAD) == 0) { 2613 fd_putfile(fd); 2614 return (EBADF); 2615 } 2616 2617 vp = fp->f_data; 2618 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2619 error = ESPIPE; 2620 goto out; 2621 } 2622 2623 offset = SCARG(uap, offset); 2624 2625 /* 2626 * XXX This works because no file systems actually 2627 * XXX take any action on the seek operation. 2628 */ 2629 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2630 goto out; 2631 2632 /* dofileread() will unuse the descriptor for us */ 2633 return (dofileread(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2634 &offset, 0, retval)); 2635 2636 out: 2637 fd_putfile(fd); 2638 return (error); 2639} 2640 2641/* 2642 * Positional scatter read system call. 2643 */ 2644int 2645sys_preadv(struct lwp *l, const struct sys_preadv_args *uap, register_t *retval) 2646{ 2647 /* { 2648 syscallarg(int) fd; 2649 syscallarg(const struct iovec *) iovp; 2650 syscallarg(int) iovcnt; 2651 syscallarg(off_t) offset; 2652 } */ 2653 off_t offset = SCARG(uap, offset); 2654 2655 return do_filereadv(SCARG(uap, fd), SCARG(uap, iovp), 2656 SCARG(uap, iovcnt), &offset, 0, retval); 2657} 2658 2659/* 2660 * Positional write system call. 2661 */ 2662int 2663sys_pwrite(struct lwp *l, const struct sys_pwrite_args *uap, register_t *retval) 2664{ 2665 /* { 2666 syscallarg(int) fd; 2667 syscallarg(const void *) buf; 2668 syscallarg(size_t) nbyte; 2669 syscallarg(off_t) offset; 2670 } */ 2671 file_t *fp; 2672 struct vnode *vp; 2673 off_t offset; 2674 int error, fd = SCARG(uap, fd); 2675 2676 if ((fp = fd_getfile(fd)) == NULL) 2677 return (EBADF); 2678 2679 if ((fp->f_flag & FWRITE) == 0) { 2680 fd_putfile(fd); 2681 return (EBADF); 2682 } 2683 2684 vp = fp->f_data; 2685 if (fp->f_type != DTYPE_VNODE || vp->v_type == VFIFO) { 2686 error = ESPIPE; 2687 goto out; 2688 } 2689 2690 offset = SCARG(uap, offset); 2691 2692 /* 2693 * XXX This works because no file systems actually 2694 * XXX take any action on the seek operation. 2695 */ 2696 if ((error = VOP_SEEK(vp, fp->f_offset, offset, fp->f_cred)) != 0) 2697 goto out; 2698 2699 /* dofilewrite() will unuse the descriptor for us */ 2700 return (dofilewrite(fd, fp, SCARG(uap, buf), SCARG(uap, nbyte), 2701 &offset, 0, retval)); 2702 2703 out: 2704 fd_putfile(fd); 2705 return (error); 2706} 2707 2708/* 2709 * Positional gather write system call. 2710 */ 2711int 2712sys_pwritev(struct lwp *l, const struct sys_pwritev_args *uap, register_t *retval) 2713{ 2714 /* { 2715 syscallarg(int) fd; 2716 syscallarg(const struct iovec *) iovp; 2717 syscallarg(int) iovcnt; 2718 syscallarg(off_t) offset; 2719 } */ 2720 off_t offset = SCARG(uap, offset); 2721 2722 return do_filewritev(SCARG(uap, fd), SCARG(uap, iovp), 2723 SCARG(uap, iovcnt), &offset, 0, retval); 2724} 2725 2726/* 2727 * Check access permissions. 2728 */ 2729int 2730sys_access(struct lwp *l, const struct sys_access_args *uap, register_t *retval) 2731{ 2732 /* { 2733 syscallarg(const char *) path; 2734 syscallarg(int) flags; 2735 } */ 2736 kauth_cred_t cred; 2737 struct vnode *vp; 2738 int error, flags; 2739 struct pathbuf *pb; 2740 struct nameidata nd; 2741 2742 CTASSERT(F_OK == 0); 2743 if ((SCARG(uap, flags) & ~(R_OK | W_OK | X_OK)) != 0) { 2744 /* nonsense flags */ 2745 return EINVAL; 2746 } 2747 2748 error = pathbuf_copyin(SCARG(uap, path), &pb); 2749 if (error) { 2750 return error; 2751 } 2752 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2753 2754 /* Override default credentials */ 2755 cred = kauth_cred_dup(l->l_cred); 2756 kauth_cred_seteuid(cred, kauth_cred_getuid(l->l_cred)); 2757 kauth_cred_setegid(cred, kauth_cred_getgid(l->l_cred)); 2758 nd.ni_cnd.cn_cred = cred; 2759 2760 if ((error = namei(&nd)) != 0) { 2761 pathbuf_destroy(pb); 2762 goto out; 2763 } 2764 vp = nd.ni_vp; 2765 pathbuf_destroy(pb); 2766 2767 /* Flags == 0 means only check for existence. */ 2768 if (SCARG(uap, flags)) { 2769 flags = 0; 2770 if (SCARG(uap, flags) & R_OK) 2771 flags |= VREAD; 2772 if (SCARG(uap, flags) & W_OK) 2773 flags |= VWRITE; 2774 if (SCARG(uap, flags) & X_OK) 2775 flags |= VEXEC; 2776 2777 error = VOP_ACCESS(vp, flags, cred); 2778 if (!error && (flags & VWRITE)) 2779 error = vn_writechk(vp); 2780 } 2781 vput(vp); 2782out: 2783 kauth_cred_free(cred); 2784 return (error); 2785} 2786 2787int 2788sys_faccessat(struct lwp *l, const struct sys_faccessat_args *uap, 2789 register_t *retval) 2790{ 2791 /* { 2792 syscallarg(int) fd; 2793 syscallarg(const char *) path; 2794 syscallarg(int) amode; 2795 syscallarg(int) flag; 2796 } */ 2797 2798 return ENOSYS; 2799} 2800 2801/* 2802 * Common code for all sys_stat functions, including compat versions. 2803 */ 2804int 2805do_sys_stat(const char *userpath, unsigned int nd_flags, struct stat *sb) 2806{ 2807 int error; 2808 struct pathbuf *pb; 2809 struct nameidata nd; 2810 2811 error = pathbuf_copyin(userpath, &pb); 2812 if (error) { 2813 return error; 2814 } 2815 NDINIT(&nd, LOOKUP, nd_flags | LOCKLEAF | TRYEMULROOT, pb); 2816 error = namei(&nd); 2817 if (error != 0) { 2818 pathbuf_destroy(pb); 2819 return error; 2820 } 2821 error = vn_stat(nd.ni_vp, sb); 2822 vput(nd.ni_vp); 2823 pathbuf_destroy(pb); 2824 return error; 2825} 2826 2827/* 2828 * Get file status; this version follows links. 2829 */ 2830/* ARGSUSED */ 2831int 2832sys___stat50(struct lwp *l, const struct sys___stat50_args *uap, register_t *retval) 2833{ 2834 /* { 2835 syscallarg(const char *) path; 2836 syscallarg(struct stat *) ub; 2837 } */ 2838 struct stat sb; 2839 int error; 2840 2841 error = do_sys_stat(SCARG(uap, path), FOLLOW, &sb); 2842 if (error) 2843 return error; 2844 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2845} 2846 2847/* 2848 * Get file status; this version does not follow links. 2849 */ 2850/* ARGSUSED */ 2851int 2852sys___lstat50(struct lwp *l, const struct sys___lstat50_args *uap, register_t *retval) 2853{ 2854 /* { 2855 syscallarg(const char *) path; 2856 syscallarg(struct stat *) ub; 2857 } */ 2858 struct stat sb; 2859 int error; 2860 2861 error = do_sys_stat(SCARG(uap, path), NOFOLLOW, &sb); 2862 if (error) 2863 return error; 2864 return copyout(&sb, SCARG(uap, ub), sizeof(sb)); 2865} 2866 2867int 2868sys_fstatat(struct lwp *l, const struct sys_fstatat_args *uap, 2869 register_t *retval) 2870{ 2871 /* { 2872 syscallarg(int) fd; 2873 syscallarg(const char *) path; 2874 syscallarg(struct stat *) ub; 2875 syscallarg(int) flag; 2876 } */ 2877 2878 return ENOSYS; 2879} 2880/* 2881 * Get configurable pathname variables. 2882 */ 2883/* ARGSUSED */ 2884int 2885sys_pathconf(struct lwp *l, const struct sys_pathconf_args *uap, register_t *retval) 2886{ 2887 /* { 2888 syscallarg(const char *) path; 2889 syscallarg(int) name; 2890 } */ 2891 int error; 2892 struct pathbuf *pb; 2893 struct nameidata nd; 2894 2895 error = pathbuf_copyin(SCARG(uap, path), &pb); 2896 if (error) { 2897 return error; 2898 } 2899 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2900 if ((error = namei(&nd)) != 0) { 2901 pathbuf_destroy(pb); 2902 return (error); 2903 } 2904 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), retval); 2905 vput(nd.ni_vp); 2906 pathbuf_destroy(pb); 2907 return (error); 2908} 2909 2910/* 2911 * Return target name of a symbolic link. 2912 */ 2913/* ARGSUSED */ 2914int 2915sys_readlink(struct lwp *l, const struct sys_readlink_args *uap, register_t *retval) 2916{ 2917 /* { 2918 syscallarg(const char *) path; 2919 syscallarg(char *) buf; 2920 syscallarg(size_t) count; 2921 } */ 2922 struct vnode *vp; 2923 struct iovec aiov; 2924 struct uio auio; 2925 int error; 2926 struct pathbuf *pb; 2927 struct nameidata nd; 2928 2929 error = pathbuf_copyin(SCARG(uap, path), &pb); 2930 if (error) { 2931 return error; 2932 } 2933 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | TRYEMULROOT, pb); 2934 if ((error = namei(&nd)) != 0) { 2935 pathbuf_destroy(pb); 2936 return error; 2937 } 2938 vp = nd.ni_vp; 2939 pathbuf_destroy(pb); 2940 if (vp->v_type != VLNK) 2941 error = EINVAL; 2942 else if (!(vp->v_mount->mnt_flag & MNT_SYMPERM) || 2943 (error = VOP_ACCESS(vp, VREAD, l->l_cred)) == 0) { 2944 aiov.iov_base = SCARG(uap, buf); 2945 aiov.iov_len = SCARG(uap, count); 2946 auio.uio_iov = &aiov; 2947 auio.uio_iovcnt = 1; 2948 auio.uio_offset = 0; 2949 auio.uio_rw = UIO_READ; 2950 KASSERT(l == curlwp); 2951 auio.uio_vmspace = l->l_proc->p_vmspace; 2952 auio.uio_resid = SCARG(uap, count); 2953 error = VOP_READLINK(vp, &auio, l->l_cred); 2954 } 2955 vput(vp); 2956 *retval = SCARG(uap, count) - auio.uio_resid; 2957 return (error); 2958} 2959 2960int 2961sys_readlinkat(struct lwp *l, const struct sys_readlinkat_args *uap, 2962 register_t *retval) 2963{ 2964 /* { 2965 syscallarg(int) fd; 2966 syscallarg(const char *) path; 2967 syscallarg(char *) buf; 2968 syscallarg(size_t) count; 2969 } */ 2970 2971 return ENOSYS; 2972} 2973 2974/* 2975 * Change flags of a file given a path name. 2976 */ 2977/* ARGSUSED */ 2978int 2979sys_chflags(struct lwp *l, const struct sys_chflags_args *uap, register_t *retval) 2980{ 2981 /* { 2982 syscallarg(const char *) path; 2983 syscallarg(u_long) flags; 2984 } */ 2985 struct vnode *vp; 2986 int error; 2987 2988 error = namei_simple_user(SCARG(uap, path), 2989 NSM_FOLLOW_TRYEMULROOT, &vp); 2990 if (error != 0) 2991 return (error); 2992 error = change_flags(vp, SCARG(uap, flags), l); 2993 vput(vp); 2994 return (error); 2995} 2996 2997/* 2998 * Change flags of a file given a file descriptor. 2999 */ 3000/* ARGSUSED */ 3001int 3002sys_fchflags(struct lwp *l, const struct sys_fchflags_args *uap, register_t *retval) 3003{ 3004 /* { 3005 syscallarg(int) fd; 3006 syscallarg(u_long) flags; 3007 } */ 3008 struct vnode *vp; 3009 file_t *fp; 3010 int error; 3011 3012 /* fd_getvnode() will use the descriptor for us */ 3013 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3014 return (error); 3015 vp = fp->f_data; 3016 error = change_flags(vp, SCARG(uap, flags), l); 3017 VOP_UNLOCK(vp); 3018 fd_putfile(SCARG(uap, fd)); 3019 return (error); 3020} 3021 3022/* 3023 * Change flags of a file given a path name; this version does 3024 * not follow links. 3025 */ 3026int 3027sys_lchflags(struct lwp *l, const struct sys_lchflags_args *uap, register_t *retval) 3028{ 3029 /* { 3030 syscallarg(const char *) path; 3031 syscallarg(u_long) flags; 3032 } */ 3033 struct vnode *vp; 3034 int error; 3035 3036 error = namei_simple_user(SCARG(uap, path), 3037 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3038 if (error != 0) 3039 return (error); 3040 error = change_flags(vp, SCARG(uap, flags), l); 3041 vput(vp); 3042 return (error); 3043} 3044 3045/* 3046 * Common routine to change flags of a file. 3047 */ 3048int 3049change_flags(struct vnode *vp, u_long flags, struct lwp *l) 3050{ 3051 struct vattr vattr; 3052 int error; 3053 3054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3055 /* 3056 * Non-superusers cannot change the flags on devices, even if they 3057 * own them. 3058 */ 3059 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, NULL)) { 3060 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3061 goto out; 3062 if (vattr.va_type == VCHR || vattr.va_type == VBLK) { 3063 error = EINVAL; 3064 goto out; 3065 } 3066 } 3067 vattr_null(&vattr); 3068 vattr.va_flags = flags; 3069 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3070out: 3071 return (error); 3072} 3073 3074/* 3075 * Change mode of a file given path name; this version follows links. 3076 */ 3077/* ARGSUSED */ 3078int 3079sys_chmod(struct lwp *l, const struct sys_chmod_args *uap, register_t *retval) 3080{ 3081 /* { 3082 syscallarg(const char *) path; 3083 syscallarg(int) mode; 3084 } */ 3085 int error; 3086 struct vnode *vp; 3087 3088 error = namei_simple_user(SCARG(uap, path), 3089 NSM_FOLLOW_TRYEMULROOT, &vp); 3090 if (error != 0) 3091 return (error); 3092 3093 error = change_mode(vp, SCARG(uap, mode), l); 3094 3095 vrele(vp); 3096 return (error); 3097} 3098 3099/* 3100 * Change mode of a file given a file descriptor. 3101 */ 3102/* ARGSUSED */ 3103int 3104sys_fchmod(struct lwp *l, const struct sys_fchmod_args *uap, register_t *retval) 3105{ 3106 /* { 3107 syscallarg(int) fd; 3108 syscallarg(int) mode; 3109 } */ 3110 file_t *fp; 3111 int error; 3112 3113 /* fd_getvnode() will use the descriptor for us */ 3114 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3115 return (error); 3116 error = change_mode(fp->f_data, SCARG(uap, mode), l); 3117 fd_putfile(SCARG(uap, fd)); 3118 return (error); 3119} 3120 3121int 3122sys_fchmodat(struct lwp *l, const struct sys_fchmodat_args *uap, 3123 register_t *retval) 3124{ 3125 /* { 3126 syscallarg(int) fd; 3127 syscallarg(const char *) path; 3128 syscallarg(int) mode; 3129 syscallarg(int) flag; 3130 } */ 3131 3132 return ENOSYS; 3133} 3134 3135/* 3136 * Change mode of a file given path name; this version does not follow links. 3137 */ 3138/* ARGSUSED */ 3139int 3140sys_lchmod(struct lwp *l, const struct sys_lchmod_args *uap, register_t *retval) 3141{ 3142 /* { 3143 syscallarg(const char *) path; 3144 syscallarg(int) mode; 3145 } */ 3146 int error; 3147 struct vnode *vp; 3148 3149 error = namei_simple_user(SCARG(uap, path), 3150 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3151 if (error != 0) 3152 return (error); 3153 3154 error = change_mode(vp, SCARG(uap, mode), l); 3155 3156 vrele(vp); 3157 return (error); 3158} 3159 3160/* 3161 * Common routine to set mode given a vnode. 3162 */ 3163static int 3164change_mode(struct vnode *vp, int mode, struct lwp *l) 3165{ 3166 struct vattr vattr; 3167 int error; 3168 3169 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3170 vattr_null(&vattr); 3171 vattr.va_mode = mode & ALLPERMS; 3172 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3173 VOP_UNLOCK(vp); 3174 return (error); 3175} 3176 3177/* 3178 * Set ownership given a path name; this version follows links. 3179 */ 3180/* ARGSUSED */ 3181int 3182sys_chown(struct lwp *l, const struct sys_chown_args *uap, register_t *retval) 3183{ 3184 /* { 3185 syscallarg(const char *) path; 3186 syscallarg(uid_t) uid; 3187 syscallarg(gid_t) gid; 3188 } */ 3189 int error; 3190 struct vnode *vp; 3191 3192 error = namei_simple_user(SCARG(uap, path), 3193 NSM_FOLLOW_TRYEMULROOT, &vp); 3194 if (error != 0) 3195 return (error); 3196 3197 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3198 3199 vrele(vp); 3200 return (error); 3201} 3202 3203/* 3204 * Set ownership given a path name; this version follows links. 3205 * Provides POSIX semantics. 3206 */ 3207/* ARGSUSED */ 3208int 3209sys___posix_chown(struct lwp *l, const struct sys___posix_chown_args *uap, register_t *retval) 3210{ 3211 /* { 3212 syscallarg(const char *) path; 3213 syscallarg(uid_t) uid; 3214 syscallarg(gid_t) gid; 3215 } */ 3216 int error; 3217 struct vnode *vp; 3218 3219 error = namei_simple_user(SCARG(uap, path), 3220 NSM_FOLLOW_TRYEMULROOT, &vp); 3221 if (error != 0) 3222 return (error); 3223 3224 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3225 3226 vrele(vp); 3227 return (error); 3228} 3229 3230/* 3231 * Set ownership given a file descriptor. 3232 */ 3233/* ARGSUSED */ 3234int 3235sys_fchown(struct lwp *l, const struct sys_fchown_args *uap, register_t *retval) 3236{ 3237 /* { 3238 syscallarg(int) fd; 3239 syscallarg(uid_t) uid; 3240 syscallarg(gid_t) gid; 3241 } */ 3242 int error; 3243 file_t *fp; 3244 3245 /* fd_getvnode() will use the descriptor for us */ 3246 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3247 return (error); 3248 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3249 l, 0); 3250 fd_putfile(SCARG(uap, fd)); 3251 return (error); 3252} 3253 3254int 3255sys_fchownat(struct lwp *l, const struct sys_fchownat_args *uap, 3256 register_t *retval) 3257{ 3258 /* { 3259 syscallarg(int) fd; 3260 syscallarg(const char *) path; 3261 syscallarg(uid_t) uid; 3262 syscallarg(gid_t) gid; 3263 syscallarg(int) flag; 3264 } */ 3265 3266 return ENOSYS; 3267} 3268 3269/* 3270 * Set ownership given a file descriptor, providing POSIX/XPG semantics. 3271 */ 3272/* ARGSUSED */ 3273int 3274sys___posix_fchown(struct lwp *l, const struct sys___posix_fchown_args *uap, register_t *retval) 3275{ 3276 /* { 3277 syscallarg(int) fd; 3278 syscallarg(uid_t) uid; 3279 syscallarg(gid_t) gid; 3280 } */ 3281 int error; 3282 file_t *fp; 3283 3284 /* fd_getvnode() will use the descriptor for us */ 3285 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3286 return (error); 3287 error = change_owner(fp->f_data, SCARG(uap, uid), SCARG(uap, gid), 3288 l, 1); 3289 fd_putfile(SCARG(uap, fd)); 3290 return (error); 3291} 3292 3293/* 3294 * Set ownership given a path name; this version does not follow links. 3295 */ 3296/* ARGSUSED */ 3297int 3298sys_lchown(struct lwp *l, const struct sys_lchown_args *uap, register_t *retval) 3299{ 3300 /* { 3301 syscallarg(const char *) path; 3302 syscallarg(uid_t) uid; 3303 syscallarg(gid_t) gid; 3304 } */ 3305 int error; 3306 struct vnode *vp; 3307 3308 error = namei_simple_user(SCARG(uap, path), 3309 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3310 if (error != 0) 3311 return (error); 3312 3313 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 0); 3314 3315 vrele(vp); 3316 return (error); 3317} 3318 3319/* 3320 * Set ownership given a path name; this version does not follow links. 3321 * Provides POSIX/XPG semantics. 3322 */ 3323/* ARGSUSED */ 3324int 3325sys___posix_lchown(struct lwp *l, const struct sys___posix_lchown_args *uap, register_t *retval) 3326{ 3327 /* { 3328 syscallarg(const char *) path; 3329 syscallarg(uid_t) uid; 3330 syscallarg(gid_t) gid; 3331 } */ 3332 int error; 3333 struct vnode *vp; 3334 3335 error = namei_simple_user(SCARG(uap, path), 3336 NSM_NOFOLLOW_TRYEMULROOT, &vp); 3337 if (error != 0) 3338 return (error); 3339 3340 error = change_owner(vp, SCARG(uap, uid), SCARG(uap, gid), l, 1); 3341 3342 vrele(vp); 3343 return (error); 3344} 3345 3346/* 3347 * Common routine to set ownership given a vnode. 3348 */ 3349static int 3350change_owner(struct vnode *vp, uid_t uid, gid_t gid, struct lwp *l, 3351 int posix_semantics) 3352{ 3353 struct vattr vattr; 3354 mode_t newmode; 3355 int error; 3356 3357 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3358 if ((error = VOP_GETATTR(vp, &vattr, l->l_cred)) != 0) 3359 goto out; 3360 3361#define CHANGED(x) ((int)(x) != -1) 3362 newmode = vattr.va_mode; 3363 if (posix_semantics) { 3364 /* 3365 * POSIX/XPG semantics: if the caller is not the super-user, 3366 * clear set-user-id and set-group-id bits. Both POSIX and 3367 * the XPG consider the behaviour for calls by the super-user 3368 * implementation-defined; we leave the set-user-id and set- 3369 * group-id settings intact in that case. 3370 */ 3371 if (kauth_authorize_generic(l->l_cred, KAUTH_GENERIC_ISSUSER, 3372 NULL) != 0) 3373 newmode &= ~(S_ISUID | S_ISGID); 3374 } else { 3375 /* 3376 * NetBSD semantics: when changing owner and/or group, 3377 * clear the respective bit(s). 3378 */ 3379 if (CHANGED(uid)) 3380 newmode &= ~S_ISUID; 3381 if (CHANGED(gid)) 3382 newmode &= ~S_ISGID; 3383 } 3384 /* Update va_mode iff altered. */ 3385 if (vattr.va_mode == newmode) 3386 newmode = VNOVAL; 3387 3388 vattr_null(&vattr); 3389 vattr.va_uid = CHANGED(uid) ? uid : (uid_t)VNOVAL; 3390 vattr.va_gid = CHANGED(gid) ? gid : (gid_t)VNOVAL; 3391 vattr.va_mode = newmode; 3392 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3393#undef CHANGED 3394 3395out: 3396 VOP_UNLOCK(vp); 3397 return (error); 3398} 3399 3400/* 3401 * Set the access and modification times given a path name; this 3402 * version follows links. 3403 */ 3404/* ARGSUSED */ 3405int 3406sys___utimes50(struct lwp *l, const struct sys___utimes50_args *uap, 3407 register_t *retval) 3408{ 3409 /* { 3410 syscallarg(const char *) path; 3411 syscallarg(const struct timeval *) tptr; 3412 } */ 3413 3414 return do_sys_utimes(l, NULL, SCARG(uap, path), FOLLOW, 3415 SCARG(uap, tptr), UIO_USERSPACE); 3416} 3417 3418/* 3419 * Set the access and modification times given a file descriptor. 3420 */ 3421/* ARGSUSED */ 3422int 3423sys___futimes50(struct lwp *l, const struct sys___futimes50_args *uap, 3424 register_t *retval) 3425{ 3426 /* { 3427 syscallarg(int) fd; 3428 syscallarg(const struct timeval *) tptr; 3429 } */ 3430 int error; 3431 file_t *fp; 3432 3433 /* fd_getvnode() will use the descriptor for us */ 3434 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3435 return (error); 3436 error = do_sys_utimes(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3437 UIO_USERSPACE); 3438 fd_putfile(SCARG(uap, fd)); 3439 return (error); 3440} 3441 3442int 3443sys_futimens(struct lwp *l, const struct sys_futimens_args *uap, 3444 register_t *retval) 3445{ 3446 /* { 3447 syscallarg(int) fd; 3448 syscallarg(const struct timespec *) tptr; 3449 } */ 3450 int error; 3451 file_t *fp; 3452 3453 /* fd_getvnode() will use the descriptor for us */ 3454 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3455 return (error); 3456 error = do_sys_utimens(l, fp->f_data, NULL, 0, SCARG(uap, tptr), 3457 UIO_USERSPACE); 3458 fd_putfile(SCARG(uap, fd)); 3459 return (error); 3460} 3461 3462/* 3463 * Set the access and modification times given a path name; this 3464 * version does not follow links. 3465 */ 3466int 3467sys___lutimes50(struct lwp *l, const struct sys___lutimes50_args *uap, 3468 register_t *retval) 3469{ 3470 /* { 3471 syscallarg(const char *) path; 3472 syscallarg(const struct timeval *) tptr; 3473 } */ 3474 3475 return do_sys_utimes(l, NULL, SCARG(uap, path), NOFOLLOW, 3476 SCARG(uap, tptr), UIO_USERSPACE); 3477} 3478 3479int 3480sys_utimensat(struct lwp *l, const struct sys_utimensat_args *uap, 3481 register_t *retval) 3482{ 3483 /* { 3484 syscallarg(int) fd; 3485 syscallarg(const char *) path; 3486 syscallarg(const struct timespec *) tptr; 3487 syscallarg(int) flag; 3488 } */ 3489 int follow; 3490 const struct timespec *tptr; 3491 3492 /* 3493 * Specified fd is not yet implemented 3494 */ 3495 if (SCARG(uap, fd) != AT_FDCWD) 3496 return ENOSYS; 3497 3498 tptr = SCARG(uap, tptr); 3499 follow = (SCARG(uap, flag) & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3500 3501 return do_sys_utimens(l, NULL, SCARG(uap, path), follow, 3502 tptr, UIO_USERSPACE); 3503} 3504 3505/* 3506 * Common routine to set access and modification times given a vnode. 3507 */ 3508int 3509do_sys_utimens(struct lwp *l, struct vnode *vp, const char *path, int flag, 3510 const struct timespec *tptr, enum uio_seg seg) 3511{ 3512 struct vattr vattr; 3513 int error, dorele = 0; 3514 namei_simple_flags_t sflags; 3515 3516 bool vanull, setbirthtime; 3517 struct timespec ts[2]; 3518 3519 /* 3520 * I have checked all callers and they pass either FOLLOW, 3521 * NOFOLLOW, or 0 (when they don't pass a path), and NOFOLLOW 3522 * is 0. More to the point, they don't pass anything else. 3523 * Let's keep it that way at least until the namei interfaces 3524 * are fully sanitized. 3525 */ 3526 KASSERT(flag == NOFOLLOW || flag == FOLLOW); 3527 sflags = (flag == FOLLOW) ? 3528 NSM_FOLLOW_TRYEMULROOT : NSM_NOFOLLOW_TRYEMULROOT; 3529 3530 if (tptr == NULL) { 3531 vanull = true; 3532 nanotime(&ts[0]); 3533 ts[1] = ts[0]; 3534 } else { 3535 vanull = false; 3536 if (seg != UIO_SYSSPACE) { 3537 error = copyin(tptr, ts, sizeof (ts)); 3538 if (error != 0) 3539 return error; 3540 } else { 3541 ts[0] = tptr[0]; 3542 ts[1] = tptr[1]; 3543 } 3544 } 3545 3546 if (ts[0].tv_nsec == UTIME_NOW) { 3547 nanotime(&ts[0]); 3548 if (ts[1].tv_nsec == UTIME_NOW) { 3549 vanull = true; 3550 ts[1] = ts[0]; 3551 } 3552 } else if (ts[1].tv_nsec == UTIME_NOW) 3553 nanotime(&ts[1]); 3554 3555 if (vp == NULL) { 3556 /* note: SEG describes TPTR, not PATH; PATH is always user */ 3557 error = namei_simple_user(path, sflags, &vp); 3558 if (error != 0) 3559 return error; 3560 dorele = 1; 3561 } 3562 3563 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3564 setbirthtime = (VOP_GETATTR(vp, &vattr, l->l_cred) == 0 && 3565 timespeccmp(&ts[1], &vattr.va_birthtime, <)); 3566 vattr_null(&vattr); 3567 3568 if (ts[0].tv_nsec != UTIME_OMIT) 3569 vattr.va_atime = ts[0]; 3570 3571 if (ts[1].tv_nsec != UTIME_OMIT) { 3572 vattr.va_mtime = ts[1]; 3573 if (setbirthtime) 3574 vattr.va_birthtime = ts[1]; 3575 } 3576 3577 if (vanull) 3578 vattr.va_vaflags |= VA_UTIMES_NULL; 3579 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3580 VOP_UNLOCK(vp); 3581 3582 if (dorele != 0) 3583 vrele(vp); 3584 3585 return error; 3586} 3587 3588int 3589do_sys_utimes(struct lwp *l, struct vnode *vp, const char *path, int flag, 3590 const struct timeval *tptr, enum uio_seg seg) 3591{ 3592 struct timespec ts[2]; 3593 struct timespec *tsptr = NULL; 3594 int error; 3595 3596 if (tptr != NULL) { 3597 struct timeval tv[2]; 3598 3599 if (seg != UIO_SYSSPACE) { 3600 error = copyin(tptr, tv, sizeof (tv)); 3601 if (error != 0) 3602 return error; 3603 tptr = tv; 3604 } 3605 3606 if ((tv[0].tv_usec == UTIME_NOW) || 3607 (tv[0].tv_usec == UTIME_OMIT)) 3608 ts[0].tv_nsec = tv[0].tv_usec; 3609 else 3610 TIMEVAL_TO_TIMESPEC(&tptr[0], &ts[0]); 3611 3612 if ((tv[1].tv_usec == UTIME_NOW) || 3613 (tv[1].tv_usec == UTIME_OMIT)) 3614 ts[1].tv_nsec = tv[1].tv_usec; 3615 else 3616 TIMEVAL_TO_TIMESPEC(&tptr[1], &ts[1]); 3617 3618 tsptr = &ts[0]; 3619 } 3620 3621 return do_sys_utimens(l, vp, path, flag, tsptr, UIO_SYSSPACE); 3622} 3623 3624/* 3625 * Truncate a file given its path name. 3626 */ 3627/* ARGSUSED */ 3628int 3629sys_truncate(struct lwp *l, const struct sys_truncate_args *uap, register_t *retval) 3630{ 3631 /* { 3632 syscallarg(const char *) path; 3633 syscallarg(int) pad; 3634 syscallarg(off_t) length; 3635 } */ 3636 struct vnode *vp; 3637 struct vattr vattr; 3638 int error; 3639 3640 error = namei_simple_user(SCARG(uap, path), 3641 NSM_FOLLOW_TRYEMULROOT, &vp); 3642 if (error != 0) 3643 return (error); 3644 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3645 if (vp->v_type == VDIR) 3646 error = EISDIR; 3647 else if ((error = vn_writechk(vp)) == 0 && 3648 (error = VOP_ACCESS(vp, VWRITE, l->l_cred)) == 0) { 3649 vattr_null(&vattr); 3650 vattr.va_size = SCARG(uap, length); 3651 error = VOP_SETATTR(vp, &vattr, l->l_cred); 3652 } 3653 vput(vp); 3654 return (error); 3655} 3656 3657/* 3658 * Truncate a file given a file descriptor. 3659 */ 3660/* ARGSUSED */ 3661int 3662sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *retval) 3663{ 3664 /* { 3665 syscallarg(int) fd; 3666 syscallarg(int) pad; 3667 syscallarg(off_t) length; 3668 } */ 3669 struct vattr vattr; 3670 struct vnode *vp; 3671 file_t *fp; 3672 int error; 3673 3674 /* fd_getvnode() will use the descriptor for us */ 3675 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3676 return (error); 3677 if ((fp->f_flag & FWRITE) == 0) { 3678 error = EINVAL; 3679 goto out; 3680 } 3681 vp = fp->f_data; 3682 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3683 if (vp->v_type == VDIR) 3684 error = EISDIR; 3685 else if ((error = vn_writechk(vp)) == 0) { 3686 vattr_null(&vattr); 3687 vattr.va_size = SCARG(uap, length); 3688 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3689 } 3690 VOP_UNLOCK(vp); 3691 out: 3692 fd_putfile(SCARG(uap, fd)); 3693 return (error); 3694} 3695 3696/* 3697 * Sync an open file. 3698 */ 3699/* ARGSUSED */ 3700int 3701sys_fsync(struct lwp *l, const struct sys_fsync_args *uap, register_t *retval) 3702{ 3703 /* { 3704 syscallarg(int) fd; 3705 } */ 3706 struct vnode *vp; 3707 file_t *fp; 3708 int error; 3709 3710 /* fd_getvnode() will use the descriptor for us */ 3711 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3712 return (error); 3713 vp = fp->f_data; 3714 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3715 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT, 0, 0); 3716 VOP_UNLOCK(vp); 3717 fd_putfile(SCARG(uap, fd)); 3718 return (error); 3719} 3720 3721/* 3722 * Sync a range of file data. API modeled after that found in AIX. 3723 * 3724 * FDATASYNC indicates that we need only save enough metadata to be able 3725 * to re-read the written data. Note we duplicate AIX's requirement that 3726 * the file be open for writing. 3727 */ 3728/* ARGSUSED */ 3729int 3730sys_fsync_range(struct lwp *l, const struct sys_fsync_range_args *uap, register_t *retval) 3731{ 3732 /* { 3733 syscallarg(int) fd; 3734 syscallarg(int) flags; 3735 syscallarg(off_t) start; 3736 syscallarg(off_t) length; 3737 } */ 3738 struct vnode *vp; 3739 file_t *fp; 3740 int flags, nflags; 3741 off_t s, e, len; 3742 int error; 3743 3744 /* fd_getvnode() will use the descriptor for us */ 3745 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3746 return (error); 3747 3748 if ((fp->f_flag & FWRITE) == 0) { 3749 error = EBADF; 3750 goto out; 3751 } 3752 3753 flags = SCARG(uap, flags); 3754 if (((flags & (FDATASYNC | FFILESYNC)) == 0) || 3755 ((~flags & (FDATASYNC | FFILESYNC)) == 0)) { 3756 error = EINVAL; 3757 goto out; 3758 } 3759 /* Now set up the flags for value(s) to pass to VOP_FSYNC() */ 3760 if (flags & FDATASYNC) 3761 nflags = FSYNC_DATAONLY | FSYNC_WAIT; 3762 else 3763 nflags = FSYNC_WAIT; 3764 if (flags & FDISKSYNC) 3765 nflags |= FSYNC_CACHE; 3766 3767 len = SCARG(uap, length); 3768 /* If length == 0, we do the whole file, and s = e = 0 will do that */ 3769 if (len) { 3770 s = SCARG(uap, start); 3771 e = s + len; 3772 if (e < s) { 3773 error = EINVAL; 3774 goto out; 3775 } 3776 } else { 3777 e = 0; 3778 s = 0; 3779 } 3780 3781 vp = fp->f_data; 3782 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3783 error = VOP_FSYNC(vp, fp->f_cred, nflags, s, e); 3784 VOP_UNLOCK(vp); 3785out: 3786 fd_putfile(SCARG(uap, fd)); 3787 return (error); 3788} 3789 3790/* 3791 * Sync the data of an open file. 3792 */ 3793/* ARGSUSED */ 3794int 3795sys_fdatasync(struct lwp *l, const struct sys_fdatasync_args *uap, register_t *retval) 3796{ 3797 /* { 3798 syscallarg(int) fd; 3799 } */ 3800 struct vnode *vp; 3801 file_t *fp; 3802 int error; 3803 3804 /* fd_getvnode() will use the descriptor for us */ 3805 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 3806 return (error); 3807 if ((fp->f_flag & FWRITE) == 0) { 3808 fd_putfile(SCARG(uap, fd)); 3809 return (EBADF); 3810 } 3811 vp = fp->f_data; 3812 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3813 error = VOP_FSYNC(vp, fp->f_cred, FSYNC_WAIT|FSYNC_DATAONLY, 0, 0); 3814 VOP_UNLOCK(vp); 3815 fd_putfile(SCARG(uap, fd)); 3816 return (error); 3817} 3818 3819/* 3820 * Rename files, (standard) BSD semantics frontend. 3821 */ 3822/* ARGSUSED */ 3823int 3824sys_rename(struct lwp *l, const struct sys_rename_args *uap, register_t *retval) 3825{ 3826 /* { 3827 syscallarg(const char *) from; 3828 syscallarg(const char *) to; 3829 } */ 3830 3831 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 0)); 3832} 3833 3834int 3835sys_renameat(struct lwp *l, const struct sys_renameat_args *uap, 3836 register_t *retval) 3837{ 3838 /* { 3839 syscallarg(int) fromfd; 3840 syscallarg(const char *) from; 3841 syscallarg(int) tofd; 3842 syscallarg(const char *) to; 3843 } */ 3844 3845 return ENOSYS; 3846} 3847 3848/* 3849 * Rename files, POSIX semantics frontend. 3850 */ 3851/* ARGSUSED */ 3852int 3853sys___posix_rename(struct lwp *l, const struct sys___posix_rename_args *uap, register_t *retval) 3854{ 3855 /* { 3856 syscallarg(const char *) from; 3857 syscallarg(const char *) to; 3858 } */ 3859 3860 return (do_sys_rename(SCARG(uap, from), SCARG(uap, to), UIO_USERSPACE, 1)); 3861} 3862 3863/* 3864 * Rename files. Source and destination must either both be directories, 3865 * or both not be directories. If target is a directory, it must be empty. 3866 * If `from' and `to' refer to the same object, the value of the `retain' 3867 * argument is used to determine whether `from' will be 3868 * 3869 * (retain == 0) deleted unless `from' and `to' refer to the same 3870 * object in the file system's name space (BSD). 3871 * (retain == 1) always retained (POSIX). 3872 */ 3873int 3874do_sys_rename(const char *from, const char *to, enum uio_seg seg, int retain) 3875{ 3876 struct vnode *tvp, *fvp, *tdvp; 3877 struct pathbuf *frompb, *topb; 3878 struct nameidata fromnd, tond; 3879 struct mount *fs; 3880 int error; 3881 3882 error = pathbuf_maybe_copyin(from, seg, &frompb); 3883 if (error) { 3884 return error; 3885 } 3886 error = pathbuf_maybe_copyin(to, seg, &topb); 3887 if (error) { 3888 pathbuf_destroy(frompb); 3889 return error; 3890 } 3891 3892 NDINIT(&fromnd, DELETE, LOCKPARENT | TRYEMULROOT | INRENAME, 3893 frompb); 3894 if ((error = namei(&fromnd)) != 0) { 3895 pathbuf_destroy(frompb); 3896 pathbuf_destroy(topb); 3897 return (error); 3898 } 3899 if (fromnd.ni_dvp != fromnd.ni_vp) 3900 VOP_UNLOCK(fromnd.ni_dvp); 3901 fvp = fromnd.ni_vp; 3902 3903 fs = fvp->v_mount; 3904 error = VFS_RENAMELOCK_ENTER(fs); 3905 if (error) { 3906 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3907 vrele(fromnd.ni_dvp); 3908 vrele(fvp); 3909 goto out1; 3910 } 3911 3912 /* 3913 * close, partially, yet another race - ideally we should only 3914 * go as far as getting fromnd.ni_dvp before getting the per-fs 3915 * lock, and then continue to get fromnd.ni_vp, but we can't do 3916 * that with namei as it stands. 3917 * 3918 * This still won't prevent rmdir from nuking fromnd.ni_vp 3919 * under us. The real fix is to get the locks in the right 3920 * order and do the lookups in the right places, but that's a 3921 * major rototill. 3922 * 3923 * Note: this logic (as well as this whole function) is cloned 3924 * in nfs_serv.c. Proceed accordingly. 3925 */ 3926 vrele(fvp); 3927 if ((fromnd.ni_cnd.cn_namelen == 1 && 3928 fromnd.ni_cnd.cn_nameptr[0] == '.') || 3929 (fromnd.ni_cnd.cn_namelen == 2 && 3930 fromnd.ni_cnd.cn_nameptr[0] == '.' && 3931 fromnd.ni_cnd.cn_nameptr[1] == '.')) { 3932 error = EINVAL; 3933 VFS_RENAMELOCK_EXIT(fs); 3934 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3935 vrele(fromnd.ni_dvp); 3936 goto out1; 3937 } 3938 vn_lock(fromnd.ni_dvp, LK_EXCLUSIVE | LK_RETRY); 3939 error = relookup(fromnd.ni_dvp, &fromnd.ni_vp, &fromnd.ni_cnd, 0); 3940 if (error) { 3941 VOP_UNLOCK(fromnd.ni_dvp); 3942 VFS_RENAMELOCK_EXIT(fs); 3943 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3944 vrele(fromnd.ni_dvp); 3945 goto out1; 3946 } 3947 VOP_UNLOCK(fromnd.ni_vp); 3948 if (fromnd.ni_dvp != fromnd.ni_vp) 3949 VOP_UNLOCK(fromnd.ni_dvp); 3950 fvp = fromnd.ni_vp; 3951 3952 NDINIT(&tond, RENAME, 3953 LOCKPARENT | LOCKLEAF | NOCACHE | TRYEMULROOT 3954 | INRENAME | (fvp->v_type == VDIR ? CREATEDIR : 0), 3955 topb); 3956 if ((error = namei(&tond)) != 0) { 3957 VFS_RENAMELOCK_EXIT(fs); 3958 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 3959 vrele(fromnd.ni_dvp); 3960 vrele(fvp); 3961 goto out1; 3962 } 3963 tdvp = tond.ni_dvp; 3964 tvp = tond.ni_vp; 3965 3966 if (tvp != NULL) { 3967 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3968 error = ENOTDIR; 3969 goto out; 3970 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3971 error = EISDIR; 3972 goto out; 3973 } 3974 } 3975 3976 if (fvp == tdvp) 3977 error = EINVAL; 3978 3979 /* 3980 * Source and destination refer to the same object. 3981 */ 3982 if (fvp == tvp) { 3983 if (retain) 3984 error = -1; 3985 else if (fromnd.ni_dvp == tdvp && 3986 fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && 3987 !memcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, 3988 fromnd.ni_cnd.cn_namelen)) 3989 error = -1; 3990 } 3991 /* 3992 * Prevent cross-mount operation. 3993 */ 3994 if (error == 0) { 3995 if (tond.ni_dvp->v_mount != fromnd.ni_dvp->v_mount) { 3996 error = EXDEV; 3997 } 3998 } 3999#if NVERIEXEC > 0 4000 if (!error) { 4001 char *f1, *f2; 4002 size_t f1_len; 4003 size_t f2_len; 4004 4005 f1_len = fromnd.ni_cnd.cn_namelen + 1; 4006 f1 = kmem_alloc(f1_len, KM_SLEEP); 4007 strlcpy(f1, fromnd.ni_cnd.cn_nameptr, f1_len); 4008 4009 f2_len = tond.ni_cnd.cn_namelen + 1; 4010 f2 = kmem_alloc(f2_len, KM_SLEEP); 4011 strlcpy(f2, tond.ni_cnd.cn_nameptr, f2_len); 4012 4013 error = veriexec_renamechk(curlwp, fvp, f1, tvp, f2); 4014 4015 kmem_free(f1, f1_len); 4016 kmem_free(f2, f2_len); 4017 } 4018#endif /* NVERIEXEC > 0 */ 4019 4020out: 4021 if (!error) { 4022 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 4023 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 4024 VFS_RENAMELOCK_EXIT(fs); 4025 } else { 4026 VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd); 4027 if (tdvp == tvp) 4028 vrele(tdvp); 4029 else 4030 vput(tdvp); 4031 if (tvp) 4032 vput(tvp); 4033 VFS_RENAMELOCK_EXIT(fs); 4034 VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd); 4035 vrele(fromnd.ni_dvp); 4036 vrele(fvp); 4037 } 4038out1: 4039 pathbuf_destroy(frompb); 4040 pathbuf_destroy(topb); 4041 return (error == -1 ? 0 : error); 4042} 4043 4044/* 4045 * Make a directory file. 4046 */ 4047/* ARGSUSED */ 4048int 4049sys_mkdir(struct lwp *l, const struct sys_mkdir_args *uap, register_t *retval) 4050{ 4051 /* { 4052 syscallarg(const char *) path; 4053 syscallarg(int) mode; 4054 } */ 4055 4056 return do_sys_mkdir(SCARG(uap, path), SCARG(uap, mode), UIO_USERSPACE); 4057} 4058 4059int 4060sys_mkdirat(struct lwp *l, const struct sys_mkdirat_args *uap, 4061 register_t *retval) 4062{ 4063 /* { 4064 syscallarg(int) fd; 4065 syscallarg(const char *) path; 4066 syscallarg(int) mode; 4067 } */ 4068 4069 return ENOSYS; 4070} 4071 4072 4073int 4074do_sys_mkdir(const char *path, mode_t mode, enum uio_seg seg) 4075{ 4076 struct proc *p = curlwp->l_proc; 4077 struct vnode *vp; 4078 struct vattr vattr; 4079 int error; 4080 struct pathbuf *pb; 4081 struct nameidata nd; 4082 4083 /* XXX bollocks, should pass in a pathbuf */ 4084 error = pathbuf_maybe_copyin(path, seg, &pb); 4085 if (error) { 4086 return error; 4087 } 4088 4089 NDINIT(&nd, CREATE, LOCKPARENT | CREATEDIR | TRYEMULROOT, pb); 4090 if ((error = namei(&nd)) != 0) { 4091 pathbuf_destroy(pb); 4092 return (error); 4093 } 4094 vp = nd.ni_vp; 4095 if (vp != NULL) { 4096 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4097 if (nd.ni_dvp == vp) 4098 vrele(nd.ni_dvp); 4099 else 4100 vput(nd.ni_dvp); 4101 vrele(vp); 4102 pathbuf_destroy(pb); 4103 return (EEXIST); 4104 } 4105 vattr_null(&vattr); 4106 vattr.va_type = VDIR; 4107 /* We will read cwdi->cwdi_cmask unlocked. */ 4108 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_cwdi->cwdi_cmask; 4109 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4110 if (!error) 4111 vput(nd.ni_vp); 4112 pathbuf_destroy(pb); 4113 return (error); 4114} 4115 4116/* 4117 * Remove a directory file. 4118 */ 4119/* ARGSUSED */ 4120int 4121sys_rmdir(struct lwp *l, const struct sys_rmdir_args *uap, register_t *retval) 4122{ 4123 /* { 4124 syscallarg(const char *) path; 4125 } */ 4126 struct vnode *vp; 4127 int error; 4128 struct pathbuf *pb; 4129 struct nameidata nd; 4130 4131 error = pathbuf_copyin(SCARG(uap, path), &pb); 4132 if (error) { 4133 return error; 4134 } 4135 NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | TRYEMULROOT, pb); 4136 if ((error = namei(&nd)) != 0) { 4137 pathbuf_destroy(pb); 4138 return error; 4139 } 4140 vp = nd.ni_vp; 4141 if (vp->v_type != VDIR) { 4142 error = ENOTDIR; 4143 goto out; 4144 } 4145 /* 4146 * No rmdir "." please. 4147 */ 4148 if (nd.ni_dvp == vp) { 4149 error = EINVAL; 4150 goto out; 4151 } 4152 /* 4153 * The root of a mounted filesystem cannot be deleted. 4154 */ 4155 if ((vp->v_vflag & VV_ROOT) != 0 || vp->v_mountedhere != NULL) { 4156 error = EBUSY; 4157 goto out; 4158 } 4159 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4160 pathbuf_destroy(pb); 4161 return (error); 4162 4163out: 4164 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); 4165 if (nd.ni_dvp == vp) 4166 vrele(nd.ni_dvp); 4167 else 4168 vput(nd.ni_dvp); 4169 vput(vp); 4170 pathbuf_destroy(pb); 4171 return (error); 4172} 4173 4174/* 4175 * Read a block of directory entries in a file system independent format. 4176 */ 4177int 4178sys___getdents30(struct lwp *l, const struct sys___getdents30_args *uap, register_t *retval) 4179{ 4180 /* { 4181 syscallarg(int) fd; 4182 syscallarg(char *) buf; 4183 syscallarg(size_t) count; 4184 } */ 4185 file_t *fp; 4186 int error, done; 4187 4188 /* fd_getvnode() will use the descriptor for us */ 4189 if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0) 4190 return (error); 4191 if ((fp->f_flag & FREAD) == 0) { 4192 error = EBADF; 4193 goto out; 4194 } 4195 error = vn_readdir(fp, SCARG(uap, buf), UIO_USERSPACE, 4196 SCARG(uap, count), &done, l, 0, 0); 4197 ktrgenio(SCARG(uap, fd), UIO_READ, SCARG(uap, buf), done, error); 4198 *retval = done; 4199 out: 4200 fd_putfile(SCARG(uap, fd)); 4201 return (error); 4202} 4203 4204/* 4205 * Set the mode mask for creation of filesystem nodes. 4206 */ 4207int 4208sys_umask(struct lwp *l, const struct sys_umask_args *uap, register_t *retval) 4209{ 4210 /* { 4211 syscallarg(mode_t) newmask; 4212 } */ 4213 struct proc *p = l->l_proc; 4214 struct cwdinfo *cwdi; 4215 4216 /* 4217 * cwdi->cwdi_cmask will be read unlocked elsewhere. What's 4218 * important is that we serialize changes to the mask. The 4219 * rw_exit() will issue a write memory barrier on our behalf, 4220 * and force the changes out to other CPUs (as it must use an 4221 * atomic operation, draining the local CPU's store buffers). 4222 */ 4223 cwdi = p->p_cwdi; 4224 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 4225 *retval = cwdi->cwdi_cmask; 4226 cwdi->cwdi_cmask = SCARG(uap, newmask) & ALLPERMS; 4227 rw_exit(&cwdi->cwdi_lock); 4228 4229 return (0); 4230} 4231 4232int 4233dorevoke(struct vnode *vp, kauth_cred_t cred) 4234{ 4235 struct vattr vattr; 4236 int error; 4237 4238 vn_lock(vp, LK_SHARED | LK_RETRY); 4239 error = VOP_GETATTR(vp, &vattr, cred); 4240 VOP_UNLOCK(vp); 4241 if (error != 0) 4242 return error; 4243 if (kauth_cred_geteuid(cred) == vattr.va_uid || 4244 (error = kauth_authorize_generic(cred, 4245 KAUTH_GENERIC_ISSUSER, NULL)) == 0) 4246 VOP_REVOKE(vp, REVOKEALL); 4247 return (error); 4248} 4249 4250/* 4251 * Void all references to file by ripping underlying filesystem 4252 * away from vnode. 4253 */ 4254/* ARGSUSED */ 4255int 4256sys_revoke(struct lwp *l, const struct sys_revoke_args *uap, register_t *retval) 4257{ 4258 /* { 4259 syscallarg(const char *) path; 4260 } */ 4261 struct vnode *vp; 4262 int error; 4263 4264 error = namei_simple_user(SCARG(uap, path), 4265 NSM_FOLLOW_TRYEMULROOT, &vp); 4266 if (error != 0) 4267 return (error); 4268 error = dorevoke(vp, l->l_cred); 4269 vrele(vp); 4270 return (error); 4271} 4272