vfs_syscalls.c revision 338943
1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/11/sys/kern/vfs_syscalls.c 338943 2018-09-26 14:26:29Z kib $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/bio.h> 47#include <sys/buf.h> 48#include <sys/capsicum.h> 49#include <sys/disk.h> 50#include <sys/sysent.h> 51#include <sys/malloc.h> 52#include <sys/mount.h> 53#include <sys/mutex.h> 54#include <sys/sysproto.h> 55#include <sys/namei.h> 56#include <sys/filedesc.h> 57#include <sys/kernel.h> 58#include <sys/fcntl.h> 59#include <sys/file.h> 60#include <sys/filio.h> 61#include <sys/limits.h> 62#include <sys/linker.h> 63#include <sys/rwlock.h> 64#include <sys/sdt.h> 65#include <sys/stat.h> 66#include <sys/sx.h> 67#include <sys/unistd.h> 68#include <sys/vnode.h> 69#include <sys/priv.h> 70#include <sys/proc.h> 71#include <sys/dirent.h> 72#include <sys/jail.h> 73#include <sys/syscallsubr.h> 74#include <sys/sysctl.h> 75#ifdef KTRACE 76#include <sys/ktrace.h> 77#endif 78 79#include <machine/stdarg.h> 80 81#include <security/audit/audit.h> 82#include <security/mac/mac_framework.h> 83 84#include <vm/vm.h> 85#include <vm/vm_object.h> 86#include <vm/vm_page.h> 87#include <vm/uma.h> 88 89#include <ufs/ufs/quota.h> 90 91MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93SDT_PROVIDER_DEFINE(vfs); 94SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99static int setfflags(struct thread *td, struct vnode *, u_long); 100static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108/* 109 * Sync each mounted filesystem. 110 */ 111#ifndef _SYS_SYSPROTO_H_ 112struct sync_args { 113 int dummy; 114}; 115#endif 116/* ARGSUSED */ 117int 118sys_sync(struct thread *td, struct sync_args *uap) 119{ 120 struct mount *mp, *nmp; 121 int save; 122 123 mtx_lock(&mountlist_mtx); 124 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 125 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 126 nmp = TAILQ_NEXT(mp, mnt_list); 127 continue; 128 } 129 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 130 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 131 save = curthread_pflags_set(TDP_SYNCIO); 132 vfs_msync(mp, MNT_NOWAIT); 133 VFS_SYNC(mp, MNT_NOWAIT); 134 curthread_pflags_restore(save); 135 vn_finished_write(mp); 136 } 137 mtx_lock(&mountlist_mtx); 138 nmp = TAILQ_NEXT(mp, mnt_list); 139 vfs_unbusy(mp); 140 } 141 mtx_unlock(&mountlist_mtx); 142 return (0); 143} 144 145/* 146 * Change filesystem quotas. 147 */ 148#ifndef _SYS_SYSPROTO_H_ 149struct quotactl_args { 150 char *path; 151 int cmd; 152 int uid; 153 caddr_t arg; 154}; 155#endif 156int 157sys_quotactl(struct thread *td, struct quotactl_args *uap) 158{ 159 struct mount *mp; 160 struct nameidata nd; 161 int error; 162 163 AUDIT_ARG_CMD(uap->cmd); 164 AUDIT_ARG_UID(uap->uid); 165 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 166 return (EPERM); 167 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 168 uap->path, td); 169 if ((error = namei(&nd)) != 0) 170 return (error); 171 NDFREE(&nd, NDF_ONLY_PNBUF); 172 mp = nd.ni_vp->v_mount; 173 vfs_ref(mp); 174 vput(nd.ni_vp); 175 error = vfs_busy(mp, 0); 176 vfs_rel(mp); 177 if (error != 0) 178 return (error); 179 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 180 181 /* 182 * Since quota on operation typically needs to open quota 183 * file, the Q_QUOTAON handler needs to unbusy the mount point 184 * before calling into namei. Otherwise, unmount might be 185 * started between two vfs_busy() invocations (first is our, 186 * second is from mount point cross-walk code in lookup()), 187 * causing deadlock. 188 * 189 * Require that Q_QUOTAON handles the vfs_busy() reference on 190 * its own, always returning with ubusied mount point. 191 */ 192 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 193 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 194 vfs_unbusy(mp); 195 return (error); 196} 197 198/* 199 * Used by statfs conversion routines to scale the block size up if 200 * necessary so that all of the block counts are <= 'max_size'. Note 201 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 202 * value of 'n'. 203 */ 204void 205statfs_scale_blocks(struct statfs *sf, long max_size) 206{ 207 uint64_t count; 208 int shift; 209 210 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 211 212 /* 213 * Attempt to scale the block counts to give a more accurate 214 * overview to userland of the ratio of free space to used 215 * space. To do this, find the largest block count and compute 216 * a divisor that lets it fit into a signed integer <= max_size. 217 */ 218 if (sf->f_bavail < 0) 219 count = -sf->f_bavail; 220 else 221 count = sf->f_bavail; 222 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 223 if (count <= max_size) 224 return; 225 226 count >>= flsl(max_size); 227 shift = 0; 228 while (count > 0) { 229 shift++; 230 count >>=1; 231 } 232 233 sf->f_bsize <<= shift; 234 sf->f_blocks >>= shift; 235 sf->f_bfree >>= shift; 236 sf->f_bavail >>= shift; 237} 238 239static int 240kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 241{ 242 struct statfs *sp; 243 int error; 244 245 if (mp == NULL) 246 return (EBADF); 247 error = vfs_busy(mp, 0); 248 vfs_rel(mp); 249 if (error != 0) 250 return (error); 251#ifdef MAC 252 error = mac_mount_check_stat(td->td_ucred, mp); 253 if (error != 0) 254 goto out; 255#endif 256 /* 257 * Set these in case the underlying filesystem fails to do so. 258 */ 259 sp = &mp->mnt_stat; 260 sp->f_version = STATFS_VERSION; 261 sp->f_namemax = NAME_MAX; 262 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 263 error = VFS_STATFS(mp, sp); 264 if (error != 0) 265 goto out; 266 *buf = *sp; 267 if (priv_check(td, PRIV_VFS_GENERATION)) { 268 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 269 prison_enforce_statfs(td->td_ucred, mp, buf); 270 } 271out: 272 vfs_unbusy(mp); 273 return (error); 274} 275 276/* 277 * Get filesystem statistics. 278 */ 279#ifndef _SYS_SYSPROTO_H_ 280struct statfs_args { 281 char *path; 282 struct statfs *buf; 283}; 284#endif 285int 286sys_statfs(struct thread *td, struct statfs_args *uap) 287{ 288 struct statfs *sfp; 289 int error; 290 291 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 292 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 293 if (error == 0) 294 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 295 free(sfp, M_STATFS); 296 return (error); 297} 298 299int 300kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 301 struct statfs *buf) 302{ 303 struct mount *mp; 304 struct nameidata nd; 305 int error; 306 307 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 308 pathseg, path, td); 309 error = namei(&nd); 310 if (error != 0) 311 return (error); 312 mp = nd.ni_vp->v_mount; 313 vfs_ref(mp); 314 NDFREE(&nd, NDF_ONLY_PNBUF); 315 vput(nd.ni_vp); 316 return (kern_do_statfs(td, mp, buf)); 317} 318 319/* 320 * Get filesystem statistics. 321 */ 322#ifndef _SYS_SYSPROTO_H_ 323struct fstatfs_args { 324 int fd; 325 struct statfs *buf; 326}; 327#endif 328int 329sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 330{ 331 struct statfs *sfp; 332 int error; 333 334 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 335 error = kern_fstatfs(td, uap->fd, sfp); 336 if (error == 0) 337 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 338 free(sfp, M_STATFS); 339 return (error); 340} 341 342int 343kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 344{ 345 struct file *fp; 346 struct mount *mp; 347 struct vnode *vp; 348 cap_rights_t rights; 349 int error; 350 351 AUDIT_ARG_FD(fd); 352 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 353 if (error != 0) 354 return (error); 355 vp = fp->f_vnode; 356 vn_lock(vp, LK_SHARED | LK_RETRY); 357#ifdef AUDIT 358 AUDIT_ARG_VNODE1(vp); 359#endif 360 mp = vp->v_mount; 361 if (mp != NULL) 362 vfs_ref(mp); 363 VOP_UNLOCK(vp, 0); 364 fdrop(fp, td); 365 return (kern_do_statfs(td, mp, buf)); 366} 367 368/* 369 * Get statistics on all filesystems. 370 */ 371#ifndef _SYS_SYSPROTO_H_ 372struct getfsstat_args { 373 struct statfs *buf; 374 long bufsize; 375 int mode; 376}; 377#endif 378int 379sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 380{ 381 size_t count; 382 int error; 383 384 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 385 return (EINVAL); 386 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 387 UIO_USERSPACE, uap->mode); 388 if (error == 0) 389 td->td_retval[0] = count; 390 return (error); 391} 392 393/* 394 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 395 * The caller is responsible for freeing memory which will be allocated 396 * in '*buf'. 397 */ 398int 399kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 400 size_t *countp, enum uio_seg bufseg, int mode) 401{ 402 struct mount *mp, *nmp; 403 struct statfs *sfsp, *sp, *sptmp, *tofree; 404 size_t count, maxcount; 405 int error; 406 407 switch (mode) { 408 case MNT_WAIT: 409 case MNT_NOWAIT: 410 break; 411 default: 412 return (EINVAL); 413 } 414restart: 415 maxcount = bufsize / sizeof(struct statfs); 416 if (bufsize == 0) { 417 sfsp = NULL; 418 tofree = NULL; 419 } else if (bufseg == UIO_USERSPACE) { 420 sfsp = *buf; 421 tofree = NULL; 422 } else /* if (bufseg == UIO_SYSSPACE) */ { 423 count = 0; 424 mtx_lock(&mountlist_mtx); 425 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 426 count++; 427 } 428 mtx_unlock(&mountlist_mtx); 429 if (maxcount > count) 430 maxcount = count; 431 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 432 M_STATFS, M_WAITOK); 433 } 434 count = 0; 435 mtx_lock(&mountlist_mtx); 436 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 437 if (prison_canseemount(td->td_ucred, mp) != 0) { 438 nmp = TAILQ_NEXT(mp, mnt_list); 439 continue; 440 } 441#ifdef MAC 442 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 443 nmp = TAILQ_NEXT(mp, mnt_list); 444 continue; 445 } 446#endif 447 if (mode == MNT_WAIT) { 448 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 449 /* 450 * If vfs_busy() failed, and MBF_NOWAIT 451 * wasn't passed, then the mp is gone. 452 * Furthermore, because of MBF_MNTLSTLOCK, 453 * the mountlist_mtx was dropped. We have 454 * no other choice than to start over. 455 */ 456 mtx_unlock(&mountlist_mtx); 457 free(tofree, M_STATFS); 458 goto restart; 459 } 460 } else { 461 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 462 nmp = TAILQ_NEXT(mp, mnt_list); 463 continue; 464 } 465 } 466 if (sfsp != NULL && count < maxcount) { 467 sp = &mp->mnt_stat; 468 /* 469 * Set these in case the underlying filesystem 470 * fails to do so. 471 */ 472 sp->f_version = STATFS_VERSION; 473 sp->f_namemax = NAME_MAX; 474 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 475 /* 476 * If MNT_NOWAIT is specified, do not refresh 477 * the fsstat cache. 478 */ 479 if (mode != MNT_NOWAIT) { 480 error = VFS_STATFS(mp, sp); 481 if (error != 0) { 482 mtx_lock(&mountlist_mtx); 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 vfs_unbusy(mp); 485 continue; 486 } 487 } 488 if (priv_check(td, PRIV_VFS_GENERATION)) { 489 sptmp = malloc(sizeof(struct statfs), M_STATFS, 490 M_WAITOK); 491 *sptmp = *sp; 492 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 493 prison_enforce_statfs(td->td_ucred, mp, sptmp); 494 sp = sptmp; 495 } else 496 sptmp = NULL; 497 if (bufseg == UIO_SYSSPACE) { 498 bcopy(sp, sfsp, sizeof(*sp)); 499 free(sptmp, M_STATFS); 500 } else /* if (bufseg == UIO_USERSPACE) */ { 501 error = copyout(sp, sfsp, sizeof(*sp)); 502 free(sptmp, M_STATFS); 503 if (error != 0) { 504 vfs_unbusy(mp); 505 return (error); 506 } 507 } 508 sfsp++; 509 } 510 count++; 511 mtx_lock(&mountlist_mtx); 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 vfs_unbusy(mp); 514 } 515 mtx_unlock(&mountlist_mtx); 516 if (sfsp != NULL && count > maxcount) 517 *countp = maxcount; 518 else 519 *countp = count; 520 return (0); 521} 522 523#ifdef COMPAT_FREEBSD4 524/* 525 * Get old format filesystem statistics. 526 */ 527static void cvtstatfs(struct statfs *, struct ostatfs *); 528 529#ifndef _SYS_SYSPROTO_H_ 530struct freebsd4_statfs_args { 531 char *path; 532 struct ostatfs *buf; 533}; 534#endif 535int 536freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 537{ 538 struct ostatfs osb; 539 struct statfs *sfp; 540 int error; 541 542 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 543 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 544 if (error == 0) { 545 cvtstatfs(sfp, &osb); 546 error = copyout(&osb, uap->buf, sizeof(osb)); 547 } 548 free(sfp, M_STATFS); 549 return (error); 550} 551 552/* 553 * Get filesystem statistics. 554 */ 555#ifndef _SYS_SYSPROTO_H_ 556struct freebsd4_fstatfs_args { 557 int fd; 558 struct ostatfs *buf; 559}; 560#endif 561int 562freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 563{ 564 struct ostatfs osb; 565 struct statfs *sfp; 566 int error; 567 568 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 569 error = kern_fstatfs(td, uap->fd, sfp); 570 if (error == 0) { 571 cvtstatfs(sfp, &osb); 572 error = copyout(&osb, uap->buf, sizeof(osb)); 573 } 574 free(sfp, M_STATFS); 575 return (error); 576} 577 578/* 579 * Get statistics on all filesystems. 580 */ 581#ifndef _SYS_SYSPROTO_H_ 582struct freebsd4_getfsstat_args { 583 struct ostatfs *buf; 584 long bufsize; 585 int mode; 586}; 587#endif 588int 589freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 590{ 591 struct statfs *buf, *sp; 592 struct ostatfs osb; 593 size_t count, size; 594 int error; 595 596 if (uap->bufsize < 0) 597 return (EINVAL); 598 count = uap->bufsize / sizeof(struct ostatfs); 599 if (count > SIZE_MAX / sizeof(struct statfs)) 600 return (EINVAL); 601 size = count * sizeof(struct statfs); 602 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 603 uap->mode); 604 td->td_retval[0] = count; 605 if (size != 0) { 606 sp = buf; 607 while (count != 0 && error == 0) { 608 cvtstatfs(sp, &osb); 609 error = copyout(&osb, uap->buf, sizeof(osb)); 610 sp++; 611 uap->buf++; 612 count--; 613 } 614 free(buf, M_STATFS); 615 } 616 return (error); 617} 618 619/* 620 * Implement fstatfs() for (NFS) file handles. 621 */ 622#ifndef _SYS_SYSPROTO_H_ 623struct freebsd4_fhstatfs_args { 624 struct fhandle *u_fhp; 625 struct ostatfs *buf; 626}; 627#endif 628int 629freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 630{ 631 struct ostatfs osb; 632 struct statfs *sfp; 633 fhandle_t fh; 634 int error; 635 636 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 637 if (error != 0) 638 return (error); 639 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 640 error = kern_fhstatfs(td, fh, sfp); 641 if (error == 0) { 642 cvtstatfs(sfp, &osb); 643 error = copyout(&osb, uap->buf, sizeof(osb)); 644 } 645 free(sfp, M_STATFS); 646 return (error); 647} 648 649/* 650 * Convert a new format statfs structure to an old format statfs structure. 651 */ 652static void 653cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 654{ 655 656 statfs_scale_blocks(nsp, LONG_MAX); 657 bzero(osp, sizeof(*osp)); 658 osp->f_bsize = nsp->f_bsize; 659 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 660 osp->f_blocks = nsp->f_blocks; 661 osp->f_bfree = nsp->f_bfree; 662 osp->f_bavail = nsp->f_bavail; 663 osp->f_files = MIN(nsp->f_files, LONG_MAX); 664 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 665 osp->f_owner = nsp->f_owner; 666 osp->f_type = nsp->f_type; 667 osp->f_flags = nsp->f_flags; 668 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 669 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 670 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 671 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 672 strlcpy(osp->f_fstypename, nsp->f_fstypename, 673 MIN(MFSNAMELEN, OMFSNAMELEN)); 674 strlcpy(osp->f_mntonname, nsp->f_mntonname, 675 MIN(MNAMELEN, OMNAMELEN)); 676 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 677 MIN(MNAMELEN, OMNAMELEN)); 678 osp->f_fsid = nsp->f_fsid; 679} 680#endif /* COMPAT_FREEBSD4 */ 681 682/* 683 * Change current working directory to a given file descriptor. 684 */ 685#ifndef _SYS_SYSPROTO_H_ 686struct fchdir_args { 687 int fd; 688}; 689#endif 690int 691sys_fchdir(struct thread *td, struct fchdir_args *uap) 692{ 693 struct vnode *vp, *tdp; 694 struct mount *mp; 695 struct file *fp; 696 cap_rights_t rights; 697 int error; 698 699 AUDIT_ARG_FD(uap->fd); 700 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 701 &fp); 702 if (error != 0) 703 return (error); 704 vp = fp->f_vnode; 705 vrefact(vp); 706 fdrop(fp, td); 707 vn_lock(vp, LK_SHARED | LK_RETRY); 708 AUDIT_ARG_VNODE1(vp); 709 error = change_dir(vp, td); 710 while (!error && (mp = vp->v_mountedhere) != NULL) { 711 if (vfs_busy(mp, 0)) 712 continue; 713 error = VFS_ROOT(mp, LK_SHARED, &tdp); 714 vfs_unbusy(mp); 715 if (error != 0) 716 break; 717 vput(vp); 718 vp = tdp; 719 } 720 if (error != 0) { 721 vput(vp); 722 return (error); 723 } 724 VOP_UNLOCK(vp, 0); 725 pwd_chdir(td, vp); 726 return (0); 727} 728 729/* 730 * Change current working directory (``.''). 731 */ 732#ifndef _SYS_SYSPROTO_H_ 733struct chdir_args { 734 char *path; 735}; 736#endif 737int 738sys_chdir(struct thread *td, struct chdir_args *uap) 739{ 740 741 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 742} 743 744int 745kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 746{ 747 struct nameidata nd; 748 int error; 749 750 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 751 pathseg, path, td); 752 if ((error = namei(&nd)) != 0) 753 return (error); 754 if ((error = change_dir(nd.ni_vp, td)) != 0) { 755 vput(nd.ni_vp); 756 NDFREE(&nd, NDF_ONLY_PNBUF); 757 return (error); 758 } 759 VOP_UNLOCK(nd.ni_vp, 0); 760 NDFREE(&nd, NDF_ONLY_PNBUF); 761 pwd_chdir(td, nd.ni_vp); 762 return (0); 763} 764 765/* 766 * Change notion of root (``/'') directory. 767 */ 768#ifndef _SYS_SYSPROTO_H_ 769struct chroot_args { 770 char *path; 771}; 772#endif 773int 774sys_chroot(struct thread *td, struct chroot_args *uap) 775{ 776 struct nameidata nd; 777 int error; 778 779 error = priv_check(td, PRIV_VFS_CHROOT); 780 if (error != 0) 781 return (error); 782 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 783 UIO_USERSPACE, uap->path, td); 784 error = namei(&nd); 785 if (error != 0) 786 goto error; 787 error = change_dir(nd.ni_vp, td); 788 if (error != 0) 789 goto e_vunlock; 790#ifdef MAC 791 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 792 if (error != 0) 793 goto e_vunlock; 794#endif 795 VOP_UNLOCK(nd.ni_vp, 0); 796 error = pwd_chroot(td, nd.ni_vp); 797 vrele(nd.ni_vp); 798 NDFREE(&nd, NDF_ONLY_PNBUF); 799 return (error); 800e_vunlock: 801 vput(nd.ni_vp); 802error: 803 NDFREE(&nd, NDF_ONLY_PNBUF); 804 return (error); 805} 806 807/* 808 * Common routine for chroot and chdir. Callers must provide a locked vnode 809 * instance. 810 */ 811int 812change_dir(struct vnode *vp, struct thread *td) 813{ 814#ifdef MAC 815 int error; 816#endif 817 818 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 819 if (vp->v_type != VDIR) 820 return (ENOTDIR); 821#ifdef MAC 822 error = mac_vnode_check_chdir(td->td_ucred, vp); 823 if (error != 0) 824 return (error); 825#endif 826 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 827} 828 829static __inline void 830flags_to_rights(int flags, cap_rights_t *rightsp) 831{ 832 833 if (flags & O_EXEC) { 834 cap_rights_set(rightsp, CAP_FEXECVE); 835 } else { 836 switch ((flags & O_ACCMODE)) { 837 case O_RDONLY: 838 cap_rights_set(rightsp, CAP_READ); 839 break; 840 case O_RDWR: 841 cap_rights_set(rightsp, CAP_READ); 842 /* FALLTHROUGH */ 843 case O_WRONLY: 844 cap_rights_set(rightsp, CAP_WRITE); 845 if (!(flags & (O_APPEND | O_TRUNC))) 846 cap_rights_set(rightsp, CAP_SEEK); 847 break; 848 } 849 } 850 851 if (flags & O_CREAT) 852 cap_rights_set(rightsp, CAP_CREATE); 853 854 if (flags & O_TRUNC) 855 cap_rights_set(rightsp, CAP_FTRUNCATE); 856 857 if (flags & (O_SYNC | O_FSYNC)) 858 cap_rights_set(rightsp, CAP_FSYNC); 859 860 if (flags & (O_EXLOCK | O_SHLOCK)) 861 cap_rights_set(rightsp, CAP_FLOCK); 862} 863 864/* 865 * Check permissions, allocate an open file structure, and call the device 866 * open routine if any. 867 */ 868#ifndef _SYS_SYSPROTO_H_ 869struct open_args { 870 char *path; 871 int flags; 872 int mode; 873}; 874#endif 875int 876sys_open(struct thread *td, struct open_args *uap) 877{ 878 879 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 880 uap->flags, uap->mode)); 881} 882 883#ifndef _SYS_SYSPROTO_H_ 884struct openat_args { 885 int fd; 886 char *path; 887 int flag; 888 int mode; 889}; 890#endif 891int 892sys_openat(struct thread *td, struct openat_args *uap) 893{ 894 895 AUDIT_ARG_FD(uap->fd); 896 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 897 uap->mode)); 898} 899 900int 901kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 902 int flags, int mode) 903{ 904 struct proc *p = td->td_proc; 905 struct filedesc *fdp = p->p_fd; 906 struct file *fp; 907 struct vnode *vp; 908 struct nameidata nd; 909 cap_rights_t rights; 910 int cmode, error, indx; 911 912 indx = -1; 913 914 AUDIT_ARG_FFLAGS(flags); 915 AUDIT_ARG_MODE(mode); 916 cap_rights_init(&rights, CAP_LOOKUP); 917 flags_to_rights(flags, &rights); 918 /* 919 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 920 * may be specified. 921 */ 922 if (flags & O_EXEC) { 923 if (flags & O_ACCMODE) 924 return (EINVAL); 925 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 926 return (EINVAL); 927 } else { 928 flags = FFLAGS(flags); 929 } 930 931 /* 932 * Allocate a file structure. The descriptor to reference it 933 * is allocated and set by finstall() below. 934 */ 935 error = falloc_noinstall(td, &fp); 936 if (error != 0) 937 return (error); 938 /* 939 * An extra reference on `fp' has been held for us by 940 * falloc_noinstall(). 941 */ 942 /* Set the flags early so the finit in devfs can pick them up. */ 943 fp->f_flag = flags & FMASK; 944 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 945 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 946 &rights, td); 947 td->td_dupfd = -1; /* XXX check for fdopen */ 948 error = vn_open(&nd, &flags, cmode, fp); 949 if (error != 0) { 950 /* 951 * If the vn_open replaced the method vector, something 952 * wonderous happened deep below and we just pass it up 953 * pretending we know what we do. 954 */ 955 if (error == ENXIO && fp->f_ops != &badfileops) 956 goto success; 957 958 /* 959 * Handle special fdopen() case. bleh. 960 * 961 * Don't do this for relative (capability) lookups; we don't 962 * understand exactly what would happen, and we don't think 963 * that it ever should. 964 */ 965 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 966 (error == ENODEV || error == ENXIO) && 967 td->td_dupfd >= 0) { 968 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 969 &indx); 970 if (error == 0) 971 goto success; 972 } 973 974 goto bad; 975 } 976 td->td_dupfd = 0; 977 NDFREE(&nd, NDF_ONLY_PNBUF); 978 vp = nd.ni_vp; 979 980 /* 981 * Store the vnode, for any f_type. Typically, the vnode use 982 * count is decremented by direct call to vn_closefile() for 983 * files that switched type in the cdevsw fdopen() method. 984 */ 985 fp->f_vnode = vp; 986 /* 987 * If the file wasn't claimed by devfs bind it to the normal 988 * vnode operations here. 989 */ 990 if (fp->f_ops == &badfileops) { 991 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 992 fp->f_seqcount = 1; 993 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 994 DTYPE_VNODE, vp, &vnops); 995 } 996 997 VOP_UNLOCK(vp, 0); 998 if (flags & O_TRUNC) { 999 error = fo_truncate(fp, 0, td->td_ucred, td); 1000 if (error != 0) 1001 goto bad; 1002 } 1003success: 1004 /* 1005 * If we haven't already installed the FD (for dupfdopen), do so now. 1006 */ 1007 if (indx == -1) { 1008 struct filecaps *fcaps; 1009 1010#ifdef CAPABILITIES 1011 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1012 fcaps = &nd.ni_filecaps; 1013 else 1014#endif 1015 fcaps = NULL; 1016 error = finstall(td, fp, &indx, flags, fcaps); 1017 /* On success finstall() consumes fcaps. */ 1018 if (error != 0) { 1019 filecaps_free(&nd.ni_filecaps); 1020 goto bad; 1021 } 1022 } else { 1023 filecaps_free(&nd.ni_filecaps); 1024 } 1025 1026 /* 1027 * Release our private reference, leaving the one associated with 1028 * the descriptor table intact. 1029 */ 1030 fdrop(fp, td); 1031 td->td_retval[0] = indx; 1032 return (0); 1033bad: 1034 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1035 fdrop(fp, td); 1036 return (error); 1037} 1038 1039#ifdef COMPAT_43 1040/* 1041 * Create a file. 1042 */ 1043#ifndef _SYS_SYSPROTO_H_ 1044struct ocreat_args { 1045 char *path; 1046 int mode; 1047}; 1048#endif 1049int 1050ocreat(struct thread *td, struct ocreat_args *uap) 1051{ 1052 1053 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1054 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1055} 1056#endif /* COMPAT_43 */ 1057 1058/* 1059 * Create a special file. 1060 */ 1061#ifndef _SYS_SYSPROTO_H_ 1062struct mknod_args { 1063 char *path; 1064 int mode; 1065 int dev; 1066}; 1067#endif 1068int 1069sys_mknod(struct thread *td, struct mknod_args *uap) 1070{ 1071 1072 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1073 uap->mode, uap->dev)); 1074} 1075 1076#ifndef _SYS_SYSPROTO_H_ 1077struct mknodat_args { 1078 int fd; 1079 char *path; 1080 mode_t mode; 1081 dev_t dev; 1082}; 1083#endif 1084int 1085sys_mknodat(struct thread *td, struct mknodat_args *uap) 1086{ 1087 1088 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1089 uap->dev)); 1090} 1091 1092int 1093kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1094 int mode, int dev) 1095{ 1096 struct vnode *vp; 1097 struct mount *mp; 1098 struct vattr vattr; 1099 struct nameidata nd; 1100 cap_rights_t rights; 1101 int error, whiteout = 0; 1102 1103 AUDIT_ARG_MODE(mode); 1104 AUDIT_ARG_DEV(dev); 1105 switch (mode & S_IFMT) { 1106 case S_IFCHR: 1107 case S_IFBLK: 1108 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1109 if (error == 0 && dev == VNOVAL) 1110 error = EINVAL; 1111 break; 1112 case S_IFWHT: 1113 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1114 break; 1115 case S_IFIFO: 1116 if (dev == 0) 1117 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1118 /* FALLTHROUGH */ 1119 default: 1120 error = EINVAL; 1121 break; 1122 } 1123 if (error != 0) 1124 return (error); 1125restart: 1126 bwillwrite(); 1127 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1128 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1129 td); 1130 if ((error = namei(&nd)) != 0) 1131 return (error); 1132 vp = nd.ni_vp; 1133 if (vp != NULL) { 1134 NDFREE(&nd, NDF_ONLY_PNBUF); 1135 if (vp == nd.ni_dvp) 1136 vrele(nd.ni_dvp); 1137 else 1138 vput(nd.ni_dvp); 1139 vrele(vp); 1140 return (EEXIST); 1141 } else { 1142 VATTR_NULL(&vattr); 1143 vattr.va_mode = (mode & ALLPERMS) & 1144 ~td->td_proc->p_fd->fd_cmask; 1145 vattr.va_rdev = dev; 1146 whiteout = 0; 1147 1148 switch (mode & S_IFMT) { 1149 case S_IFCHR: 1150 vattr.va_type = VCHR; 1151 break; 1152 case S_IFBLK: 1153 vattr.va_type = VBLK; 1154 break; 1155 case S_IFWHT: 1156 whiteout = 1; 1157 break; 1158 default: 1159 panic("kern_mknod: invalid mode"); 1160 } 1161 } 1162 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1163 NDFREE(&nd, NDF_ONLY_PNBUF); 1164 vput(nd.ni_dvp); 1165 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1166 return (error); 1167 goto restart; 1168 } 1169#ifdef MAC 1170 if (error == 0 && !whiteout) 1171 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1172 &nd.ni_cnd, &vattr); 1173#endif 1174 if (error == 0) { 1175 if (whiteout) 1176 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1177 else { 1178 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1179 &nd.ni_cnd, &vattr); 1180 if (error == 0) 1181 vput(nd.ni_vp); 1182 } 1183 } 1184 NDFREE(&nd, NDF_ONLY_PNBUF); 1185 vput(nd.ni_dvp); 1186 vn_finished_write(mp); 1187 return (error); 1188} 1189 1190/* 1191 * Create a named pipe. 1192 */ 1193#ifndef _SYS_SYSPROTO_H_ 1194struct mkfifo_args { 1195 char *path; 1196 int mode; 1197}; 1198#endif 1199int 1200sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1201{ 1202 1203 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1204 uap->mode)); 1205} 1206 1207#ifndef _SYS_SYSPROTO_H_ 1208struct mkfifoat_args { 1209 int fd; 1210 char *path; 1211 mode_t mode; 1212}; 1213#endif 1214int 1215sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1216{ 1217 1218 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1219 uap->mode)); 1220} 1221 1222int 1223kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1224 int mode) 1225{ 1226 struct mount *mp; 1227 struct vattr vattr; 1228 struct nameidata nd; 1229 cap_rights_t rights; 1230 int error; 1231 1232 AUDIT_ARG_MODE(mode); 1233restart: 1234 bwillwrite(); 1235 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1236 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1237 td); 1238 if ((error = namei(&nd)) != 0) 1239 return (error); 1240 if (nd.ni_vp != NULL) { 1241 NDFREE(&nd, NDF_ONLY_PNBUF); 1242 if (nd.ni_vp == nd.ni_dvp) 1243 vrele(nd.ni_dvp); 1244 else 1245 vput(nd.ni_dvp); 1246 vrele(nd.ni_vp); 1247 return (EEXIST); 1248 } 1249 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1250 NDFREE(&nd, NDF_ONLY_PNBUF); 1251 vput(nd.ni_dvp); 1252 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1253 return (error); 1254 goto restart; 1255 } 1256 VATTR_NULL(&vattr); 1257 vattr.va_type = VFIFO; 1258 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1259#ifdef MAC 1260 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1261 &vattr); 1262 if (error != 0) 1263 goto out; 1264#endif 1265 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1266 if (error == 0) 1267 vput(nd.ni_vp); 1268#ifdef MAC 1269out: 1270#endif 1271 vput(nd.ni_dvp); 1272 vn_finished_write(mp); 1273 NDFREE(&nd, NDF_ONLY_PNBUF); 1274 return (error); 1275} 1276 1277/* 1278 * Make a hard file link. 1279 */ 1280#ifndef _SYS_SYSPROTO_H_ 1281struct link_args { 1282 char *path; 1283 char *link; 1284}; 1285#endif 1286int 1287sys_link(struct thread *td, struct link_args *uap) 1288{ 1289 1290 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1291 UIO_USERSPACE, FOLLOW)); 1292} 1293 1294#ifndef _SYS_SYSPROTO_H_ 1295struct linkat_args { 1296 int fd1; 1297 char *path1; 1298 int fd2; 1299 char *path2; 1300 int flag; 1301}; 1302#endif 1303int 1304sys_linkat(struct thread *td, struct linkat_args *uap) 1305{ 1306 int flag; 1307 1308 flag = uap->flag; 1309 if (flag & ~AT_SYMLINK_FOLLOW) 1310 return (EINVAL); 1311 1312 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1313 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1314} 1315 1316int hardlink_check_uid = 0; 1317SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1318 &hardlink_check_uid, 0, 1319 "Unprivileged processes cannot create hard links to files owned by other " 1320 "users"); 1321static int hardlink_check_gid = 0; 1322SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1323 &hardlink_check_gid, 0, 1324 "Unprivileged processes cannot create hard links to files owned by other " 1325 "groups"); 1326 1327static int 1328can_hardlink(struct vnode *vp, struct ucred *cred) 1329{ 1330 struct vattr va; 1331 int error; 1332 1333 if (!hardlink_check_uid && !hardlink_check_gid) 1334 return (0); 1335 1336 error = VOP_GETATTR(vp, &va, cred); 1337 if (error != 0) 1338 return (error); 1339 1340 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1341 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1342 if (error != 0) 1343 return (error); 1344 } 1345 1346 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1347 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1348 if (error != 0) 1349 return (error); 1350 } 1351 1352 return (0); 1353} 1354 1355int 1356kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1357 enum uio_seg segflg, int follow) 1358{ 1359 struct vnode *vp; 1360 struct mount *mp; 1361 struct nameidata nd; 1362 cap_rights_t rights; 1363 int error; 1364 1365again: 1366 bwillwrite(); 1367 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1368 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1369 1370 if ((error = namei(&nd)) != 0) 1371 return (error); 1372 NDFREE(&nd, NDF_ONLY_PNBUF); 1373 vp = nd.ni_vp; 1374 if (vp->v_type == VDIR) { 1375 vrele(vp); 1376 return (EPERM); /* POSIX */ 1377 } 1378 NDINIT_ATRIGHTS(&nd, CREATE, 1379 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1380 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1381 if ((error = namei(&nd)) == 0) { 1382 if (nd.ni_vp != NULL) { 1383 NDFREE(&nd, NDF_ONLY_PNBUF); 1384 if (nd.ni_dvp == nd.ni_vp) 1385 vrele(nd.ni_dvp); 1386 else 1387 vput(nd.ni_dvp); 1388 vrele(nd.ni_vp); 1389 vrele(vp); 1390 return (EEXIST); 1391 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1392 /* 1393 * Cross-device link. No need to recheck 1394 * vp->v_type, since it cannot change, except 1395 * to VBAD. 1396 */ 1397 NDFREE(&nd, NDF_ONLY_PNBUF); 1398 vput(nd.ni_dvp); 1399 vrele(vp); 1400 return (EXDEV); 1401 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1402 error = can_hardlink(vp, td->td_ucred); 1403#ifdef MAC 1404 if (error == 0) 1405 error = mac_vnode_check_link(td->td_ucred, 1406 nd.ni_dvp, vp, &nd.ni_cnd); 1407#endif 1408 if (error != 0) { 1409 vput(vp); 1410 vput(nd.ni_dvp); 1411 NDFREE(&nd, NDF_ONLY_PNBUF); 1412 return (error); 1413 } 1414 error = vn_start_write(vp, &mp, V_NOWAIT); 1415 if (error != 0) { 1416 vput(vp); 1417 vput(nd.ni_dvp); 1418 NDFREE(&nd, NDF_ONLY_PNBUF); 1419 error = vn_start_write(NULL, &mp, 1420 V_XSLEEP | PCATCH); 1421 if (error != 0) 1422 return (error); 1423 goto again; 1424 } 1425 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1426 VOP_UNLOCK(vp, 0); 1427 vput(nd.ni_dvp); 1428 vn_finished_write(mp); 1429 NDFREE(&nd, NDF_ONLY_PNBUF); 1430 } else { 1431 vput(nd.ni_dvp); 1432 NDFREE(&nd, NDF_ONLY_PNBUF); 1433 vrele(vp); 1434 goto again; 1435 } 1436 } 1437 vrele(vp); 1438 return (error); 1439} 1440 1441/* 1442 * Make a symbolic link. 1443 */ 1444#ifndef _SYS_SYSPROTO_H_ 1445struct symlink_args { 1446 char *path; 1447 char *link; 1448}; 1449#endif 1450int 1451sys_symlink(struct thread *td, struct symlink_args *uap) 1452{ 1453 1454 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1455 UIO_USERSPACE)); 1456} 1457 1458#ifndef _SYS_SYSPROTO_H_ 1459struct symlinkat_args { 1460 char *path; 1461 int fd; 1462 char *path2; 1463}; 1464#endif 1465int 1466sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1467{ 1468 1469 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1470 UIO_USERSPACE)); 1471} 1472 1473int 1474kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1475 enum uio_seg segflg) 1476{ 1477 struct mount *mp; 1478 struct vattr vattr; 1479 char *syspath; 1480 struct nameidata nd; 1481 int error; 1482 cap_rights_t rights; 1483 1484 if (segflg == UIO_SYSSPACE) { 1485 syspath = path1; 1486 } else { 1487 syspath = uma_zalloc(namei_zone, M_WAITOK); 1488 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1489 goto out; 1490 } 1491 AUDIT_ARG_TEXT(syspath); 1492restart: 1493 bwillwrite(); 1494 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1495 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1496 td); 1497 if ((error = namei(&nd)) != 0) 1498 goto out; 1499 if (nd.ni_vp) { 1500 NDFREE(&nd, NDF_ONLY_PNBUF); 1501 if (nd.ni_vp == nd.ni_dvp) 1502 vrele(nd.ni_dvp); 1503 else 1504 vput(nd.ni_dvp); 1505 vrele(nd.ni_vp); 1506 error = EEXIST; 1507 goto out; 1508 } 1509 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1510 NDFREE(&nd, NDF_ONLY_PNBUF); 1511 vput(nd.ni_dvp); 1512 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1513 goto out; 1514 goto restart; 1515 } 1516 VATTR_NULL(&vattr); 1517 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1518#ifdef MAC 1519 vattr.va_type = VLNK; 1520 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1521 &vattr); 1522 if (error != 0) 1523 goto out2; 1524#endif 1525 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1526 if (error == 0) 1527 vput(nd.ni_vp); 1528#ifdef MAC 1529out2: 1530#endif 1531 NDFREE(&nd, NDF_ONLY_PNBUF); 1532 vput(nd.ni_dvp); 1533 vn_finished_write(mp); 1534out: 1535 if (segflg != UIO_SYSSPACE) 1536 uma_zfree(namei_zone, syspath); 1537 return (error); 1538} 1539 1540/* 1541 * Delete a whiteout from the filesystem. 1542 */ 1543#ifndef _SYS_SYSPROTO_H_ 1544struct undelete_args { 1545 char *path; 1546}; 1547#endif 1548int 1549sys_undelete(struct thread *td, struct undelete_args *uap) 1550{ 1551 struct mount *mp; 1552 struct nameidata nd; 1553 int error; 1554 1555restart: 1556 bwillwrite(); 1557 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1558 UIO_USERSPACE, uap->path, td); 1559 error = namei(&nd); 1560 if (error != 0) 1561 return (error); 1562 1563 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1564 NDFREE(&nd, NDF_ONLY_PNBUF); 1565 if (nd.ni_vp == nd.ni_dvp) 1566 vrele(nd.ni_dvp); 1567 else 1568 vput(nd.ni_dvp); 1569 if (nd.ni_vp) 1570 vrele(nd.ni_vp); 1571 return (EEXIST); 1572 } 1573 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1574 NDFREE(&nd, NDF_ONLY_PNBUF); 1575 vput(nd.ni_dvp); 1576 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1577 return (error); 1578 goto restart; 1579 } 1580 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 vput(nd.ni_dvp); 1583 vn_finished_write(mp); 1584 return (error); 1585} 1586 1587/* 1588 * Delete a name from the filesystem. 1589 */ 1590#ifndef _SYS_SYSPROTO_H_ 1591struct unlink_args { 1592 char *path; 1593}; 1594#endif 1595int 1596sys_unlink(struct thread *td, struct unlink_args *uap) 1597{ 1598 1599 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1600} 1601 1602#ifndef _SYS_SYSPROTO_H_ 1603struct unlinkat_args { 1604 int fd; 1605 char *path; 1606 int flag; 1607}; 1608#endif 1609int 1610sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1611{ 1612 int flag = uap->flag; 1613 int fd = uap->fd; 1614 char *path = uap->path; 1615 1616 if (flag & ~AT_REMOVEDIR) 1617 return (EINVAL); 1618 1619 if (flag & AT_REMOVEDIR) 1620 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1621 else 1622 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1623} 1624 1625int 1626kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1627 ino_t oldinum) 1628{ 1629 struct mount *mp; 1630 struct vnode *vp; 1631 struct nameidata nd; 1632 struct stat sb; 1633 cap_rights_t rights; 1634 int error; 1635 1636restart: 1637 bwillwrite(); 1638 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1639 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1640 if ((error = namei(&nd)) != 0) 1641 return (error == EINVAL ? EPERM : error); 1642 vp = nd.ni_vp; 1643 if (vp->v_type == VDIR && oldinum == 0) { 1644 error = EPERM; /* POSIX */ 1645 } else if (oldinum != 0 && 1646 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1647 sb.st_ino != oldinum) { 1648 error = EIDRM; /* Identifier removed */ 1649 } else { 1650 /* 1651 * The root of a mounted filesystem cannot be deleted. 1652 * 1653 * XXX: can this only be a VDIR case? 1654 */ 1655 if (vp->v_vflag & VV_ROOT) 1656 error = EBUSY; 1657 } 1658 if (error == 0) { 1659 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 vput(nd.ni_dvp); 1662 if (vp == nd.ni_dvp) 1663 vrele(vp); 1664 else 1665 vput(vp); 1666 if ((error = vn_start_write(NULL, &mp, 1667 V_XSLEEP | PCATCH)) != 0) 1668 return (error); 1669 goto restart; 1670 } 1671#ifdef MAC 1672 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1673 &nd.ni_cnd); 1674 if (error != 0) 1675 goto out; 1676#endif 1677 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1678 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1679#ifdef MAC 1680out: 1681#endif 1682 vn_finished_write(mp); 1683 } 1684 NDFREE(&nd, NDF_ONLY_PNBUF); 1685 vput(nd.ni_dvp); 1686 if (vp == nd.ni_dvp) 1687 vrele(vp); 1688 else 1689 vput(vp); 1690 return (error); 1691} 1692 1693/* 1694 * Reposition read/write file offset. 1695 */ 1696#ifndef _SYS_SYSPROTO_H_ 1697struct lseek_args { 1698 int fd; 1699 int pad; 1700 off_t offset; 1701 int whence; 1702}; 1703#endif 1704int 1705sys_lseek(struct thread *td, struct lseek_args *uap) 1706{ 1707 1708 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1709} 1710 1711int 1712kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1713{ 1714 struct file *fp; 1715 cap_rights_t rights; 1716 int error; 1717 1718 AUDIT_ARG_FD(fd); 1719 error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1720 if (error != 0) 1721 return (error); 1722 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1723 fo_seek(fp, offset, whence, td) : ESPIPE; 1724 fdrop(fp, td); 1725 return (error); 1726} 1727 1728#if defined(COMPAT_43) 1729/* 1730 * Reposition read/write file offset. 1731 */ 1732#ifndef _SYS_SYSPROTO_H_ 1733struct olseek_args { 1734 int fd; 1735 long offset; 1736 int whence; 1737}; 1738#endif 1739int 1740olseek(struct thread *td, struct olseek_args *uap) 1741{ 1742 1743 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1744} 1745#endif /* COMPAT_43 */ 1746 1747#if defined(COMPAT_FREEBSD6) 1748/* Version with the 'pad' argument */ 1749int 1750freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1751{ 1752 1753 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1754} 1755#endif 1756 1757/* 1758 * Check access permissions using passed credentials. 1759 */ 1760static int 1761vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1762 struct thread *td) 1763{ 1764 accmode_t accmode; 1765 int error; 1766 1767 /* Flags == 0 means only check for existence. */ 1768 if (user_flags == 0) 1769 return (0); 1770 1771 accmode = 0; 1772 if (user_flags & R_OK) 1773 accmode |= VREAD; 1774 if (user_flags & W_OK) 1775 accmode |= VWRITE; 1776 if (user_flags & X_OK) 1777 accmode |= VEXEC; 1778#ifdef MAC 1779 error = mac_vnode_check_access(cred, vp, accmode); 1780 if (error != 0) 1781 return (error); 1782#endif 1783 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1784 error = VOP_ACCESS(vp, accmode, cred, td); 1785 return (error); 1786} 1787 1788/* 1789 * Check access permissions using "real" credentials. 1790 */ 1791#ifndef _SYS_SYSPROTO_H_ 1792struct access_args { 1793 char *path; 1794 int amode; 1795}; 1796#endif 1797int 1798sys_access(struct thread *td, struct access_args *uap) 1799{ 1800 1801 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1802 0, uap->amode)); 1803} 1804 1805#ifndef _SYS_SYSPROTO_H_ 1806struct faccessat_args { 1807 int dirfd; 1808 char *path; 1809 int amode; 1810 int flag; 1811} 1812#endif 1813int 1814sys_faccessat(struct thread *td, struct faccessat_args *uap) 1815{ 1816 1817 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1818 uap->amode)); 1819} 1820 1821int 1822kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1823 int flag, int amode) 1824{ 1825 struct ucred *cred, *usecred; 1826 struct vnode *vp; 1827 struct nameidata nd; 1828 cap_rights_t rights; 1829 int error; 1830 1831 if (flag & ~AT_EACCESS) 1832 return (EINVAL); 1833 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1834 return (EINVAL); 1835 1836 /* 1837 * Create and modify a temporary credential instead of one that 1838 * is potentially shared (if we need one). 1839 */ 1840 cred = td->td_ucred; 1841 if ((flag & AT_EACCESS) == 0 && 1842 ((cred->cr_uid != cred->cr_ruid || 1843 cred->cr_rgid != cred->cr_groups[0]))) { 1844 usecred = crdup(cred); 1845 usecred->cr_uid = cred->cr_ruid; 1846 usecred->cr_groups[0] = cred->cr_rgid; 1847 td->td_ucred = usecred; 1848 } else 1849 usecred = cred; 1850 AUDIT_ARG_VALUE(amode); 1851 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1852 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1853 td); 1854 if ((error = namei(&nd)) != 0) 1855 goto out; 1856 vp = nd.ni_vp; 1857 1858 error = vn_access(vp, amode, usecred, td); 1859 NDFREE(&nd, NDF_ONLY_PNBUF); 1860 vput(vp); 1861out: 1862 if (usecred != cred) { 1863 td->td_ucred = cred; 1864 crfree(usecred); 1865 } 1866 return (error); 1867} 1868 1869/* 1870 * Check access permissions using "effective" credentials. 1871 */ 1872#ifndef _SYS_SYSPROTO_H_ 1873struct eaccess_args { 1874 char *path; 1875 int amode; 1876}; 1877#endif 1878int 1879sys_eaccess(struct thread *td, struct eaccess_args *uap) 1880{ 1881 1882 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1883 AT_EACCESS, uap->amode)); 1884} 1885 1886#if defined(COMPAT_43) 1887/* 1888 * Get file status; this version follows links. 1889 */ 1890#ifndef _SYS_SYSPROTO_H_ 1891struct ostat_args { 1892 char *path; 1893 struct ostat *ub; 1894}; 1895#endif 1896int 1897ostat(struct thread *td, struct ostat_args *uap) 1898{ 1899 struct stat sb; 1900 struct ostat osb; 1901 int error; 1902 1903 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1904 &sb, NULL); 1905 if (error != 0) 1906 return (error); 1907 cvtstat(&sb, &osb); 1908 return (copyout(&osb, uap->ub, sizeof (osb))); 1909} 1910 1911/* 1912 * Get file status; this version does not follow links. 1913 */ 1914#ifndef _SYS_SYSPROTO_H_ 1915struct olstat_args { 1916 char *path; 1917 struct ostat *ub; 1918}; 1919#endif 1920int 1921olstat(struct thread *td, struct olstat_args *uap) 1922{ 1923 struct stat sb; 1924 struct ostat osb; 1925 int error; 1926 1927 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 1928 UIO_USERSPACE, &sb, NULL); 1929 if (error != 0) 1930 return (error); 1931 cvtstat(&sb, &osb); 1932 return (copyout(&osb, uap->ub, sizeof (osb))); 1933} 1934 1935/* 1936 * Convert from an old to a new stat structure. 1937 */ 1938void 1939cvtstat(struct stat *st, struct ostat *ost) 1940{ 1941 1942 bzero(ost, sizeof(*ost)); 1943 ost->st_dev = st->st_dev; 1944 ost->st_ino = st->st_ino; 1945 ost->st_mode = st->st_mode; 1946 ost->st_nlink = st->st_nlink; 1947 ost->st_uid = st->st_uid; 1948 ost->st_gid = st->st_gid; 1949 ost->st_rdev = st->st_rdev; 1950 if (st->st_size < (quad_t)1 << 32) 1951 ost->st_size = st->st_size; 1952 else 1953 ost->st_size = -2; 1954 ost->st_atim = st->st_atim; 1955 ost->st_mtim = st->st_mtim; 1956 ost->st_ctim = st->st_ctim; 1957 ost->st_blksize = st->st_blksize; 1958 ost->st_blocks = st->st_blocks; 1959 ost->st_flags = st->st_flags; 1960 ost->st_gen = st->st_gen; 1961} 1962#endif /* COMPAT_43 */ 1963 1964/* 1965 * Get file status; this version follows links. 1966 */ 1967#ifndef _SYS_SYSPROTO_H_ 1968struct stat_args { 1969 char *path; 1970 struct stat *ub; 1971}; 1972#endif 1973int 1974sys_stat(struct thread *td, struct stat_args *uap) 1975{ 1976 struct stat sb; 1977 int error; 1978 1979 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1980 &sb, NULL); 1981 if (error == 0) 1982 error = copyout(&sb, uap->ub, sizeof (sb)); 1983 return (error); 1984} 1985 1986#ifndef _SYS_SYSPROTO_H_ 1987struct fstatat_args { 1988 int fd; 1989 char *path; 1990 struct stat *buf; 1991 int flag; 1992} 1993#endif 1994int 1995sys_fstatat(struct thread *td, struct fstatat_args *uap) 1996{ 1997 struct stat sb; 1998 int error; 1999 2000 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2001 UIO_USERSPACE, &sb, NULL); 2002 if (error == 0) 2003 error = copyout(&sb, uap->buf, sizeof (sb)); 2004 return (error); 2005} 2006 2007int 2008kern_statat(struct thread *td, int flag, int fd, char *path, 2009 enum uio_seg pathseg, struct stat *sbp, 2010 void (*hook)(struct vnode *vp, struct stat *sbp)) 2011{ 2012 struct nameidata nd; 2013 struct stat sb; 2014 cap_rights_t rights; 2015 int error; 2016 2017 if (flag & ~AT_SYMLINK_NOFOLLOW) 2018 return (EINVAL); 2019 2020 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2021 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2022 cap_rights_init(&rights, CAP_FSTAT), td); 2023 2024 if ((error = namei(&nd)) != 0) 2025 return (error); 2026 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2027 if (error == 0) { 2028 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2029 if (S_ISREG(sb.st_mode)) 2030 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2031 if (__predict_false(hook != NULL)) 2032 hook(nd.ni_vp, &sb); 2033 } 2034 NDFREE(&nd, NDF_ONLY_PNBUF); 2035 vput(nd.ni_vp); 2036 if (error != 0) 2037 return (error); 2038 *sbp = sb; 2039#ifdef KTRACE 2040 if (KTRPOINT(td, KTR_STRUCT)) 2041 ktrstat(&sb); 2042#endif 2043 return (0); 2044} 2045 2046/* 2047 * Get file status; this version does not follow links. 2048 */ 2049#ifndef _SYS_SYSPROTO_H_ 2050struct lstat_args { 2051 char *path; 2052 struct stat *ub; 2053}; 2054#endif 2055int 2056sys_lstat(struct thread *td, struct lstat_args *uap) 2057{ 2058 struct stat sb; 2059 int error; 2060 2061 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2062 UIO_USERSPACE, &sb, NULL); 2063 if (error == 0) 2064 error = copyout(&sb, uap->ub, sizeof (sb)); 2065 return (error); 2066} 2067 2068/* 2069 * Implementation of the NetBSD [l]stat() functions. 2070 */ 2071void 2072cvtnstat( struct stat *sb, struct nstat *nsb) 2073{ 2074 2075 bzero(nsb, sizeof *nsb); 2076 nsb->st_dev = sb->st_dev; 2077 nsb->st_ino = sb->st_ino; 2078 nsb->st_mode = sb->st_mode; 2079 nsb->st_nlink = sb->st_nlink; 2080 nsb->st_uid = sb->st_uid; 2081 nsb->st_gid = sb->st_gid; 2082 nsb->st_rdev = sb->st_rdev; 2083 nsb->st_atim = sb->st_atim; 2084 nsb->st_mtim = sb->st_mtim; 2085 nsb->st_ctim = sb->st_ctim; 2086 nsb->st_size = sb->st_size; 2087 nsb->st_blocks = sb->st_blocks; 2088 nsb->st_blksize = sb->st_blksize; 2089 nsb->st_flags = sb->st_flags; 2090 nsb->st_gen = sb->st_gen; 2091 nsb->st_birthtim = sb->st_birthtim; 2092} 2093 2094#ifndef _SYS_SYSPROTO_H_ 2095struct nstat_args { 2096 char *path; 2097 struct nstat *ub; 2098}; 2099#endif 2100int 2101sys_nstat(struct thread *td, struct nstat_args *uap) 2102{ 2103 struct stat sb; 2104 struct nstat nsb; 2105 int error; 2106 2107 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2108 &sb, NULL); 2109 if (error != 0) 2110 return (error); 2111 cvtnstat(&sb, &nsb); 2112 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2113} 2114 2115/* 2116 * NetBSD lstat. Get file status; this version does not follow links. 2117 */ 2118#ifndef _SYS_SYSPROTO_H_ 2119struct lstat_args { 2120 char *path; 2121 struct stat *ub; 2122}; 2123#endif 2124int 2125sys_nlstat(struct thread *td, struct nlstat_args *uap) 2126{ 2127 struct stat sb; 2128 struct nstat nsb; 2129 int error; 2130 2131 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2132 UIO_USERSPACE, &sb, NULL); 2133 if (error != 0) 2134 return (error); 2135 cvtnstat(&sb, &nsb); 2136 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2137} 2138 2139/* 2140 * Get configurable pathname variables. 2141 */ 2142#ifndef _SYS_SYSPROTO_H_ 2143struct pathconf_args { 2144 char *path; 2145 int name; 2146}; 2147#endif 2148int 2149sys_pathconf(struct thread *td, struct pathconf_args *uap) 2150{ 2151 2152 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2153} 2154 2155#ifndef _SYS_SYSPROTO_H_ 2156struct lpathconf_args { 2157 char *path; 2158 int name; 2159}; 2160#endif 2161int 2162sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2163{ 2164 2165 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2166 NOFOLLOW)); 2167} 2168 2169int 2170kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2171 u_long flags) 2172{ 2173 struct nameidata nd; 2174 int error; 2175 2176 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2177 pathseg, path, td); 2178 if ((error = namei(&nd)) != 0) 2179 return (error); 2180 NDFREE(&nd, NDF_ONLY_PNBUF); 2181 2182 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2183 vput(nd.ni_vp); 2184 return (error); 2185} 2186 2187/* 2188 * Return target name of a symbolic link. 2189 */ 2190#ifndef _SYS_SYSPROTO_H_ 2191struct readlink_args { 2192 char *path; 2193 char *buf; 2194 size_t count; 2195}; 2196#endif 2197int 2198sys_readlink(struct thread *td, struct readlink_args *uap) 2199{ 2200 2201 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2202 uap->buf, UIO_USERSPACE, uap->count)); 2203} 2204#ifndef _SYS_SYSPROTO_H_ 2205struct readlinkat_args { 2206 int fd; 2207 char *path; 2208 char *buf; 2209 size_t bufsize; 2210}; 2211#endif 2212int 2213sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2214{ 2215 2216 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2217 uap->buf, UIO_USERSPACE, uap->bufsize)); 2218} 2219 2220int 2221kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2222 char *buf, enum uio_seg bufseg, size_t count) 2223{ 2224 struct vnode *vp; 2225 struct iovec aiov; 2226 struct uio auio; 2227 struct nameidata nd; 2228 int error; 2229 2230 if (count > IOSIZE_MAX) 2231 return (EINVAL); 2232 2233 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2234 pathseg, path, fd, td); 2235 2236 if ((error = namei(&nd)) != 0) 2237 return (error); 2238 NDFREE(&nd, NDF_ONLY_PNBUF); 2239 vp = nd.ni_vp; 2240#ifdef MAC 2241 error = mac_vnode_check_readlink(td->td_ucred, vp); 2242 if (error != 0) { 2243 vput(vp); 2244 return (error); 2245 } 2246#endif 2247 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2248 error = EINVAL; 2249 else { 2250 aiov.iov_base = buf; 2251 aiov.iov_len = count; 2252 auio.uio_iov = &aiov; 2253 auio.uio_iovcnt = 1; 2254 auio.uio_offset = 0; 2255 auio.uio_rw = UIO_READ; 2256 auio.uio_segflg = bufseg; 2257 auio.uio_td = td; 2258 auio.uio_resid = count; 2259 error = VOP_READLINK(vp, &auio, td->td_ucred); 2260 td->td_retval[0] = count - auio.uio_resid; 2261 } 2262 vput(vp); 2263 return (error); 2264} 2265 2266/* 2267 * Common implementation code for chflags() and fchflags(). 2268 */ 2269static int 2270setfflags(struct thread *td, struct vnode *vp, u_long flags) 2271{ 2272 struct mount *mp; 2273 struct vattr vattr; 2274 int error; 2275 2276 /* We can't support the value matching VNOVAL. */ 2277 if (flags == VNOVAL) 2278 return (EOPNOTSUPP); 2279 2280 /* 2281 * Prevent non-root users from setting flags on devices. When 2282 * a device is reused, users can retain ownership of the device 2283 * if they are allowed to set flags and programs assume that 2284 * chown can't fail when done as root. 2285 */ 2286 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2287 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2288 if (error != 0) 2289 return (error); 2290 } 2291 2292 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2293 return (error); 2294 VATTR_NULL(&vattr); 2295 vattr.va_flags = flags; 2296 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2297#ifdef MAC 2298 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2299 if (error == 0) 2300#endif 2301 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2302 VOP_UNLOCK(vp, 0); 2303 vn_finished_write(mp); 2304 return (error); 2305} 2306 2307/* 2308 * Change flags of a file given a path name. 2309 */ 2310#ifndef _SYS_SYSPROTO_H_ 2311struct chflags_args { 2312 const char *path; 2313 u_long flags; 2314}; 2315#endif 2316int 2317sys_chflags(struct thread *td, struct chflags_args *uap) 2318{ 2319 2320 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2321 uap->flags, 0)); 2322} 2323 2324#ifndef _SYS_SYSPROTO_H_ 2325struct chflagsat_args { 2326 int fd; 2327 const char *path; 2328 u_long flags; 2329 int atflag; 2330} 2331#endif 2332int 2333sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2334{ 2335 int fd = uap->fd; 2336 const char *path = uap->path; 2337 u_long flags = uap->flags; 2338 int atflag = uap->atflag; 2339 2340 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2341 return (EINVAL); 2342 2343 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2344} 2345 2346/* 2347 * Same as chflags() but doesn't follow symlinks. 2348 */ 2349#ifndef _SYS_SYSPROTO_H_ 2350struct lchflags_args { 2351 const char *path; 2352 u_long flags; 2353}; 2354#endif 2355int 2356sys_lchflags(struct thread *td, struct lchflags_args *uap) 2357{ 2358 2359 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2360 uap->flags, AT_SYMLINK_NOFOLLOW)); 2361} 2362 2363static int 2364kern_chflagsat(struct thread *td, int fd, const char *path, 2365 enum uio_seg pathseg, u_long flags, int atflag) 2366{ 2367 struct nameidata nd; 2368 cap_rights_t rights; 2369 int error, follow; 2370 2371 AUDIT_ARG_FFLAGS(flags); 2372 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2373 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2374 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2375 if ((error = namei(&nd)) != 0) 2376 return (error); 2377 NDFREE(&nd, NDF_ONLY_PNBUF); 2378 error = setfflags(td, nd.ni_vp, flags); 2379 vrele(nd.ni_vp); 2380 return (error); 2381} 2382 2383/* 2384 * Change flags of a file given a file descriptor. 2385 */ 2386#ifndef _SYS_SYSPROTO_H_ 2387struct fchflags_args { 2388 int fd; 2389 u_long flags; 2390}; 2391#endif 2392int 2393sys_fchflags(struct thread *td, struct fchflags_args *uap) 2394{ 2395 struct file *fp; 2396 cap_rights_t rights; 2397 int error; 2398 2399 AUDIT_ARG_FD(uap->fd); 2400 AUDIT_ARG_FFLAGS(uap->flags); 2401 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2402 &fp); 2403 if (error != 0) 2404 return (error); 2405#ifdef AUDIT 2406 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2407 AUDIT_ARG_VNODE1(fp->f_vnode); 2408 VOP_UNLOCK(fp->f_vnode, 0); 2409#endif 2410 error = setfflags(td, fp->f_vnode, uap->flags); 2411 fdrop(fp, td); 2412 return (error); 2413} 2414 2415/* 2416 * Common implementation code for chmod(), lchmod() and fchmod(). 2417 */ 2418int 2419setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2420{ 2421 struct mount *mp; 2422 struct vattr vattr; 2423 int error; 2424 2425 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2426 return (error); 2427 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2428 VATTR_NULL(&vattr); 2429 vattr.va_mode = mode & ALLPERMS; 2430#ifdef MAC 2431 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2432 if (error == 0) 2433#endif 2434 error = VOP_SETATTR(vp, &vattr, cred); 2435 VOP_UNLOCK(vp, 0); 2436 vn_finished_write(mp); 2437 return (error); 2438} 2439 2440/* 2441 * Change mode of a file given path name. 2442 */ 2443#ifndef _SYS_SYSPROTO_H_ 2444struct chmod_args { 2445 char *path; 2446 int mode; 2447}; 2448#endif 2449int 2450sys_chmod(struct thread *td, struct chmod_args *uap) 2451{ 2452 2453 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2454 uap->mode, 0)); 2455} 2456 2457#ifndef _SYS_SYSPROTO_H_ 2458struct fchmodat_args { 2459 int dirfd; 2460 char *path; 2461 mode_t mode; 2462 int flag; 2463} 2464#endif 2465int 2466sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2467{ 2468 int flag = uap->flag; 2469 int fd = uap->fd; 2470 char *path = uap->path; 2471 mode_t mode = uap->mode; 2472 2473 if (flag & ~AT_SYMLINK_NOFOLLOW) 2474 return (EINVAL); 2475 2476 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2477} 2478 2479/* 2480 * Change mode of a file given path name (don't follow links.) 2481 */ 2482#ifndef _SYS_SYSPROTO_H_ 2483struct lchmod_args { 2484 char *path; 2485 int mode; 2486}; 2487#endif 2488int 2489sys_lchmod(struct thread *td, struct lchmod_args *uap) 2490{ 2491 2492 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2493 uap->mode, AT_SYMLINK_NOFOLLOW)); 2494} 2495 2496int 2497kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2498 mode_t mode, int flag) 2499{ 2500 struct nameidata nd; 2501 cap_rights_t rights; 2502 int error, follow; 2503 2504 AUDIT_ARG_MODE(mode); 2505 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2506 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2507 cap_rights_init(&rights, CAP_FCHMOD), td); 2508 if ((error = namei(&nd)) != 0) 2509 return (error); 2510 NDFREE(&nd, NDF_ONLY_PNBUF); 2511 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2512 vrele(nd.ni_vp); 2513 return (error); 2514} 2515 2516/* 2517 * Change mode of a file given a file descriptor. 2518 */ 2519#ifndef _SYS_SYSPROTO_H_ 2520struct fchmod_args { 2521 int fd; 2522 int mode; 2523}; 2524#endif 2525int 2526sys_fchmod(struct thread *td, struct fchmod_args *uap) 2527{ 2528 struct file *fp; 2529 cap_rights_t rights; 2530 int error; 2531 2532 AUDIT_ARG_FD(uap->fd); 2533 AUDIT_ARG_MODE(uap->mode); 2534 2535 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2536 if (error != 0) 2537 return (error); 2538 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2539 fdrop(fp, td); 2540 return (error); 2541} 2542 2543/* 2544 * Common implementation for chown(), lchown(), and fchown() 2545 */ 2546int 2547setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2548 gid_t gid) 2549{ 2550 struct mount *mp; 2551 struct vattr vattr; 2552 int error; 2553 2554 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2555 return (error); 2556 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2557 VATTR_NULL(&vattr); 2558 vattr.va_uid = uid; 2559 vattr.va_gid = gid; 2560#ifdef MAC 2561 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2562 vattr.va_gid); 2563 if (error == 0) 2564#endif 2565 error = VOP_SETATTR(vp, &vattr, cred); 2566 VOP_UNLOCK(vp, 0); 2567 vn_finished_write(mp); 2568 return (error); 2569} 2570 2571/* 2572 * Set ownership given a path name. 2573 */ 2574#ifndef _SYS_SYSPROTO_H_ 2575struct chown_args { 2576 char *path; 2577 int uid; 2578 int gid; 2579}; 2580#endif 2581int 2582sys_chown(struct thread *td, struct chown_args *uap) 2583{ 2584 2585 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2586 uap->gid, 0)); 2587} 2588 2589#ifndef _SYS_SYSPROTO_H_ 2590struct fchownat_args { 2591 int fd; 2592 const char * path; 2593 uid_t uid; 2594 gid_t gid; 2595 int flag; 2596}; 2597#endif 2598int 2599sys_fchownat(struct thread *td, struct fchownat_args *uap) 2600{ 2601 int flag; 2602 2603 flag = uap->flag; 2604 if (flag & ~AT_SYMLINK_NOFOLLOW) 2605 return (EINVAL); 2606 2607 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2608 uap->gid, uap->flag)); 2609} 2610 2611int 2612kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2613 int uid, int gid, int flag) 2614{ 2615 struct nameidata nd; 2616 cap_rights_t rights; 2617 int error, follow; 2618 2619 AUDIT_ARG_OWNER(uid, gid); 2620 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2621 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2622 cap_rights_init(&rights, CAP_FCHOWN), td); 2623 2624 if ((error = namei(&nd)) != 0) 2625 return (error); 2626 NDFREE(&nd, NDF_ONLY_PNBUF); 2627 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2628 vrele(nd.ni_vp); 2629 return (error); 2630} 2631 2632/* 2633 * Set ownership given a path name, do not cross symlinks. 2634 */ 2635#ifndef _SYS_SYSPROTO_H_ 2636struct lchown_args { 2637 char *path; 2638 int uid; 2639 int gid; 2640}; 2641#endif 2642int 2643sys_lchown(struct thread *td, struct lchown_args *uap) 2644{ 2645 2646 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2647 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2648} 2649 2650/* 2651 * Set ownership given a file descriptor. 2652 */ 2653#ifndef _SYS_SYSPROTO_H_ 2654struct fchown_args { 2655 int fd; 2656 int uid; 2657 int gid; 2658}; 2659#endif 2660int 2661sys_fchown(struct thread *td, struct fchown_args *uap) 2662{ 2663 struct file *fp; 2664 cap_rights_t rights; 2665 int error; 2666 2667 AUDIT_ARG_FD(uap->fd); 2668 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2669 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2670 if (error != 0) 2671 return (error); 2672 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2673 fdrop(fp, td); 2674 return (error); 2675} 2676 2677/* 2678 * Common implementation code for utimes(), lutimes(), and futimes(). 2679 */ 2680static int 2681getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2682 struct timespec *tsp) 2683{ 2684 struct timeval tv[2]; 2685 const struct timeval *tvp; 2686 int error; 2687 2688 if (usrtvp == NULL) { 2689 vfs_timestamp(&tsp[0]); 2690 tsp[1] = tsp[0]; 2691 } else { 2692 if (tvpseg == UIO_SYSSPACE) { 2693 tvp = usrtvp; 2694 } else { 2695 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2696 return (error); 2697 tvp = tv; 2698 } 2699 2700 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2701 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2702 return (EINVAL); 2703 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2704 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2705 } 2706 return (0); 2707} 2708 2709/* 2710 * Common implementation code for futimens(), utimensat(). 2711 */ 2712#define UTIMENS_NULL 0x1 2713#define UTIMENS_EXIT 0x2 2714static int 2715getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2716 struct timespec *tsp, int *retflags) 2717{ 2718 struct timespec tsnow; 2719 int error; 2720 2721 vfs_timestamp(&tsnow); 2722 *retflags = 0; 2723 if (usrtsp == NULL) { 2724 tsp[0] = tsnow; 2725 tsp[1] = tsnow; 2726 *retflags |= UTIMENS_NULL; 2727 return (0); 2728 } 2729 if (tspseg == UIO_SYSSPACE) { 2730 tsp[0] = usrtsp[0]; 2731 tsp[1] = usrtsp[1]; 2732 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2733 return (error); 2734 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2735 *retflags |= UTIMENS_EXIT; 2736 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2737 *retflags |= UTIMENS_NULL; 2738 if (tsp[0].tv_nsec == UTIME_OMIT) 2739 tsp[0].tv_sec = VNOVAL; 2740 else if (tsp[0].tv_nsec == UTIME_NOW) 2741 tsp[0] = tsnow; 2742 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2743 return (EINVAL); 2744 if (tsp[1].tv_nsec == UTIME_OMIT) 2745 tsp[1].tv_sec = VNOVAL; 2746 else if (tsp[1].tv_nsec == UTIME_NOW) 2747 tsp[1] = tsnow; 2748 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2749 return (EINVAL); 2750 2751 return (0); 2752} 2753 2754/* 2755 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2756 * and utimensat(). 2757 */ 2758static int 2759setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 2760 int numtimes, int nullflag) 2761{ 2762 struct mount *mp; 2763 struct vattr vattr; 2764 int error, setbirthtime; 2765 2766 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2767 return (error); 2768 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2769 setbirthtime = 0; 2770 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2771 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2772 setbirthtime = 1; 2773 VATTR_NULL(&vattr); 2774 vattr.va_atime = ts[0]; 2775 vattr.va_mtime = ts[1]; 2776 if (setbirthtime) 2777 vattr.va_birthtime = ts[1]; 2778 if (numtimes > 2) 2779 vattr.va_birthtime = ts[2]; 2780 if (nullflag) 2781 vattr.va_vaflags |= VA_UTIMES_NULL; 2782#ifdef MAC 2783 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 2784 vattr.va_mtime); 2785#endif 2786 if (error == 0) 2787 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2788 VOP_UNLOCK(vp, 0); 2789 vn_finished_write(mp); 2790 return (error); 2791} 2792 2793/* 2794 * Set the access and modification times of a file. 2795 */ 2796#ifndef _SYS_SYSPROTO_H_ 2797struct utimes_args { 2798 char *path; 2799 struct timeval *tptr; 2800}; 2801#endif 2802int 2803sys_utimes(struct thread *td, struct utimes_args *uap) 2804{ 2805 2806 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2807 uap->tptr, UIO_USERSPACE)); 2808} 2809 2810#ifndef _SYS_SYSPROTO_H_ 2811struct futimesat_args { 2812 int fd; 2813 const char * path; 2814 const struct timeval * times; 2815}; 2816#endif 2817int 2818sys_futimesat(struct thread *td, struct futimesat_args *uap) 2819{ 2820 2821 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 2822 uap->times, UIO_USERSPACE)); 2823} 2824 2825int 2826kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2827 struct timeval *tptr, enum uio_seg tptrseg) 2828{ 2829 struct nameidata nd; 2830 struct timespec ts[2]; 2831 cap_rights_t rights; 2832 int error; 2833 2834 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2835 return (error); 2836 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 2837 cap_rights_init(&rights, CAP_FUTIMES), td); 2838 2839 if ((error = namei(&nd)) != 0) 2840 return (error); 2841 NDFREE(&nd, NDF_ONLY_PNBUF); 2842 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2843 vrele(nd.ni_vp); 2844 return (error); 2845} 2846 2847/* 2848 * Set the access and modification times of a file. 2849 */ 2850#ifndef _SYS_SYSPROTO_H_ 2851struct lutimes_args { 2852 char *path; 2853 struct timeval *tptr; 2854}; 2855#endif 2856int 2857sys_lutimes(struct thread *td, struct lutimes_args *uap) 2858{ 2859 2860 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 2861 UIO_USERSPACE)); 2862} 2863 2864int 2865kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 2866 struct timeval *tptr, enum uio_seg tptrseg) 2867{ 2868 struct timespec ts[2]; 2869 struct nameidata nd; 2870 int error; 2871 2872 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2873 return (error); 2874 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 2875 if ((error = namei(&nd)) != 0) 2876 return (error); 2877 NDFREE(&nd, NDF_ONLY_PNBUF); 2878 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2879 vrele(nd.ni_vp); 2880 return (error); 2881} 2882 2883/* 2884 * Set the access and modification times of a file. 2885 */ 2886#ifndef _SYS_SYSPROTO_H_ 2887struct futimes_args { 2888 int fd; 2889 struct timeval *tptr; 2890}; 2891#endif 2892int 2893sys_futimes(struct thread *td, struct futimes_args *uap) 2894{ 2895 2896 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 2897} 2898 2899int 2900kern_futimes(struct thread *td, int fd, struct timeval *tptr, 2901 enum uio_seg tptrseg) 2902{ 2903 struct timespec ts[2]; 2904 struct file *fp; 2905 cap_rights_t rights; 2906 int error; 2907 2908 AUDIT_ARG_FD(fd); 2909 error = getutimes(tptr, tptrseg, ts); 2910 if (error != 0) 2911 return (error); 2912 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2913 if (error != 0) 2914 return (error); 2915#ifdef AUDIT 2916 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2917 AUDIT_ARG_VNODE1(fp->f_vnode); 2918 VOP_UNLOCK(fp->f_vnode, 0); 2919#endif 2920 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 2921 fdrop(fp, td); 2922 return (error); 2923} 2924 2925int 2926sys_futimens(struct thread *td, struct futimens_args *uap) 2927{ 2928 2929 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 2930} 2931 2932int 2933kern_futimens(struct thread *td, int fd, struct timespec *tptr, 2934 enum uio_seg tptrseg) 2935{ 2936 struct timespec ts[2]; 2937 struct file *fp; 2938 cap_rights_t rights; 2939 int error, flags; 2940 2941 AUDIT_ARG_FD(fd); 2942 error = getutimens(tptr, tptrseg, ts, &flags); 2943 if (error != 0) 2944 return (error); 2945 if (flags & UTIMENS_EXIT) 2946 return (0); 2947 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2948 if (error != 0) 2949 return (error); 2950#ifdef AUDIT 2951 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2952 AUDIT_ARG_VNODE1(fp->f_vnode); 2953 VOP_UNLOCK(fp->f_vnode, 0); 2954#endif 2955 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 2956 fdrop(fp, td); 2957 return (error); 2958} 2959 2960int 2961sys_utimensat(struct thread *td, struct utimensat_args *uap) 2962{ 2963 2964 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 2965 uap->times, UIO_USERSPACE, uap->flag)); 2966} 2967 2968int 2969kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2970 struct timespec *tptr, enum uio_seg tptrseg, int flag) 2971{ 2972 struct nameidata nd; 2973 struct timespec ts[2]; 2974 cap_rights_t rights; 2975 int error, flags; 2976 2977 if (flag & ~AT_SYMLINK_NOFOLLOW) 2978 return (EINVAL); 2979 2980 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 2981 return (error); 2982 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2983 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 2984 cap_rights_init(&rights, CAP_FUTIMES), td); 2985 if ((error = namei(&nd)) != 0) 2986 return (error); 2987 /* 2988 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 2989 * POSIX states: 2990 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 2991 * "Search permission is denied by a component of the path prefix." 2992 */ 2993 NDFREE(&nd, NDF_ONLY_PNBUF); 2994 if ((flags & UTIMENS_EXIT) == 0) 2995 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 2996 vrele(nd.ni_vp); 2997 return (error); 2998} 2999 3000/* 3001 * Truncate a file given its path name. 3002 */ 3003#ifndef _SYS_SYSPROTO_H_ 3004struct truncate_args { 3005 char *path; 3006 int pad; 3007 off_t length; 3008}; 3009#endif 3010int 3011sys_truncate(struct thread *td, struct truncate_args *uap) 3012{ 3013 3014 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3015} 3016 3017int 3018kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3019{ 3020 struct mount *mp; 3021 struct vnode *vp; 3022 void *rl_cookie; 3023 struct vattr vattr; 3024 struct nameidata nd; 3025 int error; 3026 3027 if (length < 0) 3028 return(EINVAL); 3029 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3030 if ((error = namei(&nd)) != 0) 3031 return (error); 3032 vp = nd.ni_vp; 3033 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3034 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3035 vn_rangelock_unlock(vp, rl_cookie); 3036 vrele(vp); 3037 return (error); 3038 } 3039 NDFREE(&nd, NDF_ONLY_PNBUF); 3040 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3041 if (vp->v_type == VDIR) 3042 error = EISDIR; 3043#ifdef MAC 3044 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3045 } 3046#endif 3047 else if ((error = vn_writechk(vp)) == 0 && 3048 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3049 VATTR_NULL(&vattr); 3050 vattr.va_size = length; 3051 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3052 } 3053 VOP_UNLOCK(vp, 0); 3054 vn_finished_write(mp); 3055 vn_rangelock_unlock(vp, rl_cookie); 3056 vrele(vp); 3057 return (error); 3058} 3059 3060#if defined(COMPAT_43) 3061/* 3062 * Truncate a file given its path name. 3063 */ 3064#ifndef _SYS_SYSPROTO_H_ 3065struct otruncate_args { 3066 char *path; 3067 long length; 3068}; 3069#endif 3070int 3071otruncate(struct thread *td, struct otruncate_args *uap) 3072{ 3073 3074 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3075} 3076#endif /* COMPAT_43 */ 3077 3078#if defined(COMPAT_FREEBSD6) 3079/* Versions with the pad argument */ 3080int 3081freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3082{ 3083 3084 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3085} 3086 3087int 3088freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3089{ 3090 3091 return (kern_ftruncate(td, uap->fd, uap->length)); 3092} 3093#endif 3094 3095int 3096kern_fsync(struct thread *td, int fd, bool fullsync) 3097{ 3098 struct vnode *vp; 3099 struct mount *mp; 3100 struct file *fp; 3101 cap_rights_t rights; 3102 int error, lock_flags; 3103 3104 AUDIT_ARG_FD(fd); 3105 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3106 if (error != 0) 3107 return (error); 3108 vp = fp->f_vnode; 3109#if 0 3110 if (!fullsync) 3111 /* XXXKIB: compete outstanding aio writes */; 3112#endif 3113 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3114 if (error != 0) 3115 goto drop; 3116 if (MNT_SHARED_WRITES(mp) || 3117 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3118 lock_flags = LK_SHARED; 3119 } else { 3120 lock_flags = LK_EXCLUSIVE; 3121 } 3122 vn_lock(vp, lock_flags | LK_RETRY); 3123 AUDIT_ARG_VNODE1(vp); 3124 if (vp->v_object != NULL) { 3125 VM_OBJECT_WLOCK(vp->v_object); 3126 vm_object_page_clean(vp->v_object, 0, 0, 0); 3127 VM_OBJECT_WUNLOCK(vp->v_object); 3128 } 3129 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3130 VOP_UNLOCK(vp, 0); 3131 vn_finished_write(mp); 3132drop: 3133 fdrop(fp, td); 3134 return (error); 3135} 3136 3137/* 3138 * Sync an open file. 3139 */ 3140#ifndef _SYS_SYSPROTO_H_ 3141struct fsync_args { 3142 int fd; 3143}; 3144#endif 3145int 3146sys_fsync(struct thread *td, struct fsync_args *uap) 3147{ 3148 3149 return (kern_fsync(td, uap->fd, true)); 3150} 3151 3152int 3153sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3154{ 3155 3156 return (kern_fsync(td, uap->fd, false)); 3157} 3158 3159/* 3160 * Rename files. Source and destination must either both be directories, or 3161 * both not be directories. If target is a directory, it must be empty. 3162 */ 3163#ifndef _SYS_SYSPROTO_H_ 3164struct rename_args { 3165 char *from; 3166 char *to; 3167}; 3168#endif 3169int 3170sys_rename(struct thread *td, struct rename_args *uap) 3171{ 3172 3173 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3174 uap->to, UIO_USERSPACE)); 3175} 3176 3177#ifndef _SYS_SYSPROTO_H_ 3178struct renameat_args { 3179 int oldfd; 3180 char *old; 3181 int newfd; 3182 char *new; 3183}; 3184#endif 3185int 3186sys_renameat(struct thread *td, struct renameat_args *uap) 3187{ 3188 3189 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3190 UIO_USERSPACE)); 3191} 3192 3193int 3194kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3195 enum uio_seg pathseg) 3196{ 3197 struct mount *mp = NULL; 3198 struct vnode *tvp, *fvp, *tdvp; 3199 struct nameidata fromnd, tond; 3200 cap_rights_t rights; 3201 int error; 3202 3203again: 3204 bwillwrite(); 3205#ifdef MAC 3206 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3207 AUDITVNODE1, pathseg, old, oldfd, 3208 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3209#else 3210 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3211 pathseg, old, oldfd, 3212 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3213#endif 3214 3215 if ((error = namei(&fromnd)) != 0) 3216 return (error); 3217#ifdef MAC 3218 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3219 fromnd.ni_vp, &fromnd.ni_cnd); 3220 VOP_UNLOCK(fromnd.ni_dvp, 0); 3221 if (fromnd.ni_dvp != fromnd.ni_vp) 3222 VOP_UNLOCK(fromnd.ni_vp, 0); 3223#endif 3224 fvp = fromnd.ni_vp; 3225 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3226 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3227 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3228 if (fromnd.ni_vp->v_type == VDIR) 3229 tond.ni_cnd.cn_flags |= WILLBEDIR; 3230 if ((error = namei(&tond)) != 0) { 3231 /* Translate error code for rename("dir1", "dir2/."). */ 3232 if (error == EISDIR && fvp->v_type == VDIR) 3233 error = EINVAL; 3234 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3235 vrele(fromnd.ni_dvp); 3236 vrele(fvp); 3237 goto out1; 3238 } 3239 tdvp = tond.ni_dvp; 3240 tvp = tond.ni_vp; 3241 error = vn_start_write(fvp, &mp, V_NOWAIT); 3242 if (error != 0) { 3243 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3244 NDFREE(&tond, NDF_ONLY_PNBUF); 3245 if (tvp != NULL) 3246 vput(tvp); 3247 if (tdvp == tvp) 3248 vrele(tdvp); 3249 else 3250 vput(tdvp); 3251 vrele(fromnd.ni_dvp); 3252 vrele(fvp); 3253 vrele(tond.ni_startdir); 3254 if (fromnd.ni_startdir != NULL) 3255 vrele(fromnd.ni_startdir); 3256 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3257 if (error != 0) 3258 return (error); 3259 goto again; 3260 } 3261 if (tvp != NULL) { 3262 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3263 error = ENOTDIR; 3264 goto out; 3265 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3266 error = EISDIR; 3267 goto out; 3268 } 3269#ifdef CAPABILITIES 3270 if (newfd != AT_FDCWD) { 3271 /* 3272 * If the target already exists we require CAP_UNLINKAT 3273 * from 'newfd'. 3274 */ 3275 error = cap_check(&tond.ni_filecaps.fc_rights, 3276 cap_rights_init(&rights, CAP_UNLINKAT)); 3277 if (error != 0) 3278 goto out; 3279 } 3280#endif 3281 } 3282 if (fvp == tdvp) { 3283 error = EINVAL; 3284 goto out; 3285 } 3286 /* 3287 * If the source is the same as the destination (that is, if they 3288 * are links to the same vnode), then there is nothing to do. 3289 */ 3290 if (fvp == tvp) 3291 error = -1; 3292#ifdef MAC 3293 else 3294 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3295 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3296#endif 3297out: 3298 if (error == 0) { 3299 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3300 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3301 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3302 NDFREE(&tond, NDF_ONLY_PNBUF); 3303 } else { 3304 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3305 NDFREE(&tond, NDF_ONLY_PNBUF); 3306 if (tvp != NULL) 3307 vput(tvp); 3308 if (tdvp == tvp) 3309 vrele(tdvp); 3310 else 3311 vput(tdvp); 3312 vrele(fromnd.ni_dvp); 3313 vrele(fvp); 3314 } 3315 vrele(tond.ni_startdir); 3316 vn_finished_write(mp); 3317out1: 3318 if (fromnd.ni_startdir) 3319 vrele(fromnd.ni_startdir); 3320 if (error == -1) 3321 return (0); 3322 return (error); 3323} 3324 3325/* 3326 * Make a directory file. 3327 */ 3328#ifndef _SYS_SYSPROTO_H_ 3329struct mkdir_args { 3330 char *path; 3331 int mode; 3332}; 3333#endif 3334int 3335sys_mkdir(struct thread *td, struct mkdir_args *uap) 3336{ 3337 3338 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3339 uap->mode)); 3340} 3341 3342#ifndef _SYS_SYSPROTO_H_ 3343struct mkdirat_args { 3344 int fd; 3345 char *path; 3346 mode_t mode; 3347}; 3348#endif 3349int 3350sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3351{ 3352 3353 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3354} 3355 3356int 3357kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3358 int mode) 3359{ 3360 struct mount *mp; 3361 struct vnode *vp; 3362 struct vattr vattr; 3363 struct nameidata nd; 3364 cap_rights_t rights; 3365 int error; 3366 3367 AUDIT_ARG_MODE(mode); 3368restart: 3369 bwillwrite(); 3370 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3371 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3372 td); 3373 nd.ni_cnd.cn_flags |= WILLBEDIR; 3374 if ((error = namei(&nd)) != 0) 3375 return (error); 3376 vp = nd.ni_vp; 3377 if (vp != NULL) { 3378 NDFREE(&nd, NDF_ONLY_PNBUF); 3379 /* 3380 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3381 * the strange behaviour of leaving the vnode unlocked 3382 * if the target is the same vnode as the parent. 3383 */ 3384 if (vp == nd.ni_dvp) 3385 vrele(nd.ni_dvp); 3386 else 3387 vput(nd.ni_dvp); 3388 vrele(vp); 3389 return (EEXIST); 3390 } 3391 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3392 NDFREE(&nd, NDF_ONLY_PNBUF); 3393 vput(nd.ni_dvp); 3394 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3395 return (error); 3396 goto restart; 3397 } 3398 VATTR_NULL(&vattr); 3399 vattr.va_type = VDIR; 3400 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3401#ifdef MAC 3402 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3403 &vattr); 3404 if (error != 0) 3405 goto out; 3406#endif 3407 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3408#ifdef MAC 3409out: 3410#endif 3411 NDFREE(&nd, NDF_ONLY_PNBUF); 3412 vput(nd.ni_dvp); 3413 if (error == 0) 3414 vput(nd.ni_vp); 3415 vn_finished_write(mp); 3416 return (error); 3417} 3418 3419/* 3420 * Remove a directory file. 3421 */ 3422#ifndef _SYS_SYSPROTO_H_ 3423struct rmdir_args { 3424 char *path; 3425}; 3426#endif 3427int 3428sys_rmdir(struct thread *td, struct rmdir_args *uap) 3429{ 3430 3431 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3432} 3433 3434int 3435kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3436{ 3437 struct mount *mp; 3438 struct vnode *vp; 3439 struct nameidata nd; 3440 cap_rights_t rights; 3441 int error; 3442 3443restart: 3444 bwillwrite(); 3445 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3446 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3447 if ((error = namei(&nd)) != 0) 3448 return (error); 3449 vp = nd.ni_vp; 3450 if (vp->v_type != VDIR) { 3451 error = ENOTDIR; 3452 goto out; 3453 } 3454 /* 3455 * No rmdir "." please. 3456 */ 3457 if (nd.ni_dvp == vp) { 3458 error = EINVAL; 3459 goto out; 3460 } 3461 /* 3462 * The root of a mounted filesystem cannot be deleted. 3463 */ 3464 if (vp->v_vflag & VV_ROOT) { 3465 error = EBUSY; 3466 goto out; 3467 } 3468#ifdef MAC 3469 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3470 &nd.ni_cnd); 3471 if (error != 0) 3472 goto out; 3473#endif 3474 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3475 NDFREE(&nd, NDF_ONLY_PNBUF); 3476 vput(vp); 3477 if (nd.ni_dvp == vp) 3478 vrele(nd.ni_dvp); 3479 else 3480 vput(nd.ni_dvp); 3481 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3482 return (error); 3483 goto restart; 3484 } 3485 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3486 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3487 vn_finished_write(mp); 3488out: 3489 NDFREE(&nd, NDF_ONLY_PNBUF); 3490 vput(vp); 3491 if (nd.ni_dvp == vp) 3492 vrele(nd.ni_dvp); 3493 else 3494 vput(nd.ni_dvp); 3495 return (error); 3496} 3497 3498#ifdef COMPAT_43 3499/* 3500 * Read a block of directory entries in a filesystem independent format. 3501 */ 3502#ifndef _SYS_SYSPROTO_H_ 3503struct ogetdirentries_args { 3504 int fd; 3505 char *buf; 3506 u_int count; 3507 long *basep; 3508}; 3509#endif 3510int 3511ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3512{ 3513 long loff; 3514 int error; 3515 3516 error = kern_ogetdirentries(td, uap, &loff); 3517 if (error == 0) 3518 error = copyout(&loff, uap->basep, sizeof(long)); 3519 return (error); 3520} 3521 3522int 3523kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3524 long *ploff) 3525{ 3526 struct vnode *vp; 3527 struct file *fp; 3528 struct uio auio, kuio; 3529 struct iovec aiov, kiov; 3530 struct dirent *dp, *edp; 3531 cap_rights_t rights; 3532 caddr_t dirbuf; 3533 int error, eofflag, readcnt; 3534 long loff; 3535 off_t foffset; 3536 3537 /* XXX arbitrary sanity limit on `count'. */ 3538 if (uap->count > 64 * 1024) 3539 return (EINVAL); 3540 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3541 if (error != 0) 3542 return (error); 3543 if ((fp->f_flag & FREAD) == 0) { 3544 fdrop(fp, td); 3545 return (EBADF); 3546 } 3547 vp = fp->f_vnode; 3548 foffset = foffset_lock(fp, 0); 3549unionread: 3550 if (vp->v_type != VDIR) { 3551 foffset_unlock(fp, foffset, 0); 3552 fdrop(fp, td); 3553 return (EINVAL); 3554 } 3555 aiov.iov_base = uap->buf; 3556 aiov.iov_len = uap->count; 3557 auio.uio_iov = &aiov; 3558 auio.uio_iovcnt = 1; 3559 auio.uio_rw = UIO_READ; 3560 auio.uio_segflg = UIO_USERSPACE; 3561 auio.uio_td = td; 3562 auio.uio_resid = uap->count; 3563 vn_lock(vp, LK_SHARED | LK_RETRY); 3564 loff = auio.uio_offset = foffset; 3565#ifdef MAC 3566 error = mac_vnode_check_readdir(td->td_ucred, vp); 3567 if (error != 0) { 3568 VOP_UNLOCK(vp, 0); 3569 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3570 fdrop(fp, td); 3571 return (error); 3572 } 3573#endif 3574# if (BYTE_ORDER != LITTLE_ENDIAN) 3575 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3576 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3577 NULL, NULL); 3578 foffset = auio.uio_offset; 3579 } else 3580# endif 3581 { 3582 kuio = auio; 3583 kuio.uio_iov = &kiov; 3584 kuio.uio_segflg = UIO_SYSSPACE; 3585 kiov.iov_len = uap->count; 3586 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3587 kiov.iov_base = dirbuf; 3588 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3589 NULL, NULL); 3590 foffset = kuio.uio_offset; 3591 if (error == 0) { 3592 readcnt = uap->count - kuio.uio_resid; 3593 edp = (struct dirent *)&dirbuf[readcnt]; 3594 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3595# if (BYTE_ORDER == LITTLE_ENDIAN) 3596 /* 3597 * The expected low byte of 3598 * dp->d_namlen is our dp->d_type. 3599 * The high MBZ byte of dp->d_namlen 3600 * is our dp->d_namlen. 3601 */ 3602 dp->d_type = dp->d_namlen; 3603 dp->d_namlen = 0; 3604# else 3605 /* 3606 * The dp->d_type is the high byte 3607 * of the expected dp->d_namlen, 3608 * so must be zero'ed. 3609 */ 3610 dp->d_type = 0; 3611# endif 3612 if (dp->d_reclen > 0) { 3613 dp = (struct dirent *) 3614 ((char *)dp + dp->d_reclen); 3615 } else { 3616 error = EIO; 3617 break; 3618 } 3619 } 3620 if (dp >= edp) 3621 error = uiomove(dirbuf, readcnt, &auio); 3622 } 3623 free(dirbuf, M_TEMP); 3624 } 3625 if (error != 0) { 3626 VOP_UNLOCK(vp, 0); 3627 foffset_unlock(fp, foffset, 0); 3628 fdrop(fp, td); 3629 return (error); 3630 } 3631 if (uap->count == auio.uio_resid && 3632 (vp->v_vflag & VV_ROOT) && 3633 (vp->v_mount->mnt_flag & MNT_UNION)) { 3634 struct vnode *tvp = vp; 3635 vp = vp->v_mount->mnt_vnodecovered; 3636 VREF(vp); 3637 fp->f_vnode = vp; 3638 fp->f_data = vp; 3639 foffset = 0; 3640 vput(tvp); 3641 goto unionread; 3642 } 3643 VOP_UNLOCK(vp, 0); 3644 foffset_unlock(fp, foffset, 0); 3645 fdrop(fp, td); 3646 td->td_retval[0] = uap->count - auio.uio_resid; 3647 if (error == 0) 3648 *ploff = loff; 3649 return (error); 3650} 3651#endif /* COMPAT_43 */ 3652 3653/* 3654 * Read a block of directory entries in a filesystem independent format. 3655 */ 3656#ifndef _SYS_SYSPROTO_H_ 3657struct getdirentries_args { 3658 int fd; 3659 char *buf; 3660 u_int count; 3661 long *basep; 3662}; 3663#endif 3664int 3665sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3666{ 3667 long base; 3668 int error; 3669 3670 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3671 NULL, UIO_USERSPACE); 3672 if (error != 0) 3673 return (error); 3674 if (uap->basep != NULL) 3675 error = copyout(&base, uap->basep, sizeof(long)); 3676 return (error); 3677} 3678 3679int 3680kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3681 long *basep, ssize_t *residp, enum uio_seg bufseg) 3682{ 3683 struct vnode *vp; 3684 struct file *fp; 3685 struct uio auio; 3686 struct iovec aiov; 3687 cap_rights_t rights; 3688 long loff; 3689 int error, eofflag; 3690 off_t foffset; 3691 3692 AUDIT_ARG_FD(fd); 3693 if (count > IOSIZE_MAX) 3694 return (EINVAL); 3695 auio.uio_resid = count; 3696 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3697 if (error != 0) 3698 return (error); 3699 if ((fp->f_flag & FREAD) == 0) { 3700 fdrop(fp, td); 3701 return (EBADF); 3702 } 3703 vp = fp->f_vnode; 3704 foffset = foffset_lock(fp, 0); 3705unionread: 3706 if (vp->v_type != VDIR) { 3707 error = EINVAL; 3708 goto fail; 3709 } 3710 aiov.iov_base = buf; 3711 aiov.iov_len = count; 3712 auio.uio_iov = &aiov; 3713 auio.uio_iovcnt = 1; 3714 auio.uio_rw = UIO_READ; 3715 auio.uio_segflg = bufseg; 3716 auio.uio_td = td; 3717 vn_lock(vp, LK_SHARED | LK_RETRY); 3718 AUDIT_ARG_VNODE1(vp); 3719 loff = auio.uio_offset = foffset; 3720#ifdef MAC 3721 error = mac_vnode_check_readdir(td->td_ucred, vp); 3722 if (error == 0) 3723#endif 3724 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3725 NULL); 3726 foffset = auio.uio_offset; 3727 if (error != 0) { 3728 VOP_UNLOCK(vp, 0); 3729 goto fail; 3730 } 3731 if (count == auio.uio_resid && 3732 (vp->v_vflag & VV_ROOT) && 3733 (vp->v_mount->mnt_flag & MNT_UNION)) { 3734 struct vnode *tvp = vp; 3735 3736 vp = vp->v_mount->mnt_vnodecovered; 3737 VREF(vp); 3738 fp->f_vnode = vp; 3739 fp->f_data = vp; 3740 foffset = 0; 3741 vput(tvp); 3742 goto unionread; 3743 } 3744 VOP_UNLOCK(vp, 0); 3745 *basep = loff; 3746 if (residp != NULL) 3747 *residp = auio.uio_resid; 3748 td->td_retval[0] = count - auio.uio_resid; 3749fail: 3750 foffset_unlock(fp, foffset, 0); 3751 fdrop(fp, td); 3752 return (error); 3753} 3754 3755#ifndef _SYS_SYSPROTO_H_ 3756struct getdents_args { 3757 int fd; 3758 char *buf; 3759 size_t count; 3760}; 3761#endif 3762int 3763sys_getdents(struct thread *td, struct getdents_args *uap) 3764{ 3765 struct getdirentries_args ap; 3766 3767 ap.fd = uap->fd; 3768 ap.buf = uap->buf; 3769 ap.count = uap->count; 3770 ap.basep = NULL; 3771 return (sys_getdirentries(td, &ap)); 3772} 3773 3774/* 3775 * Set the mode mask for creation of filesystem nodes. 3776 */ 3777#ifndef _SYS_SYSPROTO_H_ 3778struct umask_args { 3779 int newmask; 3780}; 3781#endif 3782int 3783sys_umask(struct thread *td, struct umask_args *uap) 3784{ 3785 struct filedesc *fdp; 3786 3787 fdp = td->td_proc->p_fd; 3788 FILEDESC_XLOCK(fdp); 3789 td->td_retval[0] = fdp->fd_cmask; 3790 fdp->fd_cmask = uap->newmask & ALLPERMS; 3791 FILEDESC_XUNLOCK(fdp); 3792 return (0); 3793} 3794 3795/* 3796 * Void all references to file by ripping underlying filesystem away from 3797 * vnode. 3798 */ 3799#ifndef _SYS_SYSPROTO_H_ 3800struct revoke_args { 3801 char *path; 3802}; 3803#endif 3804int 3805sys_revoke(struct thread *td, struct revoke_args *uap) 3806{ 3807 struct vnode *vp; 3808 struct vattr vattr; 3809 struct nameidata nd; 3810 int error; 3811 3812 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3813 uap->path, td); 3814 if ((error = namei(&nd)) != 0) 3815 return (error); 3816 vp = nd.ni_vp; 3817 NDFREE(&nd, NDF_ONLY_PNBUF); 3818 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 3819 error = EINVAL; 3820 goto out; 3821 } 3822#ifdef MAC 3823 error = mac_vnode_check_revoke(td->td_ucred, vp); 3824 if (error != 0) 3825 goto out; 3826#endif 3827 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 3828 if (error != 0) 3829 goto out; 3830 if (td->td_ucred->cr_uid != vattr.va_uid) { 3831 error = priv_check(td, PRIV_VFS_ADMIN); 3832 if (error != 0) 3833 goto out; 3834 } 3835 if (vcount(vp) > 1) 3836 VOP_REVOKE(vp, REVOKEALL); 3837out: 3838 vput(vp); 3839 return (error); 3840} 3841 3842/* 3843 * Convert a user file descriptor to a kernel file entry and check that, if it 3844 * is a capability, the correct rights are present. A reference on the file 3845 * entry is held upon returning. 3846 */ 3847int 3848getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 3849{ 3850 struct file *fp; 3851 int error; 3852 3853 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 3854 if (error != 0) 3855 return (error); 3856 3857 /* 3858 * The file could be not of the vnode type, or it may be not 3859 * yet fully initialized, in which case the f_vnode pointer 3860 * may be set, but f_ops is still badfileops. E.g., 3861 * devfs_open() transiently create such situation to 3862 * facilitate csw d_fdopen(). 3863 * 3864 * Dupfdopen() handling in kern_openat() installs the 3865 * half-baked file into the process descriptor table, allowing 3866 * other thread to dereference it. Guard against the race by 3867 * checking f_ops. 3868 */ 3869 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 3870 fdrop(fp, td); 3871 return (EINVAL); 3872 } 3873 *fpp = fp; 3874 return (0); 3875} 3876 3877 3878/* 3879 * Get an (NFS) file handle. 3880 */ 3881#ifndef _SYS_SYSPROTO_H_ 3882struct lgetfh_args { 3883 char *fname; 3884 fhandle_t *fhp; 3885}; 3886#endif 3887int 3888sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 3889{ 3890 struct nameidata nd; 3891 fhandle_t fh; 3892 struct vnode *vp; 3893 int error; 3894 3895 error = priv_check(td, PRIV_VFS_GETFH); 3896 if (error != 0) 3897 return (error); 3898 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3899 uap->fname, td); 3900 error = namei(&nd); 3901 if (error != 0) 3902 return (error); 3903 NDFREE(&nd, NDF_ONLY_PNBUF); 3904 vp = nd.ni_vp; 3905 bzero(&fh, sizeof(fh)); 3906 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3907 error = VOP_VPTOFH(vp, &fh.fh_fid); 3908 vput(vp); 3909 if (error == 0) 3910 error = copyout(&fh, uap->fhp, sizeof (fh)); 3911 return (error); 3912} 3913 3914#ifndef _SYS_SYSPROTO_H_ 3915struct getfh_args { 3916 char *fname; 3917 fhandle_t *fhp; 3918}; 3919#endif 3920int 3921sys_getfh(struct thread *td, struct getfh_args *uap) 3922{ 3923 struct nameidata nd; 3924 fhandle_t fh; 3925 struct vnode *vp; 3926 int error; 3927 3928 error = priv_check(td, PRIV_VFS_GETFH); 3929 if (error != 0) 3930 return (error); 3931 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3932 uap->fname, td); 3933 error = namei(&nd); 3934 if (error != 0) 3935 return (error); 3936 NDFREE(&nd, NDF_ONLY_PNBUF); 3937 vp = nd.ni_vp; 3938 bzero(&fh, sizeof(fh)); 3939 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3940 error = VOP_VPTOFH(vp, &fh.fh_fid); 3941 vput(vp); 3942 if (error == 0) 3943 error = copyout(&fh, uap->fhp, sizeof (fh)); 3944 return (error); 3945} 3946 3947/* 3948 * syscall for the rpc.lockd to use to translate a NFS file handle into an 3949 * open descriptor. 3950 * 3951 * warning: do not remove the priv_check() call or this becomes one giant 3952 * security hole. 3953 */ 3954#ifndef _SYS_SYSPROTO_H_ 3955struct fhopen_args { 3956 const struct fhandle *u_fhp; 3957 int flags; 3958}; 3959#endif 3960int 3961sys_fhopen(struct thread *td, struct fhopen_args *uap) 3962{ 3963 struct mount *mp; 3964 struct vnode *vp; 3965 struct fhandle fhp; 3966 struct file *fp; 3967 int fmode, error; 3968 int indx; 3969 3970 error = priv_check(td, PRIV_VFS_FHOPEN); 3971 if (error != 0) 3972 return (error); 3973 indx = -1; 3974 fmode = FFLAGS(uap->flags); 3975 /* why not allow a non-read/write open for our lockd? */ 3976 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3977 return (EINVAL); 3978 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3979 if (error != 0) 3980 return(error); 3981 /* find the mount point */ 3982 mp = vfs_busyfs(&fhp.fh_fsid); 3983 if (mp == NULL) 3984 return (ESTALE); 3985 /* now give me my vnode, it gets returned to me locked */ 3986 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 3987 vfs_unbusy(mp); 3988 if (error != 0) 3989 return (error); 3990 3991 error = falloc_noinstall(td, &fp); 3992 if (error != 0) { 3993 vput(vp); 3994 return (error); 3995 } 3996 /* 3997 * An extra reference on `fp' has been held for us by 3998 * falloc_noinstall(). 3999 */ 4000 4001#ifdef INVARIANTS 4002 td->td_dupfd = -1; 4003#endif 4004 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4005 if (error != 0) { 4006 KASSERT(fp->f_ops == &badfileops, 4007 ("VOP_OPEN in fhopen() set f_ops")); 4008 KASSERT(td->td_dupfd < 0, 4009 ("fhopen() encountered fdopen()")); 4010 4011 vput(vp); 4012 goto bad; 4013 } 4014#ifdef INVARIANTS 4015 td->td_dupfd = 0; 4016#endif 4017 fp->f_vnode = vp; 4018 fp->f_seqcount = 1; 4019 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4020 &vnops); 4021 VOP_UNLOCK(vp, 0); 4022 if ((fmode & O_TRUNC) != 0) { 4023 error = fo_truncate(fp, 0, td->td_ucred, td); 4024 if (error != 0) 4025 goto bad; 4026 } 4027 4028 error = finstall(td, fp, &indx, fmode, NULL); 4029bad: 4030 fdrop(fp, td); 4031 td->td_retval[0] = indx; 4032 return (error); 4033} 4034 4035/* 4036 * Stat an (NFS) file handle. 4037 */ 4038#ifndef _SYS_SYSPROTO_H_ 4039struct fhstat_args { 4040 struct fhandle *u_fhp; 4041 struct stat *sb; 4042}; 4043#endif 4044int 4045sys_fhstat(struct thread *td, struct fhstat_args *uap) 4046{ 4047 struct stat sb; 4048 struct fhandle fh; 4049 int error; 4050 4051 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4052 if (error != 0) 4053 return (error); 4054 error = kern_fhstat(td, fh, &sb); 4055 if (error == 0) 4056 error = copyout(&sb, uap->sb, sizeof(sb)); 4057 return (error); 4058} 4059 4060int 4061kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4062{ 4063 struct mount *mp; 4064 struct vnode *vp; 4065 int error; 4066 4067 error = priv_check(td, PRIV_VFS_FHSTAT); 4068 if (error != 0) 4069 return (error); 4070 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4071 return (ESTALE); 4072 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4073 vfs_unbusy(mp); 4074 if (error != 0) 4075 return (error); 4076 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4077 vput(vp); 4078 return (error); 4079} 4080 4081/* 4082 * Implement fstatfs() for (NFS) file handles. 4083 */ 4084#ifndef _SYS_SYSPROTO_H_ 4085struct fhstatfs_args { 4086 struct fhandle *u_fhp; 4087 struct statfs *buf; 4088}; 4089#endif 4090int 4091sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4092{ 4093 struct statfs *sfp; 4094 fhandle_t fh; 4095 int error; 4096 4097 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4098 if (error != 0) 4099 return (error); 4100 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4101 error = kern_fhstatfs(td, fh, sfp); 4102 if (error == 0) 4103 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4104 free(sfp, M_STATFS); 4105 return (error); 4106} 4107 4108int 4109kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4110{ 4111 struct statfs *sp; 4112 struct mount *mp; 4113 struct vnode *vp; 4114 int error; 4115 4116 error = priv_check(td, PRIV_VFS_FHSTATFS); 4117 if (error != 0) 4118 return (error); 4119 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4120 return (ESTALE); 4121 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4122 if (error != 0) { 4123 vfs_unbusy(mp); 4124 return (error); 4125 } 4126 vput(vp); 4127 error = prison_canseemount(td->td_ucred, mp); 4128 if (error != 0) 4129 goto out; 4130#ifdef MAC 4131 error = mac_mount_check_stat(td->td_ucred, mp); 4132 if (error != 0) 4133 goto out; 4134#endif 4135 /* 4136 * Set these in case the underlying filesystem fails to do so. 4137 */ 4138 sp = &mp->mnt_stat; 4139 sp->f_version = STATFS_VERSION; 4140 sp->f_namemax = NAME_MAX; 4141 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4142 error = VFS_STATFS(mp, sp); 4143 if (error == 0) 4144 *buf = *sp; 4145out: 4146 vfs_unbusy(mp); 4147 return (error); 4148} 4149 4150int 4151kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4152{ 4153 struct file *fp; 4154 struct mount *mp; 4155 struct vnode *vp; 4156 cap_rights_t rights; 4157 off_t olen, ooffset; 4158 int error; 4159 4160 if (offset < 0 || len <= 0) 4161 return (EINVAL); 4162 /* Check for wrap. */ 4163 if (offset > OFF_MAX - len) 4164 return (EFBIG); 4165 error = fget(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp); 4166 if (error != 0) 4167 return (error); 4168 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4169 error = ESPIPE; 4170 goto out; 4171 } 4172 if ((fp->f_flag & FWRITE) == 0) { 4173 error = EBADF; 4174 goto out; 4175 } 4176 if (fp->f_type != DTYPE_VNODE) { 4177 error = ENODEV; 4178 goto out; 4179 } 4180 vp = fp->f_vnode; 4181 if (vp->v_type != VREG) { 4182 error = ENODEV; 4183 goto out; 4184 } 4185 4186 /* Allocating blocks may take a long time, so iterate. */ 4187 for (;;) { 4188 olen = len; 4189 ooffset = offset; 4190 4191 bwillwrite(); 4192 mp = NULL; 4193 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4194 if (error != 0) 4195 break; 4196 error = vn_lock(vp, LK_EXCLUSIVE); 4197 if (error != 0) { 4198 vn_finished_write(mp); 4199 break; 4200 } 4201#ifdef MAC 4202 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4203 if (error == 0) 4204#endif 4205 error = VOP_ALLOCATE(vp, &offset, &len); 4206 VOP_UNLOCK(vp, 0); 4207 vn_finished_write(mp); 4208 4209 if (olen + ooffset != offset + len) { 4210 panic("offset + len changed from %jx/%jx to %jx/%jx", 4211 ooffset, olen, offset, len); 4212 } 4213 if (error != 0 || len == 0) 4214 break; 4215 KASSERT(olen > len, ("Iteration did not make progress?")); 4216 maybe_yield(); 4217 } 4218 out: 4219 fdrop(fp, td); 4220 return (error); 4221} 4222 4223int 4224sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4225{ 4226 int error; 4227 4228 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4229 return (kern_posix_error(td, error)); 4230} 4231 4232/* 4233 * Unlike madvise(2), we do not make a best effort to remember every 4234 * possible caching hint. Instead, we remember the last setting with 4235 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4236 * region of any current setting. 4237 */ 4238int 4239kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4240 int advice) 4241{ 4242 struct fadvise_info *fa, *new; 4243 struct file *fp; 4244 struct vnode *vp; 4245 cap_rights_t rights; 4246 off_t end; 4247 int error; 4248 4249 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4250 return (EINVAL); 4251 switch (advice) { 4252 case POSIX_FADV_SEQUENTIAL: 4253 case POSIX_FADV_RANDOM: 4254 case POSIX_FADV_NOREUSE: 4255 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4256 break; 4257 case POSIX_FADV_NORMAL: 4258 case POSIX_FADV_WILLNEED: 4259 case POSIX_FADV_DONTNEED: 4260 new = NULL; 4261 break; 4262 default: 4263 return (EINVAL); 4264 } 4265 /* XXX: CAP_POSIX_FADVISE? */ 4266 error = fget(td, fd, cap_rights_init(&rights), &fp); 4267 if (error != 0) 4268 goto out; 4269 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4270 error = ESPIPE; 4271 goto out; 4272 } 4273 if (fp->f_type != DTYPE_VNODE) { 4274 error = ENODEV; 4275 goto out; 4276 } 4277 vp = fp->f_vnode; 4278 if (vp->v_type != VREG) { 4279 error = ENODEV; 4280 goto out; 4281 } 4282 if (len == 0) 4283 end = OFF_MAX; 4284 else 4285 end = offset + len - 1; 4286 switch (advice) { 4287 case POSIX_FADV_SEQUENTIAL: 4288 case POSIX_FADV_RANDOM: 4289 case POSIX_FADV_NOREUSE: 4290 /* 4291 * Try to merge any existing non-standard region with 4292 * this new region if possible, otherwise create a new 4293 * non-standard region for this request. 4294 */ 4295 mtx_pool_lock(mtxpool_sleep, fp); 4296 fa = fp->f_advice; 4297 if (fa != NULL && fa->fa_advice == advice && 4298 ((fa->fa_start <= end && fa->fa_end >= offset) || 4299 (end != OFF_MAX && fa->fa_start == end + 1) || 4300 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4301 if (offset < fa->fa_start) 4302 fa->fa_start = offset; 4303 if (end > fa->fa_end) 4304 fa->fa_end = end; 4305 } else { 4306 new->fa_advice = advice; 4307 new->fa_start = offset; 4308 new->fa_end = end; 4309 fp->f_advice = new; 4310 new = fa; 4311 } 4312 mtx_pool_unlock(mtxpool_sleep, fp); 4313 break; 4314 case POSIX_FADV_NORMAL: 4315 /* 4316 * If a the "normal" region overlaps with an existing 4317 * non-standard region, trim or remove the 4318 * non-standard region. 4319 */ 4320 mtx_pool_lock(mtxpool_sleep, fp); 4321 fa = fp->f_advice; 4322 if (fa != NULL) { 4323 if (offset <= fa->fa_start && end >= fa->fa_end) { 4324 new = fa; 4325 fp->f_advice = NULL; 4326 } else if (offset <= fa->fa_start && 4327 end >= fa->fa_start) 4328 fa->fa_start = end + 1; 4329 else if (offset <= fa->fa_end && end >= fa->fa_end) 4330 fa->fa_end = offset - 1; 4331 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4332 /* 4333 * If the "normal" region is a middle 4334 * portion of the existing 4335 * non-standard region, just remove 4336 * the whole thing rather than picking 4337 * one side or the other to 4338 * preserve. 4339 */ 4340 new = fa; 4341 fp->f_advice = NULL; 4342 } 4343 } 4344 mtx_pool_unlock(mtxpool_sleep, fp); 4345 break; 4346 case POSIX_FADV_WILLNEED: 4347 case POSIX_FADV_DONTNEED: 4348 error = VOP_ADVISE(vp, offset, end, advice); 4349 break; 4350 } 4351out: 4352 if (fp != NULL) 4353 fdrop(fp, td); 4354 free(new, M_FADVISE); 4355 return (error); 4356} 4357 4358int 4359sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4360{ 4361 int error; 4362 4363 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4364 uap->advice); 4365 return (kern_posix_error(td, error)); 4366} 4367