1/*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: stable/11/sys/kern/vfs_syscalls.c 362426 2020-06-20 04:39:52Z kib $"); 39 40#include "opt_capsicum.h" 41#include "opt_compat.h" 42#include "opt_ktrace.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/bio.h> 47#include <sys/buf.h> 48#include <sys/capsicum.h> 49#include <sys/disk.h> 50#include <sys/sysent.h> 51#include <sys/malloc.h> 52#include <sys/mount.h> 53#include <sys/mutex.h> 54#include <sys/sysproto.h> 55#include <sys/namei.h> 56#include <sys/filedesc.h> 57#include <sys/kernel.h> 58#include <sys/fcntl.h> 59#include <sys/file.h> 60#include <sys/filio.h> 61#include <sys/limits.h> 62#include <sys/linker.h> 63#include <sys/rwlock.h> 64#include <sys/sdt.h> 65#include <sys/stat.h> 66#include <sys/sx.h> 67#include <sys/unistd.h> 68#include <sys/vnode.h> 69#include <sys/priv.h> 70#include <sys/proc.h> 71#include <sys/dirent.h> 72#include <sys/jail.h> 73#include <sys/syscallsubr.h> 74#include <sys/sysctl.h> 75#ifdef KTRACE 76#include <sys/ktrace.h> 77#endif 78 79#include <machine/stdarg.h> 80 81#include <security/audit/audit.h> 82#include <security/mac/mac_framework.h> 83 84#include <vm/vm.h> 85#include <vm/vm_object.h> 86#include <vm/vm_page.h> 87#include <vm/uma.h> 88 89#include <ufs/ufs/quota.h> 90 91MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93SDT_PROVIDER_DEFINE(vfs); 94SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99static int setfflags(struct thread *td, struct vnode *, u_long); 100static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108/* 109 * Sync each mounted filesystem. 110 */ 111#ifndef _SYS_SYSPROTO_H_ 112struct sync_args { 113 int dummy; 114}; 115#endif 116/* ARGSUSED */ 117int 118sys_sync(struct thread *td, struct sync_args *uap) 119{ 120 struct mount *mp, *nmp; 121 int save; 122 123 mtx_lock(&mountlist_mtx); 124 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 125 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 126 nmp = TAILQ_NEXT(mp, mnt_list); 127 continue; 128 } 129 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 130 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 131 save = curthread_pflags_set(TDP_SYNCIO); 132 vfs_msync(mp, MNT_NOWAIT); 133 VFS_SYNC(mp, MNT_NOWAIT); 134 curthread_pflags_restore(save); 135 vn_finished_write(mp); 136 } 137 mtx_lock(&mountlist_mtx); 138 nmp = TAILQ_NEXT(mp, mnt_list); 139 vfs_unbusy(mp); 140 } 141 mtx_unlock(&mountlist_mtx); 142 return (0); 143} 144 145/* 146 * Change filesystem quotas. 147 */ 148#ifndef _SYS_SYSPROTO_H_ 149struct quotactl_args { 150 char *path; 151 int cmd; 152 int uid; 153 caddr_t arg; 154}; 155#endif 156int 157sys_quotactl(struct thread *td, struct quotactl_args *uap) 158{ 159 struct mount *mp; 160 struct nameidata nd; 161 int error; 162 163 AUDIT_ARG_CMD(uap->cmd); 164 AUDIT_ARG_UID(uap->uid); 165 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 166 return (EPERM); 167 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 168 uap->path, td); 169 if ((error = namei(&nd)) != 0) 170 return (error); 171 NDFREE(&nd, NDF_ONLY_PNBUF); 172 mp = nd.ni_vp->v_mount; 173 vfs_ref(mp); 174 vput(nd.ni_vp); 175 error = vfs_busy(mp, 0); 176 vfs_rel(mp); 177 if (error != 0) 178 return (error); 179 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 180 181 /* 182 * Since quota on operation typically needs to open quota 183 * file, the Q_QUOTAON handler needs to unbusy the mount point 184 * before calling into namei. Otherwise, unmount might be 185 * started between two vfs_busy() invocations (first is our, 186 * second is from mount point cross-walk code in lookup()), 187 * causing deadlock. 188 * 189 * Require that Q_QUOTAON handles the vfs_busy() reference on 190 * its own, always returning with ubusied mount point. 191 */ 192 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 193 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 194 vfs_unbusy(mp); 195 return (error); 196} 197 198/* 199 * Used by statfs conversion routines to scale the block size up if 200 * necessary so that all of the block counts are <= 'max_size'. Note 201 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 202 * value of 'n'. 203 */ 204void 205statfs_scale_blocks(struct statfs *sf, long max_size) 206{ 207 uint64_t count; 208 int shift; 209 210 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 211 212 /* 213 * Attempt to scale the block counts to give a more accurate 214 * overview to userland of the ratio of free space to used 215 * space. To do this, find the largest block count and compute 216 * a divisor that lets it fit into a signed integer <= max_size. 217 */ 218 if (sf->f_bavail < 0) 219 count = -sf->f_bavail; 220 else 221 count = sf->f_bavail; 222 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 223 if (count <= max_size) 224 return; 225 226 count >>= flsl(max_size); 227 shift = 0; 228 while (count > 0) { 229 shift++; 230 count >>=1; 231 } 232 233 sf->f_bsize <<= shift; 234 sf->f_blocks >>= shift; 235 sf->f_bfree >>= shift; 236 sf->f_bavail >>= shift; 237} 238 239static int 240kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 241{ 242 struct statfs *sp; 243 int error; 244 245 if (mp == NULL) 246 return (EBADF); 247 error = vfs_busy(mp, 0); 248 vfs_rel(mp); 249 if (error != 0) 250 return (error); 251#ifdef MAC 252 error = mac_mount_check_stat(td->td_ucred, mp); 253 if (error != 0) 254 goto out; 255#endif 256 /* 257 * Set these in case the underlying filesystem fails to do so. 258 */ 259 sp = &mp->mnt_stat; 260 sp->f_version = STATFS_VERSION; 261 sp->f_namemax = NAME_MAX; 262 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 263 error = VFS_STATFS(mp, sp); 264 if (error != 0) 265 goto out; 266 *buf = *sp; 267 if (priv_check(td, PRIV_VFS_GENERATION)) { 268 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 269 prison_enforce_statfs(td->td_ucred, mp, buf); 270 } 271out: 272 vfs_unbusy(mp); 273 return (error); 274} 275 276/* 277 * Get filesystem statistics. 278 */ 279#ifndef _SYS_SYSPROTO_H_ 280struct statfs_args { 281 char *path; 282 struct statfs *buf; 283}; 284#endif 285int 286sys_statfs(struct thread *td, struct statfs_args *uap) 287{ 288 struct statfs *sfp; 289 int error; 290 291 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 292 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 293 if (error == 0) 294 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 295 free(sfp, M_STATFS); 296 return (error); 297} 298 299int 300kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 301 struct statfs *buf) 302{ 303 struct mount *mp; 304 struct nameidata nd; 305 int error; 306 307 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 308 pathseg, path, td); 309 error = namei(&nd); 310 if (error != 0) 311 return (error); 312 mp = nd.ni_vp->v_mount; 313 vfs_ref(mp); 314 NDFREE(&nd, NDF_ONLY_PNBUF); 315 vput(nd.ni_vp); 316 return (kern_do_statfs(td, mp, buf)); 317} 318 319/* 320 * Get filesystem statistics. 321 */ 322#ifndef _SYS_SYSPROTO_H_ 323struct fstatfs_args { 324 int fd; 325 struct statfs *buf; 326}; 327#endif 328int 329sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 330{ 331 struct statfs *sfp; 332 int error; 333 334 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 335 error = kern_fstatfs(td, uap->fd, sfp); 336 if (error == 0) 337 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 338 free(sfp, M_STATFS); 339 return (error); 340} 341 342int 343kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 344{ 345 struct file *fp; 346 struct mount *mp; 347 struct vnode *vp; 348 cap_rights_t rights; 349 int error; 350 351 AUDIT_ARG_FD(fd); 352 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 353 if (error != 0) 354 return (error); 355 vp = fp->f_vnode; 356 vn_lock(vp, LK_SHARED | LK_RETRY); 357#ifdef AUDIT 358 AUDIT_ARG_VNODE1(vp); 359#endif 360 mp = vp->v_mount; 361 if (mp != NULL) 362 vfs_ref(mp); 363 VOP_UNLOCK(vp, 0); 364 fdrop(fp, td); 365 return (kern_do_statfs(td, mp, buf)); 366} 367 368/* 369 * Get statistics on all filesystems. 370 */ 371#ifndef _SYS_SYSPROTO_H_ 372struct getfsstat_args { 373 struct statfs *buf; 374 long bufsize; 375 int mode; 376}; 377#endif 378int 379sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 380{ 381 size_t count; 382 int error; 383 384 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 385 return (EINVAL); 386 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 387 UIO_USERSPACE, uap->mode); 388 if (error == 0) 389 td->td_retval[0] = count; 390 return (error); 391} 392 393/* 394 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 395 * The caller is responsible for freeing memory which will be allocated 396 * in '*buf'. 397 */ 398int 399kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 400 size_t *countp, enum uio_seg bufseg, int mode) 401{ 402 struct mount *mp, *nmp; 403 struct statfs *sfsp, *sp, *sptmp, *tofree; 404 size_t count, maxcount; 405 int error; 406 407 switch (mode) { 408 case MNT_WAIT: 409 case MNT_NOWAIT: 410 break; 411 default: 412 if (bufseg == UIO_SYSSPACE) 413 *buf = NULL; 414 return (EINVAL); 415 } 416restart: 417 maxcount = bufsize / sizeof(struct statfs); 418 if (bufsize == 0) { 419 sfsp = NULL; 420 tofree = NULL; 421 } else if (bufseg == UIO_USERSPACE) { 422 sfsp = *buf; 423 tofree = NULL; 424 } else /* if (bufseg == UIO_SYSSPACE) */ { 425 count = 0; 426 mtx_lock(&mountlist_mtx); 427 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 428 count++; 429 } 430 mtx_unlock(&mountlist_mtx); 431 if (maxcount > count) 432 maxcount = count; 433 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 434 M_STATFS, M_WAITOK); 435 } 436 count = 0; 437 mtx_lock(&mountlist_mtx); 438 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 439 if (prison_canseemount(td->td_ucred, mp) != 0) { 440 nmp = TAILQ_NEXT(mp, mnt_list); 441 continue; 442 } 443#ifdef MAC 444 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 445 nmp = TAILQ_NEXT(mp, mnt_list); 446 continue; 447 } 448#endif 449 if (mode == MNT_WAIT) { 450 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 451 /* 452 * If vfs_busy() failed, and MBF_NOWAIT 453 * wasn't passed, then the mp is gone. 454 * Furthermore, because of MBF_MNTLSTLOCK, 455 * the mountlist_mtx was dropped. We have 456 * no other choice than to start over. 457 */ 458 mtx_unlock(&mountlist_mtx); 459 free(tofree, M_STATFS); 460 goto restart; 461 } 462 } else { 463 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 464 nmp = TAILQ_NEXT(mp, mnt_list); 465 continue; 466 } 467 } 468 if (sfsp != NULL && count < maxcount) { 469 sp = &mp->mnt_stat; 470 /* 471 * Set these in case the underlying filesystem 472 * fails to do so. 473 */ 474 sp->f_version = STATFS_VERSION; 475 sp->f_namemax = NAME_MAX; 476 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 477 /* 478 * If MNT_NOWAIT is specified, do not refresh 479 * the fsstat cache. 480 */ 481 if (mode != MNT_NOWAIT) { 482 error = VFS_STATFS(mp, sp); 483 if (error != 0) { 484 mtx_lock(&mountlist_mtx); 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 vfs_unbusy(mp); 487 continue; 488 } 489 } 490 if (priv_check(td, PRIV_VFS_GENERATION)) { 491 sptmp = malloc(sizeof(struct statfs), M_STATFS, 492 M_WAITOK); 493 *sptmp = *sp; 494 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 495 prison_enforce_statfs(td->td_ucred, mp, sptmp); 496 sp = sptmp; 497 } else 498 sptmp = NULL; 499 if (bufseg == UIO_SYSSPACE) { 500 bcopy(sp, sfsp, sizeof(*sp)); 501 free(sptmp, M_STATFS); 502 } else /* if (bufseg == UIO_USERSPACE) */ { 503 error = copyout(sp, sfsp, sizeof(*sp)); 504 free(sptmp, M_STATFS); 505 if (error != 0) { 506 vfs_unbusy(mp); 507 return (error); 508 } 509 } 510 sfsp++; 511 } 512 count++; 513 mtx_lock(&mountlist_mtx); 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 vfs_unbusy(mp); 516 } 517 mtx_unlock(&mountlist_mtx); 518 if (sfsp != NULL && count > maxcount) 519 *countp = maxcount; 520 else 521 *countp = count; 522 return (0); 523} 524 525#ifdef COMPAT_FREEBSD4 526/* 527 * Get old format filesystem statistics. 528 */ 529static void cvtstatfs(struct statfs *, struct ostatfs *); 530 531#ifndef _SYS_SYSPROTO_H_ 532struct freebsd4_statfs_args { 533 char *path; 534 struct ostatfs *buf; 535}; 536#endif 537int 538freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 539{ 540 struct ostatfs osb; 541 struct statfs *sfp; 542 int error; 543 544 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 545 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 546 if (error == 0) { 547 cvtstatfs(sfp, &osb); 548 error = copyout(&osb, uap->buf, sizeof(osb)); 549 } 550 free(sfp, M_STATFS); 551 return (error); 552} 553 554/* 555 * Get filesystem statistics. 556 */ 557#ifndef _SYS_SYSPROTO_H_ 558struct freebsd4_fstatfs_args { 559 int fd; 560 struct ostatfs *buf; 561}; 562#endif 563int 564freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 565{ 566 struct ostatfs osb; 567 struct statfs *sfp; 568 int error; 569 570 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 571 error = kern_fstatfs(td, uap->fd, sfp); 572 if (error == 0) { 573 cvtstatfs(sfp, &osb); 574 error = copyout(&osb, uap->buf, sizeof(osb)); 575 } 576 free(sfp, M_STATFS); 577 return (error); 578} 579 580/* 581 * Get statistics on all filesystems. 582 */ 583#ifndef _SYS_SYSPROTO_H_ 584struct freebsd4_getfsstat_args { 585 struct ostatfs *buf; 586 long bufsize; 587 int mode; 588}; 589#endif 590int 591freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 592{ 593 struct statfs *buf, *sp; 594 struct ostatfs osb; 595 size_t count, size; 596 int error; 597 598 if (uap->bufsize < 0) 599 return (EINVAL); 600 count = uap->bufsize / sizeof(struct ostatfs); 601 if (count > SIZE_MAX / sizeof(struct statfs)) 602 return (EINVAL); 603 size = count * sizeof(struct statfs); 604 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 605 uap->mode); 606 if (buf == NULL) 607 return (EINVAL); 608 td->td_retval[0] = count; 609 if (size != 0) { 610 sp = buf; 611 while (count != 0 && error == 0) { 612 cvtstatfs(sp, &osb); 613 error = copyout(&osb, uap->buf, sizeof(osb)); 614 sp++; 615 uap->buf++; 616 count--; 617 } 618 free(buf, M_STATFS); 619 } 620 return (error); 621} 622 623/* 624 * Implement fstatfs() for (NFS) file handles. 625 */ 626#ifndef _SYS_SYSPROTO_H_ 627struct freebsd4_fhstatfs_args { 628 struct fhandle *u_fhp; 629 struct ostatfs *buf; 630}; 631#endif 632int 633freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 634{ 635 struct ostatfs osb; 636 struct statfs *sfp; 637 fhandle_t fh; 638 int error; 639 640 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 641 if (error != 0) 642 return (error); 643 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 644 error = kern_fhstatfs(td, fh, sfp); 645 if (error == 0) { 646 cvtstatfs(sfp, &osb); 647 error = copyout(&osb, uap->buf, sizeof(osb)); 648 } 649 free(sfp, M_STATFS); 650 return (error); 651} 652 653/* 654 * Convert a new format statfs structure to an old format statfs structure. 655 */ 656static void 657cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 658{ 659 660 statfs_scale_blocks(nsp, LONG_MAX); 661 bzero(osp, sizeof(*osp)); 662 osp->f_bsize = nsp->f_bsize; 663 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 664 osp->f_blocks = nsp->f_blocks; 665 osp->f_bfree = nsp->f_bfree; 666 osp->f_bavail = nsp->f_bavail; 667 osp->f_files = MIN(nsp->f_files, LONG_MAX); 668 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 669 osp->f_owner = nsp->f_owner; 670 osp->f_type = nsp->f_type; 671 osp->f_flags = nsp->f_flags; 672 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 673 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 674 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 675 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 676 strlcpy(osp->f_fstypename, nsp->f_fstypename, 677 MIN(MFSNAMELEN, OMFSNAMELEN)); 678 strlcpy(osp->f_mntonname, nsp->f_mntonname, 679 MIN(MNAMELEN, OMNAMELEN)); 680 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 681 MIN(MNAMELEN, OMNAMELEN)); 682 osp->f_fsid = nsp->f_fsid; 683} 684#endif /* COMPAT_FREEBSD4 */ 685 686/* 687 * Change current working directory to a given file descriptor. 688 */ 689#ifndef _SYS_SYSPROTO_H_ 690struct fchdir_args { 691 int fd; 692}; 693#endif 694int 695sys_fchdir(struct thread *td, struct fchdir_args *uap) 696{ 697 struct vnode *vp, *tdp; 698 struct mount *mp; 699 struct file *fp; 700 cap_rights_t rights; 701 int error; 702 703 AUDIT_ARG_FD(uap->fd); 704 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 705 &fp); 706 if (error != 0) 707 return (error); 708 vp = fp->f_vnode; 709 vrefact(vp); 710 fdrop(fp, td); 711 vn_lock(vp, LK_SHARED | LK_RETRY); 712 AUDIT_ARG_VNODE1(vp); 713 error = change_dir(vp, td); 714 while (!error && (mp = vp->v_mountedhere) != NULL) { 715 if (vfs_busy(mp, 0)) 716 continue; 717 error = VFS_ROOT(mp, LK_SHARED, &tdp); 718 vfs_unbusy(mp); 719 if (error != 0) 720 break; 721 vput(vp); 722 vp = tdp; 723 } 724 if (error != 0) { 725 vput(vp); 726 return (error); 727 } 728 VOP_UNLOCK(vp, 0); 729 pwd_chdir(td, vp); 730 return (0); 731} 732 733/* 734 * Change current working directory (``.''). 735 */ 736#ifndef _SYS_SYSPROTO_H_ 737struct chdir_args { 738 char *path; 739}; 740#endif 741int 742sys_chdir(struct thread *td, struct chdir_args *uap) 743{ 744 745 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 746} 747 748int 749kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 750{ 751 struct nameidata nd; 752 int error; 753 754 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 755 pathseg, path, td); 756 if ((error = namei(&nd)) != 0) 757 return (error); 758 if ((error = change_dir(nd.ni_vp, td)) != 0) { 759 vput(nd.ni_vp); 760 NDFREE(&nd, NDF_ONLY_PNBUF); 761 return (error); 762 } 763 VOP_UNLOCK(nd.ni_vp, 0); 764 NDFREE(&nd, NDF_ONLY_PNBUF); 765 pwd_chdir(td, nd.ni_vp); 766 return (0); 767} 768 769/* 770 * Change notion of root (``/'') directory. 771 */ 772#ifndef _SYS_SYSPROTO_H_ 773struct chroot_args { 774 char *path; 775}; 776#endif 777int 778sys_chroot(struct thread *td, struct chroot_args *uap) 779{ 780 struct nameidata nd; 781 int error; 782 783 error = priv_check(td, PRIV_VFS_CHROOT); 784 if (error != 0) 785 return (error); 786 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 787 UIO_USERSPACE, uap->path, td); 788 error = namei(&nd); 789 if (error != 0) 790 goto error; 791 error = change_dir(nd.ni_vp, td); 792 if (error != 0) 793 goto e_vunlock; 794#ifdef MAC 795 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 796 if (error != 0) 797 goto e_vunlock; 798#endif 799 VOP_UNLOCK(nd.ni_vp, 0); 800 error = pwd_chroot(td, nd.ni_vp); 801 vrele(nd.ni_vp); 802 NDFREE(&nd, NDF_ONLY_PNBUF); 803 return (error); 804e_vunlock: 805 vput(nd.ni_vp); 806error: 807 NDFREE(&nd, NDF_ONLY_PNBUF); 808 return (error); 809} 810 811/* 812 * Common routine for chroot and chdir. Callers must provide a locked vnode 813 * instance. 814 */ 815int 816change_dir(struct vnode *vp, struct thread *td) 817{ 818#ifdef MAC 819 int error; 820#endif 821 822 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 823 if (vp->v_type != VDIR) 824 return (ENOTDIR); 825#ifdef MAC 826 error = mac_vnode_check_chdir(td->td_ucred, vp); 827 if (error != 0) 828 return (error); 829#endif 830 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 831} 832 833static __inline void 834flags_to_rights(int flags, cap_rights_t *rightsp) 835{ 836 837 if (flags & O_EXEC) { 838 cap_rights_set(rightsp, CAP_FEXECVE); 839 } else { 840 switch ((flags & O_ACCMODE)) { 841 case O_RDONLY: 842 cap_rights_set(rightsp, CAP_READ); 843 break; 844 case O_RDWR: 845 cap_rights_set(rightsp, CAP_READ); 846 /* FALLTHROUGH */ 847 case O_WRONLY: 848 cap_rights_set(rightsp, CAP_WRITE); 849 if (!(flags & (O_APPEND | O_TRUNC))) 850 cap_rights_set(rightsp, CAP_SEEK); 851 break; 852 } 853 } 854 855 if (flags & O_CREAT) 856 cap_rights_set(rightsp, CAP_CREATE); 857 858 if (flags & O_TRUNC) 859 cap_rights_set(rightsp, CAP_FTRUNCATE); 860 861 if (flags & (O_SYNC | O_FSYNC)) 862 cap_rights_set(rightsp, CAP_FSYNC); 863 864 if (flags & (O_EXLOCK | O_SHLOCK)) 865 cap_rights_set(rightsp, CAP_FLOCK); 866} 867 868/* 869 * Check permissions, allocate an open file structure, and call the device 870 * open routine if any. 871 */ 872#ifndef _SYS_SYSPROTO_H_ 873struct open_args { 874 char *path; 875 int flags; 876 int mode; 877}; 878#endif 879int 880sys_open(struct thread *td, struct open_args *uap) 881{ 882 883 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 884 uap->flags, uap->mode)); 885} 886 887#ifndef _SYS_SYSPROTO_H_ 888struct openat_args { 889 int fd; 890 char *path; 891 int flag; 892 int mode; 893}; 894#endif 895int 896sys_openat(struct thread *td, struct openat_args *uap) 897{ 898 899 AUDIT_ARG_FD(uap->fd); 900 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 901 uap->mode)); 902} 903 904int 905kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 906 int flags, int mode) 907{ 908 struct proc *p = td->td_proc; 909 struct filedesc *fdp = p->p_fd; 910 struct file *fp; 911 struct vnode *vp; 912 struct nameidata nd; 913 cap_rights_t rights; 914 int cmode, error, indx; 915 916 indx = -1; 917 918 AUDIT_ARG_FFLAGS(flags); 919 AUDIT_ARG_MODE(mode); 920 cap_rights_init(&rights, CAP_LOOKUP); 921 flags_to_rights(flags, &rights); 922 /* 923 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 924 * may be specified. 925 */ 926 if (flags & O_EXEC) { 927 if (flags & O_ACCMODE) 928 return (EINVAL); 929 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 930 return (EINVAL); 931 } else { 932 flags = FFLAGS(flags); 933 } 934 935 /* 936 * Allocate a file structure. The descriptor to reference it 937 * is allocated and set by finstall() below. 938 */ 939 error = falloc_noinstall(td, &fp); 940 if (error != 0) 941 return (error); 942 /* 943 * An extra reference on `fp' has been held for us by 944 * falloc_noinstall(). 945 */ 946 /* Set the flags early so the finit in devfs can pick them up. */ 947 fp->f_flag = flags & FMASK; 948 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 949 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 950 &rights, td); 951 td->td_dupfd = -1; /* XXX check for fdopen */ 952 error = vn_open(&nd, &flags, cmode, fp); 953 if (error != 0) { 954 /* 955 * If the vn_open replaced the method vector, something 956 * wonderous happened deep below and we just pass it up 957 * pretending we know what we do. 958 */ 959 if (error == ENXIO && fp->f_ops != &badfileops) 960 goto success; 961 962 /* 963 * Handle special fdopen() case. bleh. 964 * 965 * Don't do this for relative (capability) lookups; we don't 966 * understand exactly what would happen, and we don't think 967 * that it ever should. 968 */ 969 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 970 (error == ENODEV || error == ENXIO) && 971 td->td_dupfd >= 0) { 972 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 973 &indx); 974 if (error == 0) 975 goto success; 976 } 977 978 goto bad; 979 } 980 td->td_dupfd = 0; 981 NDFREE(&nd, NDF_ONLY_PNBUF); 982 vp = nd.ni_vp; 983 984 /* 985 * Store the vnode, for any f_type. Typically, the vnode use 986 * count is decremented by direct call to vn_closefile() for 987 * files that switched type in the cdevsw fdopen() method. 988 */ 989 fp->f_vnode = vp; 990 /* 991 * If the file wasn't claimed by devfs bind it to the normal 992 * vnode operations here. 993 */ 994 if (fp->f_ops == &badfileops) { 995 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 996 fp->f_seqcount = 1; 997 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 998 DTYPE_VNODE, vp, &vnops); 999 } 1000 1001 VOP_UNLOCK(vp, 0); 1002 if (flags & O_TRUNC) { 1003 error = fo_truncate(fp, 0, td->td_ucred, td); 1004 if (error != 0) 1005 goto bad; 1006 } 1007success: 1008 /* 1009 * If we haven't already installed the FD (for dupfdopen), do so now. 1010 */ 1011 if (indx == -1) { 1012 struct filecaps *fcaps; 1013 1014#ifdef CAPABILITIES 1015 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1016 fcaps = &nd.ni_filecaps; 1017 else 1018#endif 1019 fcaps = NULL; 1020 error = finstall(td, fp, &indx, flags, fcaps); 1021 /* On success finstall() consumes fcaps. */ 1022 if (error != 0) { 1023 filecaps_free(&nd.ni_filecaps); 1024 goto bad; 1025 } 1026 } else { 1027 filecaps_free(&nd.ni_filecaps); 1028 } 1029 1030 /* 1031 * Release our private reference, leaving the one associated with 1032 * the descriptor table intact. 1033 */ 1034 fdrop(fp, td); 1035 td->td_retval[0] = indx; 1036 return (0); 1037bad: 1038 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1039 fdrop(fp, td); 1040 return (error); 1041} 1042 1043#ifdef COMPAT_43 1044/* 1045 * Create a file. 1046 */ 1047#ifndef _SYS_SYSPROTO_H_ 1048struct ocreat_args { 1049 char *path; 1050 int mode; 1051}; 1052#endif 1053int 1054ocreat(struct thread *td, struct ocreat_args *uap) 1055{ 1056 1057 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1058 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1059} 1060#endif /* COMPAT_43 */ 1061 1062/* 1063 * Create a special file. 1064 */ 1065#ifndef _SYS_SYSPROTO_H_ 1066struct mknod_args { 1067 char *path; 1068 int mode; 1069 int dev; 1070}; 1071#endif 1072int 1073sys_mknod(struct thread *td, struct mknod_args *uap) 1074{ 1075 1076 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1077 uap->mode, uap->dev)); 1078} 1079 1080#ifndef _SYS_SYSPROTO_H_ 1081struct mknodat_args { 1082 int fd; 1083 char *path; 1084 mode_t mode; 1085 dev_t dev; 1086}; 1087#endif 1088int 1089sys_mknodat(struct thread *td, struct mknodat_args *uap) 1090{ 1091 1092 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1093 uap->dev)); 1094} 1095 1096int 1097kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1098 int mode, int dev) 1099{ 1100 struct vnode *vp; 1101 struct mount *mp; 1102 struct vattr vattr; 1103 struct nameidata nd; 1104 cap_rights_t rights; 1105 int error, whiteout = 0; 1106 1107 AUDIT_ARG_MODE(mode); 1108 AUDIT_ARG_DEV(dev); 1109 switch (mode & S_IFMT) { 1110 case S_IFCHR: 1111 case S_IFBLK: 1112 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1113 if (error == 0 && dev == VNOVAL) 1114 error = EINVAL; 1115 break; 1116 case S_IFWHT: 1117 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1118 break; 1119 case S_IFIFO: 1120 if (dev == 0) 1121 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1122 /* FALLTHROUGH */ 1123 default: 1124 error = EINVAL; 1125 break; 1126 } 1127 if (error != 0) 1128 return (error); 1129restart: 1130 bwillwrite(); 1131 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1132 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1133 td); 1134 if ((error = namei(&nd)) != 0) 1135 return (error); 1136 vp = nd.ni_vp; 1137 if (vp != NULL) { 1138 NDFREE(&nd, NDF_ONLY_PNBUF); 1139 if (vp == nd.ni_dvp) 1140 vrele(nd.ni_dvp); 1141 else 1142 vput(nd.ni_dvp); 1143 vrele(vp); 1144 return (EEXIST); 1145 } else { 1146 VATTR_NULL(&vattr); 1147 vattr.va_mode = (mode & ALLPERMS) & 1148 ~td->td_proc->p_fd->fd_cmask; 1149 vattr.va_rdev = dev; 1150 whiteout = 0; 1151 1152 switch (mode & S_IFMT) { 1153 case S_IFCHR: 1154 vattr.va_type = VCHR; 1155 break; 1156 case S_IFBLK: 1157 vattr.va_type = VBLK; 1158 break; 1159 case S_IFWHT: 1160 whiteout = 1; 1161 break; 1162 default: 1163 panic("kern_mknod: invalid mode"); 1164 } 1165 } 1166 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1167 NDFREE(&nd, NDF_ONLY_PNBUF); 1168 vput(nd.ni_dvp); 1169 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1170 return (error); 1171 goto restart; 1172 } 1173#ifdef MAC 1174 if (error == 0 && !whiteout) 1175 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1176 &nd.ni_cnd, &vattr); 1177#endif 1178 if (error == 0) { 1179 if (whiteout) 1180 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1181 else { 1182 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1183 &nd.ni_cnd, &vattr); 1184 if (error == 0) 1185 vput(nd.ni_vp); 1186 } 1187 } 1188 NDFREE(&nd, NDF_ONLY_PNBUF); 1189 vput(nd.ni_dvp); 1190 vn_finished_write(mp); 1191 return (error); 1192} 1193 1194/* 1195 * Create a named pipe. 1196 */ 1197#ifndef _SYS_SYSPROTO_H_ 1198struct mkfifo_args { 1199 char *path; 1200 int mode; 1201}; 1202#endif 1203int 1204sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1205{ 1206 1207 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1208 uap->mode)); 1209} 1210 1211#ifndef _SYS_SYSPROTO_H_ 1212struct mkfifoat_args { 1213 int fd; 1214 char *path; 1215 mode_t mode; 1216}; 1217#endif 1218int 1219sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1220{ 1221 1222 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1223 uap->mode)); 1224} 1225 1226int 1227kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1228 int mode) 1229{ 1230 struct mount *mp; 1231 struct vattr vattr; 1232 struct nameidata nd; 1233 cap_rights_t rights; 1234 int error; 1235 1236 AUDIT_ARG_MODE(mode); 1237restart: 1238 bwillwrite(); 1239 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1240 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1241 td); 1242 if ((error = namei(&nd)) != 0) 1243 return (error); 1244 if (nd.ni_vp != NULL) { 1245 NDFREE(&nd, NDF_ONLY_PNBUF); 1246 if (nd.ni_vp == nd.ni_dvp) 1247 vrele(nd.ni_dvp); 1248 else 1249 vput(nd.ni_dvp); 1250 vrele(nd.ni_vp); 1251 return (EEXIST); 1252 } 1253 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1254 NDFREE(&nd, NDF_ONLY_PNBUF); 1255 vput(nd.ni_dvp); 1256 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1257 return (error); 1258 goto restart; 1259 } 1260 VATTR_NULL(&vattr); 1261 vattr.va_type = VFIFO; 1262 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1263#ifdef MAC 1264 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1265 &vattr); 1266 if (error != 0) 1267 goto out; 1268#endif 1269 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1270 if (error == 0) 1271 vput(nd.ni_vp); 1272#ifdef MAC 1273out: 1274#endif 1275 vput(nd.ni_dvp); 1276 vn_finished_write(mp); 1277 NDFREE(&nd, NDF_ONLY_PNBUF); 1278 return (error); 1279} 1280 1281/* 1282 * Make a hard file link. 1283 */ 1284#ifndef _SYS_SYSPROTO_H_ 1285struct link_args { 1286 char *path; 1287 char *link; 1288}; 1289#endif 1290int 1291sys_link(struct thread *td, struct link_args *uap) 1292{ 1293 1294 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1295 UIO_USERSPACE, FOLLOW)); 1296} 1297 1298#ifndef _SYS_SYSPROTO_H_ 1299struct linkat_args { 1300 int fd1; 1301 char *path1; 1302 int fd2; 1303 char *path2; 1304 int flag; 1305}; 1306#endif 1307int 1308sys_linkat(struct thread *td, struct linkat_args *uap) 1309{ 1310 int flag; 1311 1312 flag = uap->flag; 1313 if (flag & ~AT_SYMLINK_FOLLOW) 1314 return (EINVAL); 1315 1316 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1317 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1318} 1319 1320int hardlink_check_uid = 0; 1321SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1322 &hardlink_check_uid, 0, 1323 "Unprivileged processes cannot create hard links to files owned by other " 1324 "users"); 1325static int hardlink_check_gid = 0; 1326SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1327 &hardlink_check_gid, 0, 1328 "Unprivileged processes cannot create hard links to files owned by other " 1329 "groups"); 1330 1331static int 1332can_hardlink(struct vnode *vp, struct ucred *cred) 1333{ 1334 struct vattr va; 1335 int error; 1336 1337 if (!hardlink_check_uid && !hardlink_check_gid) 1338 return (0); 1339 1340 error = VOP_GETATTR(vp, &va, cred); 1341 if (error != 0) 1342 return (error); 1343 1344 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1345 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1346 if (error != 0) 1347 return (error); 1348 } 1349 1350 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1351 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1352 if (error != 0) 1353 return (error); 1354 } 1355 1356 return (0); 1357} 1358 1359int 1360kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1361 enum uio_seg segflg, int follow) 1362{ 1363 struct vnode *vp; 1364 struct mount *mp; 1365 struct nameidata nd; 1366 cap_rights_t rights; 1367 int error; 1368 1369again: 1370 bwillwrite(); 1371 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1372 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1373 1374 if ((error = namei(&nd)) != 0) 1375 return (error); 1376 NDFREE(&nd, NDF_ONLY_PNBUF); 1377 vp = nd.ni_vp; 1378 if (vp->v_type == VDIR) { 1379 vrele(vp); 1380 return (EPERM); /* POSIX */ 1381 } 1382 NDINIT_ATRIGHTS(&nd, CREATE, 1383 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1384 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1385 if ((error = namei(&nd)) == 0) { 1386 if (nd.ni_vp != NULL) { 1387 NDFREE(&nd, NDF_ONLY_PNBUF); 1388 if (nd.ni_dvp == nd.ni_vp) 1389 vrele(nd.ni_dvp); 1390 else 1391 vput(nd.ni_dvp); 1392 vrele(nd.ni_vp); 1393 vrele(vp); 1394 return (EEXIST); 1395 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1396 /* 1397 * Cross-device link. No need to recheck 1398 * vp->v_type, since it cannot change, except 1399 * to VBAD. 1400 */ 1401 NDFREE(&nd, NDF_ONLY_PNBUF); 1402 vput(nd.ni_dvp); 1403 vrele(vp); 1404 return (EXDEV); 1405 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1406 error = can_hardlink(vp, td->td_ucred); 1407#ifdef MAC 1408 if (error == 0) 1409 error = mac_vnode_check_link(td->td_ucred, 1410 nd.ni_dvp, vp, &nd.ni_cnd); 1411#endif 1412 if (error != 0) { 1413 vput(vp); 1414 vput(nd.ni_dvp); 1415 NDFREE(&nd, NDF_ONLY_PNBUF); 1416 return (error); 1417 } 1418 error = vn_start_write(vp, &mp, V_NOWAIT); 1419 if (error != 0) { 1420 vput(vp); 1421 vput(nd.ni_dvp); 1422 NDFREE(&nd, NDF_ONLY_PNBUF); 1423 error = vn_start_write(NULL, &mp, 1424 V_XSLEEP | PCATCH); 1425 if (error != 0) 1426 return (error); 1427 goto again; 1428 } 1429 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1430 VOP_UNLOCK(vp, 0); 1431 vput(nd.ni_dvp); 1432 vn_finished_write(mp); 1433 NDFREE(&nd, NDF_ONLY_PNBUF); 1434 } else { 1435 vput(nd.ni_dvp); 1436 NDFREE(&nd, NDF_ONLY_PNBUF); 1437 vrele(vp); 1438 goto again; 1439 } 1440 } 1441 vrele(vp); 1442 return (error); 1443} 1444 1445/* 1446 * Make a symbolic link. 1447 */ 1448#ifndef _SYS_SYSPROTO_H_ 1449struct symlink_args { 1450 char *path; 1451 char *link; 1452}; 1453#endif 1454int 1455sys_symlink(struct thread *td, struct symlink_args *uap) 1456{ 1457 1458 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1459 UIO_USERSPACE)); 1460} 1461 1462#ifndef _SYS_SYSPROTO_H_ 1463struct symlinkat_args { 1464 char *path; 1465 int fd; 1466 char *path2; 1467}; 1468#endif 1469int 1470sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1471{ 1472 1473 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1474 UIO_USERSPACE)); 1475} 1476 1477int 1478kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1479 enum uio_seg segflg) 1480{ 1481 struct mount *mp; 1482 struct vattr vattr; 1483 char *syspath; 1484 struct nameidata nd; 1485 int error; 1486 cap_rights_t rights; 1487 1488 if (segflg == UIO_SYSSPACE) { 1489 syspath = path1; 1490 } else { 1491 syspath = uma_zalloc(namei_zone, M_WAITOK); 1492 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1493 goto out; 1494 } 1495 AUDIT_ARG_TEXT(syspath); 1496restart: 1497 bwillwrite(); 1498 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1499 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1500 td); 1501 if ((error = namei(&nd)) != 0) 1502 goto out; 1503 if (nd.ni_vp) { 1504 NDFREE(&nd, NDF_ONLY_PNBUF); 1505 if (nd.ni_vp == nd.ni_dvp) 1506 vrele(nd.ni_dvp); 1507 else 1508 vput(nd.ni_dvp); 1509 vrele(nd.ni_vp); 1510 error = EEXIST; 1511 goto out; 1512 } 1513 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1514 NDFREE(&nd, NDF_ONLY_PNBUF); 1515 vput(nd.ni_dvp); 1516 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1517 goto out; 1518 goto restart; 1519 } 1520 VATTR_NULL(&vattr); 1521 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1522#ifdef MAC 1523 vattr.va_type = VLNK; 1524 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1525 &vattr); 1526 if (error != 0) 1527 goto out2; 1528#endif 1529 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1530 if (error == 0) 1531 vput(nd.ni_vp); 1532#ifdef MAC 1533out2: 1534#endif 1535 NDFREE(&nd, NDF_ONLY_PNBUF); 1536 vput(nd.ni_dvp); 1537 vn_finished_write(mp); 1538out: 1539 if (segflg != UIO_SYSSPACE) 1540 uma_zfree(namei_zone, syspath); 1541 return (error); 1542} 1543 1544/* 1545 * Delete a whiteout from the filesystem. 1546 */ 1547#ifndef _SYS_SYSPROTO_H_ 1548struct undelete_args { 1549 char *path; 1550}; 1551#endif 1552int 1553sys_undelete(struct thread *td, struct undelete_args *uap) 1554{ 1555 struct mount *mp; 1556 struct nameidata nd; 1557 int error; 1558 1559restart: 1560 bwillwrite(); 1561 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1562 UIO_USERSPACE, uap->path, td); 1563 error = namei(&nd); 1564 if (error != 0) 1565 return (error); 1566 1567 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1568 NDFREE(&nd, NDF_ONLY_PNBUF); 1569 if (nd.ni_vp == nd.ni_dvp) 1570 vrele(nd.ni_dvp); 1571 else 1572 vput(nd.ni_dvp); 1573 if (nd.ni_vp) 1574 vrele(nd.ni_vp); 1575 return (EEXIST); 1576 } 1577 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1578 NDFREE(&nd, NDF_ONLY_PNBUF); 1579 vput(nd.ni_dvp); 1580 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1581 return (error); 1582 goto restart; 1583 } 1584 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1585 NDFREE(&nd, NDF_ONLY_PNBUF); 1586 vput(nd.ni_dvp); 1587 vn_finished_write(mp); 1588 return (error); 1589} 1590 1591/* 1592 * Delete a name from the filesystem. 1593 */ 1594#ifndef _SYS_SYSPROTO_H_ 1595struct unlink_args { 1596 char *path; 1597}; 1598#endif 1599int 1600sys_unlink(struct thread *td, struct unlink_args *uap) 1601{ 1602 1603 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1604} 1605 1606#ifndef _SYS_SYSPROTO_H_ 1607struct unlinkat_args { 1608 int fd; 1609 char *path; 1610 int flag; 1611}; 1612#endif 1613int 1614sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1615{ 1616 int flag = uap->flag; 1617 int fd = uap->fd; 1618 char *path = uap->path; 1619 1620 if (flag & ~AT_REMOVEDIR) 1621 return (EINVAL); 1622 1623 if (flag & AT_REMOVEDIR) 1624 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1625 else 1626 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1627} 1628 1629int 1630kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1631 ino_t oldinum) 1632{ 1633 struct mount *mp; 1634 struct vnode *vp; 1635 struct nameidata nd; 1636 struct stat sb; 1637 cap_rights_t rights; 1638 int error; 1639 1640restart: 1641 bwillwrite(); 1642 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1643 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1644 if ((error = namei(&nd)) != 0) 1645 return (error == EINVAL ? EPERM : error); 1646 vp = nd.ni_vp; 1647 if (vp->v_type == VDIR && oldinum == 0) { 1648 error = EPERM; /* POSIX */ 1649 } else if (oldinum != 0 && 1650 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1651 sb.st_ino != oldinum) { 1652 error = EIDRM; /* Identifier removed */ 1653 } else { 1654 /* 1655 * The root of a mounted filesystem cannot be deleted. 1656 * 1657 * XXX: can this only be a VDIR case? 1658 */ 1659 if (vp->v_vflag & VV_ROOT) 1660 error = EBUSY; 1661 } 1662 if (error == 0) { 1663 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1664 NDFREE(&nd, NDF_ONLY_PNBUF); 1665 vput(nd.ni_dvp); 1666 if (vp == nd.ni_dvp) 1667 vrele(vp); 1668 else 1669 vput(vp); 1670 if ((error = vn_start_write(NULL, &mp, 1671 V_XSLEEP | PCATCH)) != 0) 1672 return (error); 1673 goto restart; 1674 } 1675#ifdef MAC 1676 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1677 &nd.ni_cnd); 1678 if (error != 0) 1679 goto out; 1680#endif 1681 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1682 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1683#ifdef MAC 1684out: 1685#endif 1686 vn_finished_write(mp); 1687 } 1688 NDFREE(&nd, NDF_ONLY_PNBUF); 1689 vput(nd.ni_dvp); 1690 if (vp == nd.ni_dvp) 1691 vrele(vp); 1692 else 1693 vput(vp); 1694 return (error); 1695} 1696 1697/* 1698 * Reposition read/write file offset. 1699 */ 1700#ifndef _SYS_SYSPROTO_H_ 1701struct lseek_args { 1702 int fd; 1703 int pad; 1704 off_t offset; 1705 int whence; 1706}; 1707#endif 1708int 1709sys_lseek(struct thread *td, struct lseek_args *uap) 1710{ 1711 1712 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1713} 1714 1715int 1716kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1717{ 1718 struct file *fp; 1719 cap_rights_t rights; 1720 int error; 1721 1722 AUDIT_ARG_FD(fd); 1723 error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1724 if (error != 0) 1725 return (error); 1726 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1727 fo_seek(fp, offset, whence, td) : ESPIPE; 1728 fdrop(fp, td); 1729 return (error); 1730} 1731 1732#if defined(COMPAT_43) 1733/* 1734 * Reposition read/write file offset. 1735 */ 1736#ifndef _SYS_SYSPROTO_H_ 1737struct olseek_args { 1738 int fd; 1739 long offset; 1740 int whence; 1741}; 1742#endif 1743int 1744olseek(struct thread *td, struct olseek_args *uap) 1745{ 1746 1747 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1748} 1749#endif /* COMPAT_43 */ 1750 1751#if defined(COMPAT_FREEBSD6) 1752/* Version with the 'pad' argument */ 1753int 1754freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1755{ 1756 1757 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1758} 1759#endif 1760 1761/* 1762 * Check access permissions using passed credentials. 1763 */ 1764static int 1765vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1766 struct thread *td) 1767{ 1768 accmode_t accmode; 1769 int error; 1770 1771 /* Flags == 0 means only check for existence. */ 1772 if (user_flags == 0) 1773 return (0); 1774 1775 accmode = 0; 1776 if (user_flags & R_OK) 1777 accmode |= VREAD; 1778 if (user_flags & W_OK) 1779 accmode |= VWRITE; 1780 if (user_flags & X_OK) 1781 accmode |= VEXEC; 1782#ifdef MAC 1783 error = mac_vnode_check_access(cred, vp, accmode); 1784 if (error != 0) 1785 return (error); 1786#endif 1787 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1788 error = VOP_ACCESS(vp, accmode, cred, td); 1789 return (error); 1790} 1791 1792/* 1793 * Check access permissions using "real" credentials. 1794 */ 1795#ifndef _SYS_SYSPROTO_H_ 1796struct access_args { 1797 char *path; 1798 int amode; 1799}; 1800#endif 1801int 1802sys_access(struct thread *td, struct access_args *uap) 1803{ 1804 1805 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1806 0, uap->amode)); 1807} 1808 1809#ifndef _SYS_SYSPROTO_H_ 1810struct faccessat_args { 1811 int dirfd; 1812 char *path; 1813 int amode; 1814 int flag; 1815} 1816#endif 1817int 1818sys_faccessat(struct thread *td, struct faccessat_args *uap) 1819{ 1820 1821 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1822 uap->amode)); 1823} 1824 1825int 1826kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1827 int flag, int amode) 1828{ 1829 struct ucred *cred, *usecred; 1830 struct vnode *vp; 1831 struct nameidata nd; 1832 cap_rights_t rights; 1833 int error; 1834 1835 if (flag & ~AT_EACCESS) 1836 return (EINVAL); 1837 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1838 return (EINVAL); 1839 1840 /* 1841 * Create and modify a temporary credential instead of one that 1842 * is potentially shared (if we need one). 1843 */ 1844 cred = td->td_ucred; 1845 if ((flag & AT_EACCESS) == 0 && 1846 ((cred->cr_uid != cred->cr_ruid || 1847 cred->cr_rgid != cred->cr_groups[0]))) { 1848 usecred = crdup(cred); 1849 usecred->cr_uid = cred->cr_ruid; 1850 usecred->cr_groups[0] = cred->cr_rgid; 1851 td->td_ucred = usecred; 1852 } else 1853 usecred = cred; 1854 AUDIT_ARG_VALUE(amode); 1855 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1856 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1857 td); 1858 if ((error = namei(&nd)) != 0) 1859 goto out; 1860 vp = nd.ni_vp; 1861 1862 error = vn_access(vp, amode, usecred, td); 1863 NDFREE(&nd, NDF_ONLY_PNBUF); 1864 vput(vp); 1865out: 1866 if (usecred != cred) { 1867 td->td_ucred = cred; 1868 crfree(usecred); 1869 } 1870 return (error); 1871} 1872 1873/* 1874 * Check access permissions using "effective" credentials. 1875 */ 1876#ifndef _SYS_SYSPROTO_H_ 1877struct eaccess_args { 1878 char *path; 1879 int amode; 1880}; 1881#endif 1882int 1883sys_eaccess(struct thread *td, struct eaccess_args *uap) 1884{ 1885 1886 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1887 AT_EACCESS, uap->amode)); 1888} 1889 1890#if defined(COMPAT_43) 1891/* 1892 * Get file status; this version follows links. 1893 */ 1894#ifndef _SYS_SYSPROTO_H_ 1895struct ostat_args { 1896 char *path; 1897 struct ostat *ub; 1898}; 1899#endif 1900int 1901ostat(struct thread *td, struct ostat_args *uap) 1902{ 1903 struct stat sb; 1904 struct ostat osb; 1905 int error; 1906 1907 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1908 &sb, NULL); 1909 if (error != 0) 1910 return (error); 1911 cvtstat(&sb, &osb); 1912 return (copyout(&osb, uap->ub, sizeof (osb))); 1913} 1914 1915/* 1916 * Get file status; this version does not follow links. 1917 */ 1918#ifndef _SYS_SYSPROTO_H_ 1919struct olstat_args { 1920 char *path; 1921 struct ostat *ub; 1922}; 1923#endif 1924int 1925olstat(struct thread *td, struct olstat_args *uap) 1926{ 1927 struct stat sb; 1928 struct ostat osb; 1929 int error; 1930 1931 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 1932 UIO_USERSPACE, &sb, NULL); 1933 if (error != 0) 1934 return (error); 1935 cvtstat(&sb, &osb); 1936 return (copyout(&osb, uap->ub, sizeof (osb))); 1937} 1938 1939/* 1940 * Convert from an old to a new stat structure. 1941 */ 1942void 1943cvtstat(struct stat *st, struct ostat *ost) 1944{ 1945 1946 bzero(ost, sizeof(*ost)); 1947 ost->st_dev = st->st_dev; 1948 ost->st_ino = st->st_ino; 1949 ost->st_mode = st->st_mode; 1950 ost->st_nlink = st->st_nlink; 1951 ost->st_uid = st->st_uid; 1952 ost->st_gid = st->st_gid; 1953 ost->st_rdev = st->st_rdev; 1954 if (st->st_size < (quad_t)1 << 32) 1955 ost->st_size = st->st_size; 1956 else 1957 ost->st_size = -2; 1958 ost->st_atim = st->st_atim; 1959 ost->st_mtim = st->st_mtim; 1960 ost->st_ctim = st->st_ctim; 1961 ost->st_blksize = st->st_blksize; 1962 ost->st_blocks = st->st_blocks; 1963 ost->st_flags = st->st_flags; 1964 ost->st_gen = st->st_gen; 1965} 1966#endif /* COMPAT_43 */ 1967 1968/* 1969 * Get file status; this version follows links. 1970 */ 1971#ifndef _SYS_SYSPROTO_H_ 1972struct stat_args { 1973 char *path; 1974 struct stat *ub; 1975}; 1976#endif 1977int 1978sys_stat(struct thread *td, struct stat_args *uap) 1979{ 1980 struct stat sb; 1981 int error; 1982 1983 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1984 &sb, NULL); 1985 if (error == 0) 1986 error = copyout(&sb, uap->ub, sizeof (sb)); 1987 return (error); 1988} 1989 1990#ifndef _SYS_SYSPROTO_H_ 1991struct fstatat_args { 1992 int fd; 1993 char *path; 1994 struct stat *buf; 1995 int flag; 1996} 1997#endif 1998int 1999sys_fstatat(struct thread *td, struct fstatat_args *uap) 2000{ 2001 struct stat sb; 2002 int error; 2003 2004 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2005 UIO_USERSPACE, &sb, NULL); 2006 if (error == 0) 2007 error = copyout(&sb, uap->buf, sizeof (sb)); 2008 return (error); 2009} 2010 2011int 2012kern_statat(struct thread *td, int flag, int fd, char *path, 2013 enum uio_seg pathseg, struct stat *sbp, 2014 void (*hook)(struct vnode *vp, struct stat *sbp)) 2015{ 2016 struct nameidata nd; 2017 struct stat sb; 2018 cap_rights_t rights; 2019 int error; 2020 2021 if (flag & ~AT_SYMLINK_NOFOLLOW) 2022 return (EINVAL); 2023 2024 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2025 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2026 cap_rights_init(&rights, CAP_FSTAT), td); 2027 2028 if ((error = namei(&nd)) != 0) 2029 return (error); 2030 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2031 if (error == 0) { 2032 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2033 if (S_ISREG(sb.st_mode)) 2034 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2035 if (__predict_false(hook != NULL)) 2036 hook(nd.ni_vp, &sb); 2037 } 2038 NDFREE(&nd, NDF_ONLY_PNBUF); 2039 vput(nd.ni_vp); 2040 if (error != 0) 2041 return (error); 2042 *sbp = sb; 2043#ifdef KTRACE 2044 if (KTRPOINT(td, KTR_STRUCT)) 2045 ktrstat(&sb); 2046#endif 2047 return (0); 2048} 2049 2050/* 2051 * Get file status; this version does not follow links. 2052 */ 2053#ifndef _SYS_SYSPROTO_H_ 2054struct lstat_args { 2055 char *path; 2056 struct stat *ub; 2057}; 2058#endif 2059int 2060sys_lstat(struct thread *td, struct lstat_args *uap) 2061{ 2062 struct stat sb; 2063 int error; 2064 2065 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2066 UIO_USERSPACE, &sb, NULL); 2067 if (error == 0) 2068 error = copyout(&sb, uap->ub, sizeof (sb)); 2069 return (error); 2070} 2071 2072/* 2073 * Implementation of the NetBSD [l]stat() functions. 2074 */ 2075void 2076cvtnstat( struct stat *sb, struct nstat *nsb) 2077{ 2078 2079 bzero(nsb, sizeof *nsb); 2080 nsb->st_dev = sb->st_dev; 2081 nsb->st_ino = sb->st_ino; 2082 nsb->st_mode = sb->st_mode; 2083 nsb->st_nlink = sb->st_nlink; 2084 nsb->st_uid = sb->st_uid; 2085 nsb->st_gid = sb->st_gid; 2086 nsb->st_rdev = sb->st_rdev; 2087 nsb->st_atim = sb->st_atim; 2088 nsb->st_mtim = sb->st_mtim; 2089 nsb->st_ctim = sb->st_ctim; 2090 nsb->st_size = sb->st_size; 2091 nsb->st_blocks = sb->st_blocks; 2092 nsb->st_blksize = sb->st_blksize; 2093 nsb->st_flags = sb->st_flags; 2094 nsb->st_gen = sb->st_gen; 2095 nsb->st_birthtim = sb->st_birthtim; 2096} 2097 2098#ifndef _SYS_SYSPROTO_H_ 2099struct nstat_args { 2100 char *path; 2101 struct nstat *ub; 2102}; 2103#endif 2104int 2105sys_nstat(struct thread *td, struct nstat_args *uap) 2106{ 2107 struct stat sb; 2108 struct nstat nsb; 2109 int error; 2110 2111 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2112 &sb, NULL); 2113 if (error != 0) 2114 return (error); 2115 cvtnstat(&sb, &nsb); 2116 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2117} 2118 2119/* 2120 * NetBSD lstat. Get file status; this version does not follow links. 2121 */ 2122#ifndef _SYS_SYSPROTO_H_ 2123struct lstat_args { 2124 char *path; 2125 struct stat *ub; 2126}; 2127#endif 2128int 2129sys_nlstat(struct thread *td, struct nlstat_args *uap) 2130{ 2131 struct stat sb; 2132 struct nstat nsb; 2133 int error; 2134 2135 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2136 UIO_USERSPACE, &sb, NULL); 2137 if (error != 0) 2138 return (error); 2139 cvtnstat(&sb, &nsb); 2140 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2141} 2142 2143/* 2144 * Get configurable pathname variables. 2145 */ 2146#ifndef _SYS_SYSPROTO_H_ 2147struct pathconf_args { 2148 char *path; 2149 int name; 2150}; 2151#endif 2152int 2153sys_pathconf(struct thread *td, struct pathconf_args *uap) 2154{ 2155 2156 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2157} 2158 2159#ifndef _SYS_SYSPROTO_H_ 2160struct lpathconf_args { 2161 char *path; 2162 int name; 2163}; 2164#endif 2165int 2166sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2167{ 2168 2169 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2170 NOFOLLOW)); 2171} 2172 2173int 2174kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2175 u_long flags) 2176{ 2177 struct nameidata nd; 2178 int error; 2179 2180 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2181 pathseg, path, td); 2182 if ((error = namei(&nd)) != 0) 2183 return (error); 2184 NDFREE(&nd, NDF_ONLY_PNBUF); 2185 2186 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2187 vput(nd.ni_vp); 2188 return (error); 2189} 2190 2191/* 2192 * Return target name of a symbolic link. 2193 */ 2194#ifndef _SYS_SYSPROTO_H_ 2195struct readlink_args { 2196 char *path; 2197 char *buf; 2198 size_t count; 2199}; 2200#endif 2201int 2202sys_readlink(struct thread *td, struct readlink_args *uap) 2203{ 2204 2205 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2206 uap->buf, UIO_USERSPACE, uap->count)); 2207} 2208#ifndef _SYS_SYSPROTO_H_ 2209struct readlinkat_args { 2210 int fd; 2211 char *path; 2212 char *buf; 2213 size_t bufsize; 2214}; 2215#endif 2216int 2217sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2218{ 2219 2220 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2221 uap->buf, UIO_USERSPACE, uap->bufsize)); 2222} 2223 2224int 2225kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2226 char *buf, enum uio_seg bufseg, size_t count) 2227{ 2228 struct vnode *vp; 2229 struct iovec aiov; 2230 struct uio auio; 2231 struct nameidata nd; 2232 int error; 2233 2234 if (count > IOSIZE_MAX) 2235 return (EINVAL); 2236 2237 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2238 pathseg, path, fd, td); 2239 2240 if ((error = namei(&nd)) != 0) 2241 return (error); 2242 NDFREE(&nd, NDF_ONLY_PNBUF); 2243 vp = nd.ni_vp; 2244#ifdef MAC 2245 error = mac_vnode_check_readlink(td->td_ucred, vp); 2246 if (error != 0) { 2247 vput(vp); 2248 return (error); 2249 } 2250#endif 2251 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2252 error = EINVAL; 2253 else { 2254 aiov.iov_base = buf; 2255 aiov.iov_len = count; 2256 auio.uio_iov = &aiov; 2257 auio.uio_iovcnt = 1; 2258 auio.uio_offset = 0; 2259 auio.uio_rw = UIO_READ; 2260 auio.uio_segflg = bufseg; 2261 auio.uio_td = td; 2262 auio.uio_resid = count; 2263 error = VOP_READLINK(vp, &auio, td->td_ucred); 2264 td->td_retval[0] = count - auio.uio_resid; 2265 } 2266 vput(vp); 2267 return (error); 2268} 2269 2270/* 2271 * Common implementation code for chflags() and fchflags(). 2272 */ 2273static int 2274setfflags(struct thread *td, struct vnode *vp, u_long flags) 2275{ 2276 struct mount *mp; 2277 struct vattr vattr; 2278 int error; 2279 2280 /* We can't support the value matching VNOVAL. */ 2281 if (flags == VNOVAL) 2282 return (EOPNOTSUPP); 2283 2284 /* 2285 * Prevent non-root users from setting flags on devices. When 2286 * a device is reused, users can retain ownership of the device 2287 * if they are allowed to set flags and programs assume that 2288 * chown can't fail when done as root. 2289 */ 2290 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2291 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2292 if (error != 0) 2293 return (error); 2294 } 2295 2296 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2297 return (error); 2298 VATTR_NULL(&vattr); 2299 vattr.va_flags = flags; 2300 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2301#ifdef MAC 2302 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2303 if (error == 0) 2304#endif 2305 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2306 VOP_UNLOCK(vp, 0); 2307 vn_finished_write(mp); 2308 return (error); 2309} 2310 2311/* 2312 * Change flags of a file given a path name. 2313 */ 2314#ifndef _SYS_SYSPROTO_H_ 2315struct chflags_args { 2316 const char *path; 2317 u_long flags; 2318}; 2319#endif 2320int 2321sys_chflags(struct thread *td, struct chflags_args *uap) 2322{ 2323 2324 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2325 uap->flags, 0)); 2326} 2327 2328#ifndef _SYS_SYSPROTO_H_ 2329struct chflagsat_args { 2330 int fd; 2331 const char *path; 2332 u_long flags; 2333 int atflag; 2334} 2335#endif 2336int 2337sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2338{ 2339 int fd = uap->fd; 2340 const char *path = uap->path; 2341 u_long flags = uap->flags; 2342 int atflag = uap->atflag; 2343 2344 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2345 return (EINVAL); 2346 2347 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2348} 2349 2350/* 2351 * Same as chflags() but doesn't follow symlinks. 2352 */ 2353#ifndef _SYS_SYSPROTO_H_ 2354struct lchflags_args { 2355 const char *path; 2356 u_long flags; 2357}; 2358#endif 2359int 2360sys_lchflags(struct thread *td, struct lchflags_args *uap) 2361{ 2362 2363 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2364 uap->flags, AT_SYMLINK_NOFOLLOW)); 2365} 2366 2367static int 2368kern_chflagsat(struct thread *td, int fd, const char *path, 2369 enum uio_seg pathseg, u_long flags, int atflag) 2370{ 2371 struct nameidata nd; 2372 cap_rights_t rights; 2373 int error, follow; 2374 2375 AUDIT_ARG_FFLAGS(flags); 2376 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2377 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2378 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2379 if ((error = namei(&nd)) != 0) 2380 return (error); 2381 NDFREE(&nd, NDF_ONLY_PNBUF); 2382 error = setfflags(td, nd.ni_vp, flags); 2383 vrele(nd.ni_vp); 2384 return (error); 2385} 2386 2387/* 2388 * Change flags of a file given a file descriptor. 2389 */ 2390#ifndef _SYS_SYSPROTO_H_ 2391struct fchflags_args { 2392 int fd; 2393 u_long flags; 2394}; 2395#endif 2396int 2397sys_fchflags(struct thread *td, struct fchflags_args *uap) 2398{ 2399 struct file *fp; 2400 cap_rights_t rights; 2401 int error; 2402 2403 AUDIT_ARG_FD(uap->fd); 2404 AUDIT_ARG_FFLAGS(uap->flags); 2405 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2406 &fp); 2407 if (error != 0) 2408 return (error); 2409#ifdef AUDIT 2410 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2411 AUDIT_ARG_VNODE1(fp->f_vnode); 2412 VOP_UNLOCK(fp->f_vnode, 0); 2413#endif 2414 error = setfflags(td, fp->f_vnode, uap->flags); 2415 fdrop(fp, td); 2416 return (error); 2417} 2418 2419/* 2420 * Common implementation code for chmod(), lchmod() and fchmod(). 2421 */ 2422int 2423setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2424{ 2425 struct mount *mp; 2426 struct vattr vattr; 2427 int error; 2428 2429 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2430 return (error); 2431 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2432 VATTR_NULL(&vattr); 2433 vattr.va_mode = mode & ALLPERMS; 2434#ifdef MAC 2435 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2436 if (error == 0) 2437#endif 2438 error = VOP_SETATTR(vp, &vattr, cred); 2439 VOP_UNLOCK(vp, 0); 2440 vn_finished_write(mp); 2441 return (error); 2442} 2443 2444/* 2445 * Change mode of a file given path name. 2446 */ 2447#ifndef _SYS_SYSPROTO_H_ 2448struct chmod_args { 2449 char *path; 2450 int mode; 2451}; 2452#endif 2453int 2454sys_chmod(struct thread *td, struct chmod_args *uap) 2455{ 2456 2457 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2458 uap->mode, 0)); 2459} 2460 2461#ifndef _SYS_SYSPROTO_H_ 2462struct fchmodat_args { 2463 int dirfd; 2464 char *path; 2465 mode_t mode; 2466 int flag; 2467} 2468#endif 2469int 2470sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2471{ 2472 int flag = uap->flag; 2473 int fd = uap->fd; 2474 char *path = uap->path; 2475 mode_t mode = uap->mode; 2476 2477 if (flag & ~AT_SYMLINK_NOFOLLOW) 2478 return (EINVAL); 2479 2480 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2481} 2482 2483/* 2484 * Change mode of a file given path name (don't follow links.) 2485 */ 2486#ifndef _SYS_SYSPROTO_H_ 2487struct lchmod_args { 2488 char *path; 2489 int mode; 2490}; 2491#endif 2492int 2493sys_lchmod(struct thread *td, struct lchmod_args *uap) 2494{ 2495 2496 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2497 uap->mode, AT_SYMLINK_NOFOLLOW)); 2498} 2499 2500int 2501kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2502 mode_t mode, int flag) 2503{ 2504 struct nameidata nd; 2505 cap_rights_t rights; 2506 int error, follow; 2507 2508 AUDIT_ARG_MODE(mode); 2509 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2510 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2511 cap_rights_init(&rights, CAP_FCHMOD), td); 2512 if ((error = namei(&nd)) != 0) 2513 return (error); 2514 NDFREE(&nd, NDF_ONLY_PNBUF); 2515 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2516 vrele(nd.ni_vp); 2517 return (error); 2518} 2519 2520/* 2521 * Change mode of a file given a file descriptor. 2522 */ 2523#ifndef _SYS_SYSPROTO_H_ 2524struct fchmod_args { 2525 int fd; 2526 int mode; 2527}; 2528#endif 2529int 2530sys_fchmod(struct thread *td, struct fchmod_args *uap) 2531{ 2532 struct file *fp; 2533 cap_rights_t rights; 2534 int error; 2535 2536 AUDIT_ARG_FD(uap->fd); 2537 AUDIT_ARG_MODE(uap->mode); 2538 2539 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2540 if (error != 0) 2541 return (error); 2542 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2543 fdrop(fp, td); 2544 return (error); 2545} 2546 2547/* 2548 * Common implementation for chown(), lchown(), and fchown() 2549 */ 2550int 2551setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2552 gid_t gid) 2553{ 2554 struct mount *mp; 2555 struct vattr vattr; 2556 int error; 2557 2558 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2559 return (error); 2560 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2561 VATTR_NULL(&vattr); 2562 vattr.va_uid = uid; 2563 vattr.va_gid = gid; 2564#ifdef MAC 2565 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2566 vattr.va_gid); 2567 if (error == 0) 2568#endif 2569 error = VOP_SETATTR(vp, &vattr, cred); 2570 VOP_UNLOCK(vp, 0); 2571 vn_finished_write(mp); 2572 return (error); 2573} 2574 2575/* 2576 * Set ownership given a path name. 2577 */ 2578#ifndef _SYS_SYSPROTO_H_ 2579struct chown_args { 2580 char *path; 2581 int uid; 2582 int gid; 2583}; 2584#endif 2585int 2586sys_chown(struct thread *td, struct chown_args *uap) 2587{ 2588 2589 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2590 uap->gid, 0)); 2591} 2592 2593#ifndef _SYS_SYSPROTO_H_ 2594struct fchownat_args { 2595 int fd; 2596 const char * path; 2597 uid_t uid; 2598 gid_t gid; 2599 int flag; 2600}; 2601#endif 2602int 2603sys_fchownat(struct thread *td, struct fchownat_args *uap) 2604{ 2605 int flag; 2606 2607 flag = uap->flag; 2608 if (flag & ~AT_SYMLINK_NOFOLLOW) 2609 return (EINVAL); 2610 2611 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2612 uap->gid, uap->flag)); 2613} 2614 2615int 2616kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2617 int uid, int gid, int flag) 2618{ 2619 struct nameidata nd; 2620 cap_rights_t rights; 2621 int error, follow; 2622 2623 AUDIT_ARG_OWNER(uid, gid); 2624 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2625 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2626 cap_rights_init(&rights, CAP_FCHOWN), td); 2627 2628 if ((error = namei(&nd)) != 0) 2629 return (error); 2630 NDFREE(&nd, NDF_ONLY_PNBUF); 2631 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2632 vrele(nd.ni_vp); 2633 return (error); 2634} 2635 2636/* 2637 * Set ownership given a path name, do not cross symlinks. 2638 */ 2639#ifndef _SYS_SYSPROTO_H_ 2640struct lchown_args { 2641 char *path; 2642 int uid; 2643 int gid; 2644}; 2645#endif 2646int 2647sys_lchown(struct thread *td, struct lchown_args *uap) 2648{ 2649 2650 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2651 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2652} 2653 2654/* 2655 * Set ownership given a file descriptor. 2656 */ 2657#ifndef _SYS_SYSPROTO_H_ 2658struct fchown_args { 2659 int fd; 2660 int uid; 2661 int gid; 2662}; 2663#endif 2664int 2665sys_fchown(struct thread *td, struct fchown_args *uap) 2666{ 2667 struct file *fp; 2668 cap_rights_t rights; 2669 int error; 2670 2671 AUDIT_ARG_FD(uap->fd); 2672 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2673 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2674 if (error != 0) 2675 return (error); 2676 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2677 fdrop(fp, td); 2678 return (error); 2679} 2680 2681/* 2682 * Common implementation code for utimes(), lutimes(), and futimes(). 2683 */ 2684static int 2685getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2686 struct timespec *tsp) 2687{ 2688 struct timeval tv[2]; 2689 const struct timeval *tvp; 2690 int error; 2691 2692 if (usrtvp == NULL) { 2693 vfs_timestamp(&tsp[0]); 2694 tsp[1] = tsp[0]; 2695 } else { 2696 if (tvpseg == UIO_SYSSPACE) { 2697 tvp = usrtvp; 2698 } else { 2699 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2700 return (error); 2701 tvp = tv; 2702 } 2703 2704 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2705 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2706 return (EINVAL); 2707 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2708 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2709 } 2710 return (0); 2711} 2712 2713/* 2714 * Common implementation code for futimens(), utimensat(). 2715 */ 2716#define UTIMENS_NULL 0x1 2717#define UTIMENS_EXIT 0x2 2718static int 2719getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2720 struct timespec *tsp, int *retflags) 2721{ 2722 struct timespec tsnow; 2723 int error; 2724 2725 vfs_timestamp(&tsnow); 2726 *retflags = 0; 2727 if (usrtsp == NULL) { 2728 tsp[0] = tsnow; 2729 tsp[1] = tsnow; 2730 *retflags |= UTIMENS_NULL; 2731 return (0); 2732 } 2733 if (tspseg == UIO_SYSSPACE) { 2734 tsp[0] = usrtsp[0]; 2735 tsp[1] = usrtsp[1]; 2736 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2737 return (error); 2738 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2739 *retflags |= UTIMENS_EXIT; 2740 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2741 *retflags |= UTIMENS_NULL; 2742 if (tsp[0].tv_nsec == UTIME_OMIT) 2743 tsp[0].tv_sec = VNOVAL; 2744 else if (tsp[0].tv_nsec == UTIME_NOW) 2745 tsp[0] = tsnow; 2746 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2747 return (EINVAL); 2748 if (tsp[1].tv_nsec == UTIME_OMIT) 2749 tsp[1].tv_sec = VNOVAL; 2750 else if (tsp[1].tv_nsec == UTIME_NOW) 2751 tsp[1] = tsnow; 2752 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2753 return (EINVAL); 2754 2755 return (0); 2756} 2757 2758/* 2759 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2760 * and utimensat(). 2761 */ 2762static int 2763setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 2764 int numtimes, int nullflag) 2765{ 2766 struct mount *mp; 2767 struct vattr vattr; 2768 int error, setbirthtime; 2769 2770 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2771 return (error); 2772 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2773 setbirthtime = 0; 2774 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2775 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2776 setbirthtime = 1; 2777 VATTR_NULL(&vattr); 2778 vattr.va_atime = ts[0]; 2779 vattr.va_mtime = ts[1]; 2780 if (setbirthtime) 2781 vattr.va_birthtime = ts[1]; 2782 if (numtimes > 2) 2783 vattr.va_birthtime = ts[2]; 2784 if (nullflag) 2785 vattr.va_vaflags |= VA_UTIMES_NULL; 2786#ifdef MAC 2787 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 2788 vattr.va_mtime); 2789#endif 2790 if (error == 0) 2791 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2792 VOP_UNLOCK(vp, 0); 2793 vn_finished_write(mp); 2794 return (error); 2795} 2796 2797/* 2798 * Set the access and modification times of a file. 2799 */ 2800#ifndef _SYS_SYSPROTO_H_ 2801struct utimes_args { 2802 char *path; 2803 struct timeval *tptr; 2804}; 2805#endif 2806int 2807sys_utimes(struct thread *td, struct utimes_args *uap) 2808{ 2809 2810 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2811 uap->tptr, UIO_USERSPACE)); 2812} 2813 2814#ifndef _SYS_SYSPROTO_H_ 2815struct futimesat_args { 2816 int fd; 2817 const char * path; 2818 const struct timeval * times; 2819}; 2820#endif 2821int 2822sys_futimesat(struct thread *td, struct futimesat_args *uap) 2823{ 2824 2825 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 2826 uap->times, UIO_USERSPACE)); 2827} 2828 2829int 2830kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2831 struct timeval *tptr, enum uio_seg tptrseg) 2832{ 2833 struct nameidata nd; 2834 struct timespec ts[2]; 2835 cap_rights_t rights; 2836 int error; 2837 2838 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2839 return (error); 2840 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 2841 cap_rights_init(&rights, CAP_FUTIMES), td); 2842 2843 if ((error = namei(&nd)) != 0) 2844 return (error); 2845 NDFREE(&nd, NDF_ONLY_PNBUF); 2846 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2847 vrele(nd.ni_vp); 2848 return (error); 2849} 2850 2851/* 2852 * Set the access and modification times of a file. 2853 */ 2854#ifndef _SYS_SYSPROTO_H_ 2855struct lutimes_args { 2856 char *path; 2857 struct timeval *tptr; 2858}; 2859#endif 2860int 2861sys_lutimes(struct thread *td, struct lutimes_args *uap) 2862{ 2863 2864 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 2865 UIO_USERSPACE)); 2866} 2867 2868int 2869kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 2870 struct timeval *tptr, enum uio_seg tptrseg) 2871{ 2872 struct timespec ts[2]; 2873 struct nameidata nd; 2874 int error; 2875 2876 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2877 return (error); 2878 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 2879 if ((error = namei(&nd)) != 0) 2880 return (error); 2881 NDFREE(&nd, NDF_ONLY_PNBUF); 2882 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2883 vrele(nd.ni_vp); 2884 return (error); 2885} 2886 2887/* 2888 * Set the access and modification times of a file. 2889 */ 2890#ifndef _SYS_SYSPROTO_H_ 2891struct futimes_args { 2892 int fd; 2893 struct timeval *tptr; 2894}; 2895#endif 2896int 2897sys_futimes(struct thread *td, struct futimes_args *uap) 2898{ 2899 2900 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 2901} 2902 2903int 2904kern_futimes(struct thread *td, int fd, struct timeval *tptr, 2905 enum uio_seg tptrseg) 2906{ 2907 struct timespec ts[2]; 2908 struct file *fp; 2909 cap_rights_t rights; 2910 int error; 2911 2912 AUDIT_ARG_FD(fd); 2913 error = getutimes(tptr, tptrseg, ts); 2914 if (error != 0) 2915 return (error); 2916 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2917 if (error != 0) 2918 return (error); 2919#ifdef AUDIT 2920 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2921 AUDIT_ARG_VNODE1(fp->f_vnode); 2922 VOP_UNLOCK(fp->f_vnode, 0); 2923#endif 2924 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 2925 fdrop(fp, td); 2926 return (error); 2927} 2928 2929int 2930sys_futimens(struct thread *td, struct futimens_args *uap) 2931{ 2932 2933 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 2934} 2935 2936int 2937kern_futimens(struct thread *td, int fd, struct timespec *tptr, 2938 enum uio_seg tptrseg) 2939{ 2940 struct timespec ts[2]; 2941 struct file *fp; 2942 cap_rights_t rights; 2943 int error, flags; 2944 2945 AUDIT_ARG_FD(fd); 2946 error = getutimens(tptr, tptrseg, ts, &flags); 2947 if (error != 0) 2948 return (error); 2949 if (flags & UTIMENS_EXIT) 2950 return (0); 2951 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2952 if (error != 0) 2953 return (error); 2954#ifdef AUDIT 2955 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2956 AUDIT_ARG_VNODE1(fp->f_vnode); 2957 VOP_UNLOCK(fp->f_vnode, 0); 2958#endif 2959 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 2960 fdrop(fp, td); 2961 return (error); 2962} 2963 2964int 2965sys_utimensat(struct thread *td, struct utimensat_args *uap) 2966{ 2967 2968 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 2969 uap->times, UIO_USERSPACE, uap->flag)); 2970} 2971 2972int 2973kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2974 struct timespec *tptr, enum uio_seg tptrseg, int flag) 2975{ 2976 struct nameidata nd; 2977 struct timespec ts[2]; 2978 cap_rights_t rights; 2979 int error, flags; 2980 2981 if (flag & ~AT_SYMLINK_NOFOLLOW) 2982 return (EINVAL); 2983 2984 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 2985 return (error); 2986 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2987 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 2988 cap_rights_init(&rights, CAP_FUTIMES), td); 2989 if ((error = namei(&nd)) != 0) 2990 return (error); 2991 /* 2992 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 2993 * POSIX states: 2994 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 2995 * "Search permission is denied by a component of the path prefix." 2996 */ 2997 NDFREE(&nd, NDF_ONLY_PNBUF); 2998 if ((flags & UTIMENS_EXIT) == 0) 2999 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3000 vrele(nd.ni_vp); 3001 return (error); 3002} 3003 3004/* 3005 * Truncate a file given its path name. 3006 */ 3007#ifndef _SYS_SYSPROTO_H_ 3008struct truncate_args { 3009 char *path; 3010 int pad; 3011 off_t length; 3012}; 3013#endif 3014int 3015sys_truncate(struct thread *td, struct truncate_args *uap) 3016{ 3017 3018 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3019} 3020 3021int 3022kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3023{ 3024 struct mount *mp; 3025 struct vnode *vp; 3026 void *rl_cookie; 3027 struct vattr vattr; 3028 struct nameidata nd; 3029 int error; 3030 3031 if (length < 0) 3032 return(EINVAL); 3033 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3034 if ((error = namei(&nd)) != 0) 3035 return (error); 3036 vp = nd.ni_vp; 3037 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3038 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3039 vn_rangelock_unlock(vp, rl_cookie); 3040 vrele(vp); 3041 return (error); 3042 } 3043 NDFREE(&nd, NDF_ONLY_PNBUF); 3044 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3045 if (vp->v_type == VDIR) 3046 error = EISDIR; 3047#ifdef MAC 3048 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3049 } 3050#endif 3051 else if ((error = vn_writechk(vp)) == 0 && 3052 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3053 VATTR_NULL(&vattr); 3054 vattr.va_size = length; 3055 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3056 } 3057 VOP_UNLOCK(vp, 0); 3058 vn_finished_write(mp); 3059 vn_rangelock_unlock(vp, rl_cookie); 3060 vrele(vp); 3061 return (error); 3062} 3063 3064#if defined(COMPAT_43) 3065/* 3066 * Truncate a file given its path name. 3067 */ 3068#ifndef _SYS_SYSPROTO_H_ 3069struct otruncate_args { 3070 char *path; 3071 long length; 3072}; 3073#endif 3074int 3075otruncate(struct thread *td, struct otruncate_args *uap) 3076{ 3077 3078 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3079} 3080#endif /* COMPAT_43 */ 3081 3082#if defined(COMPAT_FREEBSD6) 3083/* Versions with the pad argument */ 3084int 3085freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3086{ 3087 3088 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3089} 3090 3091int 3092freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3093{ 3094 3095 return (kern_ftruncate(td, uap->fd, uap->length)); 3096} 3097#endif 3098 3099int 3100kern_fsync(struct thread *td, int fd, bool fullsync) 3101{ 3102 struct vnode *vp; 3103 struct mount *mp; 3104 struct file *fp; 3105 cap_rights_t rights; 3106 int error, lock_flags; 3107 3108 AUDIT_ARG_FD(fd); 3109 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3110 if (error != 0) 3111 return (error); 3112 vp = fp->f_vnode; 3113#if 0 3114 if (!fullsync) 3115 /* XXXKIB: compete outstanding aio writes */; 3116#endif 3117 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3118 if (error != 0) 3119 goto drop; 3120 if (MNT_SHARED_WRITES(mp) || 3121 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3122 lock_flags = LK_SHARED; 3123 } else { 3124 lock_flags = LK_EXCLUSIVE; 3125 } 3126 vn_lock(vp, lock_flags | LK_RETRY); 3127 AUDIT_ARG_VNODE1(vp); 3128 if (vp->v_object != NULL) { 3129 VM_OBJECT_WLOCK(vp->v_object); 3130 vm_object_page_clean(vp->v_object, 0, 0, 0); 3131 VM_OBJECT_WUNLOCK(vp->v_object); 3132 } 3133 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3134 VOP_UNLOCK(vp, 0); 3135 vn_finished_write(mp); 3136drop: 3137 fdrop(fp, td); 3138 return (error); 3139} 3140 3141/* 3142 * Sync an open file. 3143 */ 3144#ifndef _SYS_SYSPROTO_H_ 3145struct fsync_args { 3146 int fd; 3147}; 3148#endif 3149int 3150sys_fsync(struct thread *td, struct fsync_args *uap) 3151{ 3152 3153 return (kern_fsync(td, uap->fd, true)); 3154} 3155 3156int 3157sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3158{ 3159 3160 return (kern_fsync(td, uap->fd, false)); 3161} 3162 3163/* 3164 * Rename files. Source and destination must either both be directories, or 3165 * both not be directories. If target is a directory, it must be empty. 3166 */ 3167#ifndef _SYS_SYSPROTO_H_ 3168struct rename_args { 3169 char *from; 3170 char *to; 3171}; 3172#endif 3173int 3174sys_rename(struct thread *td, struct rename_args *uap) 3175{ 3176 3177 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3178 uap->to, UIO_USERSPACE)); 3179} 3180 3181#ifndef _SYS_SYSPROTO_H_ 3182struct renameat_args { 3183 int oldfd; 3184 char *old; 3185 int newfd; 3186 char *new; 3187}; 3188#endif 3189int 3190sys_renameat(struct thread *td, struct renameat_args *uap) 3191{ 3192 3193 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3194 UIO_USERSPACE)); 3195} 3196 3197int 3198kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3199 enum uio_seg pathseg) 3200{ 3201 struct mount *mp = NULL; 3202 struct vnode *tvp, *fvp, *tdvp; 3203 struct nameidata fromnd, tond; 3204 cap_rights_t rights; 3205 int error; 3206 3207again: 3208 bwillwrite(); 3209#ifdef MAC 3210 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3211 AUDITVNODE1, pathseg, old, oldfd, 3212 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3213#else 3214 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3215 pathseg, old, oldfd, 3216 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3217#endif 3218 3219 if ((error = namei(&fromnd)) != 0) 3220 return (error); 3221#ifdef MAC 3222 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3223 fromnd.ni_vp, &fromnd.ni_cnd); 3224 VOP_UNLOCK(fromnd.ni_dvp, 0); 3225 if (fromnd.ni_dvp != fromnd.ni_vp) 3226 VOP_UNLOCK(fromnd.ni_vp, 0); 3227#endif 3228 fvp = fromnd.ni_vp; 3229 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3230 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3231 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3232 if (fromnd.ni_vp->v_type == VDIR) 3233 tond.ni_cnd.cn_flags |= WILLBEDIR; 3234 if ((error = namei(&tond)) != 0) { 3235 /* Translate error code for rename("dir1", "dir2/."). */ 3236 if (error == EISDIR && fvp->v_type == VDIR) 3237 error = EINVAL; 3238 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3239 vrele(fromnd.ni_dvp); 3240 vrele(fvp); 3241 goto out1; 3242 } 3243 tdvp = tond.ni_dvp; 3244 tvp = tond.ni_vp; 3245 error = vn_start_write(fvp, &mp, V_NOWAIT); 3246 if (error != 0) { 3247 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3248 NDFREE(&tond, NDF_ONLY_PNBUF); 3249 if (tvp != NULL) 3250 vput(tvp); 3251 if (tdvp == tvp) 3252 vrele(tdvp); 3253 else 3254 vput(tdvp); 3255 vrele(fromnd.ni_dvp); 3256 vrele(fvp); 3257 vrele(tond.ni_startdir); 3258 if (fromnd.ni_startdir != NULL) 3259 vrele(fromnd.ni_startdir); 3260 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3261 if (error != 0) 3262 return (error); 3263 goto again; 3264 } 3265 if (tvp != NULL) { 3266 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3267 error = ENOTDIR; 3268 goto out; 3269 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3270 error = EISDIR; 3271 goto out; 3272 } 3273#ifdef CAPABILITIES 3274 if (newfd != AT_FDCWD) { 3275 /* 3276 * If the target already exists we require CAP_UNLINKAT 3277 * from 'newfd'. 3278 */ 3279 error = cap_check(&tond.ni_filecaps.fc_rights, 3280 cap_rights_init(&rights, CAP_UNLINKAT)); 3281 if (error != 0) 3282 goto out; 3283 } 3284#endif 3285 } 3286 if (fvp == tdvp) { 3287 error = EINVAL; 3288 goto out; 3289 } 3290 /* 3291 * If the source is the same as the destination (that is, if they 3292 * are links to the same vnode), then there is nothing to do. 3293 */ 3294 if (fvp == tvp) 3295 error = -1; 3296#ifdef MAC 3297 else 3298 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3299 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3300#endif 3301out: 3302 if (error == 0) { 3303 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3304 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3305 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3306 NDFREE(&tond, NDF_ONLY_PNBUF); 3307 } else { 3308 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3309 NDFREE(&tond, NDF_ONLY_PNBUF); 3310 if (tvp != NULL) 3311 vput(tvp); 3312 if (tdvp == tvp) 3313 vrele(tdvp); 3314 else 3315 vput(tdvp); 3316 vrele(fromnd.ni_dvp); 3317 vrele(fvp); 3318 } 3319 vrele(tond.ni_startdir); 3320 vn_finished_write(mp); 3321out1: 3322 if (fromnd.ni_startdir) 3323 vrele(fromnd.ni_startdir); 3324 if (error == -1) 3325 return (0); 3326 return (error); 3327} 3328 3329/* 3330 * Make a directory file. 3331 */ 3332#ifndef _SYS_SYSPROTO_H_ 3333struct mkdir_args { 3334 char *path; 3335 int mode; 3336}; 3337#endif 3338int 3339sys_mkdir(struct thread *td, struct mkdir_args *uap) 3340{ 3341 3342 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3343 uap->mode)); 3344} 3345 3346#ifndef _SYS_SYSPROTO_H_ 3347struct mkdirat_args { 3348 int fd; 3349 char *path; 3350 mode_t mode; 3351}; 3352#endif 3353int 3354sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3355{ 3356 3357 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3358} 3359 3360int 3361kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3362 int mode) 3363{ 3364 struct mount *mp; 3365 struct vnode *vp; 3366 struct vattr vattr; 3367 struct nameidata nd; 3368 cap_rights_t rights; 3369 int error; 3370 3371 AUDIT_ARG_MODE(mode); 3372restart: 3373 bwillwrite(); 3374 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3375 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3376 td); 3377 nd.ni_cnd.cn_flags |= WILLBEDIR; 3378 if ((error = namei(&nd)) != 0) 3379 return (error); 3380 vp = nd.ni_vp; 3381 if (vp != NULL) { 3382 NDFREE(&nd, NDF_ONLY_PNBUF); 3383 /* 3384 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3385 * the strange behaviour of leaving the vnode unlocked 3386 * if the target is the same vnode as the parent. 3387 */ 3388 if (vp == nd.ni_dvp) 3389 vrele(nd.ni_dvp); 3390 else 3391 vput(nd.ni_dvp); 3392 vrele(vp); 3393 return (EEXIST); 3394 } 3395 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3396 NDFREE(&nd, NDF_ONLY_PNBUF); 3397 vput(nd.ni_dvp); 3398 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3399 return (error); 3400 goto restart; 3401 } 3402 VATTR_NULL(&vattr); 3403 vattr.va_type = VDIR; 3404 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3405#ifdef MAC 3406 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3407 &vattr); 3408 if (error != 0) 3409 goto out; 3410#endif 3411 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3412#ifdef MAC 3413out: 3414#endif 3415 NDFREE(&nd, NDF_ONLY_PNBUF); 3416 vput(nd.ni_dvp); 3417 if (error == 0) 3418 vput(nd.ni_vp); 3419 vn_finished_write(mp); 3420 return (error); 3421} 3422 3423/* 3424 * Remove a directory file. 3425 */ 3426#ifndef _SYS_SYSPROTO_H_ 3427struct rmdir_args { 3428 char *path; 3429}; 3430#endif 3431int 3432sys_rmdir(struct thread *td, struct rmdir_args *uap) 3433{ 3434 3435 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3436} 3437 3438int 3439kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3440{ 3441 struct mount *mp; 3442 struct vnode *vp; 3443 struct nameidata nd; 3444 cap_rights_t rights; 3445 int error; 3446 3447restart: 3448 bwillwrite(); 3449 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3450 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3451 if ((error = namei(&nd)) != 0) 3452 return (error); 3453 vp = nd.ni_vp; 3454 if (vp->v_type != VDIR) { 3455 error = ENOTDIR; 3456 goto out; 3457 } 3458 /* 3459 * No rmdir "." please. 3460 */ 3461 if (nd.ni_dvp == vp) { 3462 error = EINVAL; 3463 goto out; 3464 } 3465 /* 3466 * The root of a mounted filesystem cannot be deleted. 3467 */ 3468 if (vp->v_vflag & VV_ROOT) { 3469 error = EBUSY; 3470 goto out; 3471 } 3472#ifdef MAC 3473 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3474 &nd.ni_cnd); 3475 if (error != 0) 3476 goto out; 3477#endif 3478 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3479 NDFREE(&nd, NDF_ONLY_PNBUF); 3480 vput(vp); 3481 if (nd.ni_dvp == vp) 3482 vrele(nd.ni_dvp); 3483 else 3484 vput(nd.ni_dvp); 3485 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3486 return (error); 3487 goto restart; 3488 } 3489 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3490 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3491 vn_finished_write(mp); 3492out: 3493 NDFREE(&nd, NDF_ONLY_PNBUF); 3494 vput(vp); 3495 if (nd.ni_dvp == vp) 3496 vrele(nd.ni_dvp); 3497 else 3498 vput(nd.ni_dvp); 3499 return (error); 3500} 3501 3502#ifdef COMPAT_43 3503/* 3504 * Read a block of directory entries in a filesystem independent format. 3505 */ 3506#ifndef _SYS_SYSPROTO_H_ 3507struct ogetdirentries_args { 3508 int fd; 3509 char *buf; 3510 u_int count; 3511 long *basep; 3512}; 3513#endif 3514int 3515ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3516{ 3517 long loff; 3518 int error; 3519 3520 error = kern_ogetdirentries(td, uap, &loff); 3521 if (error == 0) 3522 error = copyout(&loff, uap->basep, sizeof(long)); 3523 return (error); 3524} 3525 3526int 3527kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3528 long *ploff) 3529{ 3530 struct vnode *vp; 3531 struct file *fp; 3532 struct uio auio, kuio; 3533 struct iovec aiov, kiov; 3534 struct dirent *dp, *edp; 3535 cap_rights_t rights; 3536 caddr_t dirbuf; 3537 int error, eofflag, readcnt; 3538 long loff; 3539 off_t foffset; 3540 3541 /* XXX arbitrary sanity limit on `count'. */ 3542 if (uap->count > 64 * 1024) 3543 return (EINVAL); 3544 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3545 if (error != 0) 3546 return (error); 3547 if ((fp->f_flag & FREAD) == 0) { 3548 fdrop(fp, td); 3549 return (EBADF); 3550 } 3551 vp = fp->f_vnode; 3552 foffset = foffset_lock(fp, 0); 3553unionread: 3554 if (vp->v_type != VDIR) { 3555 foffset_unlock(fp, foffset, 0); 3556 fdrop(fp, td); 3557 return (EINVAL); 3558 } 3559 aiov.iov_base = uap->buf; 3560 aiov.iov_len = uap->count; 3561 auio.uio_iov = &aiov; 3562 auio.uio_iovcnt = 1; 3563 auio.uio_rw = UIO_READ; 3564 auio.uio_segflg = UIO_USERSPACE; 3565 auio.uio_td = td; 3566 auio.uio_resid = uap->count; 3567 vn_lock(vp, LK_SHARED | LK_RETRY); 3568 loff = auio.uio_offset = foffset; 3569#ifdef MAC 3570 error = mac_vnode_check_readdir(td->td_ucred, vp); 3571 if (error != 0) { 3572 VOP_UNLOCK(vp, 0); 3573 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3574 fdrop(fp, td); 3575 return (error); 3576 } 3577#endif 3578# if (BYTE_ORDER != LITTLE_ENDIAN) 3579 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3580 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3581 NULL, NULL); 3582 foffset = auio.uio_offset; 3583 } else 3584# endif 3585 { 3586 kuio = auio; 3587 kuio.uio_iov = &kiov; 3588 kuio.uio_segflg = UIO_SYSSPACE; 3589 kiov.iov_len = uap->count; 3590 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3591 kiov.iov_base = dirbuf; 3592 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3593 NULL, NULL); 3594 foffset = kuio.uio_offset; 3595 if (error == 0) { 3596 readcnt = uap->count - kuio.uio_resid; 3597 edp = (struct dirent *)&dirbuf[readcnt]; 3598 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3599# if (BYTE_ORDER == LITTLE_ENDIAN) 3600 /* 3601 * The expected low byte of 3602 * dp->d_namlen is our dp->d_type. 3603 * The high MBZ byte of dp->d_namlen 3604 * is our dp->d_namlen. 3605 */ 3606 dp->d_type = dp->d_namlen; 3607 dp->d_namlen = 0; 3608# else 3609 /* 3610 * The dp->d_type is the high byte 3611 * of the expected dp->d_namlen, 3612 * so must be zero'ed. 3613 */ 3614 dp->d_type = 0; 3615# endif 3616 if (dp->d_reclen > 0) { 3617 dp = (struct dirent *) 3618 ((char *)dp + dp->d_reclen); 3619 } else { 3620 error = EIO; 3621 break; 3622 } 3623 } 3624 if (dp >= edp) 3625 error = uiomove(dirbuf, readcnt, &auio); 3626 } 3627 free(dirbuf, M_TEMP); 3628 } 3629 if (error != 0) { 3630 VOP_UNLOCK(vp, 0); 3631 foffset_unlock(fp, foffset, 0); 3632 fdrop(fp, td); 3633 return (error); 3634 } 3635 if (uap->count == auio.uio_resid && 3636 (vp->v_vflag & VV_ROOT) && 3637 (vp->v_mount->mnt_flag & MNT_UNION)) { 3638 struct vnode *tvp = vp; 3639 vp = vp->v_mount->mnt_vnodecovered; 3640 VREF(vp); 3641 fp->f_vnode = vp; 3642 fp->f_data = vp; 3643 foffset = 0; 3644 vput(tvp); 3645 goto unionread; 3646 } 3647 VOP_UNLOCK(vp, 0); 3648 foffset_unlock(fp, foffset, 0); 3649 fdrop(fp, td); 3650 td->td_retval[0] = uap->count - auio.uio_resid; 3651 if (error == 0) 3652 *ploff = loff; 3653 return (error); 3654} 3655#endif /* COMPAT_43 */ 3656 3657/* 3658 * Read a block of directory entries in a filesystem independent format. 3659 */ 3660#ifndef _SYS_SYSPROTO_H_ 3661struct getdirentries_args { 3662 int fd; 3663 char *buf; 3664 u_int count; 3665 long *basep; 3666}; 3667#endif 3668int 3669sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3670{ 3671 long base; 3672 int error; 3673 3674 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3675 NULL, UIO_USERSPACE); 3676 if (error != 0) 3677 return (error); 3678 if (uap->basep != NULL) 3679 error = copyout(&base, uap->basep, sizeof(long)); 3680 return (error); 3681} 3682 3683int 3684kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3685 long *basep, ssize_t *residp, enum uio_seg bufseg) 3686{ 3687 struct vnode *vp; 3688 struct file *fp; 3689 struct uio auio; 3690 struct iovec aiov; 3691 cap_rights_t rights; 3692 long loff; 3693 int error, eofflag; 3694 off_t foffset; 3695 3696 AUDIT_ARG_FD(fd); 3697 if (count > IOSIZE_MAX) 3698 return (EINVAL); 3699 auio.uio_resid = count; 3700 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3701 if (error != 0) 3702 return (error); 3703 if ((fp->f_flag & FREAD) == 0) { 3704 fdrop(fp, td); 3705 return (EBADF); 3706 } 3707 vp = fp->f_vnode; 3708 foffset = foffset_lock(fp, 0); 3709unionread: 3710 if (vp->v_type != VDIR) { 3711 error = EINVAL; 3712 goto fail; 3713 } 3714 aiov.iov_base = buf; 3715 aiov.iov_len = count; 3716 auio.uio_iov = &aiov; 3717 auio.uio_iovcnt = 1; 3718 auio.uio_rw = UIO_READ; 3719 auio.uio_segflg = bufseg; 3720 auio.uio_td = td; 3721 vn_lock(vp, LK_SHARED | LK_RETRY); 3722 AUDIT_ARG_VNODE1(vp); 3723 loff = auio.uio_offset = foffset; 3724#ifdef MAC 3725 error = mac_vnode_check_readdir(td->td_ucred, vp); 3726 if (error == 0) 3727#endif 3728 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3729 NULL); 3730 foffset = auio.uio_offset; 3731 if (error != 0) { 3732 VOP_UNLOCK(vp, 0); 3733 goto fail; 3734 } 3735 if (count == auio.uio_resid && 3736 (vp->v_vflag & VV_ROOT) && 3737 (vp->v_mount->mnt_flag & MNT_UNION)) { 3738 struct vnode *tvp = vp; 3739 3740 vp = vp->v_mount->mnt_vnodecovered; 3741 VREF(vp); 3742 fp->f_vnode = vp; 3743 fp->f_data = vp; 3744 foffset = 0; 3745 vput(tvp); 3746 goto unionread; 3747 } 3748 VOP_UNLOCK(vp, 0); 3749 *basep = loff; 3750 if (residp != NULL) 3751 *residp = auio.uio_resid; 3752 td->td_retval[0] = count - auio.uio_resid; 3753fail: 3754 foffset_unlock(fp, foffset, 0); 3755 fdrop(fp, td); 3756 return (error); 3757} 3758 3759#ifndef _SYS_SYSPROTO_H_ 3760struct getdents_args { 3761 int fd; 3762 char *buf; 3763 size_t count; 3764}; 3765#endif 3766int 3767sys_getdents(struct thread *td, struct getdents_args *uap) 3768{ 3769 struct getdirentries_args ap; 3770 3771 ap.fd = uap->fd; 3772 ap.buf = uap->buf; 3773 ap.count = uap->count; 3774 ap.basep = NULL; 3775 return (sys_getdirentries(td, &ap)); 3776} 3777 3778/* 3779 * Set the mode mask for creation of filesystem nodes. 3780 */ 3781#ifndef _SYS_SYSPROTO_H_ 3782struct umask_args { 3783 int newmask; 3784}; 3785#endif 3786int 3787sys_umask(struct thread *td, struct umask_args *uap) 3788{ 3789 struct filedesc *fdp; 3790 3791 fdp = td->td_proc->p_fd; 3792 FILEDESC_XLOCK(fdp); 3793 td->td_retval[0] = fdp->fd_cmask; 3794 fdp->fd_cmask = uap->newmask & ALLPERMS; 3795 FILEDESC_XUNLOCK(fdp); 3796 return (0); 3797} 3798 3799/* 3800 * Void all references to file by ripping underlying filesystem away from 3801 * vnode. 3802 */ 3803#ifndef _SYS_SYSPROTO_H_ 3804struct revoke_args { 3805 char *path; 3806}; 3807#endif 3808int 3809sys_revoke(struct thread *td, struct revoke_args *uap) 3810{ 3811 struct vnode *vp; 3812 struct vattr vattr; 3813 struct nameidata nd; 3814 int error; 3815 3816 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3817 uap->path, td); 3818 if ((error = namei(&nd)) != 0) 3819 return (error); 3820 vp = nd.ni_vp; 3821 NDFREE(&nd, NDF_ONLY_PNBUF); 3822 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 3823 error = EINVAL; 3824 goto out; 3825 } 3826#ifdef MAC 3827 error = mac_vnode_check_revoke(td->td_ucred, vp); 3828 if (error != 0) 3829 goto out; 3830#endif 3831 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 3832 if (error != 0) 3833 goto out; 3834 if (td->td_ucred->cr_uid != vattr.va_uid) { 3835 error = priv_check(td, PRIV_VFS_ADMIN); 3836 if (error != 0) 3837 goto out; 3838 } 3839 if (vcount(vp) > 1) 3840 VOP_REVOKE(vp, REVOKEALL); 3841out: 3842 vput(vp); 3843 return (error); 3844} 3845 3846/* 3847 * Convert a user file descriptor to a kernel file entry and check that, if it 3848 * is a capability, the correct rights are present. A reference on the file 3849 * entry is held upon returning. 3850 */ 3851int 3852getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 3853{ 3854 struct file *fp; 3855 int error; 3856 3857 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 3858 if (error != 0) 3859 return (error); 3860 3861 /* 3862 * The file could be not of the vnode type, or it may be not 3863 * yet fully initialized, in which case the f_vnode pointer 3864 * may be set, but f_ops is still badfileops. E.g., 3865 * devfs_open() transiently create such situation to 3866 * facilitate csw d_fdopen(). 3867 * 3868 * Dupfdopen() handling in kern_openat() installs the 3869 * half-baked file into the process descriptor table, allowing 3870 * other thread to dereference it. Guard against the race by 3871 * checking f_ops. 3872 */ 3873 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 3874 fdrop(fp, td); 3875 return (EINVAL); 3876 } 3877 *fpp = fp; 3878 return (0); 3879} 3880 3881 3882/* 3883 * Get an (NFS) file handle. 3884 */ 3885#ifndef _SYS_SYSPROTO_H_ 3886struct lgetfh_args { 3887 char *fname; 3888 fhandle_t *fhp; 3889}; 3890#endif 3891int 3892sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 3893{ 3894 struct nameidata nd; 3895 fhandle_t fh; 3896 struct vnode *vp; 3897 int error; 3898 3899 error = priv_check(td, PRIV_VFS_GETFH); 3900 if (error != 0) 3901 return (error); 3902 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3903 uap->fname, td); 3904 error = namei(&nd); 3905 if (error != 0) 3906 return (error); 3907 NDFREE(&nd, NDF_ONLY_PNBUF); 3908 vp = nd.ni_vp; 3909 bzero(&fh, sizeof(fh)); 3910 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3911 error = VOP_VPTOFH(vp, &fh.fh_fid); 3912 vput(vp); 3913 if (error == 0) 3914 error = copyout(&fh, uap->fhp, sizeof (fh)); 3915 return (error); 3916} 3917 3918#ifndef _SYS_SYSPROTO_H_ 3919struct getfh_args { 3920 char *fname; 3921 fhandle_t *fhp; 3922}; 3923#endif 3924int 3925sys_getfh(struct thread *td, struct getfh_args *uap) 3926{ 3927 struct nameidata nd; 3928 fhandle_t fh; 3929 struct vnode *vp; 3930 int error; 3931 3932 error = priv_check(td, PRIV_VFS_GETFH); 3933 if (error != 0) 3934 return (error); 3935 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3936 uap->fname, td); 3937 error = namei(&nd); 3938 if (error != 0) 3939 return (error); 3940 NDFREE(&nd, NDF_ONLY_PNBUF); 3941 vp = nd.ni_vp; 3942 bzero(&fh, sizeof(fh)); 3943 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3944 error = VOP_VPTOFH(vp, &fh.fh_fid); 3945 vput(vp); 3946 if (error == 0) 3947 error = copyout(&fh, uap->fhp, sizeof (fh)); 3948 return (error); 3949} 3950 3951/* 3952 * syscall for the rpc.lockd to use to translate a NFS file handle into an 3953 * open descriptor. 3954 * 3955 * warning: do not remove the priv_check() call or this becomes one giant 3956 * security hole. 3957 */ 3958#ifndef _SYS_SYSPROTO_H_ 3959struct fhopen_args { 3960 const struct fhandle *u_fhp; 3961 int flags; 3962}; 3963#endif 3964int 3965sys_fhopen(struct thread *td, struct fhopen_args *uap) 3966{ 3967 struct mount *mp; 3968 struct vnode *vp; 3969 struct fhandle fhp; 3970 struct file *fp; 3971 int fmode, error; 3972 int indx; 3973 3974 error = priv_check(td, PRIV_VFS_FHOPEN); 3975 if (error != 0) 3976 return (error); 3977 indx = -1; 3978 fmode = FFLAGS(uap->flags); 3979 /* why not allow a non-read/write open for our lockd? */ 3980 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3981 return (EINVAL); 3982 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3983 if (error != 0) 3984 return(error); 3985 /* find the mount point */ 3986 mp = vfs_busyfs(&fhp.fh_fsid); 3987 if (mp == NULL) 3988 return (ESTALE); 3989 /* now give me my vnode, it gets returned to me locked */ 3990 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 3991 vfs_unbusy(mp); 3992 if (error != 0) 3993 return (error); 3994 3995 error = falloc_noinstall(td, &fp); 3996 if (error != 0) { 3997 vput(vp); 3998 return (error); 3999 } 4000 /* 4001 * An extra reference on `fp' has been held for us by 4002 * falloc_noinstall(). 4003 */ 4004 4005#ifdef INVARIANTS 4006 td->td_dupfd = -1; 4007#endif 4008 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4009 if (error != 0) { 4010 KASSERT(fp->f_ops == &badfileops, 4011 ("VOP_OPEN in fhopen() set f_ops")); 4012 KASSERT(td->td_dupfd < 0, 4013 ("fhopen() encountered fdopen()")); 4014 4015 vput(vp); 4016 goto bad; 4017 } 4018#ifdef INVARIANTS 4019 td->td_dupfd = 0; 4020#endif 4021 fp->f_vnode = vp; 4022 fp->f_seqcount = 1; 4023 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4024 &vnops); 4025 VOP_UNLOCK(vp, 0); 4026 if ((fmode & O_TRUNC) != 0) { 4027 error = fo_truncate(fp, 0, td->td_ucred, td); 4028 if (error != 0) 4029 goto bad; 4030 } 4031 4032 error = finstall(td, fp, &indx, fmode, NULL); 4033bad: 4034 fdrop(fp, td); 4035 td->td_retval[0] = indx; 4036 return (error); 4037} 4038 4039/* 4040 * Stat an (NFS) file handle. 4041 */ 4042#ifndef _SYS_SYSPROTO_H_ 4043struct fhstat_args { 4044 struct fhandle *u_fhp; 4045 struct stat *sb; 4046}; 4047#endif 4048int 4049sys_fhstat(struct thread *td, struct fhstat_args *uap) 4050{ 4051 struct stat sb; 4052 struct fhandle fh; 4053 int error; 4054 4055 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4056 if (error != 0) 4057 return (error); 4058 error = kern_fhstat(td, fh, &sb); 4059 if (error == 0) 4060 error = copyout(&sb, uap->sb, sizeof(sb)); 4061 return (error); 4062} 4063 4064int 4065kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4066{ 4067 struct mount *mp; 4068 struct vnode *vp; 4069 int error; 4070 4071 error = priv_check(td, PRIV_VFS_FHSTAT); 4072 if (error != 0) 4073 return (error); 4074 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4075 return (ESTALE); 4076 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4077 vfs_unbusy(mp); 4078 if (error != 0) 4079 return (error); 4080 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4081 vput(vp); 4082 return (error); 4083} 4084 4085/* 4086 * Implement fstatfs() for (NFS) file handles. 4087 */ 4088#ifndef _SYS_SYSPROTO_H_ 4089struct fhstatfs_args { 4090 struct fhandle *u_fhp; 4091 struct statfs *buf; 4092}; 4093#endif 4094int 4095sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4096{ 4097 struct statfs *sfp; 4098 fhandle_t fh; 4099 int error; 4100 4101 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4102 if (error != 0) 4103 return (error); 4104 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4105 error = kern_fhstatfs(td, fh, sfp); 4106 if (error == 0) 4107 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4108 free(sfp, M_STATFS); 4109 return (error); 4110} 4111 4112int 4113kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4114{ 4115 struct statfs *sp; 4116 struct mount *mp; 4117 struct vnode *vp; 4118 int error; 4119 4120 error = priv_check(td, PRIV_VFS_FHSTATFS); 4121 if (error != 0) 4122 return (error); 4123 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4124 return (ESTALE); 4125 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4126 if (error != 0) { 4127 vfs_unbusy(mp); 4128 return (error); 4129 } 4130 vput(vp); 4131 error = prison_canseemount(td->td_ucred, mp); 4132 if (error != 0) 4133 goto out; 4134#ifdef MAC 4135 error = mac_mount_check_stat(td->td_ucred, mp); 4136 if (error != 0) 4137 goto out; 4138#endif 4139 /* 4140 * Set these in case the underlying filesystem fails to do so. 4141 */ 4142 sp = &mp->mnt_stat; 4143 sp->f_version = STATFS_VERSION; 4144 sp->f_namemax = NAME_MAX; 4145 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4146 error = VFS_STATFS(mp, sp); 4147 if (error == 0) 4148 *buf = *sp; 4149out: 4150 vfs_unbusy(mp); 4151 return (error); 4152} 4153 4154int 4155kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4156{ 4157 struct file *fp; 4158 struct mount *mp; 4159 struct vnode *vp; 4160 cap_rights_t rights; 4161 off_t olen, ooffset; 4162 int error; 4163 4164 if (offset < 0 || len <= 0) 4165 return (EINVAL); 4166 /* Check for wrap. */ 4167 if (offset > OFF_MAX - len) 4168 return (EFBIG); 4169 error = fget(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp); 4170 if (error != 0) 4171 return (error); 4172 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4173 error = ESPIPE; 4174 goto out; 4175 } 4176 if ((fp->f_flag & FWRITE) == 0) { 4177 error = EBADF; 4178 goto out; 4179 } 4180 if (fp->f_type != DTYPE_VNODE) { 4181 error = ENODEV; 4182 goto out; 4183 } 4184 vp = fp->f_vnode; 4185 if (vp->v_type != VREG) { 4186 error = ENODEV; 4187 goto out; 4188 } 4189 4190 /* Allocating blocks may take a long time, so iterate. */ 4191 for (;;) { 4192 olen = len; 4193 ooffset = offset; 4194 4195 bwillwrite(); 4196 mp = NULL; 4197 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4198 if (error != 0) 4199 break; 4200 error = vn_lock(vp, LK_EXCLUSIVE); 4201 if (error != 0) { 4202 vn_finished_write(mp); 4203 break; 4204 } 4205#ifdef MAC 4206 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4207 if (error == 0) 4208#endif 4209 error = VOP_ALLOCATE(vp, &offset, &len); 4210 VOP_UNLOCK(vp, 0); 4211 vn_finished_write(mp); 4212 4213 if (olen + ooffset != offset + len) { 4214 panic("offset + len changed from %jx/%jx to %jx/%jx", 4215 ooffset, olen, offset, len); 4216 } 4217 if (error != 0 || len == 0) 4218 break; 4219 KASSERT(olen > len, ("Iteration did not make progress?")); 4220 maybe_yield(); 4221 } 4222 out: 4223 fdrop(fp, td); 4224 return (error); 4225} 4226 4227int 4228sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4229{ 4230 int error; 4231 4232 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4233 return (kern_posix_error(td, error)); 4234} 4235 4236/* 4237 * Unlike madvise(2), we do not make a best effort to remember every 4238 * possible caching hint. Instead, we remember the last setting with 4239 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4240 * region of any current setting. 4241 */ 4242int 4243kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4244 int advice) 4245{ 4246 struct fadvise_info *fa, *new; 4247 struct file *fp; 4248 struct vnode *vp; 4249 cap_rights_t rights; 4250 off_t end; 4251 int error; 4252 4253 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4254 return (EINVAL); 4255 switch (advice) { 4256 case POSIX_FADV_SEQUENTIAL: 4257 case POSIX_FADV_RANDOM: 4258 case POSIX_FADV_NOREUSE: 4259 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4260 break; 4261 case POSIX_FADV_NORMAL: 4262 case POSIX_FADV_WILLNEED: 4263 case POSIX_FADV_DONTNEED: 4264 new = NULL; 4265 break; 4266 default: 4267 return (EINVAL); 4268 } 4269 /* XXX: CAP_POSIX_FADVISE? */ 4270 error = fget(td, fd, cap_rights_init(&rights), &fp); 4271 if (error != 0) 4272 goto out; 4273 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4274 error = ESPIPE; 4275 goto out; 4276 } 4277 if (fp->f_type != DTYPE_VNODE) { 4278 error = ENODEV; 4279 goto out; 4280 } 4281 vp = fp->f_vnode; 4282 if (vp->v_type != VREG) { 4283 error = ENODEV; 4284 goto out; 4285 } 4286 if (len == 0) 4287 end = OFF_MAX; 4288 else 4289 end = offset + len - 1; 4290 switch (advice) { 4291 case POSIX_FADV_SEQUENTIAL: 4292 case POSIX_FADV_RANDOM: 4293 case POSIX_FADV_NOREUSE: 4294 /* 4295 * Try to merge any existing non-standard region with 4296 * this new region if possible, otherwise create a new 4297 * non-standard region for this request. 4298 */ 4299 mtx_pool_lock(mtxpool_sleep, fp); 4300 fa = fp->f_advice; 4301 if (fa != NULL && fa->fa_advice == advice && 4302 ((fa->fa_start <= end && fa->fa_end >= offset) || 4303 (end != OFF_MAX && fa->fa_start == end + 1) || 4304 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4305 if (offset < fa->fa_start) 4306 fa->fa_start = offset; 4307 if (end > fa->fa_end) 4308 fa->fa_end = end; 4309 } else { 4310 new->fa_advice = advice; 4311 new->fa_start = offset; 4312 new->fa_end = end; 4313 fp->f_advice = new; 4314 new = fa; 4315 } 4316 mtx_pool_unlock(mtxpool_sleep, fp); 4317 break; 4318 case POSIX_FADV_NORMAL: 4319 /* 4320 * If a the "normal" region overlaps with an existing 4321 * non-standard region, trim or remove the 4322 * non-standard region. 4323 */ 4324 mtx_pool_lock(mtxpool_sleep, fp); 4325 fa = fp->f_advice; 4326 if (fa != NULL) { 4327 if (offset <= fa->fa_start && end >= fa->fa_end) { 4328 new = fa; 4329 fp->f_advice = NULL; 4330 } else if (offset <= fa->fa_start && 4331 end >= fa->fa_start) 4332 fa->fa_start = end + 1; 4333 else if (offset <= fa->fa_end && end >= fa->fa_end) 4334 fa->fa_end = offset - 1; 4335 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4336 /* 4337 * If the "normal" region is a middle 4338 * portion of the existing 4339 * non-standard region, just remove 4340 * the whole thing rather than picking 4341 * one side or the other to 4342 * preserve. 4343 */ 4344 new = fa; 4345 fp->f_advice = NULL; 4346 } 4347 } 4348 mtx_pool_unlock(mtxpool_sleep, fp); 4349 break; 4350 case POSIX_FADV_WILLNEED: 4351 case POSIX_FADV_DONTNEED: 4352 error = VOP_ADVISE(vp, offset, end, advice); 4353 break; 4354 } 4355out: 4356 if (fp != NULL) 4357 fdrop(fp, td); 4358 free(new, M_FADVISE); 4359 return (error); 4360} 4361 4362int 4363sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4364{ 4365 int error; 4366 4367 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4368 uap->advice); 4369 return (kern_posix_error(td, error)); 4370} 4371