kern_descrip.c revision 174167
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94 35 */ 36 37#include <sys/cdefs.h> 38__FBSDID("$FreeBSD: head/sys/kern/kern_descrip.c 174167 2007-12-02 10:10:27Z rwatson $"); 39 40#include "opt_compat.h" 41#include "opt_ddb.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45 46#include <sys/conf.h> 47#include <sys/domain.h> 48#include <sys/fcntl.h> 49#include <sys/file.h> 50#include <sys/filedesc.h> 51#include <sys/filio.h> 52#include <sys/jail.h> 53#include <sys/kernel.h> 54#include <sys/limits.h> 55#include <sys/lock.h> 56#include <sys/malloc.h> 57#include <sys/mount.h> 58#include <sys/mqueue.h> 59#include <sys/mutex.h> 60#include <sys/namei.h> 61#include <sys/priv.h> 62#include <sys/proc.h> 63#include <sys/protosw.h> 64#include <sys/resourcevar.h> 65#include <sys/signalvar.h> 66#include <sys/socketvar.h> 67#include <sys/stat.h> 68#include <sys/sx.h> 69#include <sys/syscallsubr.h> 70#include <sys/sysctl.h> 71#include <sys/sysproto.h> 72#include <sys/unistd.h> 73#include <sys/user.h> 74#include <sys/vnode.h> 75 76#include <security/audit/audit.h> 77 78#include <vm/uma.h> 79 80#include <ddb/ddb.h> 81 82static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); 83static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", 84 "file desc to leader structures"); 85static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); 86 87static uma_zone_t file_zone; 88 89 90/* How to treat 'new' parameter when allocating a fd for do_dup(). */ 91enum dup_type { DUP_VARIABLE, DUP_FIXED }; 92 93static int do_dup(struct thread *td, enum dup_type type, int old, int new, 94 register_t *retval); 95static int fd_first_free(struct filedesc *, int, int); 96static int fd_last_used(struct filedesc *, int, int); 97static void fdgrowtable(struct filedesc *, int); 98static int fdrop_locked(struct file *fp, struct thread *td); 99static void fdunused(struct filedesc *fdp, int fd); 100static void fdused(struct filedesc *fdp, int fd); 101 102/* 103 * A process is initially started out with NDFILE descriptors stored within 104 * this structure, selected to be enough for typical applications based on 105 * the historical limit of 20 open files (and the usage of descriptors by 106 * shells). If these descriptors are exhausted, a larger descriptor table 107 * may be allocated, up to a process' resource limit; the internal arrays 108 * are then unused. 109 */ 110#define NDFILE 20 111#define NDSLOTSIZE sizeof(NDSLOTTYPE) 112#define NDENTRIES (NDSLOTSIZE * __CHAR_BIT) 113#define NDSLOT(x) ((x) / NDENTRIES) 114#define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES)) 115#define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES) 116 117/* 118 * Storage required per open file descriptor. 119 */ 120#define OFILESIZE (sizeof(struct file *) + sizeof(char)) 121 122/* 123 * Basic allocation of descriptors: 124 * one of the above, plus arrays for NDFILE descriptors. 125 */ 126struct filedesc0 { 127 struct filedesc fd_fd; 128 /* 129 * These arrays are used when the number of open files is 130 * <= NDFILE, and are then pointed to by the pointers above. 131 */ 132 struct file *fd_dfiles[NDFILE]; 133 char fd_dfileflags[NDFILE]; 134 NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)]; 135}; 136 137/* 138 * Descriptor management. 139 */ 140struct filelist filehead; /* head of list of open files */ 141int openfiles; /* actual number of open files */ 142struct sx filelist_lock; /* sx to protect filelist */ 143struct mtx sigio_lock; /* mtx to protect pointers to sigio */ 144void (*mq_fdclose)(struct thread *td, int fd, struct file *fp); 145 146/* A mutex to protect the association between a proc and filedesc. */ 147static struct mtx fdesc_mtx; 148 149/* 150 * Find the first zero bit in the given bitmap, starting at low and not 151 * exceeding size - 1. 152 */ 153static int 154fd_first_free(struct filedesc *fdp, int low, int size) 155{ 156 NDSLOTTYPE *map = fdp->fd_map; 157 NDSLOTTYPE mask; 158 int off, maxoff; 159 160 if (low >= size) 161 return (low); 162 163 off = NDSLOT(low); 164 if (low % NDENTRIES) { 165 mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES))); 166 if ((mask &= ~map[off]) != 0UL) 167 return (off * NDENTRIES + ffsl(mask) - 1); 168 ++off; 169 } 170 for (maxoff = NDSLOTS(size); off < maxoff; ++off) 171 if (map[off] != ~0UL) 172 return (off * NDENTRIES + ffsl(~map[off]) - 1); 173 return (size); 174} 175 176/* 177 * Find the highest non-zero bit in the given bitmap, starting at low and 178 * not exceeding size - 1. 179 */ 180static int 181fd_last_used(struct filedesc *fdp, int low, int size) 182{ 183 NDSLOTTYPE *map = fdp->fd_map; 184 NDSLOTTYPE mask; 185 int off, minoff; 186 187 if (low >= size) 188 return (-1); 189 190 off = NDSLOT(size); 191 if (size % NDENTRIES) { 192 mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES)); 193 if ((mask &= map[off]) != 0) 194 return (off * NDENTRIES + flsl(mask) - 1); 195 --off; 196 } 197 for (minoff = NDSLOT(low); off >= minoff; --off) 198 if (map[off] != 0) 199 return (off * NDENTRIES + flsl(map[off]) - 1); 200 return (low - 1); 201} 202 203static int 204fdisused(struct filedesc *fdp, int fd) 205{ 206 KASSERT(fd >= 0 && fd < fdp->fd_nfiles, 207 ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles)); 208 return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0); 209} 210 211/* 212 * Mark a file descriptor as used. 213 */ 214static void 215fdused(struct filedesc *fdp, int fd) 216{ 217 218 FILEDESC_XLOCK_ASSERT(fdp); 219 KASSERT(!fdisused(fdp, fd), 220 ("fd already used")); 221 222 fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd); 223 if (fd > fdp->fd_lastfile) 224 fdp->fd_lastfile = fd; 225 if (fd == fdp->fd_freefile) 226 fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles); 227} 228 229/* 230 * Mark a file descriptor as unused. 231 */ 232static void 233fdunused(struct filedesc *fdp, int fd) 234{ 235 236 FILEDESC_XLOCK_ASSERT(fdp); 237 KASSERT(fdisused(fdp, fd), 238 ("fd is already unused")); 239 KASSERT(fdp->fd_ofiles[fd] == NULL, 240 ("fd is still in use")); 241 242 fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd); 243 if (fd < fdp->fd_freefile) 244 fdp->fd_freefile = fd; 245 if (fd == fdp->fd_lastfile) 246 fdp->fd_lastfile = fd_last_used(fdp, 0, fd); 247} 248 249/* 250 * System calls on descriptors. 251 */ 252#ifndef _SYS_SYSPROTO_H_ 253struct getdtablesize_args { 254 int dummy; 255}; 256#endif 257/* ARGSUSED */ 258int 259getdtablesize(struct thread *td, struct getdtablesize_args *uap) 260{ 261 struct proc *p = td->td_proc; 262 263 PROC_LOCK(p); 264 td->td_retval[0] = 265 min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 266 PROC_UNLOCK(p); 267 return (0); 268} 269 270/* 271 * Duplicate a file descriptor to a particular value. 272 * 273 * Note: keep in mind that a potential race condition exists when closing 274 * descriptors from a shared descriptor table (via rfork). 275 */ 276#ifndef _SYS_SYSPROTO_H_ 277struct dup2_args { 278 u_int from; 279 u_int to; 280}; 281#endif 282/* ARGSUSED */ 283int 284dup2(struct thread *td, struct dup2_args *uap) 285{ 286 287 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to, 288 td->td_retval)); 289} 290 291/* 292 * Duplicate a file descriptor. 293 */ 294#ifndef _SYS_SYSPROTO_H_ 295struct dup_args { 296 u_int fd; 297}; 298#endif 299/* ARGSUSED */ 300int 301dup(struct thread *td, struct dup_args *uap) 302{ 303 304 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval)); 305} 306 307/* 308 * The file control system call. 309 */ 310#ifndef _SYS_SYSPROTO_H_ 311struct fcntl_args { 312 int fd; 313 int cmd; 314 long arg; 315}; 316#endif 317/* ARGSUSED */ 318int 319fcntl(struct thread *td, struct fcntl_args *uap) 320{ 321 struct flock fl; 322 intptr_t arg; 323 int error; 324 325 error = 0; 326 switch (uap->cmd) { 327 case F_GETLK: 328 case F_SETLK: 329 case F_SETLKW: 330 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl)); 331 arg = (intptr_t)&fl; 332 break; 333 default: 334 arg = uap->arg; 335 break; 336 } 337 if (error) 338 return (error); 339 error = kern_fcntl(td, uap->fd, uap->cmd, arg); 340 if (error) 341 return (error); 342 if (uap->cmd == F_GETLK) 343 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl)); 344 return (error); 345} 346 347static inline struct file * 348fdtofp(int fd, struct filedesc *fdp) 349{ 350 struct file *fp; 351 352 FILEDESC_LOCK_ASSERT(fdp); 353 if ((unsigned)fd >= fdp->fd_nfiles || 354 (fp = fdp->fd_ofiles[fd]) == NULL) 355 return (NULL); 356 return (fp); 357} 358 359int 360kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) 361{ 362 struct filedesc *fdp; 363 struct flock *flp; 364 struct file *fp; 365 struct proc *p; 366 char *pop; 367 struct vnode *vp; 368 u_int newmin; 369 int error, flg, tmp; 370 int vfslocked; 371 372 vfslocked = 0; 373 error = 0; 374 flg = F_POSIX; 375 p = td->td_proc; 376 fdp = p->p_fd; 377 378 switch (cmd) { 379 case F_DUPFD: 380 FILEDESC_SLOCK(fdp); 381 if ((fp = fdtofp(fd, fdp)) == NULL) { 382 FILEDESC_SUNLOCK(fdp); 383 error = EBADF; 384 break; 385 } 386 FILEDESC_SUNLOCK(fdp); 387 newmin = arg; 388 PROC_LOCK(p); 389 if (newmin >= lim_cur(p, RLIMIT_NOFILE) || 390 newmin >= maxfilesperproc) { 391 PROC_UNLOCK(p); 392 error = EINVAL; 393 break; 394 } 395 PROC_UNLOCK(p); 396 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval); 397 break; 398 399 case F_GETFD: 400 FILEDESC_SLOCK(fdp); 401 if ((fp = fdtofp(fd, fdp)) == NULL) { 402 FILEDESC_SUNLOCK(fdp); 403 error = EBADF; 404 break; 405 } 406 pop = &fdp->fd_ofileflags[fd]; 407 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0; 408 FILEDESC_SUNLOCK(fdp); 409 break; 410 411 case F_SETFD: 412 FILEDESC_XLOCK(fdp); 413 if ((fp = fdtofp(fd, fdp)) == NULL) { 414 FILEDESC_XUNLOCK(fdp); 415 error = EBADF; 416 break; 417 } 418 pop = &fdp->fd_ofileflags[fd]; 419 *pop = (*pop &~ UF_EXCLOSE) | 420 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0); 421 FILEDESC_XUNLOCK(fdp); 422 break; 423 424 case F_GETFL: 425 FILEDESC_SLOCK(fdp); 426 if ((fp = fdtofp(fd, fdp)) == NULL) { 427 FILEDESC_SUNLOCK(fdp); 428 error = EBADF; 429 break; 430 } 431 FILE_LOCK(fp); 432 td->td_retval[0] = OFLAGS(fp->f_flag); 433 FILE_UNLOCK(fp); 434 FILEDESC_SUNLOCK(fdp); 435 break; 436 437 case F_SETFL: 438 FILEDESC_SLOCK(fdp); 439 if ((fp = fdtofp(fd, fdp)) == NULL) { 440 FILEDESC_SUNLOCK(fdp); 441 error = EBADF; 442 break; 443 } 444 FILE_LOCK(fp); 445 fhold_locked(fp); 446 fp->f_flag &= ~FCNTLFLAGS; 447 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS; 448 FILE_UNLOCK(fp); 449 FILEDESC_SUNLOCK(fdp); 450 tmp = fp->f_flag & FNONBLOCK; 451 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 452 if (error) { 453 fdrop(fp, td); 454 break; 455 } 456 tmp = fp->f_flag & FASYNC; 457 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 458 if (error == 0) { 459 fdrop(fp, td); 460 break; 461 } 462 FILE_LOCK(fp); 463 fp->f_flag &= ~FNONBLOCK; 464 FILE_UNLOCK(fp); 465 tmp = 0; 466 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 467 fdrop(fp, td); 468 break; 469 470 case F_GETOWN: 471 FILEDESC_SLOCK(fdp); 472 if ((fp = fdtofp(fd, fdp)) == NULL) { 473 FILEDESC_SUNLOCK(fdp); 474 error = EBADF; 475 break; 476 } 477 fhold(fp); 478 FILEDESC_SUNLOCK(fdp); 479 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td); 480 if (error == 0) 481 td->td_retval[0] = tmp; 482 fdrop(fp, td); 483 break; 484 485 case F_SETOWN: 486 FILEDESC_SLOCK(fdp); 487 if ((fp = fdtofp(fd, fdp)) == NULL) { 488 FILEDESC_SUNLOCK(fdp); 489 error = EBADF; 490 break; 491 } 492 fhold(fp); 493 FILEDESC_SUNLOCK(fdp); 494 tmp = arg; 495 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td); 496 fdrop(fp, td); 497 break; 498 499 case F_SETLKW: 500 flg |= F_WAIT; 501 /* FALLTHROUGH F_SETLK */ 502 503 case F_SETLK: 504 FILEDESC_SLOCK(fdp); 505 if ((fp = fdtofp(fd, fdp)) == NULL) { 506 FILEDESC_SUNLOCK(fdp); 507 error = EBADF; 508 break; 509 } 510 if (fp->f_type != DTYPE_VNODE) { 511 FILEDESC_SUNLOCK(fdp); 512 error = EBADF; 513 break; 514 } 515 flp = (struct flock *)arg; 516 if (flp->l_whence == SEEK_CUR) { 517 if (fp->f_offset < 0 || 518 (flp->l_start > 0 && 519 fp->f_offset > OFF_MAX - flp->l_start)) { 520 FILEDESC_SUNLOCK(fdp); 521 error = EOVERFLOW; 522 break; 523 } 524 flp->l_start += fp->f_offset; 525 } 526 527 /* 528 * VOP_ADVLOCK() may block. 529 */ 530 fhold(fp); 531 FILEDESC_SUNLOCK(fdp); 532 vp = fp->f_vnode; 533 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 534 switch (flp->l_type) { 535 case F_RDLCK: 536 if ((fp->f_flag & FREAD) == 0) { 537 error = EBADF; 538 break; 539 } 540 PROC_LOCK(p->p_leader); 541 p->p_leader->p_flag |= P_ADVLOCK; 542 PROC_UNLOCK(p->p_leader); 543 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 544 flp, flg); 545 break; 546 case F_WRLCK: 547 if ((fp->f_flag & FWRITE) == 0) { 548 error = EBADF; 549 break; 550 } 551 PROC_LOCK(p->p_leader); 552 p->p_leader->p_flag |= P_ADVLOCK; 553 PROC_UNLOCK(p->p_leader); 554 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, 555 flp, flg); 556 break; 557 case F_UNLCK: 558 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, 559 flp, F_POSIX); 560 break; 561 default: 562 error = EINVAL; 563 break; 564 } 565 VFS_UNLOCK_GIANT(vfslocked); 566 vfslocked = 0; 567 /* Check for race with close */ 568 FILEDESC_SLOCK(fdp); 569 if ((unsigned) fd >= fdp->fd_nfiles || 570 fp != fdp->fd_ofiles[fd]) { 571 FILEDESC_SUNLOCK(fdp); 572 flp->l_whence = SEEK_SET; 573 flp->l_start = 0; 574 flp->l_len = 0; 575 flp->l_type = F_UNLCK; 576 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 577 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, 578 F_UNLCK, flp, F_POSIX); 579 VFS_UNLOCK_GIANT(vfslocked); 580 vfslocked = 0; 581 } else 582 FILEDESC_SUNLOCK(fdp); 583 fdrop(fp, td); 584 break; 585 586 case F_GETLK: 587 FILEDESC_SLOCK(fdp); 588 if ((fp = fdtofp(fd, fdp)) == NULL) { 589 FILEDESC_SUNLOCK(fdp); 590 error = EBADF; 591 break; 592 } 593 if (fp->f_type != DTYPE_VNODE) { 594 FILEDESC_SUNLOCK(fdp); 595 error = EBADF; 596 break; 597 } 598 flp = (struct flock *)arg; 599 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK && 600 flp->l_type != F_UNLCK) { 601 FILEDESC_SUNLOCK(fdp); 602 error = EINVAL; 603 break; 604 } 605 if (flp->l_whence == SEEK_CUR) { 606 if ((flp->l_start > 0 && 607 fp->f_offset > OFF_MAX - flp->l_start) || 608 (flp->l_start < 0 && 609 fp->f_offset < OFF_MIN - flp->l_start)) { 610 FILEDESC_SUNLOCK(fdp); 611 error = EOVERFLOW; 612 break; 613 } 614 flp->l_start += fp->f_offset; 615 } 616 /* 617 * VOP_ADVLOCK() may block. 618 */ 619 fhold(fp); 620 FILEDESC_SUNLOCK(fdp); 621 vp = fp->f_vnode; 622 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 623 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp, 624 F_POSIX); 625 VFS_UNLOCK_GIANT(vfslocked); 626 vfslocked = 0; 627 fdrop(fp, td); 628 break; 629 default: 630 error = EINVAL; 631 break; 632 } 633 VFS_UNLOCK_GIANT(vfslocked); 634 return (error); 635} 636 637/* 638 * Common code for dup, dup2, and fcntl(F_DUPFD). 639 */ 640static int 641do_dup(struct thread *td, enum dup_type type, int old, int new, 642 register_t *retval) 643{ 644 struct filedesc *fdp; 645 struct proc *p; 646 struct file *fp; 647 struct file *delfp; 648 int error, holdleaders, maxfd; 649 650 KASSERT((type == DUP_VARIABLE || type == DUP_FIXED), 651 ("invalid dup type %d", type)); 652 653 p = td->td_proc; 654 fdp = p->p_fd; 655 656 /* 657 * Verify we have a valid descriptor to dup from and possibly to 658 * dup to. 659 */ 660 if (old < 0 || new < 0) 661 return (EBADF); 662 PROC_LOCK(p); 663 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 664 PROC_UNLOCK(p); 665 if (new >= maxfd) 666 return (EMFILE); 667 668 FILEDESC_XLOCK(fdp); 669 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { 670 FILEDESC_XUNLOCK(fdp); 671 return (EBADF); 672 } 673 if (type == DUP_FIXED && old == new) { 674 *retval = new; 675 FILEDESC_XUNLOCK(fdp); 676 return (0); 677 } 678 fp = fdp->fd_ofiles[old]; 679 fhold(fp); 680 681 /* 682 * If the caller specified a file descriptor, make sure the file 683 * table is large enough to hold it, and grab it. Otherwise, just 684 * allocate a new descriptor the usual way. Since the filedesc 685 * lock may be temporarily dropped in the process, we have to look 686 * out for a race. 687 */ 688 if (type == DUP_FIXED) { 689 if (new >= fdp->fd_nfiles) 690 fdgrowtable(fdp, new + 1); 691 if (fdp->fd_ofiles[new] == NULL) 692 fdused(fdp, new); 693 } else { 694 if ((error = fdalloc(td, new, &new)) != 0) { 695 FILEDESC_XUNLOCK(fdp); 696 fdrop(fp, td); 697 return (error); 698 } 699 } 700 701 /* 702 * If the old file changed out from under us then treat it as a 703 * bad file descriptor. Userland should do its own locking to 704 * avoid this case. 705 */ 706 if (fdp->fd_ofiles[old] != fp) { 707 /* we've allocated a descriptor which we won't use */ 708 if (fdp->fd_ofiles[new] == NULL) 709 fdunused(fdp, new); 710 FILEDESC_XUNLOCK(fdp); 711 fdrop(fp, td); 712 return (EBADF); 713 } 714 KASSERT(old != new, 715 ("new fd is same as old")); 716 717 /* 718 * Save info on the descriptor being overwritten. We cannot close 719 * it without introducing an ownership race for the slot, since we 720 * need to drop the filedesc lock to call closef(). 721 * 722 * XXX this duplicates parts of close(). 723 */ 724 delfp = fdp->fd_ofiles[new]; 725 holdleaders = 0; 726 if (delfp != NULL) { 727 if (td->td_proc->p_fdtol != NULL) { 728 /* 729 * Ask fdfree() to sleep to ensure that all relevant 730 * process leaders can be traversed in closef(). 731 */ 732 fdp->fd_holdleaderscount++; 733 holdleaders = 1; 734 } 735 } 736 737 /* 738 * Duplicate the source descriptor 739 */ 740 fdp->fd_ofiles[new] = fp; 741 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; 742 if (new > fdp->fd_lastfile) 743 fdp->fd_lastfile = new; 744 *retval = new; 745 746 /* 747 * If we dup'd over a valid file, we now own the reference to it 748 * and must dispose of it using closef() semantics (as if a 749 * close() were performed on it). 750 * 751 * XXX this duplicates parts of close(). 752 */ 753 if (delfp != NULL) { 754 knote_fdclose(td, new); 755 if (delfp->f_type == DTYPE_MQUEUE) 756 mq_fdclose(td, new, delfp); 757 FILEDESC_XUNLOCK(fdp); 758 (void) closef(delfp, td); 759 if (holdleaders) { 760 FILEDESC_XLOCK(fdp); 761 fdp->fd_holdleaderscount--; 762 if (fdp->fd_holdleaderscount == 0 && 763 fdp->fd_holdleaderswakeup != 0) { 764 fdp->fd_holdleaderswakeup = 0; 765 wakeup(&fdp->fd_holdleaderscount); 766 } 767 FILEDESC_XUNLOCK(fdp); 768 } 769 } else { 770 FILEDESC_XUNLOCK(fdp); 771 } 772 return (0); 773} 774 775/* 776 * If sigio is on the list associated with a process or process group, 777 * disable signalling from the device, remove sigio from the list and 778 * free sigio. 779 */ 780void 781funsetown(struct sigio **sigiop) 782{ 783 struct sigio *sigio; 784 785 SIGIO_LOCK(); 786 sigio = *sigiop; 787 if (sigio == NULL) { 788 SIGIO_UNLOCK(); 789 return; 790 } 791 *(sigio->sio_myref) = NULL; 792 if ((sigio)->sio_pgid < 0) { 793 struct pgrp *pg = (sigio)->sio_pgrp; 794 PGRP_LOCK(pg); 795 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio, 796 sigio, sio_pgsigio); 797 PGRP_UNLOCK(pg); 798 } else { 799 struct proc *p = (sigio)->sio_proc; 800 PROC_LOCK(p); 801 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio, 802 sigio, sio_pgsigio); 803 PROC_UNLOCK(p); 804 } 805 SIGIO_UNLOCK(); 806 crfree(sigio->sio_ucred); 807 FREE(sigio, M_SIGIO); 808} 809 810/* 811 * Free a list of sigio structures. 812 * We only need to lock the SIGIO_LOCK because we have made ourselves 813 * inaccessible to callers of fsetown and therefore do not need to lock 814 * the proc or pgrp struct for the list manipulation. 815 */ 816void 817funsetownlst(struct sigiolst *sigiolst) 818{ 819 struct proc *p; 820 struct pgrp *pg; 821 struct sigio *sigio; 822 823 sigio = SLIST_FIRST(sigiolst); 824 if (sigio == NULL) 825 return; 826 p = NULL; 827 pg = NULL; 828 829 /* 830 * Every entry of the list should belong 831 * to a single proc or pgrp. 832 */ 833 if (sigio->sio_pgid < 0) { 834 pg = sigio->sio_pgrp; 835 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED); 836 } else /* if (sigio->sio_pgid > 0) */ { 837 p = sigio->sio_proc; 838 PROC_LOCK_ASSERT(p, MA_NOTOWNED); 839 } 840 841 SIGIO_LOCK(); 842 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) { 843 *(sigio->sio_myref) = NULL; 844 if (pg != NULL) { 845 KASSERT(sigio->sio_pgid < 0, 846 ("Proc sigio in pgrp sigio list")); 847 KASSERT(sigio->sio_pgrp == pg, 848 ("Bogus pgrp in sigio list")); 849 PGRP_LOCK(pg); 850 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, 851 sio_pgsigio); 852 PGRP_UNLOCK(pg); 853 } else /* if (p != NULL) */ { 854 KASSERT(sigio->sio_pgid > 0, 855 ("Pgrp sigio in proc sigio list")); 856 KASSERT(sigio->sio_proc == p, 857 ("Bogus proc in sigio list")); 858 PROC_LOCK(p); 859 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, 860 sio_pgsigio); 861 PROC_UNLOCK(p); 862 } 863 SIGIO_UNLOCK(); 864 crfree(sigio->sio_ucred); 865 FREE(sigio, M_SIGIO); 866 SIGIO_LOCK(); 867 } 868 SIGIO_UNLOCK(); 869} 870 871/* 872 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg). 873 * 874 * After permission checking, add a sigio structure to the sigio list for 875 * the process or process group. 876 */ 877int 878fsetown(pid_t pgid, struct sigio **sigiop) 879{ 880 struct proc *proc; 881 struct pgrp *pgrp; 882 struct sigio *sigio; 883 int ret; 884 885 if (pgid == 0) { 886 funsetown(sigiop); 887 return (0); 888 } 889 890 ret = 0; 891 892 /* Allocate and fill in the new sigio out of locks. */ 893 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK); 894 sigio->sio_pgid = pgid; 895 sigio->sio_ucred = crhold(curthread->td_ucred); 896 sigio->sio_myref = sigiop; 897 898 sx_slock(&proctree_lock); 899 if (pgid > 0) { 900 proc = pfind(pgid); 901 if (proc == NULL) { 902 ret = ESRCH; 903 goto fail; 904 } 905 906 /* 907 * Policy - Don't allow a process to FSETOWN a process 908 * in another session. 909 * 910 * Remove this test to allow maximum flexibility or 911 * restrict FSETOWN to the current process or process 912 * group for maximum safety. 913 */ 914 PROC_UNLOCK(proc); 915 if (proc->p_session != curthread->td_proc->p_session) { 916 ret = EPERM; 917 goto fail; 918 } 919 920 pgrp = NULL; 921 } else /* if (pgid < 0) */ { 922 pgrp = pgfind(-pgid); 923 if (pgrp == NULL) { 924 ret = ESRCH; 925 goto fail; 926 } 927 PGRP_UNLOCK(pgrp); 928 929 /* 930 * Policy - Don't allow a process to FSETOWN a process 931 * in another session. 932 * 933 * Remove this test to allow maximum flexibility or 934 * restrict FSETOWN to the current process or process 935 * group for maximum safety. 936 */ 937 if (pgrp->pg_session != curthread->td_proc->p_session) { 938 ret = EPERM; 939 goto fail; 940 } 941 942 proc = NULL; 943 } 944 funsetown(sigiop); 945 if (pgid > 0) { 946 PROC_LOCK(proc); 947 /* 948 * Since funsetownlst() is called without the proctree 949 * locked, we need to check for P_WEXIT. 950 * XXX: is ESRCH correct? 951 */ 952 if ((proc->p_flag & P_WEXIT) != 0) { 953 PROC_UNLOCK(proc); 954 ret = ESRCH; 955 goto fail; 956 } 957 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio); 958 sigio->sio_proc = proc; 959 PROC_UNLOCK(proc); 960 } else { 961 PGRP_LOCK(pgrp); 962 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio); 963 sigio->sio_pgrp = pgrp; 964 PGRP_UNLOCK(pgrp); 965 } 966 sx_sunlock(&proctree_lock); 967 SIGIO_LOCK(); 968 *sigiop = sigio; 969 SIGIO_UNLOCK(); 970 return (0); 971 972fail: 973 sx_sunlock(&proctree_lock); 974 crfree(sigio->sio_ucred); 975 FREE(sigio, M_SIGIO); 976 return (ret); 977} 978 979/* 980 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg). 981 */ 982pid_t 983fgetown(sigiop) 984 struct sigio **sigiop; 985{ 986 pid_t pgid; 987 988 SIGIO_LOCK(); 989 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0; 990 SIGIO_UNLOCK(); 991 return (pgid); 992} 993 994/* 995 * Close a file descriptor. 996 */ 997#ifndef _SYS_SYSPROTO_H_ 998struct close_args { 999 int fd; 1000}; 1001#endif 1002/* ARGSUSED */ 1003int 1004close(td, uap) 1005 struct thread *td; 1006 struct close_args *uap; 1007{ 1008 1009 return (kern_close(td, uap->fd)); 1010} 1011 1012int 1013kern_close(td, fd) 1014 struct thread *td; 1015 int fd; 1016{ 1017 struct filedesc *fdp; 1018 struct file *fp; 1019 int error; 1020 int holdleaders; 1021 1022 error = 0; 1023 holdleaders = 0; 1024 fdp = td->td_proc->p_fd; 1025 1026 AUDIT_SYSCLOSE(td, fd); 1027 1028 FILEDESC_XLOCK(fdp); 1029 if ((unsigned)fd >= fdp->fd_nfiles || 1030 (fp = fdp->fd_ofiles[fd]) == NULL) { 1031 FILEDESC_XUNLOCK(fdp); 1032 return (EBADF); 1033 } 1034 fdp->fd_ofiles[fd] = NULL; 1035 fdp->fd_ofileflags[fd] = 0; 1036 fdunused(fdp, fd); 1037 if (td->td_proc->p_fdtol != NULL) { 1038 /* 1039 * Ask fdfree() to sleep to ensure that all relevant 1040 * process leaders can be traversed in closef(). 1041 */ 1042 fdp->fd_holdleaderscount++; 1043 holdleaders = 1; 1044 } 1045 1046 /* 1047 * We now hold the fp reference that used to be owned by the 1048 * descriptor array. We have to unlock the FILEDESC *AFTER* 1049 * knote_fdclose to prevent a race of the fd getting opened, a knote 1050 * added, and deleteing a knote for the new fd. 1051 */ 1052 knote_fdclose(td, fd); 1053 if (fp->f_type == DTYPE_MQUEUE) 1054 mq_fdclose(td, fd, fp); 1055 FILEDESC_XUNLOCK(fdp); 1056 1057 error = closef(fp, td); 1058 if (holdleaders) { 1059 FILEDESC_XLOCK(fdp); 1060 fdp->fd_holdleaderscount--; 1061 if (fdp->fd_holdleaderscount == 0 && 1062 fdp->fd_holdleaderswakeup != 0) { 1063 fdp->fd_holdleaderswakeup = 0; 1064 wakeup(&fdp->fd_holdleaderscount); 1065 } 1066 FILEDESC_XUNLOCK(fdp); 1067 } 1068 return (error); 1069} 1070 1071#if defined(COMPAT_43) 1072/* 1073 * Return status information about a file descriptor. 1074 */ 1075#ifndef _SYS_SYSPROTO_H_ 1076struct ofstat_args { 1077 int fd; 1078 struct ostat *sb; 1079}; 1080#endif 1081/* ARGSUSED */ 1082int 1083ofstat(struct thread *td, struct ofstat_args *uap) 1084{ 1085 struct ostat oub; 1086 struct stat ub; 1087 int error; 1088 1089 error = kern_fstat(td, uap->fd, &ub); 1090 if (error == 0) { 1091 cvtstat(&ub, &oub); 1092 error = copyout(&oub, uap->sb, sizeof(oub)); 1093 } 1094 return (error); 1095} 1096#endif /* COMPAT_43 */ 1097 1098/* 1099 * Return status information about a file descriptor. 1100 */ 1101#ifndef _SYS_SYSPROTO_H_ 1102struct fstat_args { 1103 int fd; 1104 struct stat *sb; 1105}; 1106#endif 1107/* ARGSUSED */ 1108int 1109fstat(struct thread *td, struct fstat_args *uap) 1110{ 1111 struct stat ub; 1112 int error; 1113 1114 error = kern_fstat(td, uap->fd, &ub); 1115 if (error == 0) 1116 error = copyout(&ub, uap->sb, sizeof(ub)); 1117 return (error); 1118} 1119 1120int 1121kern_fstat(struct thread *td, int fd, struct stat *sbp) 1122{ 1123 struct file *fp; 1124 int error; 1125 1126 AUDIT_ARG(fd, fd); 1127 1128 if ((error = fget(td, fd, &fp)) != 0) 1129 return (error); 1130 1131 AUDIT_ARG(file, td->td_proc, fp); 1132 1133 error = fo_stat(fp, sbp, td->td_ucred, td); 1134 fdrop(fp, td); 1135 return (error); 1136} 1137 1138/* 1139 * Return status information about a file descriptor. 1140 */ 1141#ifndef _SYS_SYSPROTO_H_ 1142struct nfstat_args { 1143 int fd; 1144 struct nstat *sb; 1145}; 1146#endif 1147/* ARGSUSED */ 1148int 1149nfstat(struct thread *td, struct nfstat_args *uap) 1150{ 1151 struct nstat nub; 1152 struct stat ub; 1153 int error; 1154 1155 error = kern_fstat(td, uap->fd, &ub); 1156 if (error == 0) { 1157 cvtnstat(&ub, &nub); 1158 error = copyout(&nub, uap->sb, sizeof(nub)); 1159 } 1160 return (error); 1161} 1162 1163/* 1164 * Return pathconf information about a file descriptor. 1165 */ 1166#ifndef _SYS_SYSPROTO_H_ 1167struct fpathconf_args { 1168 int fd; 1169 int name; 1170}; 1171#endif 1172/* ARGSUSED */ 1173int 1174fpathconf(struct thread *td, struct fpathconf_args *uap) 1175{ 1176 struct file *fp; 1177 struct vnode *vp; 1178 int error; 1179 1180 if ((error = fget(td, uap->fd, &fp)) != 0) 1181 return (error); 1182 1183 /* If asynchronous I/O is available, it works for all descriptors. */ 1184 if (uap->name == _PC_ASYNC_IO) { 1185 td->td_retval[0] = async_io_version; 1186 goto out; 1187 } 1188 vp = fp->f_vnode; 1189 if (vp != NULL) { 1190 int vfslocked; 1191 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1193 error = VOP_PATHCONF(vp, uap->name, td->td_retval); 1194 VOP_UNLOCK(vp, 0, td); 1195 VFS_UNLOCK_GIANT(vfslocked); 1196 } else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) { 1197 if (uap->name != _PC_PIPE_BUF) { 1198 error = EINVAL; 1199 } else { 1200 td->td_retval[0] = PIPE_BUF; 1201 error = 0; 1202 } 1203 } else { 1204 error = EOPNOTSUPP; 1205 } 1206out: 1207 fdrop(fp, td); 1208 return (error); 1209} 1210 1211/* 1212 * Grow the file table to accomodate (at least) nfd descriptors. This may 1213 * block and drop the filedesc lock, but it will reacquire it before 1214 * returning. 1215 */ 1216static void 1217fdgrowtable(struct filedesc *fdp, int nfd) 1218{ 1219 struct file **ntable; 1220 char *nfileflags; 1221 int nnfiles, onfiles; 1222 NDSLOTTYPE *nmap; 1223 1224 FILEDESC_XLOCK_ASSERT(fdp); 1225 1226 KASSERT(fdp->fd_nfiles > 0, 1227 ("zero-length file table")); 1228 1229 /* compute the size of the new table */ 1230 onfiles = fdp->fd_nfiles; 1231 nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */ 1232 if (nnfiles <= onfiles) 1233 /* the table is already large enough */ 1234 return; 1235 1236 /* allocate a new table and (if required) new bitmaps */ 1237 FILEDESC_XUNLOCK(fdp); 1238 MALLOC(ntable, struct file **, nnfiles * OFILESIZE, 1239 M_FILEDESC, M_ZERO | M_WAITOK); 1240 nfileflags = (char *)&ntable[nnfiles]; 1241 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) 1242 MALLOC(nmap, NDSLOTTYPE *, NDSLOTS(nnfiles) * NDSLOTSIZE, 1243 M_FILEDESC, M_ZERO | M_WAITOK); 1244 else 1245 nmap = NULL; 1246 FILEDESC_XLOCK(fdp); 1247 1248 /* 1249 * We now have new tables ready to go. Since we dropped the 1250 * filedesc lock to call malloc(), watch out for a race. 1251 */ 1252 onfiles = fdp->fd_nfiles; 1253 if (onfiles >= nnfiles) { 1254 /* we lost the race, but that's OK */ 1255 free(ntable, M_FILEDESC); 1256 if (nmap != NULL) 1257 free(nmap, M_FILEDESC); 1258 return; 1259 } 1260 bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable)); 1261 bcopy(fdp->fd_ofileflags, nfileflags, onfiles); 1262 if (onfiles > NDFILE) 1263 free(fdp->fd_ofiles, M_FILEDESC); 1264 fdp->fd_ofiles = ntable; 1265 fdp->fd_ofileflags = nfileflags; 1266 if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) { 1267 bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap)); 1268 if (NDSLOTS(onfiles) > NDSLOTS(NDFILE)) 1269 free(fdp->fd_map, M_FILEDESC); 1270 fdp->fd_map = nmap; 1271 } 1272 fdp->fd_nfiles = nnfiles; 1273} 1274 1275/* 1276 * Allocate a file descriptor for the process. 1277 */ 1278int 1279fdalloc(struct thread *td, int minfd, int *result) 1280{ 1281 struct proc *p = td->td_proc; 1282 struct filedesc *fdp = p->p_fd; 1283 int fd = -1, maxfd; 1284 1285 FILEDESC_XLOCK_ASSERT(fdp); 1286 1287 if (fdp->fd_freefile > minfd) 1288 minfd = fdp->fd_freefile; 1289 1290 PROC_LOCK(p); 1291 maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1292 PROC_UNLOCK(p); 1293 1294 /* 1295 * Search the bitmap for a free descriptor. If none is found, try 1296 * to grow the file table. Keep at it until we either get a file 1297 * descriptor or run into process or system limits; fdgrowtable() 1298 * may drop the filedesc lock, so we're in a race. 1299 */ 1300 for (;;) { 1301 fd = fd_first_free(fdp, minfd, fdp->fd_nfiles); 1302 if (fd >= maxfd) 1303 return (EMFILE); 1304 if (fd < fdp->fd_nfiles) 1305 break; 1306 fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd)); 1307 } 1308 1309 /* 1310 * Perform some sanity checks, then mark the file descriptor as 1311 * used and return it to the caller. 1312 */ 1313 KASSERT(!fdisused(fdp, fd), 1314 ("fd_first_free() returned non-free descriptor")); 1315 KASSERT(fdp->fd_ofiles[fd] == NULL, 1316 ("free descriptor isn't")); 1317 fdp->fd_ofileflags[fd] = 0; /* XXX needed? */ 1318 fdused(fdp, fd); 1319 *result = fd; 1320 return (0); 1321} 1322 1323/* 1324 * Check to see whether n user file descriptors are available to the process 1325 * p. 1326 */ 1327int 1328fdavail(struct thread *td, int n) 1329{ 1330 struct proc *p = td->td_proc; 1331 struct filedesc *fdp = td->td_proc->p_fd; 1332 struct file **fpp; 1333 int i, lim, last; 1334 1335 FILEDESC_LOCK_ASSERT(fdp); 1336 1337 PROC_LOCK(p); 1338 lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc); 1339 PROC_UNLOCK(p); 1340 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) 1341 return (1); 1342 last = min(fdp->fd_nfiles, lim); 1343 fpp = &fdp->fd_ofiles[fdp->fd_freefile]; 1344 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { 1345 if (*fpp == NULL && --n <= 0) 1346 return (1); 1347 } 1348 return (0); 1349} 1350 1351/* 1352 * Create a new open file structure and allocate a file decriptor for the 1353 * process that refers to it. We add one reference to the file for the 1354 * descriptor table and one reference for resultfp. This is to prevent us 1355 * being preempted and the entry in the descriptor table closed after we 1356 * release the FILEDESC lock. 1357 */ 1358int 1359falloc(struct thread *td, struct file **resultfp, int *resultfd) 1360{ 1361 struct proc *p = td->td_proc; 1362 struct file *fp, *fq; 1363 int error, i; 1364 int maxuserfiles = maxfiles - (maxfiles / 20); 1365 static struct timeval lastfail; 1366 static int curfail; 1367 1368 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO); 1369 sx_xlock(&filelist_lock); 1370 1371 if ((openfiles >= maxuserfiles && 1372 priv_check(td, PRIV_MAXFILES) != 0) || 1373 openfiles >= maxfiles) { 1374 if (ppsratecheck(&lastfail, &curfail, 1)) { 1375 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n", 1376 td->td_ucred->cr_ruid); 1377 } 1378 sx_xunlock(&filelist_lock); 1379 uma_zfree(file_zone, fp); 1380 return (ENFILE); 1381 } 1382 openfiles++; 1383 1384 /* 1385 * If the process has file descriptor zero open, add the new file 1386 * descriptor to the list of open files at that point, otherwise 1387 * put it at the front of the list of open files. 1388 */ 1389 fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep); 1390 fp->f_count = 1; 1391 if (resultfp) 1392 fp->f_count++; 1393 fp->f_cred = crhold(td->td_ucred); 1394 fp->f_ops = &badfileops; 1395 fp->f_data = NULL; 1396 fp->f_vnode = NULL; 1397 FILEDESC_XLOCK(p->p_fd); 1398 if ((fq = p->p_fd->fd_ofiles[0])) { 1399 LIST_INSERT_AFTER(fq, fp, f_list); 1400 } else { 1401 LIST_INSERT_HEAD(&filehead, fp, f_list); 1402 } 1403 sx_xunlock(&filelist_lock); 1404 if ((error = fdalloc(td, 0, &i))) { 1405 FILEDESC_XUNLOCK(p->p_fd); 1406 fdrop(fp, td); 1407 if (resultfp) 1408 fdrop(fp, td); 1409 return (error); 1410 } 1411 p->p_fd->fd_ofiles[i] = fp; 1412 FILEDESC_XUNLOCK(p->p_fd); 1413 if (resultfp) 1414 *resultfp = fp; 1415 if (resultfd) 1416 *resultfd = i; 1417 return (0); 1418} 1419 1420/* 1421 * Build a new filedesc structure from another. 1422 * Copy the current, root, and jail root vnode references. 1423 */ 1424struct filedesc * 1425fdinit(struct filedesc *fdp) 1426{ 1427 struct filedesc0 *newfdp; 1428 1429 newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO); 1430 FILEDESC_LOCK_INIT(&newfdp->fd_fd); 1431 if (fdp != NULL) { 1432 FILEDESC_XLOCK(fdp); 1433 newfdp->fd_fd.fd_cdir = fdp->fd_cdir; 1434 if (newfdp->fd_fd.fd_cdir) 1435 VREF(newfdp->fd_fd.fd_cdir); 1436 newfdp->fd_fd.fd_rdir = fdp->fd_rdir; 1437 if (newfdp->fd_fd.fd_rdir) 1438 VREF(newfdp->fd_fd.fd_rdir); 1439 newfdp->fd_fd.fd_jdir = fdp->fd_jdir; 1440 if (newfdp->fd_fd.fd_jdir) 1441 VREF(newfdp->fd_fd.fd_jdir); 1442 FILEDESC_XUNLOCK(fdp); 1443 } 1444 1445 /* Create the file descriptor table. */ 1446 newfdp->fd_fd.fd_refcnt = 1; 1447 newfdp->fd_fd.fd_holdcnt = 1; 1448 newfdp->fd_fd.fd_cmask = CMASK; 1449 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles; 1450 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; 1451 newfdp->fd_fd.fd_nfiles = NDFILE; 1452 newfdp->fd_fd.fd_map = newfdp->fd_dmap; 1453 newfdp->fd_fd.fd_lastfile = -1; 1454 return (&newfdp->fd_fd); 1455} 1456 1457static struct filedesc * 1458fdhold(struct proc *p) 1459{ 1460 struct filedesc *fdp; 1461 1462 mtx_lock(&fdesc_mtx); 1463 fdp = p->p_fd; 1464 if (fdp != NULL) 1465 fdp->fd_holdcnt++; 1466 mtx_unlock(&fdesc_mtx); 1467 return (fdp); 1468} 1469 1470static void 1471fddrop(struct filedesc *fdp) 1472{ 1473 int i; 1474 1475 mtx_lock(&fdesc_mtx); 1476 i = --fdp->fd_holdcnt; 1477 mtx_unlock(&fdesc_mtx); 1478 if (i > 0) 1479 return; 1480 1481 FILEDESC_LOCK_DESTROY(fdp); 1482 FREE(fdp, M_FILEDESC); 1483} 1484 1485/* 1486 * Share a filedesc structure. 1487 */ 1488struct filedesc * 1489fdshare(struct filedesc *fdp) 1490{ 1491 1492 FILEDESC_XLOCK(fdp); 1493 fdp->fd_refcnt++; 1494 FILEDESC_XUNLOCK(fdp); 1495 return (fdp); 1496} 1497 1498/* 1499 * Unshare a filedesc structure, if necessary by making a copy 1500 */ 1501void 1502fdunshare(struct proc *p, struct thread *td) 1503{ 1504 1505 FILEDESC_XLOCK(p->p_fd); 1506 if (p->p_fd->fd_refcnt > 1) { 1507 struct filedesc *tmp; 1508 1509 FILEDESC_XUNLOCK(p->p_fd); 1510 tmp = fdcopy(p->p_fd); 1511 fdfree(td); 1512 p->p_fd = tmp; 1513 } else 1514 FILEDESC_XUNLOCK(p->p_fd); 1515} 1516 1517/* 1518 * Copy a filedesc structure. A NULL pointer in returns a NULL reference, 1519 * this is to ease callers, not catch errors. 1520 */ 1521struct filedesc * 1522fdcopy(struct filedesc *fdp) 1523{ 1524 struct filedesc *newfdp; 1525 int i; 1526 1527 /* Certain daemons might not have file descriptors. */ 1528 if (fdp == NULL) 1529 return (NULL); 1530 1531 newfdp = fdinit(fdp); 1532 FILEDESC_SLOCK(fdp); 1533 while (fdp->fd_lastfile >= newfdp->fd_nfiles) { 1534 FILEDESC_SUNLOCK(fdp); 1535 FILEDESC_XLOCK(newfdp); 1536 fdgrowtable(newfdp, fdp->fd_lastfile + 1); 1537 FILEDESC_XUNLOCK(newfdp); 1538 FILEDESC_SLOCK(fdp); 1539 } 1540 /* copy everything except kqueue descriptors */ 1541 newfdp->fd_freefile = -1; 1542 for (i = 0; i <= fdp->fd_lastfile; ++i) { 1543 if (fdisused(fdp, i) && 1544 fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE) { 1545 newfdp->fd_ofiles[i] = fdp->fd_ofiles[i]; 1546 newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i]; 1547 fhold(newfdp->fd_ofiles[i]); 1548 newfdp->fd_lastfile = i; 1549 } else { 1550 if (newfdp->fd_freefile == -1) 1551 newfdp->fd_freefile = i; 1552 } 1553 } 1554 FILEDESC_SUNLOCK(fdp); 1555 FILEDESC_XLOCK(newfdp); 1556 for (i = 0; i <= newfdp->fd_lastfile; ++i) 1557 if (newfdp->fd_ofiles[i] != NULL) 1558 fdused(newfdp, i); 1559 FILEDESC_XUNLOCK(newfdp); 1560 FILEDESC_SLOCK(fdp); 1561 if (newfdp->fd_freefile == -1) 1562 newfdp->fd_freefile = i; 1563 newfdp->fd_cmask = fdp->fd_cmask; 1564 FILEDESC_SUNLOCK(fdp); 1565 return (newfdp); 1566} 1567 1568/* 1569 * Release a filedesc structure. 1570 */ 1571void 1572fdfree(struct thread *td) 1573{ 1574 struct filedesc *fdp; 1575 struct file **fpp; 1576 int i, locked; 1577 struct filedesc_to_leader *fdtol; 1578 struct file *fp; 1579 struct vnode *cdir, *jdir, *rdir, *vp; 1580 struct flock lf; 1581 1582 /* Certain daemons might not have file descriptors. */ 1583 fdp = td->td_proc->p_fd; 1584 if (fdp == NULL) 1585 return; 1586 1587 /* Check for special need to clear POSIX style locks */ 1588 fdtol = td->td_proc->p_fdtol; 1589 if (fdtol != NULL) { 1590 FILEDESC_XLOCK(fdp); 1591 KASSERT(fdtol->fdl_refcount > 0, 1592 ("filedesc_to_refcount botch: fdl_refcount=%d", 1593 fdtol->fdl_refcount)); 1594 if (fdtol->fdl_refcount == 1 && 1595 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1596 for (i = 0, fpp = fdp->fd_ofiles; 1597 i <= fdp->fd_lastfile; 1598 i++, fpp++) { 1599 if (*fpp == NULL || 1600 (*fpp)->f_type != DTYPE_VNODE) 1601 continue; 1602 fp = *fpp; 1603 fhold(fp); 1604 FILEDESC_XUNLOCK(fdp); 1605 lf.l_whence = SEEK_SET; 1606 lf.l_start = 0; 1607 lf.l_len = 0; 1608 lf.l_type = F_UNLCK; 1609 vp = fp->f_vnode; 1610 locked = VFS_LOCK_GIANT(vp->v_mount); 1611 (void) VOP_ADVLOCK(vp, 1612 (caddr_t)td->td_proc-> 1613 p_leader, 1614 F_UNLCK, 1615 &lf, 1616 F_POSIX); 1617 VFS_UNLOCK_GIANT(locked); 1618 FILEDESC_XLOCK(fdp); 1619 fdrop(fp, td); 1620 fpp = fdp->fd_ofiles + i; 1621 } 1622 } 1623 retry: 1624 if (fdtol->fdl_refcount == 1) { 1625 if (fdp->fd_holdleaderscount > 0 && 1626 (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1627 /* 1628 * close() or do_dup() has cleared a reference 1629 * in a shared file descriptor table. 1630 */ 1631 fdp->fd_holdleaderswakeup = 1; 1632 sx_sleep(&fdp->fd_holdleaderscount, 1633 FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0); 1634 goto retry; 1635 } 1636 if (fdtol->fdl_holdcount > 0) { 1637 /* 1638 * Ensure that fdtol->fdl_leader remains 1639 * valid in closef(). 1640 */ 1641 fdtol->fdl_wakeup = 1; 1642 sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK, 1643 "fdlhold", 0); 1644 goto retry; 1645 } 1646 } 1647 fdtol->fdl_refcount--; 1648 if (fdtol->fdl_refcount == 0 && 1649 fdtol->fdl_holdcount == 0) { 1650 fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; 1651 fdtol->fdl_prev->fdl_next = fdtol->fdl_next; 1652 } else 1653 fdtol = NULL; 1654 td->td_proc->p_fdtol = NULL; 1655 FILEDESC_XUNLOCK(fdp); 1656 if (fdtol != NULL) 1657 FREE(fdtol, M_FILEDESC_TO_LEADER); 1658 } 1659 FILEDESC_XLOCK(fdp); 1660 i = --fdp->fd_refcnt; 1661 FILEDESC_XUNLOCK(fdp); 1662 if (i > 0) 1663 return; 1664 /* 1665 * We are the last reference to the structure, so we can 1666 * safely assume it will not change out from under us. 1667 */ 1668 fpp = fdp->fd_ofiles; 1669 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { 1670 if (*fpp) 1671 (void) closef(*fpp, td); 1672 } 1673 FILEDESC_XLOCK(fdp); 1674 1675 /* XXX This should happen earlier. */ 1676 mtx_lock(&fdesc_mtx); 1677 td->td_proc->p_fd = NULL; 1678 mtx_unlock(&fdesc_mtx); 1679 1680 if (fdp->fd_nfiles > NDFILE) 1681 FREE(fdp->fd_ofiles, M_FILEDESC); 1682 if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE)) 1683 FREE(fdp->fd_map, M_FILEDESC); 1684 1685 fdp->fd_nfiles = 0; 1686 1687 cdir = fdp->fd_cdir; 1688 fdp->fd_cdir = NULL; 1689 rdir = fdp->fd_rdir; 1690 fdp->fd_rdir = NULL; 1691 jdir = fdp->fd_jdir; 1692 fdp->fd_jdir = NULL; 1693 FILEDESC_XUNLOCK(fdp); 1694 1695 if (cdir) { 1696 locked = VFS_LOCK_GIANT(cdir->v_mount); 1697 vrele(cdir); 1698 VFS_UNLOCK_GIANT(locked); 1699 } 1700 if (rdir) { 1701 locked = VFS_LOCK_GIANT(rdir->v_mount); 1702 vrele(rdir); 1703 VFS_UNLOCK_GIANT(locked); 1704 } 1705 if (jdir) { 1706 locked = VFS_LOCK_GIANT(jdir->v_mount); 1707 vrele(jdir); 1708 VFS_UNLOCK_GIANT(locked); 1709 } 1710 1711 fddrop(fdp); 1712} 1713 1714/* 1715 * For setugid programs, we don't want to people to use that setugidness 1716 * to generate error messages which write to a file which otherwise would 1717 * otherwise be off-limits to the process. We check for filesystems where 1718 * the vnode can change out from under us after execve (like [lin]procfs). 1719 * 1720 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is 1721 * sufficient. We also don't check for setugidness since we know we are. 1722 */ 1723static int 1724is_unsafe(struct file *fp) 1725{ 1726 if (fp->f_type == DTYPE_VNODE) { 1727 struct vnode *vp = fp->f_vnode; 1728 1729 if ((vp->v_vflag & VV_PROCDEP) != 0) 1730 return (1); 1731 } 1732 return (0); 1733} 1734 1735/* 1736 * Make this setguid thing safe, if at all possible. 1737 */ 1738void 1739setugidsafety(struct thread *td) 1740{ 1741 struct filedesc *fdp; 1742 int i; 1743 1744 /* Certain daemons might not have file descriptors. */ 1745 fdp = td->td_proc->p_fd; 1746 if (fdp == NULL) 1747 return; 1748 1749 /* 1750 * Note: fdp->fd_ofiles may be reallocated out from under us while 1751 * we are blocked in a close. Be careful! 1752 */ 1753 FILEDESC_XLOCK(fdp); 1754 for (i = 0; i <= fdp->fd_lastfile; i++) { 1755 if (i > 2) 1756 break; 1757 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { 1758 struct file *fp; 1759 1760 knote_fdclose(td, i); 1761 /* 1762 * NULL-out descriptor prior to close to avoid 1763 * a race while close blocks. 1764 */ 1765 fp = fdp->fd_ofiles[i]; 1766 fdp->fd_ofiles[i] = NULL; 1767 fdp->fd_ofileflags[i] = 0; 1768 fdunused(fdp, i); 1769 FILEDESC_XUNLOCK(fdp); 1770 (void) closef(fp, td); 1771 FILEDESC_XLOCK(fdp); 1772 } 1773 } 1774 FILEDESC_XUNLOCK(fdp); 1775} 1776 1777/* 1778 * If a specific file object occupies a specific file descriptor, close the 1779 * file descriptor entry and drop a reference on the file object. This is a 1780 * convenience function to handle a subsequent error in a function that calls 1781 * falloc() that handles the race that another thread might have closed the 1782 * file descriptor out from under the thread creating the file object. 1783 */ 1784void 1785fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td) 1786{ 1787 1788 FILEDESC_XLOCK(fdp); 1789 if (fdp->fd_ofiles[idx] == fp) { 1790 fdp->fd_ofiles[idx] = NULL; 1791 fdunused(fdp, idx); 1792 FILEDESC_XUNLOCK(fdp); 1793 fdrop(fp, td); 1794 } else 1795 FILEDESC_XUNLOCK(fdp); 1796} 1797 1798/* 1799 * Close any files on exec? 1800 */ 1801void 1802fdcloseexec(struct thread *td) 1803{ 1804 struct filedesc *fdp; 1805 int i; 1806 1807 /* Certain daemons might not have file descriptors. */ 1808 fdp = td->td_proc->p_fd; 1809 if (fdp == NULL) 1810 return; 1811 1812 FILEDESC_XLOCK(fdp); 1813 1814 /* 1815 * We cannot cache fd_ofiles or fd_ofileflags since operations 1816 * may block and rip them out from under us. 1817 */ 1818 for (i = 0; i <= fdp->fd_lastfile; i++) { 1819 if (fdp->fd_ofiles[i] != NULL && 1820 (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE || 1821 (fdp->fd_ofileflags[i] & UF_EXCLOSE))) { 1822 struct file *fp; 1823 1824 knote_fdclose(td, i); 1825 /* 1826 * NULL-out descriptor prior to close to avoid 1827 * a race while close blocks. 1828 */ 1829 fp = fdp->fd_ofiles[i]; 1830 fdp->fd_ofiles[i] = NULL; 1831 fdp->fd_ofileflags[i] = 0; 1832 fdunused(fdp, i); 1833 if (fp->f_type == DTYPE_MQUEUE) 1834 mq_fdclose(td, i, fp); 1835 FILEDESC_XUNLOCK(fdp); 1836 (void) closef(fp, td); 1837 FILEDESC_XLOCK(fdp); 1838 } 1839 } 1840 FILEDESC_XUNLOCK(fdp); 1841} 1842 1843/* 1844 * It is unsafe for set[ug]id processes to be started with file 1845 * descriptors 0..2 closed, as these descriptors are given implicit 1846 * significance in the Standard C library. fdcheckstd() will create a 1847 * descriptor referencing /dev/null for each of stdin, stdout, and 1848 * stderr that is not already open. 1849 */ 1850int 1851fdcheckstd(struct thread *td) 1852{ 1853 struct filedesc *fdp; 1854 register_t retval, save; 1855 int i, error, devnull; 1856 1857 fdp = td->td_proc->p_fd; 1858 if (fdp == NULL) 1859 return (0); 1860 KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared")); 1861 devnull = -1; 1862 error = 0; 1863 for (i = 0; i < 3; i++) { 1864 if (fdp->fd_ofiles[i] != NULL) 1865 continue; 1866 if (devnull < 0) { 1867 save = td->td_retval[0]; 1868 error = kern_open(td, "/dev/null", UIO_SYSSPACE, 1869 O_RDWR, 0); 1870 devnull = td->td_retval[0]; 1871 KASSERT(devnull == i, ("oof, we didn't get our fd")); 1872 td->td_retval[0] = save; 1873 if (error) 1874 break; 1875 } else { 1876 error = do_dup(td, DUP_FIXED, devnull, i, &retval); 1877 if (error != 0) 1878 break; 1879 } 1880 } 1881 return (error); 1882} 1883 1884/* 1885 * Internal form of close. Decrement reference count on file structure. 1886 * Note: td may be NULL when closing a file that was being passed in a 1887 * message. 1888 * 1889 * XXXRW: Giant is not required for the caller, but often will be held; this 1890 * makes it moderately likely the Giant will be recursed in the VFS case. 1891 */ 1892int 1893closef(struct file *fp, struct thread *td) 1894{ 1895 struct vnode *vp; 1896 struct flock lf; 1897 struct filedesc_to_leader *fdtol; 1898 struct filedesc *fdp; 1899 1900 /* 1901 * POSIX record locking dictates that any close releases ALL 1902 * locks owned by this process. This is handled by setting 1903 * a flag in the unlock to free ONLY locks obeying POSIX 1904 * semantics, and not to free BSD-style file locks. 1905 * If the descriptor was in a message, POSIX-style locks 1906 * aren't passed with the descriptor, and the thread pointer 1907 * will be NULL. Callers should be careful only to pass a 1908 * NULL thread pointer when there really is no owning 1909 * context that might have locks, or the locks will be 1910 * leaked. 1911 */ 1912 if (fp->f_type == DTYPE_VNODE && td != NULL) { 1913 int vfslocked; 1914 1915 vp = fp->f_vnode; 1916 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1917 if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { 1918 lf.l_whence = SEEK_SET; 1919 lf.l_start = 0; 1920 lf.l_len = 0; 1921 lf.l_type = F_UNLCK; 1922 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, 1923 F_UNLCK, &lf, F_POSIX); 1924 } 1925 fdtol = td->td_proc->p_fdtol; 1926 if (fdtol != NULL) { 1927 /* 1928 * Handle special case where file descriptor table is 1929 * shared between multiple process leaders. 1930 */ 1931 fdp = td->td_proc->p_fd; 1932 FILEDESC_XLOCK(fdp); 1933 for (fdtol = fdtol->fdl_next; 1934 fdtol != td->td_proc->p_fdtol; 1935 fdtol = fdtol->fdl_next) { 1936 if ((fdtol->fdl_leader->p_flag & 1937 P_ADVLOCK) == 0) 1938 continue; 1939 fdtol->fdl_holdcount++; 1940 FILEDESC_XUNLOCK(fdp); 1941 lf.l_whence = SEEK_SET; 1942 lf.l_start = 0; 1943 lf.l_len = 0; 1944 lf.l_type = F_UNLCK; 1945 vp = fp->f_vnode; 1946 (void) VOP_ADVLOCK(vp, 1947 (caddr_t)fdtol->fdl_leader, 1948 F_UNLCK, &lf, F_POSIX); 1949 FILEDESC_XLOCK(fdp); 1950 fdtol->fdl_holdcount--; 1951 if (fdtol->fdl_holdcount == 0 && 1952 fdtol->fdl_wakeup != 0) { 1953 fdtol->fdl_wakeup = 0; 1954 wakeup(fdtol); 1955 } 1956 } 1957 FILEDESC_XUNLOCK(fdp); 1958 } 1959 VFS_UNLOCK_GIANT(vfslocked); 1960 } 1961 return (fdrop(fp, td)); 1962} 1963 1964/* 1965 * Extract the file pointer associated with the specified descriptor for the 1966 * current user process. 1967 * 1968 * If the descriptor doesn't exist, EBADF is returned. 1969 * 1970 * If the descriptor exists but doesn't match 'flags' then return EBADF for 1971 * read attempts and EINVAL for write attempts. 1972 * 1973 * If 'hold' is set (non-zero) the file's refcount will be bumped on return. 1974 * It should be dropped with fdrop(). If it is not set, then the refcount 1975 * will not be bumped however the thread's filedesc struct will be returned 1976 * locked (for fgetsock). 1977 * 1978 * If an error occured the non-zero error is returned and *fpp is set to 1979 * NULL. Otherwise *fpp is set and zero is returned. 1980 */ 1981static __inline int 1982_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold) 1983{ 1984 struct filedesc *fdp; 1985 struct file *fp; 1986 1987 *fpp = NULL; 1988 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) 1989 return (EBADF); 1990 FILEDESC_SLOCK(fdp); 1991 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) { 1992 FILEDESC_SUNLOCK(fdp); 1993 return (EBADF); 1994 } 1995 1996 /* 1997 * FREAD and FWRITE failure return EBADF as per POSIX. 1998 * 1999 * Only one flag, or 0, may be specified. 2000 */ 2001 if (flags == FREAD && (fp->f_flag & FREAD) == 0) { 2002 FILEDESC_SUNLOCK(fdp); 2003 return (EBADF); 2004 } 2005 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) { 2006 FILEDESC_SUNLOCK(fdp); 2007 return (EBADF); 2008 } 2009 if (hold) { 2010 fhold(fp); 2011 FILEDESC_SUNLOCK(fdp); 2012 } 2013 *fpp = fp; 2014 return (0); 2015} 2016 2017int 2018fget(struct thread *td, int fd, struct file **fpp) 2019{ 2020 2021 return(_fget(td, fd, fpp, 0, 1)); 2022} 2023 2024int 2025fget_read(struct thread *td, int fd, struct file **fpp) 2026{ 2027 2028 return(_fget(td, fd, fpp, FREAD, 1)); 2029} 2030 2031int 2032fget_write(struct thread *td, int fd, struct file **fpp) 2033{ 2034 2035 return(_fget(td, fd, fpp, FWRITE, 1)); 2036} 2037 2038/* 2039 * Like fget() but loads the underlying vnode, or returns an error if the 2040 * descriptor does not represent a vnode. Note that pipes use vnodes but 2041 * never have VM objects. The returned vnode will be vref()'d. 2042 * 2043 * XXX: what about the unused flags ? 2044 */ 2045static __inline int 2046_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags) 2047{ 2048 struct file *fp; 2049 int error; 2050 2051 *vpp = NULL; 2052 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2053 return (error); 2054 if (fp->f_vnode == NULL) { 2055 error = EINVAL; 2056 } else { 2057 *vpp = fp->f_vnode; 2058 vref(*vpp); 2059 } 2060 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2061 return (error); 2062} 2063 2064int 2065fgetvp(struct thread *td, int fd, struct vnode **vpp) 2066{ 2067 2068 return (_fgetvp(td, fd, vpp, 0)); 2069} 2070 2071int 2072fgetvp_read(struct thread *td, int fd, struct vnode **vpp) 2073{ 2074 2075 return (_fgetvp(td, fd, vpp, FREAD)); 2076} 2077 2078#ifdef notyet 2079int 2080fgetvp_write(struct thread *td, int fd, struct vnode **vpp) 2081{ 2082 2083 return (_fgetvp(td, fd, vpp, FWRITE)); 2084} 2085#endif 2086 2087/* 2088 * Like fget() but loads the underlying socket, or returns an error if the 2089 * descriptor does not represent a socket. 2090 * 2091 * We bump the ref count on the returned socket. XXX Also obtain the SX lock 2092 * in the future. 2093 * 2094 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely 2095 * on their file descriptor reference to prevent the socket from being free'd 2096 * during use. 2097 */ 2098int 2099fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp) 2100{ 2101 struct file *fp; 2102 int error; 2103 2104 *spp = NULL; 2105 if (fflagp != NULL) 2106 *fflagp = 0; 2107 if ((error = _fget(td, fd, &fp, 0, 0)) != 0) 2108 return (error); 2109 if (fp->f_type != DTYPE_SOCKET) { 2110 error = ENOTSOCK; 2111 } else { 2112 *spp = fp->f_data; 2113 if (fflagp) 2114 *fflagp = fp->f_flag; 2115 SOCK_LOCK(*spp); 2116 soref(*spp); 2117 SOCK_UNLOCK(*spp); 2118 } 2119 FILEDESC_SUNLOCK(td->td_proc->p_fd); 2120 return (error); 2121} 2122 2123/* 2124 * Drop the reference count on the socket and XXX release the SX lock in the 2125 * future. The last reference closes the socket. 2126 * 2127 * XXXRW: fputsock() is deprecated, see comment for fgetsock(). 2128 */ 2129void 2130fputsock(struct socket *so) 2131{ 2132 2133 ACCEPT_LOCK(); 2134 SOCK_LOCK(so); 2135 sorele(so); 2136} 2137 2138int 2139fdrop(struct file *fp, struct thread *td) 2140{ 2141 2142 FILE_LOCK(fp); 2143 return (fdrop_locked(fp, td)); 2144} 2145 2146/* 2147 * Drop reference on struct file passed in, may call closef if the 2148 * reference hits zero. 2149 * Expects struct file locked, and will unlock it. 2150 */ 2151static int 2152fdrop_locked(struct file *fp, struct thread *td) 2153{ 2154 int error; 2155 2156 FILE_LOCK_ASSERT(fp, MA_OWNED); 2157 2158 if (--fp->f_count > 0) { 2159 FILE_UNLOCK(fp); 2160 return (0); 2161 } 2162 2163 /* 2164 * We might have just dropped the last reference to a file 2165 * object that is for a UNIX domain socket whose message 2166 * buffers are being examined in unp_gc(). If that is the 2167 * case, FWAIT will be set in f_gcflag and we need to wait for 2168 * unp_gc() to finish its scan. 2169 */ 2170 while (fp->f_gcflag & FWAIT) 2171 msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0); 2172 2173 /* We have the last ref so we can proceed without the file lock. */ 2174 FILE_UNLOCK(fp); 2175 if (fp->f_count < 0) 2176 panic("fdrop: count < 0"); 2177 if (fp->f_ops != &badfileops) 2178 error = fo_close(fp, td); 2179 else 2180 error = 0; 2181 2182 sx_xlock(&filelist_lock); 2183 LIST_REMOVE(fp, f_list); 2184 openfiles--; 2185 sx_xunlock(&filelist_lock); 2186 crfree(fp->f_cred); 2187 uma_zfree(file_zone, fp); 2188 2189 return (error); 2190} 2191 2192/* 2193 * Apply an advisory lock on a file descriptor. 2194 * 2195 * Just attempt to get a record lock of the requested type on the entire file 2196 * (l_whence = SEEK_SET, l_start = 0, l_len = 0). 2197 */ 2198#ifndef _SYS_SYSPROTO_H_ 2199struct flock_args { 2200 int fd; 2201 int how; 2202}; 2203#endif 2204/* ARGSUSED */ 2205int 2206flock(struct thread *td, struct flock_args *uap) 2207{ 2208 struct file *fp; 2209 struct vnode *vp; 2210 struct flock lf; 2211 int vfslocked; 2212 int error; 2213 2214 if ((error = fget(td, uap->fd, &fp)) != 0) 2215 return (error); 2216 if (fp->f_type != DTYPE_VNODE) { 2217 fdrop(fp, td); 2218 return (EOPNOTSUPP); 2219 } 2220 2221 vp = fp->f_vnode; 2222 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2223 lf.l_whence = SEEK_SET; 2224 lf.l_start = 0; 2225 lf.l_len = 0; 2226 if (uap->how & LOCK_UN) { 2227 lf.l_type = F_UNLCK; 2228 FILE_LOCK(fp); 2229 fp->f_flag &= ~FHASLOCK; 2230 FILE_UNLOCK(fp); 2231 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); 2232 goto done2; 2233 } 2234 if (uap->how & LOCK_EX) 2235 lf.l_type = F_WRLCK; 2236 else if (uap->how & LOCK_SH) 2237 lf.l_type = F_RDLCK; 2238 else { 2239 error = EBADF; 2240 goto done2; 2241 } 2242 FILE_LOCK(fp); 2243 fp->f_flag |= FHASLOCK; 2244 FILE_UNLOCK(fp); 2245 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, 2246 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); 2247done2: 2248 fdrop(fp, td); 2249 VFS_UNLOCK_GIANT(vfslocked); 2250 return (error); 2251} 2252/* 2253 * Duplicate the specified descriptor to a free descriptor. 2254 */ 2255int 2256dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error) 2257{ 2258 struct file *wfp; 2259 struct file *fp; 2260 2261 /* 2262 * If the to-be-dup'd fd number is greater than the allowed number 2263 * of file descriptors, or the fd to be dup'd has already been 2264 * closed, then reject. 2265 */ 2266 FILEDESC_XLOCK(fdp); 2267 if (dfd < 0 || dfd >= fdp->fd_nfiles || 2268 (wfp = fdp->fd_ofiles[dfd]) == NULL) { 2269 FILEDESC_XUNLOCK(fdp); 2270 return (EBADF); 2271 } 2272 2273 /* 2274 * There are two cases of interest here. 2275 * 2276 * For ENODEV simply dup (dfd) to file descriptor (indx) and return. 2277 * 2278 * For ENXIO steal away the file structure from (dfd) and store it in 2279 * (indx). (dfd) is effectively closed by this operation. 2280 * 2281 * Any other error code is just returned. 2282 */ 2283 switch (error) { 2284 case ENODEV: 2285 /* 2286 * Check that the mode the file is being opened for is a 2287 * subset of the mode of the existing descriptor. 2288 */ 2289 FILE_LOCK(wfp); 2290 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { 2291 FILE_UNLOCK(wfp); 2292 FILEDESC_XUNLOCK(fdp); 2293 return (EACCES); 2294 } 2295 fp = fdp->fd_ofiles[indx]; 2296 fdp->fd_ofiles[indx] = wfp; 2297 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2298 if (fp == NULL) 2299 fdused(fdp, indx); 2300 fhold_locked(wfp); 2301 FILE_UNLOCK(wfp); 2302 FILEDESC_XUNLOCK(fdp); 2303 if (fp != NULL) 2304 /* 2305 * We now own the reference to fp that the ofiles[] 2306 * array used to own. Release it. 2307 */ 2308 fdrop(fp, td); 2309 return (0); 2310 2311 case ENXIO: 2312 /* 2313 * Steal away the file pointer from dfd and stuff it into indx. 2314 */ 2315 fp = fdp->fd_ofiles[indx]; 2316 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; 2317 fdp->fd_ofiles[dfd] = NULL; 2318 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; 2319 fdp->fd_ofileflags[dfd] = 0; 2320 fdunused(fdp, dfd); 2321 if (fp == NULL) 2322 fdused(fdp, indx); 2323 FILEDESC_XUNLOCK(fdp); 2324 2325 /* 2326 * We now own the reference to fp that the ofiles[] array 2327 * used to own. Release it. 2328 */ 2329 if (fp != NULL) 2330 fdrop(fp, td); 2331 return (0); 2332 2333 default: 2334 FILEDESC_XUNLOCK(fdp); 2335 return (error); 2336 } 2337 /* NOTREACHED */ 2338} 2339 2340/* 2341 * Scan all active processes to see if any of them have a current or root 2342 * directory of `olddp'. If so, replace them with the new mount point. 2343 */ 2344void 2345mountcheckdirs(struct vnode *olddp, struct vnode *newdp) 2346{ 2347 struct filedesc *fdp; 2348 struct proc *p; 2349 int nrele; 2350 2351 if (vrefcnt(olddp) == 1) 2352 return; 2353 sx_slock(&allproc_lock); 2354 FOREACH_PROC_IN_SYSTEM(p) { 2355 fdp = fdhold(p); 2356 if (fdp == NULL) 2357 continue; 2358 nrele = 0; 2359 FILEDESC_XLOCK(fdp); 2360 if (fdp->fd_cdir == olddp) { 2361 vref(newdp); 2362 fdp->fd_cdir = newdp; 2363 nrele++; 2364 } 2365 if (fdp->fd_rdir == olddp) { 2366 vref(newdp); 2367 fdp->fd_rdir = newdp; 2368 nrele++; 2369 } 2370 FILEDESC_XUNLOCK(fdp); 2371 fddrop(fdp); 2372 while (nrele--) 2373 vrele(olddp); 2374 } 2375 sx_sunlock(&allproc_lock); 2376 if (rootvnode == olddp) { 2377 vrele(rootvnode); 2378 vref(newdp); 2379 rootvnode = newdp; 2380 } 2381} 2382 2383struct filedesc_to_leader * 2384filedesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader) 2385{ 2386 struct filedesc_to_leader *fdtol; 2387 2388 MALLOC(fdtol, struct filedesc_to_leader *, 2389 sizeof(struct filedesc_to_leader), 2390 M_FILEDESC_TO_LEADER, 2391 M_WAITOK); 2392 fdtol->fdl_refcount = 1; 2393 fdtol->fdl_holdcount = 0; 2394 fdtol->fdl_wakeup = 0; 2395 fdtol->fdl_leader = leader; 2396 if (old != NULL) { 2397 FILEDESC_XLOCK(fdp); 2398 fdtol->fdl_next = old->fdl_next; 2399 fdtol->fdl_prev = old; 2400 old->fdl_next = fdtol; 2401 fdtol->fdl_next->fdl_prev = fdtol; 2402 FILEDESC_XUNLOCK(fdp); 2403 } else { 2404 fdtol->fdl_next = fdtol; 2405 fdtol->fdl_prev = fdtol; 2406 } 2407 return (fdtol); 2408} 2409 2410/* 2411 * Get file structures globally. 2412 */ 2413static int 2414sysctl_kern_file(SYSCTL_HANDLER_ARGS) 2415{ 2416 struct xfile xf; 2417 struct filedesc *fdp; 2418 struct file *fp; 2419 struct proc *p; 2420 int error, n; 2421 2422 /* 2423 * Note: because the number of file descriptors is calculated 2424 * in different ways for sizing vs returning the data, 2425 * there is information leakage from the first loop. However, 2426 * it is of a similar order of magnitude to the leakage from 2427 * global system statistics such as kern.openfiles. 2428 */ 2429 error = sysctl_wire_old_buffer(req, 0); 2430 if (error != 0) 2431 return (error); 2432 if (req->oldptr == NULL) { 2433 n = 16; /* A slight overestimate. */ 2434 sx_slock(&filelist_lock); 2435 LIST_FOREACH(fp, &filehead, f_list) { 2436 /* 2437 * We should grab the lock, but this is an 2438 * estimate, so does it really matter? 2439 */ 2440 /* mtx_lock(fp->f_mtxp); */ 2441 n += fp->f_count; 2442 /* mtx_unlock(f->f_mtxp); */ 2443 } 2444 sx_sunlock(&filelist_lock); 2445 return (SYSCTL_OUT(req, 0, n * sizeof(xf))); 2446 } 2447 error = 0; 2448 bzero(&xf, sizeof(xf)); 2449 xf.xf_size = sizeof(xf); 2450 sx_slock(&allproc_lock); 2451 FOREACH_PROC_IN_SYSTEM(p) { 2452 if (p->p_state == PRS_NEW) 2453 continue; 2454 PROC_LOCK(p); 2455 if (p_cansee(req->td, p) != 0) { 2456 PROC_UNLOCK(p); 2457 continue; 2458 } 2459 xf.xf_pid = p->p_pid; 2460 xf.xf_uid = p->p_ucred->cr_uid; 2461 PROC_UNLOCK(p); 2462 fdp = fdhold(p); 2463 if (fdp == NULL) 2464 continue; 2465 FILEDESC_SLOCK(fdp); 2466 for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) { 2467 if ((fp = fdp->fd_ofiles[n]) == NULL) 2468 continue; 2469 xf.xf_fd = n; 2470 xf.xf_file = fp; 2471 xf.xf_data = fp->f_data; 2472 xf.xf_vnode = fp->f_vnode; 2473 xf.xf_type = fp->f_type; 2474 xf.xf_count = fp->f_count; 2475 xf.xf_msgcount = fp->f_msgcount; 2476 xf.xf_offset = fp->f_offset; 2477 xf.xf_flag = fp->f_flag; 2478 error = SYSCTL_OUT(req, &xf, sizeof(xf)); 2479 if (error) 2480 break; 2481 } 2482 FILEDESC_SUNLOCK(fdp); 2483 fddrop(fdp); 2484 if (error) 2485 break; 2486 } 2487 sx_sunlock(&allproc_lock); 2488 return (error); 2489} 2490 2491SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD, 2492 0, 0, sysctl_kern_file, "S,xfile", "Entire file table"); 2493 2494/* 2495 * Get per-process file descriptors for use by procstat(1), et al. 2496 */ 2497static int 2498sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) 2499{ 2500 char *fullpath, *freepath; 2501 struct kinfo_file *kif; 2502 struct filedesc *fdp; 2503 int error, i, *name; 2504 struct socket *so; 2505 struct vnode *vp; 2506 struct file *fp; 2507 struct proc *p; 2508 int vfslocked; 2509 2510 name = (int *)arg1; 2511 if ((p = pfind((pid_t)name[0])) == NULL) 2512 return (ESRCH); 2513 if ((error = p_candebug(curthread, p))) { 2514 PROC_UNLOCK(p); 2515 return (error); 2516 } 2517 fdp = fdhold(p); 2518 PROC_UNLOCK(p); 2519 kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); 2520 FILEDESC_SLOCK(fdp); 2521 for (i = 0; i < fdp->fd_nfiles; i++) { 2522 if ((fp = fdp->fd_ofiles[i]) == NULL) 2523 continue; 2524 bzero(kif, sizeof(*kif)); 2525 kif->kf_structsize = sizeof(*kif); 2526 FILE_LOCK(fp); 2527 vp = NULL; 2528 so = NULL; 2529 kif->kf_fd = i; 2530 switch (fp->f_type) { 2531 case DTYPE_VNODE: 2532 kif->kf_type = KF_TYPE_VNODE; 2533 vp = fp->f_vnode; 2534 vref(vp); 2535 break; 2536 2537 case DTYPE_SOCKET: 2538 kif->kf_type = KF_TYPE_SOCKET; 2539 so = fp->f_data; 2540 break; 2541 2542 case DTYPE_PIPE: 2543 kif->kf_type = KF_TYPE_PIPE; 2544 break; 2545 2546 case DTYPE_FIFO: 2547 kif->kf_type = KF_TYPE_FIFO; 2548 vp = fp->f_vnode; 2549 vref(vp); 2550 break; 2551 2552 case DTYPE_KQUEUE: 2553 kif->kf_type = KF_TYPE_KQUEUE; 2554 break; 2555 2556 case DTYPE_CRYPTO: 2557 kif->kf_type = KF_TYPE_CRYPTO; 2558 break; 2559 2560 case DTYPE_MQUEUE: 2561 kif->kf_type = KF_TYPE_MQUEUE; 2562 break; 2563 2564 default: 2565 kif->kf_type = KF_TYPE_UNKNOWN; 2566 break; 2567 } 2568 kif->kf_ref_count = fp->f_count; 2569 if (fp->f_flag & FREAD) 2570 kif->kf_flags |= KF_FLAG_READ; 2571 if (fp->f_flag & FWRITE) 2572 kif->kf_flags |= KF_FLAG_WRITE; 2573 if (fp->f_flag & FAPPEND) 2574 kif->kf_flags |= KF_FLAG_APPEND; 2575 if (fp->f_flag & FASYNC) 2576 kif->kf_flags |= KF_FLAG_ASYNC; 2577 if (fp->f_flag & FFSYNC) 2578 kif->kf_flags |= KF_FLAG_FSYNC; 2579 if (fp->f_flag & FNONBLOCK) 2580 kif->kf_flags |= KF_FLAG_NONBLOCK; 2581 if (fp->f_flag & O_DIRECT) 2582 kif->kf_flags |= KF_FLAG_DIRECT; 2583 if (fp->f_flag & FHASLOCK) 2584 kif->kf_flags |= KF_FLAG_HASLOCK; 2585 kif->kf_offset = fp->f_offset; 2586 FILE_UNLOCK(fp); 2587 if (vp != NULL) { 2588 switch (vp->v_type) { 2589 case VNON: 2590 kif->kf_vnode_type = KF_VTYPE_VNON; 2591 break; 2592 case VREG: 2593 kif->kf_vnode_type = KF_VTYPE_VREG; 2594 break; 2595 case VDIR: 2596 kif->kf_vnode_type = KF_VTYPE_VDIR; 2597 break; 2598 case VBLK: 2599 kif->kf_vnode_type = KF_VTYPE_VBLK; 2600 break; 2601 case VCHR: 2602 kif->kf_vnode_type = KF_VTYPE_VCHR; 2603 break; 2604 case VLNK: 2605 kif->kf_vnode_type = KF_VTYPE_VLNK; 2606 break; 2607 case VSOCK: 2608 kif->kf_vnode_type = KF_VTYPE_VSOCK; 2609 break; 2610 case VFIFO: 2611 kif->kf_vnode_type = KF_VTYPE_VFIFO; 2612 break; 2613 case VBAD: 2614 kif->kf_vnode_type = KF_VTYPE_VBAD; 2615 break; 2616 default: 2617 kif->kf_vnode_type = KF_VTYPE_UNKNOWN; 2618 break; 2619 } 2620 /* 2621 * It is OK to drop the filedesc lock here as we will 2622 * re-validate and re-evaluate its properties when 2623 * the loop continues. 2624 */ 2625 freepath = NULL; 2626 fullpath = "-"; 2627 FILEDESC_SUNLOCK(fdp); 2628 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2629 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); 2630 vn_fullpath(curthread, vp, &fullpath, &freepath); 2631 vput(vp); 2632 VFS_UNLOCK_GIANT(vfslocked); 2633 strlcpy(kif->kf_path, fullpath, 2634 sizeof(kif->kf_path)); 2635 if (freepath != NULL) 2636 free(freepath, M_TEMP); 2637 FILEDESC_SLOCK(fdp); 2638 } 2639 if (so != NULL) { 2640 struct sockaddr *sa; 2641 2642 if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa) 2643 == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) { 2644 bcopy(sa, &kif->kf_sa_local, sa->sa_len); 2645 free(sa, M_SONAME); 2646 } 2647 if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa) 2648 == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) { 2649 bcopy(sa, &kif->kf_sa_peer, sa->sa_len); 2650 free(sa, M_SONAME); 2651 } 2652 kif->kf_sock_domain = 2653 so->so_proto->pr_domain->dom_family; 2654 kif->kf_sock_type = so->so_type; 2655 kif->kf_sock_protocol = so->so_proto->pr_protocol; 2656 } 2657 error = SYSCTL_OUT(req, kif, sizeof(*kif)); 2658 if (error) 2659 break; 2660 } 2661 FILEDESC_SUNLOCK(fdp); 2662 fddrop(fdp); 2663 free(kif, M_TEMP); 2664 return (0); 2665} 2666 2667static SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD, 2668 sysctl_kern_proc_filedesc, "Process filedesc entries"); 2669 2670#ifdef DDB 2671/* 2672 * For the purposes of debugging, generate a human-readable string for the 2673 * file type. 2674 */ 2675static const char * 2676file_type_to_name(short type) 2677{ 2678 2679 switch (type) { 2680 case 0: 2681 return ("zero"); 2682 case DTYPE_VNODE: 2683 return ("vnod"); 2684 case DTYPE_SOCKET: 2685 return ("sock"); 2686 case DTYPE_PIPE: 2687 return ("pipe"); 2688 case DTYPE_FIFO: 2689 return ("fifo"); 2690 case DTYPE_KQUEUE: 2691 return ("kque"); 2692 case DTYPE_CRYPTO: 2693 return ("crpt"); 2694 case DTYPE_MQUEUE: 2695 return ("mque"); 2696 default: 2697 return ("unkn"); 2698 } 2699} 2700 2701/* 2702 * For the purposes of debugging, identify a process (if any, perhaps one of 2703 * many) that references the passed file in its file descriptor array. Return 2704 * NULL if none. 2705 */ 2706static struct proc * 2707file_to_first_proc(struct file *fp) 2708{ 2709 struct filedesc *fdp; 2710 struct proc *p; 2711 int n; 2712 2713 FOREACH_PROC_IN_SYSTEM(p) { 2714 if (p->p_state == PRS_NEW) 2715 continue; 2716 fdp = p->p_fd; 2717 if (fdp == NULL) 2718 continue; 2719 for (n = 0; n < fdp->fd_nfiles; n++) { 2720 if (fp == fdp->fd_ofiles[n]) 2721 return (p); 2722 } 2723 } 2724 return (NULL); 2725} 2726 2727static void 2728db_print_file(struct file *fp, int header) 2729{ 2730 struct proc *p; 2731 2732 if (header) 2733 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n", 2734 "File", "Type", "Data", "Flag", "GCFl", "Count", 2735 "MCount", "Vnode", "FPID", "FCmd"); 2736 p = file_to_first_proc(fp); 2737 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp, 2738 file_type_to_name(fp->f_type), fp->f_data, fp->f_flag, 2739 fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode, 2740 p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-"); 2741} 2742 2743DB_SHOW_COMMAND(file, db_show_file) 2744{ 2745 struct file *fp; 2746 2747 if (!have_addr) { 2748 db_printf("usage: show file <addr>\n"); 2749 return; 2750 } 2751 fp = (struct file *)addr; 2752 db_print_file(fp, 1); 2753} 2754 2755DB_SHOW_COMMAND(files, db_show_files) 2756{ 2757 struct file *fp; 2758 int header; 2759 2760 header = 1; 2761 LIST_FOREACH(fp, &filehead, f_list) { 2762 db_print_file(fp, header); 2763 header = 0; 2764 } 2765} 2766#endif 2767 2768SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW, 2769 &maxfilesperproc, 0, "Maximum files allowed open per process"); 2770 2771SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, 2772 &maxfiles, 0, "Maximum number of files"); 2773 2774SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD, 2775 &openfiles, 0, "System-wide number of open files"); 2776 2777/* ARGSUSED*/ 2778static void 2779filelistinit(void *dummy) 2780{ 2781 2782 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL, 2783 NULL, NULL, UMA_ALIGN_PTR, 0); 2784 sx_init(&filelist_lock, "filelist lock"); 2785 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); 2786 mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF); 2787} 2788SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) 2789 2790/*-------------------------------------------------------------------*/ 2791 2792static int 2793badfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) 2794{ 2795 2796 return (EBADF); 2797} 2798 2799static int 2800badfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td) 2801{ 2802 2803 return (EBADF); 2804} 2805 2806static int 2807badfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) 2808{ 2809 2810 return (0); 2811} 2812 2813static int 2814badfo_kqfilter(struct file *fp, struct knote *kn) 2815{ 2816 2817 return (EBADF); 2818} 2819 2820static int 2821badfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td) 2822{ 2823 2824 return (EBADF); 2825} 2826 2827static int 2828badfo_close(struct file *fp, struct thread *td) 2829{ 2830 2831 return (EBADF); 2832} 2833 2834struct fileops badfileops = { 2835 .fo_read = badfo_readwrite, 2836 .fo_write = badfo_readwrite, 2837 .fo_ioctl = badfo_ioctl, 2838 .fo_poll = badfo_poll, 2839 .fo_kqfilter = badfo_kqfilter, 2840 .fo_stat = badfo_stat, 2841 .fo_close = badfo_close, 2842}; 2843 2844 2845/*-------------------------------------------------------------------*/ 2846 2847/* 2848 * File Descriptor pseudo-device driver (/dev/fd/). 2849 * 2850 * Opening minor device N dup()s the file (if any) connected to file 2851 * descriptor N belonging to the calling process. Note that this driver 2852 * consists of only the ``open()'' routine, because all subsequent 2853 * references to this file will be direct to the other driver. 2854 * 2855 * XXX: we could give this one a cloning event handler if necessary. 2856 */ 2857 2858/* ARGSUSED */ 2859static int 2860fdopen(struct cdev *dev, int mode, int type, struct thread *td) 2861{ 2862 2863 /* 2864 * XXX Kludge: set curthread->td_dupfd to contain the value of the 2865 * the file descriptor being sought for duplication. The error 2866 * return ensures that the vnode for this device will be released 2867 * by vn_open. Open will detect this special error and take the 2868 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN 2869 * will simply report the error. 2870 */ 2871 td->td_dupfd = dev2unit(dev); 2872 return (ENODEV); 2873} 2874 2875static struct cdevsw fildesc_cdevsw = { 2876 .d_version = D_VERSION, 2877 .d_flags = D_NEEDGIANT, 2878 .d_open = fdopen, 2879 .d_name = "FD", 2880}; 2881 2882static void 2883fildesc_drvinit(void *unused) 2884{ 2885 struct cdev *dev; 2886 2887 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0"); 2888 make_dev_alias(dev, "stdin"); 2889 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1"); 2890 make_dev_alias(dev, "stdout"); 2891 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2"); 2892 make_dev_alias(dev, "stderr"); 2893} 2894 2895SYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL) 2896