kern_descrip.c revision 194881
118334Speter/*-
290075Sobrien * Copyright (c) 1982, 1986, 1989, 1991, 1993
3132718Skan *	The Regents of the University of California.  All rights reserved.
418334Speter * (c) UNIX System Laboratories, Inc.
590075Sobrien * All or some portions of this file are derived from material licensed
618334Speter * to the University of California by American Telephone and Telegraph
790075Sobrien * Co. or Unix System Laboratories, Inc. and are reproduced herein with
890075Sobrien * the permission of UNIX System Laboratories, Inc.
990075Sobrien *
1090075Sobrien * Redistribution and use in source and binary forms, with or without
1118334Speter * modification, are permitted provided that the following conditions
1290075Sobrien * are met:
1390075Sobrien * 1. Redistributions of source code must retain the above copyright
1490075Sobrien *    notice, this list of conditions and the following disclaimer.
1590075Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1618334Speter *    notice, this list of conditions and the following disclaimer in the
1718334Speter *    documentation and/or other materials provided with the distribution.
1890075Sobrien * 4. Neither the name of the University nor the names of its contributors
1990075Sobrien *    may be used to endorse or promote products derived from this software
2090075Sobrien *    without specific prior written permission.
2118334Speter *
2218334Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2318334Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2418334Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2518334Speter * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2618334Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2718334Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2818334Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2918334Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3018334Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3118334Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3218334Speter * SUCH DAMAGE.
3350397Sobrien *
34132718Skan *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
35132718Skan */
3690075Sobrien
3718334Speter#include <sys/cdefs.h>
3818334Speter__FBSDID("$FreeBSD: head/sys/kern/kern_descrip.c 194881 2009-06-24 18:44:38Z lulf $");
3990075Sobrien
4018334Speter#include "opt_compat.h"
4118334Speter#include "opt_ddb.h"
4250397Sobrien#include "opt_ktrace.h"
4350397Sobrien
4452284Sobrien#include <sys/param.h>
4590075Sobrien#include <sys/systm.h>
4690075Sobrien
4718334Speter#include <sys/conf.h>
4818334Speter#include <sys/domain.h>
4918334Speter#include <sys/fcntl.h>
5018334Speter#include <sys/file.h>
5118334Speter#include <sys/filedesc.h>
52132718Skan#include <sys/filio.h>
53132718Skan#include <sys/jail.h>
5490075Sobrien#include <sys/kernel.h>
55132718Skan#include <sys/limits.h>
56132718Skan#include <sys/lock.h>
57132718Skan#include <sys/malloc.h>
58132718Skan#include <sys/mount.h>
59132718Skan#include <sys/mqueue.h>
60132718Skan#include <sys/mutex.h>
61132718Skan#include <sys/namei.h>
62132718Skan#include <sys/priv.h>
63132718Skan#include <sys/proc.h>
64132718Skan#include <sys/protosw.h>
65132718Skan#include <sys/resourcevar.h>
66132718Skan#include <sys/signalvar.h>
67132718Skan#include <sys/socketvar.h>
68132718Skan#include <sys/stat.h>
69132718Skan#include <sys/sx.h>
70132718Skan#include <sys/syscallsubr.h>
71132718Skan#include <sys/sysctl.h>
72132718Skan#include <sys/sysproto.h>
73132718Skan#include <sys/tty.h>
74132718Skan#include <sys/unistd.h>
75132718Skan#include <sys/user.h>
76132718Skan#include <sys/vnode.h>
77132718Skan#ifdef KTRACE
78132718Skan#include <sys/ktrace.h>
79132718Skan#endif
80132718Skan
81132718Skan#include <security/audit/audit.h>
82132718Skan
83132718Skan#include <vm/uma.h>
84132718Skan
85132718Skan#include <ddb/ddb.h>
86132718Skan
87132718Skanstatic MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table");
8818334Speterstatic MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader",
8918334Speter		     "file desc to leader structures");
9018334Speterstatic MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
9118334Speter
9218334Speterstatic uma_zone_t file_zone;
93132718Skan
9418334Speter
9518334Speter/* Flags for do_dup() */
9618334Speter#define DUP_FIXED	0x1	/* Force fixed allocation */
9790075Sobrien#define DUP_FCNTL	0x2	/* fcntl()-style errors */
9852284Sobrien
9952284Sobrienstatic int do_dup(struct thread *td, int flags, int old, int new,
10018334Speter    register_t *retval);
10190075Sobrienstatic int	fd_first_free(struct filedesc *, int, int);
10218334Speterstatic int	fd_last_used(struct filedesc *, int, int);
10318334Speterstatic void	fdgrowtable(struct filedesc *, int);
104117395Skanstatic void	fdunused(struct filedesc *fdp, int fd);
10518334Speterstatic void	fdused(struct filedesc *fdp, int fd);
10618334Speter
10718334Speter/*
10818334Speter * A process is initially started out with NDFILE descriptors stored within
10918334Speter * this structure, selected to be enough for typical applications based on
11018334Speter * the historical limit of 20 open files (and the usage of descriptors by
11118334Speter * shells).  If these descriptors are exhausted, a larger descriptor table
11218334Speter * may be allocated, up to a process' resource limit; the internal arrays
113132718Skan * are then unused.
11418334Speter */
11552284Sobrien#define NDFILE		20
11618334Speter#define NDSLOTSIZE	sizeof(NDSLOTTYPE)
11718334Speter#define	NDENTRIES	(NDSLOTSIZE * __CHAR_BIT)
11818334Speter#define NDSLOT(x)	((x) / NDENTRIES)
11918334Speter#define NDBIT(x)	((NDSLOTTYPE)1 << ((x) % NDENTRIES))
12018334Speter#define	NDSLOTS(x)	(((x) + NDENTRIES - 1) / NDENTRIES)
12118334Speter
12218334Speter/*
12318334Speter * Storage required per open file descriptor.
12418334Speter */
12518334Speter#define OFILESIZE (sizeof(struct file *) + sizeof(char))
12618334Speter
12718334Speter/*
12818334Speter * Storage to hold unused ofiles that need to be reclaimed.
12918334Speter */
13018334Speterstruct freetable {
13118334Speter	struct file	**ft_table;
13218334Speter	SLIST_ENTRY(freetable) ft_next;
13352284Sobrien};
13418334Speter
13518334Speter/*
13618334Speter * Basic allocation of descriptors:
13752284Sobrien * one of the above, plus arrays for NDFILE descriptors.
13818334Speter */
13918334Speterstruct filedesc0 {
14018334Speter	struct	filedesc fd_fd;
14152284Sobrien	/*
14218334Speter	 * ofiles which need to be reclaimed on free.
14318334Speter	 */
14418334Speter	SLIST_HEAD(,freetable) fd_free;
14518334Speter	/*
14618334Speter	 * These arrays are used when the number of open files is
14718334Speter	 * <= NDFILE, and are then pointed to by the pointers above.
14818334Speter	 */
14918334Speter	struct	file *fd_dfiles[NDFILE];
15018334Speter	char	fd_dfileflags[NDFILE];
15196263Sobrien	NDSLOTTYPE fd_dmap[NDSLOTS(NDFILE)];
15296263Sobrien};
15396263Sobrien
15496263Sobrien/*
15596263Sobrien * Descriptor management.
15618334Speter */
15718334Spetervolatile int openfiles;			/* actual number of open files */
15818334Speterstruct mtx sigio_lock;		/* mtx to protect pointers to sigio */
15918334Spetervoid	(*mq_fdclose)(struct thread *td, int fd, struct file *fp);
16018334Speter
16118334Speter/* A mutex to protect the association between a proc and filedesc. */
16218334Speterstatic struct mtx	fdesc_mtx;
16318334Speter
16418334Speter/*
16518334Speter * Find the first zero bit in the given bitmap, starting at low and not
16618334Speter * exceeding size - 1.
16752284Sobrien */
16852284Sobrienstatic int
16918334Speterfd_first_free(struct filedesc *fdp, int low, int size)
17018334Speter{
17118334Speter	NDSLOTTYPE *map = fdp->fd_map;
17218334Speter	NDSLOTTYPE mask;
17318334Speter	int off, maxoff;
17418334Speter
17518334Speter	if (low >= size)
176117395Skan		return (low);
177117395Skan
178117395Skan	off = NDSLOT(low);
179117395Skan	if (low % NDENTRIES) {
180132718Skan		mask = ~(~(NDSLOTTYPE)0 >> (NDENTRIES - (low % NDENTRIES)));
181117395Skan		if ((mask &= ~map[off]) != 0UL)
182117395Skan			return (off * NDENTRIES + ffsl(mask) - 1);
183117395Skan		++off;
184117395Skan	}
185117395Skan	for (maxoff = NDSLOTS(size); off < maxoff; ++off)
186117395Skan		if (map[off] != ~0UL)
187117395Skan			return (off * NDENTRIES + ffsl(~map[off]) - 1);
188117395Skan	return (size);
189117395Skan}
190117395Skan
191117395Skan/*
192117395Skan * Find the highest non-zero bit in the given bitmap, starting at low and
193117395Skan * not exceeding size - 1.
194117395Skan */
195117395Skanstatic int
196117395Skanfd_last_used(struct filedesc *fdp, int low, int size)
19718334Speter{
19818334Speter	NDSLOTTYPE *map = fdp->fd_map;
19918334Speter	NDSLOTTYPE mask;
20018334Speter	int off, minoff;
201132718Skan
20218334Speter	if (low >= size)
203132718Skan		return (-1);
20452750Sobrien
20518334Speter	off = NDSLOT(size);
20618334Speter	if (size % NDENTRIES) {
20718334Speter		mask = ~(~(NDSLOTTYPE)0 << (size % NDENTRIES));
20818334Speter		if ((mask &= map[off]) != 0)
20918334Speter			return (off * NDENTRIES + flsl(mask) - 1);
21018334Speter		--off;
21118334Speter	}
21218334Speter	for (minoff = NDSLOT(low); off >= minoff; --off)
21318334Speter		if (map[off] != 0)
21418334Speter			return (off * NDENTRIES + flsl(map[off]) - 1);
21518334Speter	return (low - 1);
216132718Skan}
21718334Speter
21890075Sobrienstatic int
21990075Sobrienfdisused(struct filedesc *fdp, int fd)
22018334Speter{
22118334Speter        KASSERT(fd >= 0 && fd < fdp->fd_nfiles,
22218334Speter            ("file descriptor %d out of range (0, %d)", fd, fdp->fd_nfiles));
22318334Speter	return ((fdp->fd_map[NDSLOT(fd)] & NDBIT(fd)) != 0);
22418334Speter}
22518334Speter
22618334Speter/*
22718334Speter * Mark a file descriptor as used.
22818334Speter */
22918334Speterstatic void
23018334Speterfdused(struct filedesc *fdp, int fd)
23118334Speter{
23250397Sobrien
23390075Sobrien	FILEDESC_XLOCK_ASSERT(fdp);
23418334Speter	KASSERT(!fdisused(fdp, fd),
23518334Speter	    ("fd already used"));
23618334Speter
23718334Speter	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
238117395Skan	if (fd > fdp->fd_lastfile)
23918334Speter		fdp->fd_lastfile = fd;
240117395Skan	if (fd == fdp->fd_freefile)
24118334Speter		fdp->fd_freefile = fd_first_free(fdp, fd, fdp->fd_nfiles);
24218334Speter}
24318334Speter
24418334Speter/*
24518334Speter * Mark a file descriptor as unused.
24618334Speter */
24718334Speterstatic void
24818334Speterfdunused(struct filedesc *fdp, int fd)
24918334Speter{
25018334Speter
25118334Speter	FILEDESC_XLOCK_ASSERT(fdp);
25218334Speter	KASSERT(fdisused(fdp, fd),
25318334Speter	    ("fd is already unused"));
25418334Speter	KASSERT(fdp->fd_ofiles[fd] == NULL,
25518334Speter	    ("fd is still in use"));
25618334Speter
25718334Speter	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
25818334Speter	if (fd < fdp->fd_freefile)
25918334Speter		fdp->fd_freefile = fd;
26018334Speter	if (fd == fdp->fd_lastfile)
26118334Speter		fdp->fd_lastfile = fd_last_used(fdp, 0, fd);
26218334Speter}
26318334Speter
26418334Speter/*
26518334Speter * System calls on descriptors.
26618334Speter */
26718334Speter#ifndef _SYS_SYSPROTO_H_
26818334Speterstruct getdtablesize_args {
26918334Speter	int	dummy;
27018334Speter};
27118334Speter#endif
27218334Speter/* ARGSUSED */
27318334Speterint
27418334Spetergetdtablesize(struct thread *td, struct getdtablesize_args *uap)
27518334Speter{
27618334Speter	struct proc *p = td->td_proc;
27718334Speter
27818334Speter	PROC_LOCK(p);
27918334Speter	td->td_retval[0] =
28018334Speter	    min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
28118334Speter	PROC_UNLOCK(p);
28218334Speter	return (0);
28318334Speter}
28418334Speter
28518334Speter/*
286132718Skan * Duplicate a file descriptor to a particular value.
287132718Skan *
288132718Skan * Note: keep in mind that a potential race condition exists when closing
289132718Skan * descriptors from a shared descriptor table (via rfork).
290132718Skan */
29118334Speter#ifndef _SYS_SYSPROTO_H_
29218334Speterstruct dup2_args {
29318334Speter	u_int	from;
29418334Speter	u_int	to;
295132718Skan};
296132718Skan#endif
29718334Speter/* ARGSUSED */
29818334Speterint
29918334Speterdup2(struct thread *td, struct dup2_args *uap)
300132718Skan{
30118334Speter
302132718Skan	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
303132718Skan		    td->td_retval));
304132718Skan}
305132718Skan
30618334Speter/*
30718334Speter * Duplicate a file descriptor.
30850397Sobrien */
30950397Sobrien#ifndef _SYS_SYSPROTO_H_
31050397Sobrienstruct dup_args {
311132718Skan	u_int	fd;
312132718Skan};
313132718Skan#endif
314132718Skan/* ARGSUSED */
315132718Skanint
31650397Sobriendup(struct thread *td, struct dup_args *uap)
31718334Speter{
31818334Speter
31918334Speter	return (do_dup(td, 0, (int)uap->fd, 0, td->td_retval));
32018334Speter}
32118334Speter
32218334Speter/*
32318334Speter * The file control system call.
32418334Speter */
32518334Speter#ifndef _SYS_SYSPROTO_H_
32618334Speterstruct fcntl_args {
32718334Speter	int	fd;
32818334Speter	int	cmd;
32918334Speter	long	arg;
33018334Speter};
33152284Sobrien#endif
33252284Sobrien/* ARGSUSED */
33352284Sobrienint
33452284Sobrienfcntl(struct thread *td, struct fcntl_args *uap)
335132718Skan{
336132718Skan	struct flock fl;
337132718Skan	struct oflock ofl;
33852284Sobrien	intptr_t arg;
33918334Speter	int error;
34018334Speter	int cmd;
34118334Speter
34218334Speter	error = 0;
34318334Speter	cmd = uap->cmd;
34418334Speter	switch (uap->cmd) {
34518334Speter	case F_OGETLK:
34618334Speter	case F_OSETLK:
34718334Speter	case F_OSETLKW:
34818334Speter		/*
34918334Speter		 * Convert old flock structure to new.
35018334Speter		 */
35118334Speter		error = copyin((void *)(intptr_t)uap->arg, &ofl, sizeof(ofl));
35218334Speter		fl.l_start = ofl.l_start;
35318334Speter		fl.l_len = ofl.l_len;
35418334Speter		fl.l_pid = ofl.l_pid;
35518334Speter		fl.l_type = ofl.l_type;
35618334Speter		fl.l_whence = ofl.l_whence;
35718334Speter		fl.l_sysid = 0;
35818334Speter
35918334Speter		switch (uap->cmd) {
36018334Speter		case F_OGETLK:
36118334Speter		    cmd = F_GETLK;
36218334Speter		    break;
36318334Speter		case F_OSETLK:
36418334Speter		    cmd = F_SETLK;
36518334Speter		    break;
36618334Speter		case F_OSETLKW:
36718334Speter		    cmd = F_SETLKW;
36818334Speter		    break;
36918334Speter		}
37018334Speter		arg = (intptr_t)&fl;
37118334Speter		break;
37218334Speter        case F_GETLK:
37318334Speter        case F_SETLK:
37418334Speter        case F_SETLKW:
37518334Speter	case F_SETLK_REMOTE:
37618334Speter                error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
37718334Speter                arg = (intptr_t)&fl;
37818334Speter                break;
37918334Speter	default:
38018334Speter		arg = uap->arg;
38118334Speter		break;
38218334Speter	}
38318334Speter	if (error)
38418334Speter		return (error);
38518334Speter	error = kern_fcntl(td, uap->fd, cmd, arg);
38618334Speter	if (error)
38790075Sobrien		return (error);
388132718Skan	if (uap->cmd == F_OGETLK) {
38990075Sobrien		ofl.l_start = fl.l_start;
39018334Speter		ofl.l_len = fl.l_len;
39118334Speter		ofl.l_pid = fl.l_pid;
39218334Speter		ofl.l_type = fl.l_type;
39318334Speter		ofl.l_whence = fl.l_whence;
39418334Speter		error = copyout(&ofl, (void *)(intptr_t)uap->arg, sizeof(ofl));
39518334Speter	} else if (uap->cmd == F_GETLK) {
39618334Speter		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
39718334Speter	}
39818334Speter	return (error);
39918334Speter}
40018334Speter
40118334Speterstatic inline struct file *
40218334Speterfdtofp(int fd, struct filedesc *fdp)
40318334Speter{
40418334Speter	struct file *fp;
40518334Speter
40618334Speter	FILEDESC_LOCK_ASSERT(fdp);
40718334Speter	if ((unsigned)fd >= fdp->fd_nfiles ||
40818334Speter	    (fp = fdp->fd_ofiles[fd]) == NULL)
40918334Speter		return (NULL);
41018334Speter	return (fp);
41118334Speter}
41218334Speter
413132718Skanint
41418334Speterkern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
41518334Speter{
41618334Speter	struct filedesc *fdp;
41718334Speter	struct flock *flp;
41818334Speter	struct file *fp;
41918334Speter	struct proc *p;
42018334Speter	char *pop;
42118334Speter	struct vnode *vp;
42218334Speter	int error, flg, tmp;
423132718Skan	int vfslocked;
424132718Skan
42518334Speter	vfslocked = 0;
42618334Speter	error = 0;
42718334Speter	flg = F_POSIX;
42890075Sobrien	p = td->td_proc;
42918334Speter	fdp = p->p_fd;
43018334Speter
43118334Speter	switch (cmd) {
43218334Speter	case F_DUPFD:
43318334Speter		tmp = arg;
43418334Speter		error = do_dup(td, DUP_FCNTL, fd, tmp, td->td_retval);
43518334Speter		break;
43618334Speter
43718334Speter	case F_DUP2FD:
438132718Skan		tmp = arg;
439132718Skan		error = do_dup(td, DUP_FIXED, fd, tmp, td->td_retval);
44018334Speter		break;
44118334Speter
44218334Speter	case F_GETFD:
44390075Sobrien		FILEDESC_SLOCK(fdp);
44418334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
44518334Speter			FILEDESC_SUNLOCK(fdp);
44618334Speter			error = EBADF;
44718334Speter			break;
44818334Speter		}
44918334Speter		pop = &fdp->fd_ofileflags[fd];
45018334Speter		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
45190075Sobrien		FILEDESC_SUNLOCK(fdp);
45290075Sobrien		break;
45318334Speter
45450397Sobrien	case F_SETFD:
45518334Speter		FILEDESC_XLOCK(fdp);
45618334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
45718334Speter			FILEDESC_XUNLOCK(fdp);
45818334Speter			error = EBADF;
45918334Speter			break;
46018334Speter		}
46118334Speter		pop = &fdp->fd_ofileflags[fd];
46218334Speter		*pop = (*pop &~ UF_EXCLOSE) |
46318334Speter		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
46418334Speter		FILEDESC_XUNLOCK(fdp);
46518334Speter		break;
46618334Speter
46718334Speter	case F_GETFL:
468132718Skan		FILEDESC_SLOCK(fdp);
46918334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
47090075Sobrien			FILEDESC_SUNLOCK(fdp);
47190075Sobrien			error = EBADF;
47218334Speter			break;
47318334Speter		}
47418334Speter		td->td_retval[0] = OFLAGS(fp->f_flag);
47518334Speter		FILEDESC_SUNLOCK(fdp);
47650397Sobrien		break;
47750397Sobrien
47818334Speter	case F_SETFL:
47918334Speter		FILEDESC_SLOCK(fdp);
48090075Sobrien		if ((fp = fdtofp(fd, fdp)) == NULL) {
48190075Sobrien			FILEDESC_SUNLOCK(fdp);
48290075Sobrien			error = EBADF;
48390075Sobrien			break;
48490075Sobrien		}
48590075Sobrien		fhold(fp);
48690075Sobrien		FILEDESC_SUNLOCK(fdp);
48790075Sobrien		do {
48890075Sobrien			tmp = flg = fp->f_flag;
48990075Sobrien			tmp &= ~FCNTLFLAGS;
490132718Skan			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
491132718Skan		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
492132718Skan		tmp = fp->f_flag & FNONBLOCK;
49318334Speter		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
494132718Skan		if (error) {
495117395Skan			fdrop(fp, td);
496132718Skan			break;
497117395Skan		}
49818334Speter		tmp = fp->f_flag & FASYNC;
49918334Speter		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
50018334Speter		if (error == 0) {
50118334Speter			fdrop(fp, td);
50218334Speter			break;
50318334Speter		}
504132718Skan		atomic_clear_int(&fp->f_flag, FNONBLOCK);
505132718Skan		tmp = 0;
50618334Speter		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
50718334Speter		fdrop(fp, td);
50818334Speter		break;
50952284Sobrien
51018334Speter	case F_GETOWN:
51118334Speter		FILEDESC_SLOCK(fdp);
51218334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
51318334Speter			FILEDESC_SUNLOCK(fdp);
51450397Sobrien			error = EBADF;
51518334Speter			break;
51618334Speter		}
51718334Speter		fhold(fp);
51818334Speter		FILEDESC_SUNLOCK(fdp);
51918334Speter		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
52090075Sobrien		if (error == 0)
52118334Speter			td->td_retval[0] = tmp;
52218334Speter		fdrop(fp, td);
52318334Speter		break;
52418334Speter
52518334Speter	case F_SETOWN:
52618334Speter		FILEDESC_SLOCK(fdp);
52718334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
52818334Speter			FILEDESC_SUNLOCK(fdp);
529132718Skan			error = EBADF;
530132718Skan			break;
531132718Skan		}
532132718Skan		fhold(fp);
53318334Speter		FILEDESC_SUNLOCK(fdp);
534132718Skan		tmp = arg;
53518334Speter		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
53618334Speter		fdrop(fp, td);
53718334Speter		break;
538132718Skan
53918334Speter	case F_SETLK_REMOTE:
54018334Speter		error = priv_check(td, PRIV_NFS_LOCKD);
54118334Speter		if (error)
54218334Speter			return (error);
54318334Speter		flg = F_REMOTE;
54418334Speter		goto do_setlk;
54590075Sobrien
54690075Sobrien	case F_SETLKW:
54718334Speter		flg |= F_WAIT;
54818334Speter		/* FALLTHROUGH F_SETLK */
54918334Speter
55018334Speter	case F_SETLK:
551132718Skan	do_setlk:
552132718Skan		FILEDESC_SLOCK(fdp);
55318334Speter		if ((fp = fdtofp(fd, fdp)) == NULL) {
55418334Speter			FILEDESC_SUNLOCK(fdp);
55518334Speter			error = EBADF;
55690075Sobrien			break;
55718334Speter		}
55818334Speter		if (fp->f_type != DTYPE_VNODE) {
55990075Sobrien			FILEDESC_SUNLOCK(fdp);
56090075Sobrien			error = EBADF;
56190075Sobrien			break;
56290075Sobrien		}
56390075Sobrien		flp = (struct flock *)arg;
56490075Sobrien		if (flp->l_whence == SEEK_CUR) {
56590075Sobrien			if (fp->f_offset < 0 ||
56690075Sobrien			    (flp->l_start > 0 &&
56790075Sobrien			     fp->f_offset > OFF_MAX - flp->l_start)) {
56890075Sobrien				FILEDESC_SUNLOCK(fdp);
56990075Sobrien				error = EOVERFLOW;
57090075Sobrien				break;
57190075Sobrien			}
57290075Sobrien			flp->l_start += fp->f_offset;
57390075Sobrien		}
57490075Sobrien
57590075Sobrien		/*
57690075Sobrien		 * VOP_ADVLOCK() may block.
57790075Sobrien		 */
57818334Speter		fhold(fp);
57918334Speter		FILEDESC_SUNLOCK(fdp);
58018334Speter		vp = fp->f_vnode;
58118334Speter		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
582132718Skan		switch (flp->l_type) {
583132718Skan		case F_RDLCK:
584132718Skan			if ((fp->f_flag & FREAD) == 0) {
58518334Speter				error = EBADF;
586132718Skan				break;
587132718Skan			}
588132718Skan			PROC_LOCK(p->p_leader);
589132718Skan			p->p_leader->p_flag |= P_ADVLOCK;
590132718Skan			PROC_UNLOCK(p->p_leader);
59118334Speter			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
59250397Sobrien			    flp, flg);
593132718Skan			break;
594132718Skan		case F_WRLCK:
595132718Skan			if ((fp->f_flag & FWRITE) == 0) {
596132718Skan				error = EBADF;
597132718Skan				break;
598132718Skan			}
59950397Sobrien			PROC_LOCK(p->p_leader);
60050397Sobrien			p->p_leader->p_flag |= P_ADVLOCK;
60118334Speter			PROC_UNLOCK(p->p_leader);
60218334Speter			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
60318334Speter			    flp, flg);
60418334Speter			break;
60518334Speter		case F_UNLCK:
606117395Skan			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
607117395Skan			    flp, flg);
608117395Skan			break;
609117395Skan		case F_UNLCKSYS:
610117395Skan			/*
61118334Speter			 * Temporary api for testing remote lock
61218334Speter			 * infrastructure.
613132718Skan			 */
61418334Speter			if (flg != F_REMOTE) {
61518334Speter				error = EINVAL;
61618334Speter				break;
617117395Skan			}
618117395Skan			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
61918334Speter			    F_UNLCKSYS, flp, flg);
62018334Speter			break;
62118334Speter		default:
622132718Skan			error = EINVAL;
62318334Speter			break;
62418334Speter		}
62518334Speter		VFS_UNLOCK_GIANT(vfslocked);
62618334Speter		vfslocked = 0;
62718334Speter		/* Check for race with close */
62818334Speter		FILEDESC_SLOCK(fdp);
62918334Speter		if ((unsigned) fd >= fdp->fd_nfiles ||
63018334Speter		    fp != fdp->fd_ofiles[fd]) {
631132718Skan			FILEDESC_SUNLOCK(fdp);
632132718Skan			flp->l_whence = SEEK_SET;
633132718Skan			flp->l_start = 0;
634132718Skan			flp->l_len = 0;
635132718Skan			flp->l_type = F_UNLCK;
636132718Skan			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
637132718Skan			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
638132718Skan					   F_UNLCK, flp, F_POSIX);
639132718Skan			VFS_UNLOCK_GIANT(vfslocked);
640132718Skan			vfslocked = 0;
641132718Skan		} else
642132718Skan			FILEDESC_SUNLOCK(fdp);
643132718Skan		fdrop(fp, td);
644132718Skan		break;
645132718Skan
646132718Skan	case F_GETLK:
647132718Skan		FILEDESC_SLOCK(fdp);
648132718Skan		if ((fp = fdtofp(fd, fdp)) == NULL) {
649132718Skan			FILEDESC_SUNLOCK(fdp);
650146895Skan			error = EBADF;
651132718Skan			break;
652132718Skan		}
653132718Skan		if (fp->f_type != DTYPE_VNODE) {
654132718Skan			FILEDESC_SUNLOCK(fdp);
655132718Skan			error = EBADF;
656132718Skan			break;
657132718Skan		}
658132718Skan		flp = (struct flock *)arg;
659132718Skan		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
660132718Skan		    flp->l_type != F_UNLCK) {
661132718Skan			FILEDESC_SUNLOCK(fdp);
662132718Skan			error = EINVAL;
663132718Skan			break;
664132718Skan		}
665132718Skan		if (flp->l_whence == SEEK_CUR) {
666132718Skan			if ((flp->l_start > 0 &&
667132718Skan			    fp->f_offset > OFF_MAX - flp->l_start) ||
668132718Skan			    (flp->l_start < 0 &&
669132718Skan			     fp->f_offset < OFF_MIN - flp->l_start)) {
670132718Skan				FILEDESC_SUNLOCK(fdp);
671132718Skan				error = EOVERFLOW;
672132718Skan				break;
673132718Skan			}
674132718Skan			flp->l_start += fp->f_offset;
675132718Skan		}
676132718Skan		/*
677132718Skan		 * VOP_ADVLOCK() may block.
678132718Skan		 */
679132718Skan		fhold(fp);
680132718Skan		FILEDESC_SUNLOCK(fdp);
681132718Skan		vp = fp->f_vnode;
682132718Skan		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
683132718Skan		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
684132718Skan		    F_POSIX);
685132718Skan		VFS_UNLOCK_GIANT(vfslocked);
686132718Skan		vfslocked = 0;
687132718Skan		fdrop(fp, td);
688132718Skan		break;
689132718Skan	default:
690132718Skan		error = EINVAL;
691132718Skan		break;
692132718Skan	}
693132718Skan	VFS_UNLOCK_GIANT(vfslocked);
694132718Skan	return (error);
695132718Skan}
696132718Skan
697132718Skan/*
698132718Skan * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
699132718Skan */
700132718Skanstatic int
701132718Skando_dup(struct thread *td, int flags, int old, int new,
702132718Skan    register_t *retval)
703132718Skan{
704132718Skan	struct filedesc *fdp;
705132718Skan	struct proc *p;
706132718Skan	struct file *fp;
707132718Skan	struct file *delfp;
708132718Skan	int error, holdleaders, maxfd;
709132718Skan
710132718Skan	p = td->td_proc;
711132718Skan	fdp = p->p_fd;
712132718Skan
713132718Skan	/*
714132718Skan	 * Verify we have a valid descriptor to dup from and possibly to
715132718Skan	 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
716132718Skan	 * return EINVAL when the new descriptor is out of bounds.
717132718Skan	 */
718132718Skan	if (old < 0)
719132718Skan		return (EBADF);
720132718Skan	if (new < 0)
721132718Skan		return (flags & DUP_FCNTL ? EINVAL : EBADF);
722132718Skan	PROC_LOCK(p);
723132718Skan	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
724132718Skan	PROC_UNLOCK(p);
725132718Skan	if (new >= maxfd)
726132718Skan		return (flags & DUP_FCNTL ? EINVAL : EMFILE);
727132718Skan
728132718Skan	FILEDESC_XLOCK(fdp);
729132718Skan	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
730132718Skan		FILEDESC_XUNLOCK(fdp);
731132718Skan		return (EBADF);
732132718Skan	}
733132718Skan	if (flags & DUP_FIXED && old == new) {
734132718Skan		*retval = new;
735132718Skan		FILEDESC_XUNLOCK(fdp);
736132718Skan		return (0);
737132718Skan	}
738132718Skan	fp = fdp->fd_ofiles[old];
739132718Skan	fhold(fp);
740132718Skan
741132718Skan	/*
742132718Skan	 * If the caller specified a file descriptor, make sure the file
743132718Skan	 * table is large enough to hold it, and grab it.  Otherwise, just
744132718Skan	 * allocate a new descriptor the usual way.  Since the filedesc
745132718Skan	 * lock may be temporarily dropped in the process, we have to look
746132718Skan	 * out for a race.
747132718Skan	 */
748132718Skan	if (flags & DUP_FIXED) {
749132718Skan		if (new >= fdp->fd_nfiles)
750132718Skan			fdgrowtable(fdp, new + 1);
751132718Skan		if (fdp->fd_ofiles[new] == NULL)
752132718Skan			fdused(fdp, new);
753132718Skan	} else {
754132718Skan		if ((error = fdalloc(td, new, &new)) != 0) {
755132718Skan			FILEDESC_XUNLOCK(fdp);
756132718Skan			fdrop(fp, td);
757132718Skan			return (error);
758132718Skan		}
759132718Skan	}
760132718Skan
761132718Skan	/*
762132718Skan	 * If the old file changed out from under us then treat it as a
763132718Skan	 * bad file descriptor.  Userland should do its own locking to
764132718Skan	 * avoid this case.
765132718Skan	 */
766132718Skan	if (fdp->fd_ofiles[old] != fp) {
767132718Skan		/* we've allocated a descriptor which we won't use */
768132718Skan		if (fdp->fd_ofiles[new] == NULL)
769132718Skan			fdunused(fdp, new);
770132718Skan		FILEDESC_XUNLOCK(fdp);
771132718Skan		fdrop(fp, td);
772132718Skan		return (EBADF);
773132718Skan	}
774132718Skan	KASSERT(old != new,
775132718Skan	    ("new fd is same as old"));
776132718Skan
777132718Skan	/*
778132718Skan	 * Save info on the descriptor being overwritten.  We cannot close
779132718Skan	 * it without introducing an ownership race for the slot, since we
780132718Skan	 * need to drop the filedesc lock to call closef().
781132718Skan	 *
782132718Skan	 * XXX this duplicates parts of close().
783132718Skan	 */
784132718Skan	delfp = fdp->fd_ofiles[new];
785132718Skan	holdleaders = 0;
786132718Skan	if (delfp != NULL) {
787132718Skan		if (td->td_proc->p_fdtol != NULL) {
788132718Skan			/*
789132718Skan			 * Ask fdfree() to sleep to ensure that all relevant
790132718Skan			 * process leaders can be traversed in closef().
791132718Skan			 */
792132718Skan			fdp->fd_holdleaderscount++;
793132718Skan			holdleaders = 1;
794132718Skan		}
795132718Skan	}
796132718Skan
797132718Skan	/*
798132718Skan	 * Duplicate the source descriptor
799132718Skan	 */
800132718Skan	fdp->fd_ofiles[new] = fp;
801132718Skan	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
802132718Skan	if (new > fdp->fd_lastfile)
803132718Skan		fdp->fd_lastfile = new;
804132718Skan	*retval = new;
805132718Skan
806132718Skan	/*
807132718Skan	 * If we dup'd over a valid file, we now own the reference to it
808132718Skan	 * and must dispose of it using closef() semantics (as if a
809132718Skan	 * close() were performed on it).
810132718Skan	 *
811132718Skan	 * XXX this duplicates parts of close().
812132718Skan	 */
813132718Skan	if (delfp != NULL) {
814132718Skan		knote_fdclose(td, new);
815132718Skan		if (delfp->f_type == DTYPE_MQUEUE)
81618334Speter			mq_fdclose(td, new, delfp);
81718334Speter		FILEDESC_XUNLOCK(fdp);
81818334Speter		(void) closef(delfp, td);
819132718Skan		if (holdleaders) {
82018334Speter			FILEDESC_XLOCK(fdp);
82118334Speter			fdp->fd_holdleaderscount--;
82218334Speter			if (fdp->fd_holdleaderscount == 0 &&
82318334Speter			    fdp->fd_holdleaderswakeup != 0) {
824132718Skan				fdp->fd_holdleaderswakeup = 0;
82518334Speter				wakeup(&fdp->fd_holdleaderscount);
82618334Speter			}
82718334Speter			FILEDESC_XUNLOCK(fdp);
82818334Speter		}
82918334Speter	} else {
830132718Skan		FILEDESC_XUNLOCK(fdp);
83118334Speter	}
832132718Skan	return (0);
833132718Skan}
834132718Skan
835132718Skan/*
836132718Skan * If sigio is on the list associated with a process or process group,
837132718Skan * disable signalling from the device, remove sigio from the list and
838132718Skan * free sigio.
839132718Skan */
840132718Skanvoid
841132718Skanfunsetown(struct sigio **sigiop)
842132718Skan{
843132718Skan	struct sigio *sigio;
844132718Skan
845132718Skan	SIGIO_LOCK();
846132718Skan	sigio = *sigiop;
84718334Speter	if (sigio == NULL) {
84818334Speter		SIGIO_UNLOCK();
84918334Speter		return;
85018334Speter	}
85118334Speter	*(sigio->sio_myref) = NULL;
85218334Speter	if ((sigio)->sio_pgid < 0) {
85318334Speter		struct pgrp *pg = (sigio)->sio_pgrp;
85418334Speter		PGRP_LOCK(pg);
85518334Speter		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
85618334Speter			     sigio, sio_pgsigio);
85718334Speter		PGRP_UNLOCK(pg);
85818334Speter	} else {
85918334Speter		struct proc *p = (sigio)->sio_proc;
86018334Speter		PROC_LOCK(p);
86118334Speter		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
86218334Speter			     sigio, sio_pgsigio);
863132718Skan		PROC_UNLOCK(p);
864132718Skan	}
86518334Speter	SIGIO_UNLOCK();
86618334Speter	crfree(sigio->sio_ucred);
86718334Speter	free(sigio, M_SIGIO);
86818334Speter}
86918334Speter
87018334Speter/*
87118334Speter * Free a list of sigio structures.
87218334Speter * We only need to lock the SIGIO_LOCK because we have made ourselves
873132718Skan * inaccessible to callers of fsetown and therefore do not need to lock
874132718Skan * the proc or pgrp struct for the list manipulation.
87518334Speter */
87618334Spetervoid
87718334Speterfunsetownlst(struct sigiolst *sigiolst)
87818334Speter{
87918334Speter	struct proc *p;
880132718Skan	struct pgrp *pg;
88118334Speter	struct sigio *sigio;
88218334Speter
88318334Speter	sigio = SLIST_FIRST(sigiolst);
88418334Speter	if (sigio == NULL)
88518334Speter		return;
88618334Speter	p = NULL;
88718334Speter	pg = NULL;
88818334Speter
889132718Skan	/*
89018334Speter	 * Every entry of the list should belong
89118334Speter	 * to a single proc or pgrp.
89218334Speter	 */
89318334Speter	if (sigio->sio_pgid < 0) {
89418334Speter		pg = sigio->sio_pgrp;
89518334Speter		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
89618334Speter	} else /* if (sigio->sio_pgid > 0) */ {
89718334Speter		p = sigio->sio_proc;
89818334Speter		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
89918334Speter	}
90018334Speter
90118334Speter	SIGIO_LOCK();
90218334Speter	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
90318334Speter		*(sigio->sio_myref) = NULL;
90418334Speter		if (pg != NULL) {
90518334Speter			KASSERT(sigio->sio_pgid < 0,
90618334Speter			    ("Proc sigio in pgrp sigio list"));
90718334Speter			KASSERT(sigio->sio_pgrp == pg,
90818334Speter			    ("Bogus pgrp in sigio list"));
909117395Skan			PGRP_LOCK(pg);
91018334Speter			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
91118334Speter			    sio_pgsigio);
91218334Speter			PGRP_UNLOCK(pg);
91318334Speter		} else /* if (p != NULL) */ {
914117395Skan			KASSERT(sigio->sio_pgid > 0,
91518334Speter			    ("Pgrp sigio in proc sigio list"));
91618334Speter			KASSERT(sigio->sio_proc == p,
917132718Skan			    ("Bogus proc in sigio list"));
918132718Skan			PROC_LOCK(p);
919132718Skan			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
920132718Skan			    sio_pgsigio);
921132718Skan			PROC_UNLOCK(p);
922132718Skan		}
923132718Skan		SIGIO_UNLOCK();
92418334Speter		crfree(sigio->sio_ucred);
92518334Speter		free(sigio, M_SIGIO);
92618334Speter		SIGIO_LOCK();
92718334Speter	}
92818334Speter	SIGIO_UNLOCK();
92918334Speter}
93018334Speter
93118334Speter/*
93218334Speter * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
93318334Speter *
93418334Speter * After permission checking, add a sigio structure to the sigio list for
93518334Speter * the process or process group.
93618334Speter */
937132718Skanint
938132718Skanfsetown(pid_t pgid, struct sigio **sigiop)
93918334Speter{
94018334Speter	struct proc *proc;
94118334Speter	struct pgrp *pgrp;
94218334Speter	struct sigio *sigio;
94318334Speter	int ret;
94418334Speter
94518334Speter	if (pgid == 0) {
94618334Speter		funsetown(sigiop);
94718334Speter		return (0);
94818334Speter	}
94918334Speter
95018334Speter	ret = 0;
95118334Speter
95218334Speter	/* Allocate and fill in the new sigio out of locks. */
953132718Skan	sigio = malloc(sizeof(struct sigio), M_SIGIO, M_WAITOK);
954132718Skan	sigio->sio_pgid = pgid;
95518334Speter	sigio->sio_ucred = crhold(curthread->td_ucred);
95618334Speter	sigio->sio_myref = sigiop;
95718334Speter
95818334Speter	sx_slock(&proctree_lock);
95918334Speter	if (pgid > 0) {
96018334Speter		proc = pfind(pgid);
96118334Speter		if (proc == NULL) {
96218334Speter			ret = ESRCH;
96318334Speter			goto fail;
96418334Speter		}
96518334Speter
96618334Speter		/*
96718334Speter		 * Policy - Don't allow a process to FSETOWN a process
96818334Speter		 * in another session.
96918334Speter		 *
97018334Speter		 * Remove this test to allow maximum flexibility or
97118334Speter		 * restrict FSETOWN to the current process or process
97218334Speter		 * group for maximum safety.
97318334Speter		 */
97418334Speter		PROC_UNLOCK(proc);
975132718Skan		if (proc->p_session != curthread->td_proc->p_session) {
97618334Speter			ret = EPERM;
97718334Speter			goto fail;
97818334Speter		}
97990075Sobrien
98090075Sobrien		pgrp = NULL;
98190075Sobrien	} else /* if (pgid < 0) */ {
98290075Sobrien		pgrp = pgfind(-pgid);
98318334Speter		if (pgrp == NULL) {
98418334Speter			ret = ESRCH;
98590075Sobrien			goto fail;
98618334Speter		}
98718334Speter		PGRP_UNLOCK(pgrp);
98818334Speter
98990075Sobrien		/*
99090075Sobrien		 * Policy - Don't allow a process to FSETOWN a process
99190075Sobrien		 * in another session.
99218334Speter		 *
99318334Speter		 * Remove this test to allow maximum flexibility or
99490075Sobrien		 * restrict FSETOWN to the current process or process
99518334Speter		 * group for maximum safety.
99618334Speter		 */
997132718Skan		if (pgrp->pg_session != curthread->td_proc->p_session) {
99818334Speter			ret = EPERM;
99950397Sobrien			goto fail;
100018334Speter		}
100150397Sobrien
100218334Speter		proc = NULL;
100390075Sobrien	}
100418334Speter	funsetown(sigiop);
100518334Speter	if (pgid > 0) {
100618334Speter		PROC_LOCK(proc);
100718334Speter		/*
100818334Speter		 * Since funsetownlst() is called without the proctree
100918334Speter		 * locked, we need to check for P_WEXIT.
101018334Speter		 * XXX: is ESRCH correct?
101190075Sobrien		 */
101218334Speter		if ((proc->p_flag & P_WEXIT) != 0) {
101318334Speter			PROC_UNLOCK(proc);
101418334Speter			ret = ESRCH;
101518334Speter			goto fail;
101690075Sobrien		}
101790075Sobrien		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
101890075Sobrien		sigio->sio_proc = proc;
101990075Sobrien		PROC_UNLOCK(proc);
102090075Sobrien	} else {
102190075Sobrien		PGRP_LOCK(pgrp);
102290075Sobrien		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
102318334Speter		sigio->sio_pgrp = pgrp;
102490075Sobrien		PGRP_UNLOCK(pgrp);
1025132718Skan	}
102618334Speter	sx_sunlock(&proctree_lock);
102790075Sobrien	SIGIO_LOCK();
102890075Sobrien	*sigiop = sigio;
102990075Sobrien	SIGIO_UNLOCK();
103090075Sobrien	return (0);
103190075Sobrien
103218334Speterfail:
103318334Speter	sx_sunlock(&proctree_lock);
103490075Sobrien	crfree(sigio->sio_ucred);
103590075Sobrien	free(sigio, M_SIGIO);
103690075Sobrien	return (ret);
103718334Speter}
103890075Sobrien
1039132718Skan/*
104090075Sobrien * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
104190075Sobrien */
104290075Sobrienpid_t
104390075Sobrienfgetown(sigiop)
104490075Sobrien	struct sigio **sigiop;
104590075Sobrien{
104618334Speter	pid_t pgid;
1047132718Skan
104890075Sobrien	SIGIO_LOCK();
104990075Sobrien	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
105018334Speter	SIGIO_UNLOCK();
105190075Sobrien	return (pgid);
105218334Speter}
105318334Speter
105418334Speter/*
105518334Speter * Close a file descriptor.
105690075Sobrien */
105790075Sobrien#ifndef _SYS_SYSPROTO_H_
105890075Sobrienstruct close_args {
105918334Speter	int     fd;
106018334Speter};
106190075Sobrien#endif
106290075Sobrien/* ARGSUSED */
106390075Sobrienint
106450397Sobrienclose(td, uap)
106518334Speter	struct thread *td;
106618334Speter	struct close_args *uap;
106718334Speter{
106818334Speter
106918334Speter	return (kern_close(td, uap->fd));
107018334Speter}
107190075Sobrien
107218334Speterint
107318334Speterkern_close(td, fd)
107418334Speter	struct thread *td;
107518334Speter	int fd;
107690075Sobrien{
107718334Speter	struct filedesc *fdp;
107890075Sobrien	struct file *fp;
107918334Speter	int error;
108018334Speter	int holdleaders;
108118334Speter
108218334Speter	error = 0;
108318334Speter	holdleaders = 0;
108452284Sobrien	fdp = td->td_proc->p_fd;
1085132718Skan
108652284Sobrien	AUDIT_SYSCLOSE(td, fd);
1087132718Skan
108852284Sobrien	FILEDESC_XLOCK(fdp);
108952284Sobrien	if ((unsigned)fd >= fdp->fd_nfiles ||
109018334Speter	    (fp = fdp->fd_ofiles[fd]) == NULL) {
109118334Speter		FILEDESC_XUNLOCK(fdp);
109218334Speter		return (EBADF);
109318334Speter	}
109418334Speter	fdp->fd_ofiles[fd] = NULL;
109518334Speter	fdp->fd_ofileflags[fd] = 0;
109618334Speter	fdunused(fdp, fd);
109718334Speter	if (td->td_proc->p_fdtol != NULL) {
109818334Speter		/*
109918334Speter		 * Ask fdfree() to sleep to ensure that all relevant
110018334Speter		 * process leaders can be traversed in closef().
110118334Speter		 */
110290075Sobrien		fdp->fd_holdleaderscount++;
110390075Sobrien		holdleaders = 1;
110418334Speter	}
110590075Sobrien
110690075Sobrien	/*
110790075Sobrien	 * We now hold the fp reference that used to be owned by the
110890075Sobrien	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
110990075Sobrien	 * knote_fdclose to prevent a race of the fd getting opened, a knote
111018334Speter	 * added, and deleteing a knote for the new fd.
111118334Speter	 */
111218334Speter	knote_fdclose(td, fd);
111318334Speter	if (fp->f_type == DTYPE_MQUEUE)
111418334Speter		mq_fdclose(td, fd, fp);
111518334Speter	FILEDESC_XUNLOCK(fdp);
111618334Speter
111718334Speter	error = closef(fp, td);
111818334Speter	if (holdleaders) {
111918334Speter		FILEDESC_XLOCK(fdp);
112018334Speter		fdp->fd_holdleaderscount--;
1121117395Skan		if (fdp->fd_holdleaderscount == 0 &&
112218334Speter		    fdp->fd_holdleaderswakeup != 0) {
112318334Speter			fdp->fd_holdleaderswakeup = 0;
112418334Speter			wakeup(&fdp->fd_holdleaderscount);
112518334Speter		}
112618334Speter		FILEDESC_XUNLOCK(fdp);
112718334Speter	}
112818334Speter	return (error);
112918334Speter}
113018334Speter
113118334Speter/*
113218334Speter * Close open file descriptors.
113318334Speter */
113490075Sobrien#ifndef _SYS_SYSPROTO_H_
113590075Sobrienstruct closefrom_args {
113690075Sobrien	int	lowfd;
113790075Sobrien};
113890075Sobrien#endif
113990075Sobrien/* ARGSUSED */
114090075Sobrienint
1141132718Skanclosefrom(struct thread *td, struct closefrom_args *uap)
114290075Sobrien{
114390075Sobrien	struct filedesc *fdp;
114490075Sobrien	int fd;
114590075Sobrien
114690075Sobrien	fdp = td->td_proc->p_fd;
114790075Sobrien	AUDIT_ARG(fd, uap->lowfd);
114890075Sobrien
114990075Sobrien	/*
115090075Sobrien	 * Treat negative starting file descriptor values identical to
115190075Sobrien	 * closefrom(0) which closes all files.
115290075Sobrien	 */
115390075Sobrien	if (uap->lowfd < 0)
115490075Sobrien		uap->lowfd = 0;
115590075Sobrien	FILEDESC_SLOCK(fdp);
115690075Sobrien	for (fd = uap->lowfd; fd < fdp->fd_nfiles; fd++) {
115790075Sobrien		if (fdp->fd_ofiles[fd] != NULL) {
115890075Sobrien			FILEDESC_SUNLOCK(fdp);
115990075Sobrien			(void)kern_close(td, fd);
116090075Sobrien			FILEDESC_SLOCK(fdp);
116190075Sobrien		}
116290075Sobrien	}
116390075Sobrien	FILEDESC_SUNLOCK(fdp);
1164132718Skan	return (0);
116590075Sobrien}
116690075Sobrien
116790075Sobrien#if defined(COMPAT_43)
116890075Sobrien/*
116990075Sobrien * Return status information about a file descriptor.
117090075Sobrien */
117190075Sobrien#ifndef _SYS_SYSPROTO_H_
117290075Sobrienstruct ofstat_args {
117390075Sobrien	int	fd;
117490075Sobrien	struct	ostat *sb;
117590075Sobrien};
117690075Sobrien#endif
117790075Sobrien/* ARGSUSED */
117890075Sobrienint
117990075Sobrienofstat(struct thread *td, struct ofstat_args *uap)
118090075Sobrien{
118190075Sobrien	struct ostat oub;
1182117395Skan	struct stat ub;
1183117395Skan	int error;
1184117395Skan
1185117395Skan	error = kern_fstat(td, uap->fd, &ub);
1186117395Skan	if (error == 0) {
118790075Sobrien		cvtstat(&ub, &oub);
118890075Sobrien		error = copyout(&oub, uap->sb, sizeof(oub));
118990075Sobrien	}
119090075Sobrien	return (error);
119190075Sobrien}
119290075Sobrien#endif /* COMPAT_43 */
119390075Sobrien
119490075Sobrien/*
119590075Sobrien * Return status information about a file descriptor.
119690075Sobrien */
1197117395Skan#ifndef _SYS_SYSPROTO_H_
119890075Sobrienstruct fstat_args {
119990075Sobrien	int	fd;
120090075Sobrien	struct	stat *sb;
1201117395Skan};
120290075Sobrien#endif
1203117395Skan/* ARGSUSED */
120490075Sobrienint
120590075Sobrienfstat(struct thread *td, struct fstat_args *uap)
120690075Sobrien{
120790075Sobrien	struct stat ub;
120890075Sobrien	int error;
120990075Sobrien
121090075Sobrien	error = kern_fstat(td, uap->fd, &ub);
121190075Sobrien	if (error == 0)
121290075Sobrien		error = copyout(&ub, uap->sb, sizeof(ub));
121390075Sobrien	return (error);
121490075Sobrien}
121590075Sobrien
121618334Speterint
121790075Sobrienkern_fstat(struct thread *td, int fd, struct stat *sbp)
121818334Speter{
121990075Sobrien	struct file *fp;
122090075Sobrien	int error;
122190075Sobrien
122290075Sobrien	AUDIT_ARG(fd, fd);
122390075Sobrien
122490075Sobrien	if ((error = fget(td, fd, &fp)) != 0)
1225132718Skan		return (error);
122618334Speter
1227132718Skan	AUDIT_ARG(file, td->td_proc, fp);
122818334Speter
122990075Sobrien	error = fo_stat(fp, sbp, td->td_ucred, td);
123018334Speter	fdrop(fp, td);
123118334Speter#ifdef KTRACE
123218334Speter	if (error == 0 && KTRPOINT(td, KTR_STRUCT))
123318334Speter		ktrstat(sbp);
123418334Speter#endif
123518334Speter	return (error);
123618334Speter}
123718334Speter
123818334Speter/*
123918334Speter * Return status information about a file descriptor.
1240132718Skan */
124118334Speter#ifndef _SYS_SYSPROTO_H_
124218334Speterstruct nfstat_args {
124318334Speter	int	fd;
1244132718Skan	struct	nstat *sb;
124518334Speter};
124618334Speter#endif
124718334Speter/* ARGSUSED */
124818334Speterint
124918334Speternfstat(struct thread *td, struct nfstat_args *uap)
125018334Speter{
125118334Speter	struct nstat nub;
125218334Speter	struct stat ub;
125318334Speter	int error;
125418334Speter
125518334Speter	error = kern_fstat(td, uap->fd, &ub);
125650397Sobrien	if (error == 0) {
125750397Sobrien		cvtnstat(&ub, &nub);
125890075Sobrien		error = copyout(&nub, uap->sb, sizeof(nub));
125990075Sobrien	}
126050397Sobrien	return (error);
126190075Sobrien}
126290075Sobrien
1263132718Skan/*
126418334Speter * Return pathconf information about a file descriptor.
126518334Speter */
126618334Speter#ifndef _SYS_SYSPROTO_H_
126718334Speterstruct fpathconf_args {
126818334Speter	int	fd;
126918334Speter	int	name;
127018334Speter};
127118334Speter#endif
127218334Speter/* ARGSUSED */
127318334Speterint
127418334Speterfpathconf(struct thread *td, struct fpathconf_args *uap)
127518334Speter{
127618334Speter	struct file *fp;
127718334Speter	struct vnode *vp;
127818334Speter	int error;
127918334Speter
128018334Speter	if ((error = fget(td, uap->fd, &fp)) != 0)
128118334Speter		return (error);
128218334Speter
128318334Speter	/* If asynchronous I/O is available, it works for all descriptors. */
128418334Speter	if (uap->name == _PC_ASYNC_IO) {
128590075Sobrien		td->td_retval[0] = async_io_version;
128618334Speter		goto out;
128718334Speter	}
128818334Speter	vp = fp->f_vnode;
128918334Speter	if (vp != NULL) {
129018334Speter		int vfslocked;
129190075Sobrien		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
129290075Sobrien		vn_lock(vp, LK_SHARED | LK_RETRY);
129390075Sobrien		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
129418334Speter		VOP_UNLOCK(vp, 0);
129590075Sobrien		VFS_UNLOCK_GIANT(vfslocked);
129650397Sobrien	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
129790075Sobrien		if (uap->name != _PC_PIPE_BUF) {
129890075Sobrien			error = EINVAL;
129918334Speter		} else {
130018334Speter			td->td_retval[0] = PIPE_BUF;
130118334Speter		error = 0;
130218334Speter		}
130318334Speter	} else {
130490075Sobrien		error = EOPNOTSUPP;
130590075Sobrien	}
130690075Sobrienout:
130718334Speter	fdrop(fp, td);
130818334Speter	return (error);
130990075Sobrien}
131018334Speter
131118334Speter/*
131218334Speter * Grow the file table to accomodate (at least) nfd descriptors.  This may
131318334Speter * block and drop the filedesc lock, but it will reacquire it before
131418334Speter * returning.
131518334Speter */
1316132718Skanstatic void
131718334Speterfdgrowtable(struct filedesc *fdp, int nfd)
131890075Sobrien{
131990075Sobrien	struct filedesc0 *fdp0;
132090075Sobrien	struct freetable *fo;
132190075Sobrien	struct file **ntable;
132218334Speter	struct file **otable;
132318334Speter	char *nfileflags;
132418334Speter	int nnfiles, onfiles;
132518334Speter	NDSLOTTYPE *nmap;
132618334Speter
132790075Sobrien	FILEDESC_XLOCK_ASSERT(fdp);
132818334Speter
1329117395Skan	KASSERT(fdp->fd_nfiles > 0,
133018334Speter	    ("zero-length file table"));
133118334Speter
133218334Speter	/* compute the size of the new table */
133390075Sobrien	onfiles = fdp->fd_nfiles;
133418334Speter	nnfiles = NDSLOTS(nfd) * NDENTRIES; /* round up */
133518334Speter	if (nnfiles <= onfiles)
133618334Speter		/* the table is already large enough */
133790075Sobrien		return;
133890075Sobrien
133918334Speter	/* allocate a new table and (if required) new bitmaps */
134018334Speter	FILEDESC_XUNLOCK(fdp);
134118334Speter	ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
134218334Speter	    M_FILEDESC, M_ZERO | M_WAITOK);
134390075Sobrien	nfileflags = (char *)&ntable[nnfiles];
134490075Sobrien	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
134590075Sobrien		nmap = malloc(NDSLOTS(nnfiles) * NDSLOTSIZE,
134690075Sobrien		    M_FILEDESC, M_ZERO | M_WAITOK);
134790075Sobrien	else
1348117395Skan		nmap = NULL;
134918334Speter	FILEDESC_XLOCK(fdp);
135090075Sobrien
135190075Sobrien	/*
135290075Sobrien	 * We now have new tables ready to go.  Since we dropped the
135390075Sobrien	 * filedesc lock to call malloc(), watch out for a race.
135490075Sobrien	 */
135590075Sobrien	onfiles = fdp->fd_nfiles;
135690075Sobrien	if (onfiles >= nnfiles) {
135718334Speter		/* we lost the race, but that's OK */
135890075Sobrien		free(ntable, M_FILEDESC);
135918334Speter		if (nmap != NULL)
136090075Sobrien			free(nmap, M_FILEDESC);
136190075Sobrien		return;
136290075Sobrien	}
136390075Sobrien	bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
136418334Speter	bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
1365117395Skan	otable = fdp->fd_ofiles;
1366117395Skan	fdp->fd_ofileflags = nfileflags;
136718334Speter	fdp->fd_ofiles = ntable;
1368117395Skan	/*
136918334Speter	 * We must preserve ofiles until the process exits because we can't
137018334Speter	 * be certain that no threads have references to the old table via
137118334Speter	 * _fget().
137218334Speter	 */
137318334Speter	if (onfiles > NDFILE) {
137418334Speter		fo = (struct freetable *)&otable[onfiles];
137518334Speter		fdp0 = (struct filedesc0 *)fdp;
137618334Speter		fo->ft_table = otable;
137718334Speter		SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
137818334Speter	}
137918334Speter	if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
138018334Speter		bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
138118334Speter		if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
138218334Speter			free(fdp->fd_map, M_FILEDESC);
138318334Speter		fdp->fd_map = nmap;
1384132718Skan	}
138518334Speter	fdp->fd_nfiles = nnfiles;
138690075Sobrien}
138790075Sobrien
138818334Speter/*
138918334Speter * Allocate a file descriptor for the process.
139018334Speter */
139118334Speterint
139218334Speterfdalloc(struct thread *td, int minfd, int *result)
139318334Speter{
139418334Speter	struct proc *p = td->td_proc;
139518334Speter	struct filedesc *fdp = p->p_fd;
139618334Speter	int fd = -1, maxfd;
139718334Speter
139890075Sobrien	FILEDESC_XLOCK_ASSERT(fdp);
139918334Speter
140090075Sobrien	if (fdp->fd_freefile > minfd)
140118334Speter		minfd = fdp->fd_freefile;
140218334Speter
140318334Speter	PROC_LOCK(p);
140418334Speter	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
140590075Sobrien	PROC_UNLOCK(p);
140618334Speter
140718334Speter	/*
140818334Speter	 * Search the bitmap for a free descriptor.  If none is found, try
140918334Speter	 * to grow the file table.  Keep at it until we either get a file
141018334Speter	 * descriptor or run into process or system limits; fdgrowtable()
141118334Speter	 * may drop the filedesc lock, so we're in a race.
141218334Speter	 */
141318334Speter	for (;;) {
141418334Speter		fd = fd_first_free(fdp, minfd, fdp->fd_nfiles);
141518334Speter		if (fd >= maxfd)
141618334Speter			return (EMFILE);
1417132718Skan		if (fd < fdp->fd_nfiles)
141818334Speter			break;
141918334Speter		fdgrowtable(fdp, min(fdp->fd_nfiles * 2, maxfd));
142018334Speter	}
142118334Speter
142218334Speter	/*
142318334Speter	 * Perform some sanity checks, then mark the file descriptor as
142418334Speter	 * used and return it to the caller.
142518334Speter	 */
142618334Speter	KASSERT(!fdisused(fdp, fd),
142718334Speter	    ("fd_first_free() returned non-free descriptor"));
142818334Speter	KASSERT(fdp->fd_ofiles[fd] == NULL,
142918334Speter	    ("free descriptor isn't"));
143018334Speter	fdp->fd_ofileflags[fd] = 0; /* XXX needed? */
143118334Speter	fdused(fdp, fd);
143218334Speter	*result = fd;
143318334Speter	return (0);
143418334Speter}
143518334Speter
143618334Speter/*
1437132718Skan * Check to see whether n user file descriptors are available to the process
143818334Speter * p.
143918334Speter */
144018334Speterint
144118334Speterfdavail(struct thread *td, int n)
144218334Speter{
144318334Speter	struct proc *p = td->td_proc;
144418334Speter	struct filedesc *fdp = td->td_proc->p_fd;
144518334Speter	struct file **fpp;
144618334Speter	int i, lim, last;
144718334Speter
144818334Speter	FILEDESC_LOCK_ASSERT(fdp);
144918334Speter
145018334Speter	PROC_LOCK(p);
145118334Speter	lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
145218334Speter	PROC_UNLOCK(p);
145318334Speter	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
145418334Speter		return (1);
145518334Speter	last = min(fdp->fd_nfiles, lim);
145618334Speter	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
145718334Speter	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
145818334Speter		if (*fpp == NULL && --n <= 0)
145918334Speter			return (1);
146018334Speter	}
146118334Speter	return (0);
146218334Speter}
146318334Speter
146418334Speter/*
146518334Speter * Create a new open file structure and allocate a file decriptor for the
146618334Speter * process that refers to it.  We add one reference to the file for the
146718334Speter * descriptor table and one reference for resultfp. This is to prevent us
146818334Speter * being preempted and the entry in the descriptor table closed after we
146918334Speter * release the FILEDESC lock.
147018334Speter */
147118334Speterint
147218334Speterfalloc(struct thread *td, struct file **resultfp, int *resultfd)
147318334Speter{
147418334Speter	struct proc *p = td->td_proc;
147518334Speter	struct file *fp;
147618334Speter	int error, i;
147718334Speter	int maxuserfiles = maxfiles - (maxfiles / 20);
147818334Speter	static struct timeval lastfail;
147990075Sobrien	static int curfail;
148018334Speter
148118334Speter	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1482117395Skan	if ((openfiles >= maxuserfiles &&
148318334Speter	    priv_check(td, PRIV_MAXFILES) != 0) ||
148418334Speter	    openfiles >= maxfiles) {
148518334Speter		if (ppsratecheck(&lastfail, &curfail, 1)) {
148618334Speter			printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
148718334Speter				td->td_ucred->cr_ruid);
148818334Speter		}
148918334Speter		uma_zfree(file_zone, fp);
149018334Speter		return (ENFILE);
149118334Speter	}
149218334Speter	atomic_add_int(&openfiles, 1);
1493117395Skan
149418334Speter	/*
149518334Speter	 * If the process has file descriptor zero open, add the new file
149618334Speter	 * descriptor to the list of open files at that point, otherwise
149718334Speter	 * put it at the front of the list of open files.
149818334Speter	 */
149918334Speter	refcount_init(&fp->f_count, 1);
150018334Speter	if (resultfp)
150118334Speter		fhold(fp);
150218334Speter	fp->f_cred = crhold(td->td_ucred);
150390075Sobrien	fp->f_ops = &badfileops;
150490075Sobrien	fp->f_data = NULL;
1505117395Skan	fp->f_vnode = NULL;
150618334Speter	FILEDESC_XLOCK(p->p_fd);
150718334Speter	if ((error = fdalloc(td, 0, &i))) {
150818334Speter		FILEDESC_XUNLOCK(p->p_fd);
150918334Speter		fdrop(fp, td);
151018334Speter		if (resultfp)
151118334Speter			fdrop(fp, td);
151218334Speter		return (error);
151318334Speter	}
151418334Speter	p->p_fd->fd_ofiles[i] = fp;
151518334Speter	FILEDESC_XUNLOCK(p->p_fd);
151618334Speter	if (resultfp)
151718334Speter		*resultfp = fp;
151818334Speter	if (resultfd)
151918334Speter		*resultfd = i;
152018334Speter	return (0);
1521132718Skan}
152218334Speter
152318334Speter/*
152418334Speter * Build a new filedesc structure from another.
152518334Speter * Copy the current, root, and jail root vnode references.
152618334Speter */
152718334Speterstruct filedesc *
152818334Speterfdinit(struct filedesc *fdp)
152918334Speter{
153018334Speter	struct filedesc0 *newfdp;
153118334Speter
153250397Sobrien	newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
153350397Sobrien	FILEDESC_LOCK_INIT(&newfdp->fd_fd);
153450397Sobrien	if (fdp != NULL) {
153550397Sobrien		FILEDESC_XLOCK(fdp);
153650397Sobrien		newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
153750397Sobrien		if (newfdp->fd_fd.fd_cdir)
153850397Sobrien			VREF(newfdp->fd_fd.fd_cdir);
153950397Sobrien		newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
154018334Speter		if (newfdp->fd_fd.fd_rdir)
154118334Speter			VREF(newfdp->fd_fd.fd_rdir);
154218334Speter		newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
154318334Speter		if (newfdp->fd_fd.fd_jdir)
154418334Speter			VREF(newfdp->fd_fd.fd_jdir);
154518334Speter		FILEDESC_XUNLOCK(fdp);
154618334Speter	}
154718334Speter
154818334Speter	/* Create the file descriptor table. */
154918334Speter	newfdp->fd_fd.fd_refcnt = 1;
155018334Speter	newfdp->fd_fd.fd_holdcnt = 1;
155150397Sobrien	newfdp->fd_fd.fd_cmask = CMASK;
155250397Sobrien	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
155318334Speter	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
155418334Speter	newfdp->fd_fd.fd_nfiles = NDFILE;
155518334Speter	newfdp->fd_fd.fd_map = newfdp->fd_dmap;
155618334Speter	newfdp->fd_fd.fd_lastfile = -1;
155718334Speter	return (&newfdp->fd_fd);
155818334Speter}
155918334Speter
156018334Speterstatic struct filedesc *
156118334Speterfdhold(struct proc *p)
156218334Speter{
156318334Speter	struct filedesc *fdp;
156418334Speter
156518334Speter	mtx_lock(&fdesc_mtx);
156618334Speter	fdp = p->p_fd;
156718334Speter	if (fdp != NULL)
156890075Sobrien		fdp->fd_holdcnt++;
156990075Sobrien	mtx_unlock(&fdesc_mtx);
157090075Sobrien	return (fdp);
1571132718Skan}
157290075Sobrien
157390075Sobrienstatic void
157490075Sobrienfddrop(struct filedesc *fdp)
157590075Sobrien{
157690075Sobrien	struct filedesc0 *fdp0;
1577132718Skan	struct freetable *ft;
157890075Sobrien	int i;
157990075Sobrien
158090075Sobrien	mtx_lock(&fdesc_mtx);
158190075Sobrien	i = --fdp->fd_holdcnt;
1582132718Skan	mtx_unlock(&fdesc_mtx);
158390075Sobrien	if (i > 0)
158490075Sobrien		return;
158590075Sobrien
158690075Sobrien	FILEDESC_LOCK_DESTROY(fdp);
158790075Sobrien	fdp0 = (struct filedesc0 *)fdp;
158890075Sobrien	while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
158990075Sobrien		SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
159090075Sobrien		free(ft->ft_table, M_FILEDESC);
1591132718Skan	}
1592132718Skan	free(fdp, M_FILEDESC);
1593132718Skan}
1594132718Skan
1595132718Skan/*
1596132718Skan * Share a filedesc structure.
1597132718Skan */
1598132718Skanstruct filedesc *
1599132718Skanfdshare(struct filedesc *fdp)
1600132718Skan{
1601132718Skan
1602132718Skan	FILEDESC_XLOCK(fdp);
1603132718Skan	fdp->fd_refcnt++;
1604132718Skan	FILEDESC_XUNLOCK(fdp);
160590075Sobrien	return (fdp);
160690075Sobrien}
160790075Sobrien
160890075Sobrien/*
1609132718Skan * Unshare a filedesc structure, if necessary by making a copy
1610132718Skan */
1611132718Skanvoid
1612103445Skanfdunshare(struct proc *p, struct thread *td)
1613103445Skan{
161490075Sobrien
161590075Sobrien	FILEDESC_XLOCK(p->p_fd);
161690075Sobrien	if (p->p_fd->fd_refcnt > 1) {
161790075Sobrien		struct filedesc *tmp;
161890075Sobrien
161990075Sobrien		FILEDESC_XUNLOCK(p->p_fd);
162090075Sobrien		tmp = fdcopy(p->p_fd);
1621117395Skan		fdfree(td);
1622132718Skan		p->p_fd = tmp;
1623117395Skan	} else
1624117395Skan		FILEDESC_XUNLOCK(p->p_fd);
1625117395Skan}
1626117395Skan
1627117395Skan/*
1628132718Skan * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
1629117395Skan * this is to ease callers, not catch errors.
1630117395Skan */
1631117395Skanstruct filedesc *
163290075Sobrienfdcopy(struct filedesc *fdp)
163390075Sobrien{
163490075Sobrien	struct filedesc *newfdp;
163590075Sobrien	int i;
163618334Speter
163718334Speter	/* Certain daemons might not have file descriptors. */
163818334Speter	if (fdp == NULL)
163918334Speter		return (NULL);
164018334Speter
164118334Speter	newfdp = fdinit(fdp);
1642132718Skan	FILEDESC_SLOCK(fdp);
164318334Speter	while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
164490075Sobrien		FILEDESC_SUNLOCK(fdp);
164590075Sobrien		FILEDESC_XLOCK(newfdp);
1646132718Skan		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
1647132718Skan		FILEDESC_XUNLOCK(newfdp);
164818334Speter		FILEDESC_SLOCK(fdp);
164918334Speter	}
165018334Speter	/* copy everything except kqueue descriptors */
165118334Speter	newfdp->fd_freefile = -1;
165218334Speter	for (i = 0; i <= fdp->fd_lastfile; ++i) {
165318334Speter		if (fdisused(fdp, i) &&
165418334Speter		    fdp->fd_ofiles[i]->f_type != DTYPE_KQUEUE &&
165518334Speter		    fdp->fd_ofiles[i]->f_ops != &badfileops) {
165618334Speter			newfdp->fd_ofiles[i] = fdp->fd_ofiles[i];
165718334Speter			newfdp->fd_ofileflags[i] = fdp->fd_ofileflags[i];
165818334Speter			fhold(newfdp->fd_ofiles[i]);
165918334Speter			newfdp->fd_lastfile = i;
166018334Speter		} else {
166118334Speter			if (newfdp->fd_freefile == -1)
166218334Speter				newfdp->fd_freefile = i;
166318334Speter		}
166418334Speter	}
166518334Speter	newfdp->fd_cmask = fdp->fd_cmask;
166618334Speter	FILEDESC_SUNLOCK(fdp);
166718334Speter	FILEDESC_XLOCK(newfdp);
166818334Speter	for (i = 0; i <= newfdp->fd_lastfile; ++i)
166918334Speter		if (newfdp->fd_ofiles[i] != NULL)
167018334Speter			fdused(newfdp, i);
167118334Speter	if (newfdp->fd_freefile == -1)
167218334Speter		newfdp->fd_freefile = i;
167318334Speter	FILEDESC_XUNLOCK(newfdp);
167418334Speter	return (newfdp);
167518334Speter}
167618334Speter
167718334Speter/*
167818334Speter * Release a filedesc structure.
167918334Speter */
168018334Spetervoid
168196263Sobrienfdfree(struct thread *td)
168296263Sobrien{
168396263Sobrien	struct filedesc *fdp;
168418334Speter	struct file **fpp;
168518334Speter	int i, locked;
168618334Speter	struct filedesc_to_leader *fdtol;
1687132718Skan	struct file *fp;
1688132718Skan	struct vnode *cdir, *jdir, *rdir, *vp;
1689132718Skan	struct flock lf;
1690132718Skan
1691132718Skan	/* Certain daemons might not have file descriptors. */
1692132718Skan	fdp = td->td_proc->p_fd;
1693132718Skan	if (fdp == NULL)
1694132718Skan		return;
1695132718Skan
1696132718Skan	/* Check for special need to clear POSIX style locks */
1697132718Skan	fdtol = td->td_proc->p_fdtol;
1698132718Skan	if (fdtol != NULL) {
1699132718Skan		FILEDESC_XLOCK(fdp);
1700132718Skan		KASSERT(fdtol->fdl_refcount > 0,
1701132718Skan			("filedesc_to_refcount botch: fdl_refcount=%d",
1702132718Skan			 fdtol->fdl_refcount));
1703132718Skan		if (fdtol->fdl_refcount == 1 &&
1704132718Skan		    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
1705132718Skan			for (i = 0, fpp = fdp->fd_ofiles;
1706132718Skan			     i <= fdp->fd_lastfile;
1707132718Skan			     i++, fpp++) {
1708132718Skan				if (*fpp == NULL ||
1709132718Skan				    (*fpp)->f_type != DTYPE_VNODE)
1710132718Skan					continue;
1711132718Skan				fp = *fpp;
1712132718Skan				fhold(fp);
1713132718Skan				FILEDESC_XUNLOCK(fdp);
1714132718Skan				lf.l_whence = SEEK_SET;
1715132718Skan				lf.l_start = 0;
1716132718Skan				lf.l_len = 0;
1717132718Skan				lf.l_type = F_UNLCK;
1718132718Skan				vp = fp->f_vnode;
1719132718Skan				locked = VFS_LOCK_GIANT(vp->v_mount);
1720132718Skan				(void) VOP_ADVLOCK(vp,
1721132718Skan						   (caddr_t)td->td_proc->
1722132718Skan						   p_leader,
1723132718Skan						   F_UNLCK,
1724132718Skan						   &lf,
1725132718Skan						   F_POSIX);
1726132718Skan				VFS_UNLOCK_GIANT(locked);
1727132718Skan				FILEDESC_XLOCK(fdp);
1728132718Skan				fdrop(fp, td);
1729132718Skan				fpp = fdp->fd_ofiles + i;
1730132718Skan			}
1731132718Skan		}
173218334Speter	retry:
173318334Speter		if (fdtol->fdl_refcount == 1) {
173418334Speter			if (fdp->fd_holdleaderscount > 0 &&
173518334Speter			    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
173618334Speter				/*
173718334Speter				 * close() or do_dup() has cleared a reference
1738117395Skan				 * in a shared file descriptor table.
173918334Speter				 */
1740117395Skan				fdp->fd_holdleaderswakeup = 1;
174118334Speter				sx_sleep(&fdp->fd_holdleaderscount,
174218334Speter				    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
174318334Speter				goto retry;
174418334Speter			}
174518334Speter			if (fdtol->fdl_holdcount > 0) {
174618334Speter				/*
174718334Speter				 * Ensure that fdtol->fdl_leader remains
174818334Speter				 * valid in closef().
174990075Sobrien				 */
175090075Sobrien				fdtol->fdl_wakeup = 1;
175190075Sobrien				sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
175290075Sobrien				    "fdlhold", 0);
175390075Sobrien				goto retry;
175490075Sobrien			}
175518334Speter		}
175690075Sobrien		fdtol->fdl_refcount--;
175790075Sobrien		if (fdtol->fdl_refcount == 0 &&
175890075Sobrien		    fdtol->fdl_holdcount == 0) {
175918334Speter			fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
1760132718Skan			fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
1761132718Skan		} else
1762132718Skan			fdtol = NULL;
1763132718Skan		td->td_proc->p_fdtol = NULL;
1764132718Skan		FILEDESC_XUNLOCK(fdp);
1765132718Skan		if (fdtol != NULL)
1766132718Skan			free(fdtol, M_FILEDESC_TO_LEADER);
1767132718Skan	}
1768132718Skan	FILEDESC_XLOCK(fdp);
1769132718Skan	i = --fdp->fd_refcnt;
1770132718Skan	FILEDESC_XUNLOCK(fdp);
1771132718Skan	if (i > 0)
177290075Sobrien		return;
177390075Sobrien
177490075Sobrien	fpp = fdp->fd_ofiles;
177518334Speter	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
177618334Speter		if (*fpp) {
177718334Speter			FILEDESC_XLOCK(fdp);
177818334Speter			fp = *fpp;
177918334Speter			*fpp = NULL;
178018334Speter			FILEDESC_XUNLOCK(fdp);
178118334Speter			(void) closef(fp, td);
178218334Speter		}
178318334Speter	}
178418334Speter	FILEDESC_XLOCK(fdp);
178518334Speter
178618334Speter	/* XXX This should happen earlier. */
178718334Speter	mtx_lock(&fdesc_mtx);
178818334Speter	td->td_proc->p_fd = NULL;
178918334Speter	mtx_unlock(&fdesc_mtx);
179018334Speter
179118334Speter	if (fdp->fd_nfiles > NDFILE)
179218334Speter		free(fdp->fd_ofiles, M_FILEDESC);
179318334Speter	if (NDSLOTS(fdp->fd_nfiles) > NDSLOTS(NDFILE))
179418334Speter		free(fdp->fd_map, M_FILEDESC);
179518334Speter
1796132718Skan	fdp->fd_nfiles = 0;
179718334Speter
179890075Sobrien	cdir = fdp->fd_cdir;
179990075Sobrien	fdp->fd_cdir = NULL;
180018334Speter	rdir = fdp->fd_rdir;
180118334Speter	fdp->fd_rdir = NULL;
180218334Speter	jdir = fdp->fd_jdir;
180318334Speter	fdp->fd_jdir = NULL;
180418334Speter	FILEDESC_XUNLOCK(fdp);
180518334Speter
180618334Speter	if (cdir) {
180718334Speter		locked = VFS_LOCK_GIANT(cdir->v_mount);
180818334Speter		vrele(cdir);
180990075Sobrien		VFS_UNLOCK_GIANT(locked);
181090075Sobrien	}
181118334Speter	if (rdir) {
181218334Speter		locked = VFS_LOCK_GIANT(rdir->v_mount);
181318334Speter		vrele(rdir);
181418334Speter		VFS_UNLOCK_GIANT(locked);
181518334Speter	}
181618334Speter	if (jdir) {
181718334Speter		locked = VFS_LOCK_GIANT(jdir->v_mount);
181818334Speter		vrele(jdir);
181918334Speter		VFS_UNLOCK_GIANT(locked);
182018334Speter	}
182118334Speter
182218334Speter	fddrop(fdp);
182318334Speter}
182418334Speter
182518334Speter/*
182618334Speter * For setugid programs, we don't want to people to use that setugidness
182718334Speter * to generate error messages which write to a file which otherwise would
182890075Sobrien * otherwise be off-limits to the process.  We check for filesystems where
182918334Speter * the vnode can change out from under us after execve (like [lin]procfs).
183018334Speter *
183118334Speter * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
183218334Speter * sufficient.  We also don't check for setugidness since we know we are.
183318334Speter */
183418334Speterstatic int
183518334Speteris_unsafe(struct file *fp)
183618334Speter{
183790075Sobrien	if (fp->f_type == DTYPE_VNODE) {
183818334Speter		struct vnode *vp = fp->f_vnode;
183918334Speter
184018334Speter		if ((vp->v_vflag & VV_PROCDEP) != 0)
184118334Speter			return (1);
184218334Speter	}
184318334Speter	return (0);
184418334Speter}
184518334Speter
184690075Sobrien/*
184718334Speter * Make this setguid thing safe, if at all possible.
184818334Speter */
184918334Spetervoid
185018334Spetersetugidsafety(struct thread *td)
185118334Speter{
185218334Speter	struct filedesc *fdp;
185390075Sobrien	int i;
185490075Sobrien
185590075Sobrien	/* Certain daemons might not have file descriptors. */
185618334Speter	fdp = td->td_proc->p_fd;
185718334Speter	if (fdp == NULL)
185818334Speter		return;
185918334Speter
186018334Speter	/*
186118334Speter	 * Note: fdp->fd_ofiles may be reallocated out from under us while
186290075Sobrien	 * we are blocked in a close.  Be careful!
186390075Sobrien	 */
186490075Sobrien	FILEDESC_XLOCK(fdp);
186518334Speter	for (i = 0; i <= fdp->fd_lastfile; i++) {
186618334Speter		if (i > 2)
186718334Speter			break;
186818334Speter		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
186918334Speter			struct file *fp;
187018334Speter
187118334Speter			knote_fdclose(td, i);
187218334Speter			/*
187318334Speter			 * NULL-out descriptor prior to close to avoid
187418334Speter			 * a race while close blocks.
187518334Speter			 */
187618334Speter			fp = fdp->fd_ofiles[i];
187718334Speter			fdp->fd_ofiles[i] = NULL;
187818334Speter			fdp->fd_ofileflags[i] = 0;
187990075Sobrien			fdunused(fdp, i);
188090075Sobrien			FILEDESC_XUNLOCK(fdp);
188190075Sobrien			(void) closef(fp, td);
188290075Sobrien			FILEDESC_XLOCK(fdp);
188318334Speter		}
188418334Speter	}
188518334Speter	FILEDESC_XUNLOCK(fdp);
188618334Speter}
188718334Speter
188818334Speter/*
188990075Sobrien * If a specific file object occupies a specific file descriptor, close the
189090075Sobrien * file descriptor entry and drop a reference on the file object.  This is a
189118334Speter * convenience function to handle a subsequent error in a function that calls
189218334Speter * falloc() that handles the race that another thread might have closed the
189318334Speter * file descriptor out from under the thread creating the file object.
189418334Speter */
189518334Spetervoid
189618334Speterfdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
189718334Speter{
189818334Speter
189918334Speter	FILEDESC_XLOCK(fdp);
190018334Speter	if (fdp->fd_ofiles[idx] == fp) {
190118334Speter		fdp->fd_ofiles[idx] = NULL;
190218334Speter		fdunused(fdp, idx);
190318334Speter		FILEDESC_XUNLOCK(fdp);
190418334Speter		fdrop(fp, td);
190518334Speter	} else
190618334Speter		FILEDESC_XUNLOCK(fdp);
190718334Speter}
190818334Speter
190918334Speter/*
191018334Speter * Close any files on exec?
191118334Speter */
191218334Spetervoid
191318334Speterfdcloseexec(struct thread *td)
191418334Speter{
191518334Speter	struct filedesc *fdp;
191618334Speter	int i;
191718334Speter
191818334Speter	/* Certain daemons might not have file descriptors. */
191918334Speter	fdp = td->td_proc->p_fd;
192018334Speter	if (fdp == NULL)
192118334Speter		return;
192218334Speter
192318334Speter	FILEDESC_XLOCK(fdp);
192418334Speter
192518334Speter	/*
192618334Speter	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1927132718Skan	 * may block and rip them out from under us.
192850397Sobrien	 */
192918334Speter	for (i = 0; i <= fdp->fd_lastfile; i++) {
1930132718Skan		if (fdp->fd_ofiles[i] != NULL &&
1931132718Skan		    (fdp->fd_ofiles[i]->f_type == DTYPE_MQUEUE ||
193290075Sobrien		    (fdp->fd_ofileflags[i] & UF_EXCLOSE))) {
193318334Speter			struct file *fp;
193418334Speter
193518334Speter			knote_fdclose(td, i);
193618334Speter			/*
193718334Speter			 * NULL-out descriptor prior to close to avoid
193818334Speter			 * a race while close blocks.
193918334Speter			 */
194018334Speter			fp = fdp->fd_ofiles[i];
194118334Speter			fdp->fd_ofiles[i] = NULL;
194218334Speter			fdp->fd_ofileflags[i] = 0;
194318334Speter			fdunused(fdp, i);
194418334Speter			if (fp->f_type == DTYPE_MQUEUE)
194518334Speter				mq_fdclose(td, i, fp);
194618334Speter			FILEDESC_XUNLOCK(fdp);
194718334Speter			(void) closef(fp, td);
194818334Speter			FILEDESC_XLOCK(fdp);
194918334Speter		}
195018334Speter	}
195118334Speter	FILEDESC_XUNLOCK(fdp);
195218334Speter}
195318334Speter
195418334Speter/*
195518334Speter * It is unsafe for set[ug]id processes to be started with file
195618334Speter * descriptors 0..2 closed, as these descriptors are given implicit
195718334Speter * significance in the Standard C library.  fdcheckstd() will create a
195818334Speter * descriptor referencing /dev/null for each of stdin, stdout, and
195918334Speter * stderr that is not already open.
196018334Speter */
196118334Speterint
196218334Speterfdcheckstd(struct thread *td)
196318334Speter{
196418334Speter	struct filedesc *fdp;
196518334Speter	register_t retval, save;
196618334Speter	int i, error, devnull;
196718334Speter
196818334Speter	fdp = td->td_proc->p_fd;
196918334Speter	if (fdp == NULL)
1970132718Skan		return (0);
197118334Speter	KASSERT(fdp->fd_refcnt == 1, ("the fdtable should not be shared"));
197218334Speter	devnull = -1;
197318334Speter	error = 0;
197418334Speter	for (i = 0; i < 3; i++) {
197518334Speter		if (fdp->fd_ofiles[i] != NULL)
197618334Speter			continue;
197718334Speter		if (devnull < 0) {
197818334Speter			save = td->td_retval[0];
197918334Speter			error = kern_open(td, "/dev/null", UIO_SYSSPACE,
198090075Sobrien			    O_RDWR, 0);
198190075Sobrien			devnull = td->td_retval[0];
198290075Sobrien			KASSERT(devnull == i, ("oof, we didn't get our fd"));
198390075Sobrien			td->td_retval[0] = save;
198418334Speter			if (error)
198590075Sobrien				break;
198618334Speter		} else {
198718334Speter			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
198818334Speter			if (error != 0)
198918334Speter				break;
199018334Speter		}
199118334Speter	}
199218334Speter	return (error);
199318334Speter}
199418334Speter
199518334Speter/*
199618334Speter * Internal form of close.  Decrement reference count on file structure.
199718334Speter * Note: td may be NULL when closing a file that was being passed in a
199818334Speter * message.
199918334Speter *
200018334Speter * XXXRW: Giant is not required for the caller, but often will be held; this
200118334Speter * makes it moderately likely the Giant will be recursed in the VFS case.
200218334Speter */
200318334Speterint
200418334Speterclosef(struct file *fp, struct thread *td)
200518334Speter{
200618334Speter	struct vnode *vp;
200718334Speter	struct flock lf;
200818334Speter	struct filedesc_to_leader *fdtol;
200918334Speter	struct filedesc *fdp;
201018334Speter
201118334Speter	/*
201218334Speter	 * POSIX record locking dictates that any close releases ALL
201318334Speter	 * locks owned by this process.  This is handled by setting
201418334Speter	 * a flag in the unlock to free ONLY locks obeying POSIX
201518334Speter	 * semantics, and not to free BSD-style file locks.
201618334Speter	 * If the descriptor was in a message, POSIX-style locks
201718334Speter	 * aren't passed with the descriptor, and the thread pointer
201818334Speter	 * will be NULL.  Callers should be careful only to pass a
201918334Speter	 * NULL thread pointer when there really is no owning
202018334Speter	 * context that might have locks, or the locks will be
202118334Speter	 * leaked.
202218334Speter	 */
202318334Speter	if (fp->f_type == DTYPE_VNODE && td != NULL) {
202418334Speter		int vfslocked;
202518334Speter
202618334Speter		vp = fp->f_vnode;
202718334Speter		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
202818334Speter		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
202918334Speter			lf.l_whence = SEEK_SET;
203018334Speter			lf.l_start = 0;
203118334Speter			lf.l_len = 0;
203218334Speter			lf.l_type = F_UNLCK;
203318334Speter			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
203418334Speter					   F_UNLCK, &lf, F_POSIX);
203518334Speter		}
203618334Speter		fdtol = td->td_proc->p_fdtol;
203718334Speter		if (fdtol != NULL) {
203818334Speter			/*
203918334Speter			 * Handle special case where file descriptor table is
204018334Speter			 * shared between multiple process leaders.
204118334Speter			 */
204218334Speter			fdp = td->td_proc->p_fd;
204318334Speter			FILEDESC_XLOCK(fdp);
204418334Speter			for (fdtol = fdtol->fdl_next;
204518334Speter			     fdtol != td->td_proc->p_fdtol;
204618334Speter			     fdtol = fdtol->fdl_next) {
204718334Speter				if ((fdtol->fdl_leader->p_flag &
204818334Speter				     P_ADVLOCK) == 0)
204918334Speter					continue;
205018334Speter				fdtol->fdl_holdcount++;
205118334Speter				FILEDESC_XUNLOCK(fdp);
205218334Speter				lf.l_whence = SEEK_SET;
205318334Speter				lf.l_start = 0;
205418334Speter				lf.l_len = 0;
205590075Sobrien				lf.l_type = F_UNLCK;
205618334Speter				vp = fp->f_vnode;
205718334Speter				(void) VOP_ADVLOCK(vp,
205818334Speter						   (caddr_t)fdtol->fdl_leader,
205918334Speter						   F_UNLCK, &lf, F_POSIX);
206018334Speter				FILEDESC_XLOCK(fdp);
206118334Speter				fdtol->fdl_holdcount--;
206218334Speter				if (fdtol->fdl_holdcount == 0 &&
206318334Speter				    fdtol->fdl_wakeup != 0) {
206418334Speter					fdtol->fdl_wakeup = 0;
206518334Speter					wakeup(fdtol);
206618334Speter				}
206718334Speter			}
206818334Speter			FILEDESC_XUNLOCK(fdp);
206918334Speter		}
207018334Speter		VFS_UNLOCK_GIANT(vfslocked);
207118334Speter	}
2072117395Skan	return (fdrop(fp, td));
2073117395Skan}
2074117395Skan
2075117395Skan/*
2076132718Skan * Initialize the file pointer with the specified properties.
2077117395Skan *
2078117395Skan * The ops are set with release semantics to be certain that the flags, type,
2079117395Skan * and data are visible when ops is.  This is to prevent ops methods from being
2080132718Skan * called with bad data.
2081117395Skan */
2082117395Skanvoid
2083117395Skanfinit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
2084132718Skan{
2085117395Skan	fp->f_data = data;
2086132718Skan	fp->f_flag = flag;
2087117395Skan	fp->f_type = type;
2088117395Skan	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
2089117395Skan}
2090117395Skan
2091117395Skanstruct file *
209218334Speterfget_unlocked(struct filedesc *fdp, int fd)
209318334Speter{
209418334Speter	struct file *fp;
209518334Speter	u_int count;
2096132718Skan
209718334Speter	if (fd < 0 || fd >= fdp->fd_nfiles)
209890075Sobrien		return (NULL);
209918334Speter	/*
210018334Speter	 * Fetch the descriptor locklessly.  We avoid fdrop() races by
210118334Speter	 * never raising a refcount above 0.  To accomplish this we have
210290075Sobrien	 * to use a cmpset loop rather than an atomic_add.  The descriptor
210390075Sobrien	 * must be re-verified once we acquire a reference to be certain
210418334Speter	 * that the identity is still correct and we did not lose a race
210518334Speter	 * due to preemption.
210618334Speter	 */
210718334Speter	for (;;) {
210818334Speter		fp = fdp->fd_ofiles[fd];
210918334Speter		if (fp == NULL)
211018334Speter			break;
211118334Speter		count = fp->f_count;
211218334Speter		if (count == 0)
211390075Sobrien			continue;
211490075Sobrien		/*
211590075Sobrien		 * Use an acquire barrier to prevent caching of fd_ofiles
211690075Sobrien		 * so it is refreshed for verification.
211790075Sobrien		 */
211890075Sobrien		if (atomic_cmpset_acq_int(&fp->f_count, count, count + 1) != 1)
211990075Sobrien			continue;
212090075Sobrien		if (fp == fdp->fd_ofiles[fd])
212190075Sobrien			break;
212290075Sobrien		fdrop(fp, curthread);
212390075Sobrien	}
212490075Sobrien
212590075Sobrien	return (fp);
212690075Sobrien}
212790075Sobrien
212890075Sobrien/*
212990075Sobrien * Extract the file pointer associated with the specified descriptor for the
213090075Sobrien * current user process.
213190075Sobrien *
213290075Sobrien * If the descriptor doesn't exist or doesn't match 'flags', EBADF is
213390075Sobrien * returned.
213490075Sobrien *
213590075Sobrien * If an error occured the non-zero error is returned and *fpp is set to
213690075Sobrien * NULL.  Otherwise *fpp is held and set and zero is returned.  Caller is
213790075Sobrien * responsible for fdrop().
213890075Sobrien */
213990075Sobrienstatic __inline int
214090075Sobrien_fget(struct thread *td, int fd, struct file **fpp, int flags)
214190075Sobrien{
214290075Sobrien	struct filedesc *fdp;
214390075Sobrien	struct file *fp;
214490075Sobrien
214590075Sobrien	*fpp = NULL;
214690075Sobrien	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
214718334Speter		return (EBADF);
214850397Sobrien	if ((fp = fget_unlocked(fdp, fd)) == NULL)
214950397Sobrien		return (EBADF);
215050397Sobrien	if (fp->f_ops == &badfileops) {
215118334Speter		fdrop(fp, td);
215218334Speter		return (EBADF);
215350397Sobrien	}
215418334Speter	/*
215590075Sobrien	 * FREAD and FWRITE failure return EBADF as per POSIX.
215618334Speter	 *
215718334Speter	 * Only one flag, or 0, may be specified.
215818334Speter	 */
215918334Speter	if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
216018334Speter	    (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
216118334Speter		fdrop(fp, td);
216218334Speter		return (EBADF);
216318334Speter	}
216418334Speter	*fpp = fp;
216518334Speter	return (0);
216618334Speter}
216718334Speter
216818334Speterint
216918334Speterfget(struct thread *td, int fd, struct file **fpp)
217018334Speter{
217118334Speter
217218334Speter	return(_fget(td, fd, fpp, 0));
217390075Sobrien}
217490075Sobrien
217590075Sobrienint
217690075Sobrienfget_read(struct thread *td, int fd, struct file **fpp)
217790075Sobrien{
217890075Sobrien
217918334Speter	return(_fget(td, fd, fpp, FREAD));
218018334Speter}
2181132718Skan
218218334Speterint
218318334Speterfget_write(struct thread *td, int fd, struct file **fpp)
218490075Sobrien{
218590075Sobrien
218690075Sobrien	return(_fget(td, fd, fpp, FWRITE));
218718334Speter}
218890075Sobrien
218918334Speter/*
219018334Speter * Like fget() but loads the underlying vnode, or returns an error if the
219118334Speter * descriptor does not represent a vnode.  Note that pipes use vnodes but
219290075Sobrien * never have VM objects.  The returned vnode will be vref()'d.
219318334Speter *
219418334Speter * XXX: what about the unused flags ?
219518334Speter */
219618334Speterstatic __inline int
219718334Speter_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
219818334Speter{
219918334Speter	struct file *fp;
220018334Speter	int error;
220118334Speter
220218334Speter	*vpp = NULL;
220352284Sobrien	if ((error = _fget(td, fd, &fp, flags)) != 0)
220452284Sobrien		return (error);
220552284Sobrien	if (fp->f_vnode == NULL) {
220652284Sobrien		error = EINVAL;
220718334Speter	} else {
220818334Speter		*vpp = fp->f_vnode;
2209117395Skan		vref(*vpp);
221018334Speter	}
221118334Speter	fdrop(fp, td);
221218334Speter
221318334Speter	return (error);
2214117395Skan}
2215117395Skan
221652284Sobrienint
221752284Sobrienfgetvp(struct thread *td, int fd, struct vnode **vpp)
221852284Sobrien{
221952284Sobrien
222018334Speter	return (_fgetvp(td, fd, vpp, 0));
222118334Speter}
222218334Speter
222318334Speterint
222418334Speterfgetvp_read(struct thread *td, int fd, struct vnode **vpp)
2225117395Skan{
222618334Speter
2227117395Skan	return (_fgetvp(td, fd, vpp, FREAD));
2228117395Skan}
2229117395Skan
2230117395Skan#ifdef notyet
2231117395Skanint
223218334Speterfgetvp_write(struct thread *td, int fd, struct vnode **vpp)
223390075Sobrien{
223490075Sobrien
223518334Speter	return (_fgetvp(td, fd, vpp, FWRITE));
223618334Speter}
223718334Speter#endif
2238117395Skan
223952284Sobrien/*
224052284Sobrien * Like fget() but loads the underlying socket, or returns an error if the
224152284Sobrien * descriptor does not represent a socket.
224252284Sobrien *
224318334Speter * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
224418334Speter * in the future.
224518334Speter *
2246132718Skan * Note: fgetsock() and fputsock() are deprecated, as consumers should rely
224752284Sobrien * on their file descriptor reference to prevent the socket from being free'd
224852284Sobrien * during use.
224952284Sobrien */
225052284Sobrienint
225118334Speterfgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
225218334Speter{
225318334Speter	struct file *fp;
225418334Speter	int error;
225518334Speter
225618334Speter	*spp = NULL;
225718334Speter	if (fflagp != NULL)
225818334Speter		*fflagp = 0;
225952284Sobrien	if ((error = _fget(td, fd, &fp, 0)) != 0)
226052284Sobrien		return (error);
226152284Sobrien	if (fp->f_type != DTYPE_SOCKET) {
226252284Sobrien		error = ENOTSOCK;
226318334Speter	} else {
226418334Speter		*spp = fp->f_data;
226518334Speter		if (fflagp)
226618334Speter			*fflagp = fp->f_flag;
226718334Speter		SOCK_LOCK(*spp);
226818334Speter		soref(*spp);
226918334Speter		SOCK_UNLOCK(*spp);
227018334Speter	}
227118334Speter	fdrop(fp, td);
227218334Speter
227318334Speter	return (error);
227452284Sobrien}
227552284Sobrien
227618334Speter/*
2277117395Skan * Drop the reference count on the socket and XXX release the SX lock in the
227818334Speter * future.  The last reference closes the socket.
227918334Speter *
228018334Speter * Note: fputsock() is deprecated, see comment for fgetsock().
228118334Speter */
228218334Spetervoid
228318334Speterfputsock(struct socket *so)
228418334Speter{
228518334Speter
228618334Speter	ACCEPT_LOCK();
228718334Speter	SOCK_LOCK(so);
228818334Speter	sorele(so);
228918334Speter}
229018334Speter
229118334Speter/*
229218334Speter * Handle the last reference to a file being closed.
229318334Speter */
229418334Speterint
229518334Speter_fdrop(struct file *fp, struct thread *td)
229618334Speter{
229718334Speter	int error;
2298132718Skan
229918334Speter	error = 0;
230018334Speter	if (fp->f_count != 0)
230118334Speter		panic("fdrop: count %d", fp->f_count);
230218334Speter	if (fp->f_ops != &badfileops)
230318334Speter		error = fo_close(fp, td);
230418334Speter	/*
230518334Speter	 * The f_cdevpriv cannot be assigned non-NULL value while we
230690075Sobrien	 * are destroying the file.
230718334Speter	 */
230818334Speter	if (fp->f_cdevpriv != NULL)
230918334Speter		devfs_fpdrop(fp);
231018334Speter	atomic_subtract_int(&openfiles, 1);
231118334Speter	crfree(fp->f_cred);
231218334Speter	uma_zfree(file_zone, fp);
231318334Speter
231418334Speter	return (error);
231518334Speter}
231690075Sobrien
231790075Sobrien/*
231890075Sobrien * Apply an advisory lock on a file descriptor.
231918334Speter *
232018334Speter * Just attempt to get a record lock of the requested type on the entire file
232118334Speter * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
232218334Speter */
232318334Speter#ifndef _SYS_SYSPROTO_H_
232418334Speterstruct flock_args {
232518334Speter	int	fd;
232618334Speter	int	how;
232718334Speter};
232818334Speter#endif
232918334Speter/* ARGSUSED */
233018334Speterint
233190075Sobrienflock(struct thread *td, struct flock_args *uap)
233290075Sobrien{
233390075Sobrien	struct file *fp;
233490075Sobrien	struct vnode *vp;
233590075Sobrien	struct flock lf;
233652284Sobrien	int vfslocked;
233718334Speter	int error;
233818334Speter
233918334Speter	if ((error = fget(td, uap->fd, &fp)) != 0)
234090075Sobrien		return (error);
234118334Speter	if (fp->f_type != DTYPE_VNODE) {
234218334Speter		fdrop(fp, td);
2343117395Skan		return (EOPNOTSUPP);
2344117395Skan	}
2345117395Skan
234618334Speter	vp = fp->f_vnode;
234718334Speter	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
234818334Speter	lf.l_whence = SEEK_SET;
234918334Speter	lf.l_start = 0;
235018334Speter	lf.l_len = 0;
235118334Speter	if (uap->how & LOCK_UN) {
235290075Sobrien		lf.l_type = F_UNLCK;
235390075Sobrien		atomic_clear_int(&fp->f_flag, FHASLOCK);
235490075Sobrien		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
235590075Sobrien		goto done2;
235690075Sobrien	}
235790075Sobrien	if (uap->how & LOCK_EX)
235890075Sobrien		lf.l_type = F_WRLCK;
235918334Speter	else if (uap->how & LOCK_SH)
236018334Speter		lf.l_type = F_RDLCK;
236118334Speter	else {
236290075Sobrien		error = EBADF;
236390075Sobrien		goto done2;
236490075Sobrien	}
236590075Sobrien	atomic_set_int(&fp->f_flag, FHASLOCK);
236690075Sobrien	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
236790075Sobrien	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
236890075Sobriendone2:
236918334Speter	fdrop(fp, td);
237018334Speter	VFS_UNLOCK_GIANT(vfslocked);
237118334Speter	return (error);
237218334Speter}
237318334Speter/*
237418334Speter * Duplicate the specified descriptor to a free descriptor.
237518334Speter */
237618334Speterint
237718334Speterdupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode, int error)
237818334Speter{
237918334Speter	struct file *wfp;
238018334Speter	struct file *fp;
238118334Speter
238218334Speter	/*
238318334Speter	 * If the to-be-dup'd fd number is greater than the allowed number
238418334Speter	 * of file descriptors, or the fd to be dup'd has already been
238518334Speter	 * closed, then reject.
238618334Speter	 */
238718334Speter	FILEDESC_XLOCK(fdp);
238818334Speter	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
238918334Speter	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
239018334Speter		FILEDESC_XUNLOCK(fdp);
239118334Speter		return (EBADF);
239218334Speter	}
239318334Speter
239418334Speter	/*
239518334Speter	 * There are two cases of interest here.
239618334Speter	 *
239718334Speter	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
239818334Speter	 *
239918334Speter	 * For ENXIO steal away the file structure from (dfd) and store it in
240090075Sobrien	 * (indx).  (dfd) is effectively closed by this operation.
240190075Sobrien	 *
240218334Speter	 * Any other error code is just returned.
240390075Sobrien	 */
240490075Sobrien	switch (error) {
240590075Sobrien	case ENODEV:
240690075Sobrien		/*
240790075Sobrien		 * Check that the mode the file is being opened for is a
240890075Sobrien		 * subset of the mode of the existing descriptor.
240990075Sobrien		 */
241090075Sobrien		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
241190075Sobrien			FILEDESC_XUNLOCK(fdp);
241290075Sobrien			return (EACCES);
241390075Sobrien		}
241490075Sobrien		fp = fdp->fd_ofiles[indx];
241590075Sobrien		fdp->fd_ofiles[indx] = wfp;
241690075Sobrien		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
241718334Speter		if (fp == NULL)
241818334Speter			fdused(fdp, indx);
241918334Speter		fhold(wfp);
242018334Speter		FILEDESC_XUNLOCK(fdp);
242118334Speter		if (fp != NULL)
242218334Speter			/*
242318334Speter			 * We now own the reference to fp that the ofiles[]
242418334Speter			 * array used to own.  Release it.
242518334Speter			 */
242618334Speter			fdrop(fp, td);
242718334Speter		return (0);
242818334Speter
242918334Speter	case ENXIO:
243052284Sobrien		/*
243152284Sobrien		 * Steal away the file pointer from dfd and stuff it into indx.
243218334Speter		 */
243318334Speter		fp = fdp->fd_ofiles[indx];
243418334Speter		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
243518334Speter		fdp->fd_ofiles[dfd] = NULL;
2436132718Skan		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2437132718Skan		fdp->fd_ofileflags[dfd] = 0;
2438132718Skan		fdunused(fdp, dfd);
2439132718Skan		if (fp == NULL)
244018334Speter			fdused(fdp, indx);
244190075Sobrien		FILEDESC_XUNLOCK(fdp);
244290075Sobrien
244390075Sobrien		/*
244490075Sobrien		 * We now own the reference to fp that the ofiles[] array
244518334Speter		 * used to own.  Release it.
244618334Speter		 */
244718334Speter		if (fp != NULL)
244818334Speter			fdrop(fp, td);
244918334Speter		return (0);
245018334Speter
245118334Speter	default:
245218334Speter		FILEDESC_XUNLOCK(fdp);
245390075Sobrien		return (error);
245418334Speter	}
245518334Speter	/* NOTREACHED */
245618334Speter}
245718334Speter
245818334Speter/*
245918334Speter * Scan all active processes and prisons to see if any of them have a current
246018334Speter * or root directory of `olddp'. If so, replace them with the new mount point.
246118334Speter */
246218334Spetervoid
246318334Spetermountcheckdirs(struct vnode *olddp, struct vnode *newdp)
246418334Speter{
246518334Speter	struct filedesc *fdp;
246618334Speter	struct prison *pr;
2467117395Skan	struct proc *p;
246818334Speter	int nrele;
246918334Speter
247018334Speter	if (vrefcnt(olddp) == 1)
247118334Speter		return;
247218334Speter	nrele = 0;
247318334Speter	sx_slock(&allproc_lock);
247418334Speter	FOREACH_PROC_IN_SYSTEM(p) {
247590075Sobrien		fdp = fdhold(p);
247618334Speter		if (fdp == NULL)
247718334Speter			continue;
247818334Speter		FILEDESC_XLOCK(fdp);
247918334Speter		if (fdp->fd_cdir == olddp) {
248090075Sobrien			vref(newdp);
248190075Sobrien			fdp->fd_cdir = newdp;
248252284Sobrien			nrele++;
248318334Speter		}
248418334Speter		if (fdp->fd_rdir == olddp) {
248518334Speter			vref(newdp);
248618334Speter			fdp->fd_rdir = newdp;
248790075Sobrien			nrele++;
248852284Sobrien		}
248952284Sobrien		if (fdp->fd_jdir == olddp) {
249052284Sobrien			vref(newdp);
249152284Sobrien			fdp->fd_jdir = newdp;
249290075Sobrien			nrele++;
249390075Sobrien		}
249490075Sobrien		FILEDESC_XUNLOCK(fdp);
249590075Sobrien		fddrop(fdp);
249690075Sobrien	}
249718334Speter	sx_sunlock(&allproc_lock);
249818334Speter	if (rootvnode == olddp) {
249918334Speter		vref(newdp);
2500117395Skan		rootvnode = newdp;
250118334Speter		nrele++;
250218334Speter	}
250318334Speter	mtx_lock(&prison0.pr_mtx);
250418334Speter	if (prison0.pr_root == olddp) {
250518334Speter		vref(newdp);
250618334Speter		prison0.pr_root = newdp;
250718334Speter		nrele++;
250818334Speter	}
250918334Speter	mtx_unlock(&prison0.pr_mtx);
251090075Sobrien	sx_slock(&allprison_lock);
251118334Speter	TAILQ_FOREACH(pr, &allprison, pr_list) {
251250397Sobrien		mtx_lock(&pr->pr_mtx);
251318334Speter		if (pr->pr_root == olddp) {
251418334Speter			vref(newdp);
251518334Speter			pr->pr_root = newdp;
251618334Speter			nrele++;
251718334Speter		}
251818334Speter		mtx_unlock(&pr->pr_mtx);
251990075Sobrien	}
252090075Sobrien	sx_sunlock(&allprison_lock);
252190075Sobrien	while (nrele--)
252218334Speter		vrele(olddp);
252318334Speter}
252418334Speter
252518334Speterstruct filedesc_to_leader *
252618334Speterfiledesc_to_leader_alloc(struct filedesc_to_leader *old, struct filedesc *fdp, struct proc *leader)
252718334Speter{
252818334Speter	struct filedesc_to_leader *fdtol;
252918334Speter
2530132718Skan	fdtol = malloc(sizeof(struct filedesc_to_leader),
253118334Speter	       M_FILEDESC_TO_LEADER,
253218334Speter	       M_WAITOK);
253318334Speter	fdtol->fdl_refcount = 1;
253450397Sobrien	fdtol->fdl_holdcount = 0;
253550397Sobrien	fdtol->fdl_wakeup = 0;
253650397Sobrien	fdtol->fdl_leader = leader;
253718334Speter	if (old != NULL) {
253818334Speter		FILEDESC_XLOCK(fdp);
253952284Sobrien		fdtol->fdl_next = old->fdl_next;
254052284Sobrien		fdtol->fdl_prev = old;
2541132718Skan		old->fdl_next = fdtol;
2542132718Skan		fdtol->fdl_next->fdl_prev = fdtol;
254318334Speter		FILEDESC_XUNLOCK(fdp);
254418334Speter	} else {
254518334Speter		fdtol->fdl_next = fdtol;
254618334Speter		fdtol->fdl_prev = fdtol;
254718334Speter	}
254818334Speter	return (fdtol);
254918334Speter}
2550132718Skan
255118334Speter/*
255290075Sobrien * Get file structures globally.
255318334Speter */
255418334Speterstatic int
255518334Spetersysctl_kern_file(SYSCTL_HANDLER_ARGS)
255618334Speter{
255718334Speter	struct xfile xf;
255818334Speter	struct filedesc *fdp;
255918334Speter	struct file *fp;
256018334Speter	struct proc *p;
256190075Sobrien	int error, n;
256218334Speter
256318334Speter	error = sysctl_wire_old_buffer(req, 0);
256418334Speter	if (error != 0)
256518334Speter		return (error);
256618334Speter	if (req->oldptr == NULL) {
256718334Speter		n = 0;
256818334Speter		sx_slock(&allproc_lock);
256918334Speter		FOREACH_PROC_IN_SYSTEM(p) {
257018334Speter			if (p->p_state == PRS_NEW)
257150397Sobrien				continue;
257250397Sobrien			fdp = fdhold(p);
257350397Sobrien			if (fdp == NULL)
257450397Sobrien				continue;
257550397Sobrien			/* overestimates sparse tables. */
257650397Sobrien			if (fdp->fd_lastfile > 0)
257750397Sobrien				n += fdp->fd_lastfile;
257850397Sobrien			fddrop(fdp);
257950397Sobrien		}
258018334Speter		sx_sunlock(&allproc_lock);
258118334Speter		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
258218334Speter	}
258318334Speter	error = 0;
258418334Speter	bzero(&xf, sizeof(xf));
258518334Speter	xf.xf_size = sizeof(xf);
258618334Speter	sx_slock(&allproc_lock);
2587132718Skan	FOREACH_PROC_IN_SYSTEM(p) {
258818334Speter		if (p->p_state == PRS_NEW)
258918334Speter			continue;
259090075Sobrien		PROC_LOCK(p);
259118334Speter		if (p_cansee(req->td, p) != 0) {
259290075Sobrien			PROC_UNLOCK(p);
259390075Sobrien			continue;
259418334Speter		}
259518334Speter		xf.xf_pid = p->p_pid;
259618334Speter		xf.xf_uid = p->p_ucred->cr_uid;
259718334Speter		PROC_UNLOCK(p);
259818334Speter		fdp = fdhold(p);
259990075Sobrien		if (fdp == NULL)
260090075Sobrien			continue;
260190075Sobrien		FILEDESC_SLOCK(fdp);
260218334Speter		for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
260318334Speter			if ((fp = fdp->fd_ofiles[n]) == NULL)
2604132718Skan				continue;
260518334Speter			xf.xf_fd = n;
2606132718Skan			xf.xf_file = fp;
260718334Speter			xf.xf_data = fp->f_data;
260818334Speter			xf.xf_vnode = fp->f_vnode;
260918334Speter			xf.xf_type = fp->f_type;
261018334Speter			xf.xf_count = fp->f_count;
261118334Speter			xf.xf_msgcount = 0;
261250397Sobrien			xf.xf_offset = fp->f_offset;
261350397Sobrien			xf.xf_flag = fp->f_flag;
261450397Sobrien			error = SYSCTL_OUT(req, &xf, sizeof(xf));
261590075Sobrien			if (error)
261650397Sobrien				break;
261750397Sobrien		}
261818334Speter		FILEDESC_SUNLOCK(fdp);
261918334Speter		fddrop(fdp);
262018334Speter		if (error)
262118334Speter			break;
262218334Speter	}
262318334Speter	sx_sunlock(&allproc_lock);
262418334Speter	return (error);
262590075Sobrien}
262690075Sobrien
262790075SobrienSYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
262850397Sobrien    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
262950397Sobrien
263018334Speter#ifdef KINFO_OFILE_SIZE
263118334SpeterCTASSERT(sizeof(struct kinfo_ofile) == KINFO_OFILE_SIZE);
263290075Sobrien#endif
263390075Sobrien
263418334Speter#ifdef COMPAT_FREEBSD7
263518334Speterstatic int
263618334Speterexport_vnode_for_osysctl(struct vnode *vp, int type,
263718334Speter    struct kinfo_ofile *kif, struct filedesc *fdp, struct sysctl_req *req)
263818334Speter{
263918334Speter	int error;
264018334Speter	char *fullpath, *freepath;
264118334Speter	int vfslocked;
264218334Speter
2643132718Skan	bzero(kif, sizeof(*kif));
264418334Speter	kif->kf_structsize = sizeof(*kif);
2645132718Skan
2646132718Skan	vref(vp);
2647132718Skan	kif->kf_fd = type;
2648132718Skan	kif->kf_type = KF_TYPE_VNODE;
2649132718Skan	/* This function only handles directories. */
2650132718Skan	if (vp->v_type != VDIR) {
2651132718Skan		vrele(vp);
2652132718Skan		return (ENOTDIR);
2653132718Skan	}
2654132718Skan	kif->kf_vnode_type = KF_VTYPE_VDIR;
2655132718Skan
2656132718Skan	/*
265718334Speter	 * This is not a true file descriptor, so we set a bogus refcount
265818334Speter	 * and offset to indicate these fields should be ignored.
265918334Speter	 */
266018334Speter	kif->kf_ref_count = -1;
266118334Speter	kif->kf_offset = -1;
2662132718Skan
266318334Speter	freepath = NULL;
266418334Speter	fullpath = "-";
266518334Speter	FILEDESC_SUNLOCK(fdp);
266618334Speter	vn_fullpath(curthread, vp, &fullpath, &freepath);
2667132718Skan	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
266818334Speter	vrele(vp);
2669132718Skan	VFS_UNLOCK_GIANT(vfslocked);
2670132718Skan	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
267118334Speter	if (freepath != NULL)
267218334Speter		free(freepath, M_TEMP);
2673132718Skan	error = SYSCTL_OUT(req, kif, sizeof(*kif));
2674132718Skan	FILEDESC_SLOCK(fdp);
267518334Speter	return (error);
2676132718Skan}
267718334Speter
267818334Speter/*
267918334Speter * Get per-process file descriptors for use by procstat(1), et al.
268018334Speter */
2681117395Skanstatic int
268218334Spetersysctl_kern_proc_ofiledesc(SYSCTL_HANDLER_ARGS)
2683117395Skan{
2684132718Skan	char *fullpath, *freepath;
268518334Speter	struct kinfo_ofile *kif;
268690075Sobrien	struct filedesc *fdp;
2687117395Skan	int error, i, *name;
268818334Speter	struct socket *so;
268918334Speter	struct vnode *vp;
269018334Speter	struct file *fp;
269150397Sobrien	struct proc *p;
269250397Sobrien	struct tty *tp;
269350397Sobrien	int vfslocked;
269490075Sobrien
269550397Sobrien	name = (int *)arg1;
2696117395Skan	if ((p = pfind((pid_t)name[0])) == NULL)
269750397Sobrien		return (ESRCH);
269850397Sobrien	if ((error = p_candebug(curthread, p))) {
269950397Sobrien		PROC_UNLOCK(p);
270050397Sobrien		return (error);
270118334Speter	}
270218334Speter	fdp = fdhold(p);
270318334Speter	PROC_UNLOCK(p);
270418334Speter	if (fdp == NULL)
270518334Speter		return (ENOENT);
270618334Speter	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
270718334Speter	FILEDESC_SLOCK(fdp);
270890075Sobrien	if (fdp->fd_cdir != NULL)
270918334Speter		export_vnode_for_osysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
271018334Speter				fdp, req);
2711117395Skan	if (fdp->fd_rdir != NULL)
271218334Speter		export_vnode_for_osysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
271318334Speter				fdp, req);
271418334Speter	if (fdp->fd_jdir != NULL)
271518334Speter		export_vnode_for_osysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
271618334Speter				fdp, req);
271718334Speter	for (i = 0; i < fdp->fd_nfiles; i++) {
271818334Speter		if ((fp = fdp->fd_ofiles[i]) == NULL)
271918334Speter			continue;
2720132718Skan		bzero(kif, sizeof(*kif));
272118334Speter		kif->kf_structsize = sizeof(*kif);
272218334Speter		vp = NULL;
272318334Speter		so = NULL;
2724117395Skan		tp = NULL;
272518334Speter		kif->kf_fd = i;
272618334Speter		switch (fp->f_type) {
272718334Speter		case DTYPE_VNODE:
272818334Speter			kif->kf_type = KF_TYPE_VNODE;
272918334Speter			vp = fp->f_vnode;
273018334Speter			break;
2731132718Skan
273218334Speter		case DTYPE_SOCKET:
273318334Speter			kif->kf_type = KF_TYPE_SOCKET;
273418334Speter			so = fp->f_data;
2735117395Skan			break;
273618334Speter
273718334Speter		case DTYPE_PIPE:
273818334Speter			kif->kf_type = KF_TYPE_PIPE;
273918334Speter			break;
274018334Speter
274118334Speter		case DTYPE_FIFO:
274218334Speter			kif->kf_type = KF_TYPE_FIFO;
274318334Speter			vp = fp->f_vnode;
274418334Speter			break;
274518334Speter
2746117395Skan		case DTYPE_KQUEUE:
274718334Speter			kif->kf_type = KF_TYPE_KQUEUE;
274818334Speter			break;
274918334Speter
275018334Speter		case DTYPE_CRYPTO:
275118334Speter			kif->kf_type = KF_TYPE_CRYPTO;
2752117395Skan			break;
275318334Speter
275418334Speter		case DTYPE_MQUEUE:
275518334Speter			kif->kf_type = KF_TYPE_MQUEUE;
275618334Speter			break;
2757132718Skan
275818334Speter		case DTYPE_SHM:
2759117395Skan			kif->kf_type = KF_TYPE_SHM;
276018334Speter			break;
276118334Speter
276218334Speter		case DTYPE_SEM:
276318334Speter			kif->kf_type = KF_TYPE_SEM;
276418334Speter			break;
276518334Speter
2766132718Skan		case DTYPE_PTS:
276718334Speter			kif->kf_type = KF_TYPE_PTS;
276890075Sobrien			tp = fp->f_data;
276990075Sobrien			break;
277090075Sobrien
277190075Sobrien		default:
277290075Sobrien			kif->kf_type = KF_TYPE_UNKNOWN;
277318334Speter			break;
277418334Speter		}
2775117395Skan		kif->kf_ref_count = fp->f_count;
277618334Speter		if (fp->f_flag & FREAD)
277718334Speter			kif->kf_flags |= KF_FLAG_READ;
277818334Speter		if (fp->f_flag & FWRITE)
277918334Speter			kif->kf_flags |= KF_FLAG_WRITE;
278018334Speter		if (fp->f_flag & FAPPEND)
278118334Speter			kif->kf_flags |= KF_FLAG_APPEND;
278218334Speter		if (fp->f_flag & FASYNC)
278318334Speter			kif->kf_flags |= KF_FLAG_ASYNC;
278418334Speter		if (fp->f_flag & FFSYNC)
278518334Speter			kif->kf_flags |= KF_FLAG_FSYNC;
278618334Speter		if (fp->f_flag & FNONBLOCK)
278718334Speter			kif->kf_flags |= KF_FLAG_NONBLOCK;
278818334Speter		if (fp->f_flag & O_DIRECT)
278918334Speter			kif->kf_flags |= KF_FLAG_DIRECT;
279018334Speter		if (fp->f_flag & FHASLOCK)
279118334Speter			kif->kf_flags |= KF_FLAG_HASLOCK;
279218334Speter		kif->kf_offset = fp->f_offset;
2793132718Skan		if (vp != NULL) {
279418334Speter			vref(vp);
279518334Speter			switch (vp->v_type) {
279618334Speter			case VNON:
279718334Speter				kif->kf_vnode_type = KF_VTYPE_VNON;
279818334Speter				break;
279918334Speter			case VREG:
280018334Speter				kif->kf_vnode_type = KF_VTYPE_VREG;
280118334Speter				break;
280218334Speter			case VDIR:
280390075Sobrien				kif->kf_vnode_type = KF_VTYPE_VDIR;
280490075Sobrien				break;
280590075Sobrien			case VBLK:
280690075Sobrien				kif->kf_vnode_type = KF_VTYPE_VBLK;
280718334Speter				break;
280818334Speter			case VCHR:
280990075Sobrien				kif->kf_vnode_type = KF_VTYPE_VCHR;
281090075Sobrien				break;
281190075Sobrien			case VLNK:
281290075Sobrien				kif->kf_vnode_type = KF_VTYPE_VLNK;
281390075Sobrien				break;
281490075Sobrien			case VSOCK:
2815132718Skan				kif->kf_vnode_type = KF_VTYPE_VSOCK;
281690075Sobrien				break;
281790075Sobrien			case VFIFO:
281890075Sobrien				kif->kf_vnode_type = KF_VTYPE_VFIFO;
281990075Sobrien				break;
282090075Sobrien			case VBAD:
282190075Sobrien				kif->kf_vnode_type = KF_VTYPE_VBAD;
282290075Sobrien				break;
282390075Sobrien			default:
282490075Sobrien				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
282590075Sobrien				break;
282690075Sobrien			}
282790075Sobrien			/*
282890075Sobrien			 * It is OK to drop the filedesc lock here as we will
282990075Sobrien			 * re-validate and re-evaluate its properties when
2830117395Skan			 * the loop continues.
2831117395Skan			 */
283290075Sobrien			freepath = NULL;
283390075Sobrien			fullpath = "-";
283490075Sobrien			FILEDESC_SUNLOCK(fdp);
283590075Sobrien			vn_fullpath(curthread, vp, &fullpath, &freepath);
283690075Sobrien			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
283718334Speter			vrele(vp);
283818334Speter			VFS_UNLOCK_GIANT(vfslocked);
283918334Speter			strlcpy(kif->kf_path, fullpath,
284018334Speter			    sizeof(kif->kf_path));
284190075Sobrien			if (freepath != NULL)
284218334Speter				free(freepath, M_TEMP);
284318334Speter			FILEDESC_SLOCK(fdp);
284418334Speter		}
284518334Speter		if (so != NULL) {
2846117395Skan			struct sockaddr *sa;
284718334Speter
284818334Speter			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
284918334Speter			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
285018334Speter				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
285118334Speter				free(sa, M_SONAME);
285218334Speter			}
285318334Speter			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
285490075Sobrien			    == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
285518334Speter				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
285618334Speter				free(sa, M_SONAME);
285790075Sobrien			}
285890075Sobrien			kif->kf_sock_domain =
285990075Sobrien			    so->so_proto->pr_domain->dom_family;
286018334Speter			kif->kf_sock_type = so->so_type;
286190075Sobrien			kif->kf_sock_protocol = so->so_proto->pr_protocol;
286218334Speter		}
286318334Speter		if (tp != NULL) {
286490075Sobrien			strlcpy(kif->kf_path, tty_devname(tp),
286590075Sobrien			    sizeof(kif->kf_path));
286690075Sobrien		}
286718334Speter		error = SYSCTL_OUT(req, kif, sizeof(*kif));
286818334Speter		if (error)
286918334Speter			break;
287018334Speter	}
287118334Speter	FILEDESC_SUNLOCK(fdp);
287218334Speter	fddrop(fdp);
287318334Speter	free(kif, M_TEMP);
287418334Speter	return (0);
287518334Speter}
287618334Speter
287718334Speterstatic SYSCTL_NODE(_kern_proc, KERN_PROC_OFILEDESC, ofiledesc, CTLFLAG_RD,
287818334Speter    sysctl_kern_proc_ofiledesc, "Process ofiledesc entries");
287918334Speter#endif	/* COMPAT_FREEBSD7 */
288018334Speter
288118334Speter#ifdef KINFO_FILE_SIZE
288218334SpeterCTASSERT(sizeof(struct kinfo_file) == KINFO_FILE_SIZE);
288318334Speter#endif
288418334Speter
288518334Speterstatic int
288618334Speterexport_vnode_for_sysctl(struct vnode *vp, int type,
288718334Speter    struct kinfo_file *kif, struct filedesc *fdp, struct sysctl_req *req)
288818334Speter{
288918334Speter	int error;
289018334Speter	char *fullpath, *freepath;
289118334Speter	int vfslocked;
289218334Speter
289318334Speter	bzero(kif, sizeof(*kif));
289418334Speter
289518334Speter	vref(vp);
289618334Speter	kif->kf_fd = type;
289718334Speter	kif->kf_type = KF_TYPE_VNODE;
289818334Speter	/* This function only handles directories. */
289918334Speter	if (vp->v_type != VDIR) {
290018334Speter		vrele(vp);
290118334Speter		return (ENOTDIR);
290218334Speter	}
290318334Speter	kif->kf_vnode_type = KF_VTYPE_VDIR;
290418334Speter
290518334Speter	/*
290618334Speter	 * This is not a true file descriptor, so we set a bogus refcount
290718334Speter	 * and offset to indicate these fields should be ignored.
290818334Speter	 */
290918334Speter	kif->kf_ref_count = -1;
291018334Speter	kif->kf_offset = -1;
291118334Speter
291218334Speter	freepath = NULL;
291318334Speter	fullpath = "-";
291418334Speter	FILEDESC_SUNLOCK(fdp);
291518334Speter	vn_fullpath(curthread, vp, &fullpath, &freepath);
2916132718Skan	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
291718334Speter	vrele(vp);
291818334Speter	VFS_UNLOCK_GIANT(vfslocked);
291918334Speter	strlcpy(kif->kf_path, fullpath, sizeof(kif->kf_path));
292018334Speter	if (freepath != NULL)
292118334Speter		free(freepath, M_TEMP);
292218334Speter	/* Pack record size down */
292318334Speter	kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
292418334Speter	    strlen(kif->kf_path) + 1;
292518334Speter	kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
292618334Speter	error = SYSCTL_OUT(req, kif, kif->kf_structsize);
2927132718Skan	FILEDESC_SLOCK(fdp);
292818334Speter	return (error);
292918334Speter}
293018334Speter
293118334Speter/*
293218334Speter * Get per-process file descriptors for use by procstat(1), et al.
2933132718Skan */
293418334Speterstatic int
293590075Sobriensysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
293618334Speter{
293718334Speter	char *fullpath, *freepath;
293818334Speter	struct kinfo_file *kif;
293990075Sobrien	struct filedesc *fdp;
294090075Sobrien	int error, i, *name;
294190075Sobrien	struct socket *so;
294290075Sobrien	struct vnode *vp;
294390075Sobrien	struct file *fp;
294490075Sobrien	struct proc *p;
294518334Speter	struct tty *tp;
294618334Speter	int vfslocked;
294718334Speter	size_t oldidx;
294818334Speter
294918334Speter	name = (int *)arg1;
295018334Speter	if ((p = pfind((pid_t)name[0])) == NULL)
295118334Speter		return (ESRCH);
295218334Speter	if ((error = p_candebug(curthread, p))) {
295318334Speter		PROC_UNLOCK(p);
295418334Speter		return (error);
295518334Speter	}
2956132718Skan	fdp = fdhold(p);
295718334Speter	PROC_UNLOCK(p);
2958132718Skan	if (fdp == NULL)
295918334Speter		return (ENOENT);
296090075Sobrien	kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK);
296118334Speter	FILEDESC_SLOCK(fdp);
296218334Speter	if (fdp->fd_cdir != NULL)
296318334Speter		export_vnode_for_sysctl(fdp->fd_cdir, KF_FD_TYPE_CWD, kif,
296418334Speter				fdp, req);
296518334Speter	if (fdp->fd_rdir != NULL)
296618334Speter		export_vnode_for_sysctl(fdp->fd_rdir, KF_FD_TYPE_ROOT, kif,
296718334Speter				fdp, req);
296890075Sobrien	if (fdp->fd_jdir != NULL)
296918334Speter		export_vnode_for_sysctl(fdp->fd_jdir, KF_FD_TYPE_JAIL, kif,
297018334Speter				fdp, req);
297118334Speter	for (i = 0; i < fdp->fd_nfiles; i++) {
297218334Speter		if ((fp = fdp->fd_ofiles[i]) == NULL)
297318334Speter			continue;
297418334Speter		bzero(kif, sizeof(*kif));
297518334Speter		vp = NULL;
297618334Speter		so = NULL;
2977132718Skan		tp = NULL;
297818334Speter		kif->kf_fd = i;
297990075Sobrien		switch (fp->f_type) {
2980132718Skan		case DTYPE_VNODE:
298118334Speter			kif->kf_type = KF_TYPE_VNODE;
298218334Speter			vp = fp->f_vnode;
298318334Speter			break;
2984117395Skan
2985117395Skan		case DTYPE_SOCKET:
2986117395Skan			kif->kf_type = KF_TYPE_SOCKET;
2987132718Skan			so = fp->f_data;
2988117395Skan			break;
2989117395Skan
299018334Speter		case DTYPE_PIPE:
299118334Speter			kif->kf_type = KF_TYPE_PIPE;
299218334Speter			break;
299318334Speter
299418334Speter		case DTYPE_FIFO:
299518334Speter			kif->kf_type = KF_TYPE_FIFO;
299618334Speter			vp = fp->f_vnode;
299718334Speter			break;
299818334Speter
299918334Speter		case DTYPE_KQUEUE:
300018334Speter			kif->kf_type = KF_TYPE_KQUEUE;
300118334Speter			break;
300290075Sobrien
300318334Speter		case DTYPE_CRYPTO:
300418334Speter			kif->kf_type = KF_TYPE_CRYPTO;
300518334Speter			break;
300618334Speter
300718334Speter		case DTYPE_MQUEUE:
300890075Sobrien			kif->kf_type = KF_TYPE_MQUEUE;
300918334Speter			break;
301018334Speter
301118334Speter		case DTYPE_SHM:
301218334Speter			kif->kf_type = KF_TYPE_SHM;
301318334Speter			break;
301490075Sobrien
301518334Speter		case DTYPE_SEM:
301618334Speter			kif->kf_type = KF_TYPE_SEM;
301718334Speter			break;
3018132718Skan
301918334Speter		case DTYPE_PTS:
302018334Speter			kif->kf_type = KF_TYPE_PTS;
302118334Speter			tp = fp->f_data;
302218334Speter			break;
302318334Speter
302418334Speter		default:
302518334Speter			kif->kf_type = KF_TYPE_UNKNOWN;
302690075Sobrien			break;
3027132718Skan		}
3028132718Skan		kif->kf_ref_count = fp->f_count;
3029132718Skan		if (fp->f_flag & FREAD)
3030132718Skan			kif->kf_flags |= KF_FLAG_READ;
303118334Speter		if (fp->f_flag & FWRITE)
303218334Speter			kif->kf_flags |= KF_FLAG_WRITE;
303318334Speter		if (fp->f_flag & FAPPEND)
303418334Speter			kif->kf_flags |= KF_FLAG_APPEND;
303518334Speter		if (fp->f_flag & FASYNC)
303618334Speter			kif->kf_flags |= KF_FLAG_ASYNC;
303718334Speter		if (fp->f_flag & FFSYNC)
303818334Speter			kif->kf_flags |= KF_FLAG_FSYNC;
303918334Speter		if (fp->f_flag & FNONBLOCK)
304018334Speter			kif->kf_flags |= KF_FLAG_NONBLOCK;
304118334Speter		if (fp->f_flag & O_DIRECT)
304218334Speter			kif->kf_flags |= KF_FLAG_DIRECT;
304318334Speter		if (fp->f_flag & FHASLOCK)
304418334Speter			kif->kf_flags |= KF_FLAG_HASLOCK;
304518334Speter		kif->kf_offset = fp->f_offset;
304618334Speter		if (vp != NULL) {
304718334Speter			vref(vp);
304890075Sobrien			switch (vp->v_type) {
304918334Speter			case VNON:
305018334Speter				kif->kf_vnode_type = KF_VTYPE_VNON;
305118334Speter				break;
305218334Speter			case VREG:
305318334Speter				kif->kf_vnode_type = KF_VTYPE_VREG;
305418334Speter				break;
305518334Speter			case VDIR:
305618334Speter				kif->kf_vnode_type = KF_VTYPE_VDIR;
305750397Sobrien				break;
305850397Sobrien			case VBLK:
305996263Sobrien				kif->kf_vnode_type = KF_VTYPE_VBLK;
306096263Sobrien				break;
306150397Sobrien			case VCHR:
306290075Sobrien				kif->kf_vnode_type = KF_VTYPE_VCHR;
306390075Sobrien				break;
306490075Sobrien			case VLNK:
306590075Sobrien				kif->kf_vnode_type = KF_VTYPE_VLNK;
306690075Sobrien				break;
306790075Sobrien			case VSOCK:
306890075Sobrien				kif->kf_vnode_type = KF_VTYPE_VSOCK;
306990075Sobrien				break;
307096263Sobrien			case VFIFO:
307196263Sobrien				kif->kf_vnode_type = KF_VTYPE_VFIFO;
307296263Sobrien				break;
307396263Sobrien			case VBAD:
307496263Sobrien				kif->kf_vnode_type = KF_VTYPE_VBAD;
307596263Sobrien				break;
307696263Sobrien			default:
307796263Sobrien				kif->kf_vnode_type = KF_VTYPE_UNKNOWN;
307896263Sobrien				break;
307996263Sobrien			}
308090075Sobrien			/*
308190075Sobrien			 * It is OK to drop the filedesc lock here as we will
308290075Sobrien			 * re-validate and re-evaluate its properties when
308390075Sobrien			 * the loop continues.
308496263Sobrien			 */
308596263Sobrien			freepath = NULL;
308696263Sobrien			fullpath = "-";
308796263Sobrien			FILEDESC_SUNLOCK(fdp);
308852284Sobrien			vn_fullpath(curthread, vp, &fullpath, &freepath);
308952284Sobrien			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
309090075Sobrien			vrele(vp);
309118334Speter			VFS_UNLOCK_GIANT(vfslocked);
309218334Speter			strlcpy(kif->kf_path, fullpath,
309318334Speter			    sizeof(kif->kf_path));
309418334Speter			if (freepath != NULL)
309518334Speter				free(freepath, M_TEMP);
309618334Speter			FILEDESC_SLOCK(fdp);
309718334Speter		}
309818334Speter		if (so != NULL) {
309950397Sobrien			struct sockaddr *sa;
310050397Sobrien
310150397Sobrien			if (so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa)
310250397Sobrien			    == 0 && sa->sa_len <= sizeof(kif->kf_sa_local)) {
310350397Sobrien				bcopy(sa, &kif->kf_sa_local, sa->sa_len);
310418334Speter				free(sa, M_SONAME);
310518334Speter			}
310618334Speter			if (so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa)
310718334Speter			    == 00 && sa->sa_len <= sizeof(kif->kf_sa_peer)) {
310818334Speter				bcopy(sa, &kif->kf_sa_peer, sa->sa_len);
310918334Speter				free(sa, M_SONAME);
311018334Speter			}
311118334Speter			kif->kf_sock_domain =
311218334Speter			    so->so_proto->pr_domain->dom_family;
311318334Speter			kif->kf_sock_type = so->so_type;
311418334Speter			kif->kf_sock_protocol = so->so_proto->pr_protocol;
311518334Speter		}
311618334Speter		if (tp != NULL) {
311718334Speter			strlcpy(kif->kf_path, tty_devname(tp),
311818334Speter			    sizeof(kif->kf_path));
311918334Speter		}
312018334Speter		/* Pack record size down */
312118334Speter		kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
312218334Speter		    strlen(kif->kf_path) + 1;
312318334Speter		kif->kf_structsize = roundup(kif->kf_structsize,
312418334Speter		    sizeof(uint64_t));
3125117395Skan		oldidx = req->oldidx;
3126117395Skan		error = SYSCTL_OUT(req, kif, kif->kf_structsize);
3127117395Skan		if (error) {
3128117395Skan			if (error == ENOMEM) {
3129117395Skan				/*
3130117395Skan				 * The hack to keep the ABI of sysctl
3131117395Skan				 * kern.proc.filedesc intact, but not
3132132718Skan				 * to account a partially copied
3133117395Skan				 * kinfo_file into the oldidx.
3134117395Skan				 */
3135117395Skan				req->oldidx = oldidx;
3136117395Skan				error = 0;
3137117395Skan			}
3138117395Skan			break;
3139117395Skan		}
3140117395Skan	}
3141132718Skan	FILEDESC_SUNLOCK(fdp);
3142132718Skan	fddrop(fdp);
3143132718Skan	free(kif, M_TEMP);
3144132718Skan	return (error);
3145132718Skan}
3146132718Skan
3147132718Skanstatic SYSCTL_NODE(_kern_proc, KERN_PROC_FILEDESC, filedesc, CTLFLAG_RD,
3148132718Skan    sysctl_kern_proc_filedesc, "Process filedesc entries");
3149132718Skan
3150132718Skan#ifdef DDB
3151132718Skan/*
3152132718Skan * For the purposes of debugging, generate a human-readable string for the
3153132718Skan * file type.
3154132718Skan */
3155132718Skanstatic const char *
3156132718Skanfile_type_to_name(short type)
3157132718Skan{
3158132718Skan
3159132718Skan	switch (type) {
3160132718Skan	case 0:
316118334Speter		return ("zero");
3162132718Skan	case DTYPE_VNODE:
3163132718Skan		return ("vnod");
3164132718Skan	case DTYPE_SOCKET:
316518334Speter		return ("sock");
316618334Speter	case DTYPE_PIPE:
316718334Speter		return ("pipe");
316818334Speter	case DTYPE_FIFO:
316918334Speter		return ("fifo");
317018334Speter	case DTYPE_KQUEUE:
317118334Speter		return ("kque");
317218334Speter	case DTYPE_CRYPTO:
317318334Speter		return ("crpt");
317418334Speter	case DTYPE_MQUEUE:
317590075Sobrien		return ("mque");
317618334Speter	case DTYPE_SHM:
317718334Speter		return ("shm");
317818334Speter	case DTYPE_SEM:
317918334Speter		return ("ksem");
318018334Speter	default:
318118334Speter		return ("unkn");
318218334Speter	}
318318334Speter}
318418334Speter
318518334Speter/*
318618334Speter * For the purposes of debugging, identify a process (if any, perhaps one of
318790075Sobrien * many) that references the passed file in its file descriptor array. Return
318890075Sobrien * NULL if none.
318990075Sobrien */
3190132718Skanstatic struct proc *
319190075Sobrienfile_to_first_proc(struct file *fp)
319290075Sobrien{
319390075Sobrien	struct filedesc *fdp;
319490075Sobrien	struct proc *p;
319590075Sobrien	int n;
319690075Sobrien
319790075Sobrien	FOREACH_PROC_IN_SYSTEM(p) {
319890075Sobrien		if (p->p_state == PRS_NEW)
319990075Sobrien			continue;
320090075Sobrien		fdp = p->p_fd;
320190075Sobrien		if (fdp == NULL)
320290075Sobrien			continue;
320390075Sobrien		for (n = 0; n < fdp->fd_nfiles; n++) {
320418334Speter			if (fp == fdp->fd_ofiles[n])
320518334Speter				return (p);
320618334Speter		}
320718334Speter	}
320818334Speter	return (NULL);
320918334Speter}
321018334Speter
3211132718Skanstatic void
321218334Speterdb_print_file(struct file *fp, int header)
321390075Sobrien{
321418334Speter	struct proc *p;
321518334Speter
321618334Speter	if (header)
321718334Speter		db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
321818334Speter		    "File", "Type", "Data", "Flag", "GCFl", "Count",
321918334Speter		    "MCount", "Vnode", "FPID", "FCmd");
322018334Speter	p = file_to_first_proc(fp);
322118334Speter	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
322218334Speter	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
322318334Speter	    0, fp->f_count, 0, fp->f_vnode,
322418334Speter	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
322518334Speter}
322618334Speter
322718334SpeterDB_SHOW_COMMAND(file, db_show_file)
322818334Speter{
322918334Speter	struct file *fp;
323018334Speter
323118334Speter	if (!have_addr) {
323218334Speter		db_printf("usage: show file <addr>\n");
323318334Speter		return;
323418334Speter	}
323518334Speter	fp = (struct file *)addr;
323618334Speter	db_print_file(fp, 1);
323718334Speter}
323818334Speter
323918334SpeterDB_SHOW_COMMAND(files, db_show_files)
324090075Sobrien{
324118334Speter	struct filedesc *fdp;
324218334Speter	struct file *fp;
324318334Speter	struct proc *p;
324418334Speter	int header;
3245132718Skan	int n;
324618334Speter
324718334Speter	header = 1;
324818334Speter	FOREACH_PROC_IN_SYSTEM(p) {
324918334Speter		if (p->p_state == PRS_NEW)
325018334Speter			continue;
325118334Speter		if ((fdp = p->p_fd) == NULL)
325218334Speter			continue;
325318334Speter		for (n = 0; n < fdp->fd_nfiles; ++n) {
325418334Speter			if ((fp = fdp->fd_ofiles[n]) == NULL)
325518334Speter				continue;
325618334Speter			db_print_file(fp, header);
325718334Speter			header = 0;
325818334Speter		}
325918334Speter	}
326018334Speter}
326118334Speter#endif
326218334Speter
326318334SpeterSYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
326418334Speter    &maxfilesperproc, 0, "Maximum files allowed open per process");
326518334Speter
326618334SpeterSYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
326718334Speter    &maxfiles, 0, "Maximum number of files");
326850397Sobrien
326950397SobrienSYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
327018334Speter    __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
327118334Speter
327218334Speter/* ARGSUSED*/
327318334Speterstatic void
327418334Speterfilelistinit(void *dummy)
327518334Speter{
327618334Speter
327718334Speter	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
327818334Speter	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
327918334Speter	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
328018334Speter	mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
328118334Speter}
328218334SpeterSYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
328318334Speter
328418334Speter/*-------------------------------------------------------------------*/
328518334Speter
328618334Speterstatic int
328718334Speterbadfo_readwrite(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td)
328818334Speter{
328918334Speter
329018334Speter	return (EBADF);
329118334Speter}
329218334Speter
329318334Speterstatic int
329418334Speterbadfo_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td)
329518334Speter{
329618334Speter
329790075Sobrien	return (EINVAL);
329818334Speter}
329918334Speter
330018334Speterstatic int
330118334Speterbadfo_ioctl(struct file *fp, u_long com, void *data, struct ucred *active_cred, struct thread *td)
330218334Speter{
330318334Speter
330418334Speter	return (EBADF);
330518334Speter}
330618334Speter
330718334Speterstatic int
330818334Speterbadfo_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td)
3309132718Skan{
331050397Sobrien
331150397Sobrien	return (0);
331218334Speter}
331318334Speter
331418334Speterstatic int
331518334Speterbadfo_kqfilter(struct file *fp, struct knote *kn)
331618334Speter{
331752284Sobrien
331818334Speter	return (EBADF);
331918334Speter}
332018334Speter
332118334Speterstatic int
332218334Speterbadfo_stat(struct file *fp, struct stat *sb, struct ucred *active_cred, struct thread *td)
332318334Speter{
332418334Speter
332518334Speter	return (EBADF);
3326132718Skan}
332718334Speter
332818334Speterstatic int
332918334Speterbadfo_close(struct file *fp, struct thread *td)
333018334Speter{
333118334Speter
333218334Speter	return (EBADF);
333318334Speter}
333418334Speter
333590075Sobrienstruct fileops badfileops = {
333618334Speter	.fo_read = badfo_readwrite,
333718334Speter	.fo_write = badfo_readwrite,
333818334Speter	.fo_truncate = badfo_truncate,
333918334Speter	.fo_ioctl = badfo_ioctl,
334018334Speter	.fo_poll = badfo_poll,
334118334Speter	.fo_kqfilter = badfo_kqfilter,
334218334Speter	.fo_stat = badfo_stat,
334318334Speter	.fo_close = badfo_close,
334418334Speter};
334518334Speter
334618334Speter
334718334Speter/*-------------------------------------------------------------------*/
334818334Speter
334918334Speter/*
335018334Speter * File Descriptor pseudo-device driver (/dev/fd/).
335152284Sobrien *
335218334Speter * Opening minor device N dup()s the file (if any) connected to file
335318334Speter * descriptor N belonging to the calling process.  Note that this driver
335418334Speter * consists of only the ``open()'' routine, because all subsequent
335518334Speter * references to this file will be direct to the other driver.
335690075Sobrien *
335790075Sobrien * XXX: we could give this one a cloning event handler if necessary.
335818334Speter */
335918334Speter
336018334Speter/* ARGSUSED */
336118334Speterstatic int
336218334Speterfdopen(struct cdev *dev, int mode, int type, struct thread *td)
336318334Speter{
336418334Speter
336518334Speter	/*
336618334Speter	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
336718334Speter	 * the file descriptor being sought for duplication. The error
336852284Sobrien	 * return ensures that the vnode for this device will be released
336918334Speter	 * by vn_open. Open will detect this special error and take the
337018334Speter	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
337118334Speter	 * will simply report the error.
337218334Speter	 */
337318334Speter	td->td_dupfd = dev2unit(dev);
337418334Speter	return (ENODEV);
337518334Speter}
337618334Speter
337718334Speterstatic struct cdevsw fildesc_cdevsw = {
337818334Speter	.d_version =	D_VERSION,
337952284Sobrien	.d_open =	fdopen,
338018334Speter	.d_name =	"FD",
338118334Speter};
338218334Speter
338318334Speterstatic void
338418334Speterfildesc_drvinit(void *unused)
338518334Speter{
3386132718Skan	struct cdev *dev;
3387132718Skan
338818334Speter	dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
338990075Sobrien	make_dev_alias(dev, "stdin");
339090075Sobrien	dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
339190075Sobrien	make_dev_alias(dev, "stdout");
339218334Speter	dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
339318334Speter	make_dev_alias(dev, "stderr");
339418334Speter}
339518334Speter
339618334SpeterSYSINIT(fildescdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, fildesc_drvinit, NULL);
339718334Speter