kern_descrip.c revision 105408
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 105408 2002-10-18 17:42:28Z jhb $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/sysctl.h>
55#include <sys/vnode.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/namei.h>
59#include <sys/file.h>
60#include <sys/stat.h>
61#include <sys/filio.h>
62#include <sys/fcntl.h>
63#include <sys/unistd.h>
64#include <sys/resourcevar.h>
65#include <sys/event.h>
66#include <sys/sx.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69
70#include <machine/limits.h>
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74#include <vm/uma.h>
75
76static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
77static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
78
79uma_zone_t file_zone;
80
81static	 d_open_t  fdopen;
82#define	NUMFDESC 64
83
84#define	CDEV_MAJOR 22
85static struct cdevsw fildesc_cdevsw = {
86	/* open */	fdopen,
87	/* close */	noclose,
88	/* read */	noread,
89	/* write */	nowrite,
90	/* ioctl */	noioctl,
91	/* poll */	nopoll,
92	/* mmap */	nommap,
93	/* strategy */	nostrategy,
94	/* name */	"FD",
95	/* maj */	CDEV_MAJOR,
96	/* dump */	nodump,
97	/* psize */	nopsize,
98	/* flags */	0,
99};
100
101/* How to treat 'new' parameter when allocating a fd for do_dup(). */
102enum dup_type { DUP_VARIABLE, DUP_FIXED };
103
104static int do_dup(struct thread *td, enum dup_type type, int old, int new,
105    register_t *retval);
106static int badfo_readwrite(struct file *fp, struct uio *uio,
107    struct ucred *active_cred, int flags, struct thread *td);
108static int badfo_ioctl(struct file *fp, u_long com, void *data,
109    struct ucred *active_cred, struct thread *td);
110static int badfo_poll(struct file *fp, int events,
111    struct ucred *active_cred, struct thread *td);
112static int badfo_kqfilter(struct file *fp, struct knote *kn);
113static int badfo_stat(struct file *fp, struct stat *sb,
114    struct ucred *active_cred, struct thread *td);
115static int badfo_close(struct file *fp, struct thread *td);
116
117/*
118 * Descriptor management.
119 */
120struct filelist filehead;	/* head of list of open files */
121int nfiles;			/* actual number of open files */
122extern int cmask;
123struct sx filelist_lock;	/* sx to protect filelist */
124struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
125
126/*
127 * System calls on descriptors.
128 */
129#ifndef _SYS_SYSPROTO_H_
130struct getdtablesize_args {
131	int	dummy;
132};
133#endif
134/*
135 * MPSAFE
136 */
137/* ARGSUSED */
138int
139getdtablesize(td, uap)
140	struct thread *td;
141	struct getdtablesize_args *uap;
142{
143	struct proc *p = td->td_proc;
144
145	mtx_lock(&Giant);
146	td->td_retval[0] =
147	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
148	mtx_unlock(&Giant);
149	return (0);
150}
151
152/*
153 * Duplicate a file descriptor to a particular value.
154 *
155 * note: keep in mind that a potential race condition exists when closing
156 * descriptors from a shared descriptor table (via rfork).
157 */
158#ifndef _SYS_SYSPROTO_H_
159struct dup2_args {
160	u_int	from;
161	u_int	to;
162};
163#endif
164/*
165 * MPSAFE
166 */
167/* ARGSUSED */
168int
169dup2(td, uap)
170	struct thread *td;
171	struct dup2_args *uap;
172{
173
174	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
175		    td->td_retval));
176}
177
178/*
179 * Duplicate a file descriptor.
180 */
181#ifndef _SYS_SYSPROTO_H_
182struct dup_args {
183	u_int	fd;
184};
185#endif
186/*
187 * MPSAFE
188 */
189/* ARGSUSED */
190int
191dup(td, uap)
192	struct thread *td;
193	struct dup_args *uap;
194{
195
196	return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
197}
198
199/*
200 * The file control system call.
201 */
202#ifndef _SYS_SYSPROTO_H_
203struct fcntl_args {
204	int	fd;
205	int	cmd;
206	long	arg;
207};
208#endif
209/*
210 * MPSAFE
211 */
212/* ARGSUSED */
213int
214fcntl(td, uap)
215	struct thread *td;
216	register struct fcntl_args *uap;
217{
218	struct flock fl;
219	intptr_t arg;
220	int error;
221
222	error = 0;
223	switch (uap->cmd) {
224	case F_GETLK:
225	case F_SETLK:
226	case F_SETLKW:
227		error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
228		arg = (intptr_t)&fl;
229		break;
230	default:
231		arg = uap->arg;
232		break;
233	}
234	if (error)
235		return (error);
236	error = kern_fcntl(td, uap->fd, uap->cmd, arg);
237	if (error)
238		return (error);
239	if (uap->cmd == F_GETLK)
240		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
241	return (error);
242}
243
244int
245kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
246{
247	register struct filedesc *fdp;
248	struct flock *flp;
249	register struct file *fp;
250	struct proc *p;
251	register char *pop;
252	struct vnode *vp;
253	u_int newmin;
254	int error, flg, tmp;
255
256	error = 0;
257	flg = F_POSIX;
258	p = td->td_proc;
259	fdp = p->p_fd;
260	mtx_lock(&Giant);
261	FILEDESC_LOCK(fdp);
262	if ((unsigned)fd >= fdp->fd_nfiles ||
263	    (fp = fdp->fd_ofiles[fd]) == NULL) {
264		FILEDESC_UNLOCK(fdp);
265		error = EBADF;
266		goto done2;
267	}
268	pop = &fdp->fd_ofileflags[fd];
269
270	switch (cmd) {
271	case F_DUPFD:
272		FILEDESC_UNLOCK(fdp);
273		newmin = arg;
274		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
275		    newmin >= maxfilesperproc) {
276			error = EINVAL;
277			break;
278		}
279		error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
280		break;
281
282	case F_GETFD:
283		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
284		FILEDESC_UNLOCK(fdp);
285		break;
286
287	case F_SETFD:
288		*pop = (*pop &~ UF_EXCLOSE) |
289		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
290		FILEDESC_UNLOCK(fdp);
291		break;
292
293	case F_GETFL:
294		FILE_LOCK(fp);
295		FILEDESC_UNLOCK(fdp);
296		td->td_retval[0] = OFLAGS(fp->f_flag);
297		FILE_UNLOCK(fp);
298		break;
299
300	case F_SETFL:
301		fhold(fp);
302		FILEDESC_UNLOCK(fdp);
303		fp->f_flag &= ~FCNTLFLAGS;
304		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
305		tmp = fp->f_flag & FNONBLOCK;
306		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
307		if (error) {
308			fdrop(fp, td);
309			break;
310		}
311		tmp = fp->f_flag & FASYNC;
312		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
313		if (error == 0) {
314			fdrop(fp, td);
315			break;
316		}
317		fp->f_flag &= ~FNONBLOCK;
318		tmp = 0;
319		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
320		fdrop(fp, td);
321		break;
322
323	case F_GETOWN:
324		fhold(fp);
325		FILEDESC_UNLOCK(fdp);
326		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
327		if (error == 0)
328			td->td_retval[0] = tmp;
329		fdrop(fp, td);
330		break;
331
332	case F_SETOWN:
333		fhold(fp);
334		FILEDESC_UNLOCK(fdp);
335		tmp = arg;
336		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
337		fdrop(fp, td);
338		break;
339
340	case F_SETLKW:
341		flg |= F_WAIT;
342		/* FALLTHROUGH F_SETLK */
343
344	case F_SETLK:
345		if (fp->f_type != DTYPE_VNODE) {
346			FILEDESC_UNLOCK(fdp);
347			error = EBADF;
348			break;
349		}
350
351		flp = (struct flock *)arg;
352		if (flp->l_whence == SEEK_CUR) {
353			if (fp->f_offset < 0 ||
354			    (flp->l_start > 0 &&
355			     fp->f_offset > OFF_MAX - flp->l_start)) {
356				FILEDESC_UNLOCK(fdp);
357				error = EOVERFLOW;
358				break;
359			}
360			flp->l_start += fp->f_offset;
361		}
362
363		/*
364		 * VOP_ADVLOCK() may block.
365		 */
366		fhold(fp);
367		FILEDESC_UNLOCK(fdp);
368		vp = (struct vnode *)fp->f_data;
369
370		switch (flp->l_type) {
371		case F_RDLCK:
372			if ((fp->f_flag & FREAD) == 0) {
373				error = EBADF;
374				break;
375			}
376			PROC_LOCK(p);
377			p->p_flag |= P_ADVLOCK;
378			PROC_UNLOCK(p);
379			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
380			    flp, flg);
381			break;
382		case F_WRLCK:
383			if ((fp->f_flag & FWRITE) == 0) {
384				error = EBADF;
385				break;
386			}
387			PROC_LOCK(p);
388			p->p_flag |= P_ADVLOCK;
389			PROC_UNLOCK(p);
390			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
391			    flp, flg);
392			break;
393		case F_UNLCK:
394			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
395			    flp, F_POSIX);
396			break;
397		default:
398			error = EINVAL;
399			break;
400		}
401		fdrop(fp, td);
402		break;
403
404	case F_GETLK:
405		if (fp->f_type != DTYPE_VNODE) {
406			FILEDESC_UNLOCK(fdp);
407			error = EBADF;
408			break;
409		}
410		flp = (struct flock *)arg;
411		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
412		    flp->l_type != F_UNLCK) {
413			FILEDESC_UNLOCK(fdp);
414			error = EINVAL;
415			break;
416		}
417		if (flp->l_whence == SEEK_CUR) {
418			if ((flp->l_start > 0 &&
419			    fp->f_offset > OFF_MAX - flp->l_start) ||
420			    (flp->l_start < 0 &&
421			     fp->f_offset < OFF_MIN - flp->l_start)) {
422				FILEDESC_UNLOCK(fdp);
423				error = EOVERFLOW;
424				break;
425			}
426			flp->l_start += fp->f_offset;
427		}
428		/*
429		 * VOP_ADVLOCK() may block.
430		 */
431		fhold(fp);
432		FILEDESC_UNLOCK(fdp);
433		vp = (struct vnode *)fp->f_data;
434		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
435		    F_POSIX);
436		fdrop(fp, td);
437		break;
438	default:
439		FILEDESC_UNLOCK(fdp);
440		error = EINVAL;
441		break;
442	}
443done2:
444	mtx_unlock(&Giant);
445	return (error);
446}
447
448/*
449 * Common code for dup, dup2, and fcntl(F_DUPFD).
450 * filedesc must be locked, but will be unlocked as a side effect.
451 */
452static int
453do_dup(td, type, old, new, retval)
454	enum dup_type type;
455	int old, new;
456	register_t *retval;
457	struct thread *td;
458{
459	register struct filedesc *fdp;
460	struct proc *p;
461	struct file *fp;
462	struct file *delfp;
463	int error, newfd;
464
465	p = td->td_proc;
466	fdp = p->p_fd;
467
468	/*
469	 * Verify we have a valid descriptor to dup from and possibly to
470	 * dup to.
471	 */
472	FILEDESC_LOCK(fdp);
473	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL ||
474	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
475	    new >= maxfilesperproc) {
476		FILEDESC_UNLOCK(fdp);
477		return (EBADF);
478	}
479	if (type == DUP_FIXED && old == new) {
480		*retval = new;
481		FILEDESC_UNLOCK(fdp);
482		return (0);
483	}
484	fp = fdp->fd_ofiles[old];
485	fhold(fp);
486
487	/*
488	 * Expand the table for the new descriptor if needed.  This may
489	 * block and drop and reacquire the filedesc lock.
490	 */
491	if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
492		error = fdalloc(td, new, &newfd);
493		if (error) {
494			FILEDESC_UNLOCK(fdp);
495			return (error);
496		}
497	}
498	if (type == DUP_VARIABLE)
499		new = newfd;
500
501	/*
502	 * If the old file changed out from under us then treat it as a
503	 * bad file descriptor.  Userland should do its own locking to
504	 * avoid this case.
505	 */
506	if (fdp->fd_ofiles[old] != fp) {
507		if (fdp->fd_ofiles[new] == NULL) {
508			if (new < fdp->fd_freefile)
509				fdp->fd_freefile = new;
510			while (fdp->fd_lastfile > 0 &&
511			    fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
512				fdp->fd_lastfile--;
513		}
514		FILEDESC_UNLOCK(fdp);
515		fdrop(fp, td);
516		return (EBADF);
517	}
518	KASSERT(old != new, ("new fd is same as old"));
519
520	/*
521	 * Save info on the descriptor being overwritten.  We have
522	 * to do the unmap now, but we cannot close it without
523	 * introducing an ownership race for the slot.
524	 */
525	delfp = fdp->fd_ofiles[new];
526	KASSERT(delfp == NULL || type == DUP_FIXED,
527	    ("dup() picked an open file"));
528#if 0
529	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
530		(void) munmapfd(td, new);
531#endif
532
533	/*
534	 * Duplicate the source descriptor, update lastfile
535	 */
536	fdp->fd_ofiles[new] = fp;
537 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
538	if (new > fdp->fd_lastfile)
539		fdp->fd_lastfile = new;
540	FILEDESC_UNLOCK(fdp);
541	*retval = new;
542
543	/*
544	 * If we dup'd over a valid file, we now own the reference to it
545	 * and must dispose of it using closef() semantics (as if a
546	 * close() were performed on it).
547	 */
548	if (delfp) {
549		mtx_lock(&Giant);
550		(void) closef(delfp, td);
551		mtx_unlock(&Giant);
552	}
553	return (0);
554}
555
556/*
557 * If sigio is on the list associated with a process or process group,
558 * disable signalling from the device, remove sigio from the list and
559 * free sigio.
560 */
561void
562funsetown(sigiop)
563	struct sigio **sigiop;
564{
565	struct sigio *sigio;
566
567	SIGIO_LOCK();
568	sigio = *sigiop;
569	if (sigio == NULL) {
570		SIGIO_UNLOCK();
571		return;
572	}
573	*(sigio->sio_myref) = NULL;
574	if ((sigio)->sio_pgid < 0) {
575		struct pgrp *pg = (sigio)->sio_pgrp;
576		PGRP_LOCK(pg);
577		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
578			     sigio, sio_pgsigio);
579		PGRP_UNLOCK(pg);
580	} else {
581		struct proc *p = (sigio)->sio_proc;
582		PROC_LOCK(p);
583		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
584			     sigio, sio_pgsigio);
585		PROC_UNLOCK(p);
586	}
587	SIGIO_UNLOCK();
588	crfree(sigio->sio_ucred);
589	FREE(sigio, M_SIGIO);
590}
591
592/*
593 * Free a list of sigio structures.
594 * We only need to lock the SIGIO_LOCK because we have made ourselves
595 * inaccessable to callers of fsetown and therefore do not need to lock
596 * the proc or pgrp struct for the list manipulation.
597 */
598void
599funsetownlst(sigiolst)
600	struct sigiolst *sigiolst;
601{
602	struct proc *p;
603	struct pgrp *pg;
604	struct sigio *sigio;
605
606	sigio = SLIST_FIRST(sigiolst);
607	if (sigio == NULL)
608		return;
609	p = NULL;
610	pg = NULL;
611
612	/*
613	 * Every entry of the list should belong
614	 * to a single proc or pgrp.
615	 */
616	if (sigio->sio_pgid < 0) {
617		pg = sigio->sio_pgrp;
618		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
619	} else /* if (sigio->sio_pgid > 0) */ {
620		p = sigio->sio_proc;
621		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
622	}
623
624	SIGIO_LOCK();
625	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
626		*(sigio->sio_myref) = NULL;
627		if (pg != NULL) {
628			KASSERT(sigio->sio_pgid < 0,
629			    ("Proc sigio in pgrp sigio list"));
630			KASSERT(sigio->sio_pgrp == pg,
631			    ("Bogus pgrp in sigio list"));
632			PGRP_LOCK(pg);
633			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
634			    sio_pgsigio);
635			PGRP_UNLOCK(pg);
636		} else /* if (p != NULL) */ {
637			KASSERT(sigio->sio_pgid > 0,
638			    ("Pgrp sigio in proc sigio list"));
639			KASSERT(sigio->sio_proc == p,
640			    ("Bogus proc in sigio list"));
641			PROC_LOCK(p);
642			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
643			    sio_pgsigio);
644			PROC_UNLOCK(p);
645		}
646		SIGIO_UNLOCK();
647		crfree(sigio->sio_ucred);
648		FREE(sigio, M_SIGIO);
649		SIGIO_LOCK();
650	}
651	SIGIO_UNLOCK();
652}
653
654/*
655 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
656 *
657 * After permission checking, add a sigio structure to the sigio list for
658 * the process or process group.
659 */
660int
661fsetown(pgid, sigiop)
662	pid_t pgid;
663	struct sigio **sigiop;
664{
665	struct proc *proc;
666	struct pgrp *pgrp;
667	struct sigio *sigio;
668	int ret;
669
670	if (pgid == 0) {
671		funsetown(sigiop);
672		return (0);
673	}
674
675	ret = 0;
676
677	/* Allocate and fill in the new sigio out of locks. */
678	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
679	sigio->sio_pgid = pgid;
680	sigio->sio_ucred = crhold(curthread->td_ucred);
681	sigio->sio_myref = sigiop;
682
683	sx_slock(&proctree_lock);
684	if (pgid > 0) {
685		proc = pfind(pgid);
686		if (proc == NULL) {
687			ret = ESRCH;
688			goto fail;
689		}
690
691		/*
692		 * Policy - Don't allow a process to FSETOWN a process
693		 * in another session.
694		 *
695		 * Remove this test to allow maximum flexibility or
696		 * restrict FSETOWN to the current process or process
697		 * group for maximum safety.
698		 */
699		PROC_UNLOCK(proc);
700		if (proc->p_session != curthread->td_proc->p_session) {
701			ret = EPERM;
702			goto fail;
703		}
704
705		pgrp = NULL;
706	} else /* if (pgid < 0) */ {
707		pgrp = pgfind(-pgid);
708		if (pgrp == NULL) {
709			ret = ESRCH;
710			goto fail;
711		}
712		PGRP_UNLOCK(pgrp);
713
714		/*
715		 * Policy - Don't allow a process to FSETOWN a process
716		 * in another session.
717		 *
718		 * Remove this test to allow maximum flexibility or
719		 * restrict FSETOWN to the current process or process
720		 * group for maximum safety.
721		 */
722		if (pgrp->pg_session != curthread->td_proc->p_session) {
723			ret = EPERM;
724			goto fail;
725		}
726
727		proc = NULL;
728	}
729	funsetown(sigiop);
730	if (pgid > 0) {
731		PROC_LOCK(proc);
732		/*
733		 * Since funsetownlst() is called without the proctree
734		 * locked, we need to check for P_WEXIT.
735		 * XXX: is ESRCH correct?
736		 */
737		if ((proc->p_flag & P_WEXIT) != 0) {
738			PROC_UNLOCK(proc);
739			ret = ESRCH;
740			goto fail;
741		}
742		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
743		sigio->sio_proc = proc;
744		PROC_UNLOCK(proc);
745	} else {
746		PGRP_LOCK(pgrp);
747		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
748		sigio->sio_pgrp = pgrp;
749		PGRP_UNLOCK(pgrp);
750	}
751	sx_sunlock(&proctree_lock);
752	SIGIO_LOCK();
753	*sigiop = sigio;
754	SIGIO_UNLOCK();
755	return (0);
756
757fail:
758	sx_sunlock(&proctree_lock);
759	crfree(sigio->sio_ucred);
760	FREE(sigio, M_SIGIO);
761	return (ret);
762}
763
764/*
765 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
766 */
767pid_t
768fgetown(sigiop)
769	struct sigio **sigiop;
770{
771	pid_t pgid;
772
773	SIGIO_LOCK();
774	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
775	SIGIO_UNLOCK();
776	return (pgid);
777}
778
779/*
780 * Close a file descriptor.
781 */
782#ifndef _SYS_SYSPROTO_H_
783struct close_args {
784        int     fd;
785};
786#endif
787/*
788 * MPSAFE
789 */
790/* ARGSUSED */
791int
792close(td, uap)
793	struct thread *td;
794	struct close_args *uap;
795{
796	register struct filedesc *fdp;
797	register struct file *fp;
798	int fd, error;
799
800	fd = uap->fd;
801	error = 0;
802	fdp = td->td_proc->p_fd;
803	mtx_lock(&Giant);
804	FILEDESC_LOCK(fdp);
805	if ((unsigned)fd >= fdp->fd_nfiles ||
806	    (fp = fdp->fd_ofiles[fd]) == NULL) {
807		FILEDESC_UNLOCK(fdp);
808		error = EBADF;
809		goto done2;
810	}
811#if 0
812	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
813		(void) munmapfd(td, fd);
814#endif
815	fdp->fd_ofiles[fd] = NULL;
816	fdp->fd_ofileflags[fd] = 0;
817
818	/*
819	 * we now hold the fp reference that used to be owned by the descriptor
820	 * array.
821	 */
822	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
823		fdp->fd_lastfile--;
824	if (fd < fdp->fd_freefile)
825		fdp->fd_freefile = fd;
826	if (fd < fdp->fd_knlistsize) {
827		FILEDESC_UNLOCK(fdp);
828		knote_fdclose(td, fd);
829	} else
830		FILEDESC_UNLOCK(fdp);
831
832	error = closef(fp, td);
833done2:
834	mtx_unlock(&Giant);
835	return (error);
836}
837
838#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
839/*
840 * Return status information about a file descriptor.
841 */
842#ifndef _SYS_SYSPROTO_H_
843struct ofstat_args {
844	int	fd;
845	struct	ostat *sb;
846};
847#endif
848/*
849 * MPSAFE
850 */
851/* ARGSUSED */
852int
853ofstat(td, uap)
854	struct thread *td;
855	register struct ofstat_args *uap;
856{
857	struct file *fp;
858	struct stat ub;
859	struct ostat oub;
860	int error;
861
862	mtx_lock(&Giant);
863	if ((error = fget(td, uap->fd, &fp)) != 0)
864		goto done2;
865	error = fo_stat(fp, &ub, td->td_ucred, td);
866	if (error == 0) {
867		cvtstat(&ub, &oub);
868		error = copyout(&oub, uap->sb, sizeof(oub));
869	}
870	fdrop(fp, td);
871done2:
872	mtx_unlock(&Giant);
873	return (error);
874}
875#endif /* COMPAT_43 || COMPAT_SUNOS */
876
877/*
878 * Return status information about a file descriptor.
879 */
880#ifndef _SYS_SYSPROTO_H_
881struct fstat_args {
882	int	fd;
883	struct	stat *sb;
884};
885#endif
886/*
887 * MPSAFE
888 */
889/* ARGSUSED */
890int
891fstat(td, uap)
892	struct thread *td;
893	struct fstat_args *uap;
894{
895	struct file *fp;
896	struct stat ub;
897	int error;
898
899	mtx_lock(&Giant);
900	if ((error = fget(td, uap->fd, &fp)) != 0)
901		goto done2;
902	error = fo_stat(fp, &ub, td->td_ucred, td);
903	if (error == 0)
904		error = copyout(&ub, uap->sb, sizeof(ub));
905	fdrop(fp, td);
906done2:
907	mtx_unlock(&Giant);
908	return (error);
909}
910
911/*
912 * Return status information about a file descriptor.
913 */
914#ifndef _SYS_SYSPROTO_H_
915struct nfstat_args {
916	int	fd;
917	struct	nstat *sb;
918};
919#endif
920/*
921 * MPSAFE
922 */
923/* ARGSUSED */
924int
925nfstat(td, uap)
926	struct thread *td;
927	register struct nfstat_args *uap;
928{
929	struct file *fp;
930	struct stat ub;
931	struct nstat nub;
932	int error;
933
934	mtx_lock(&Giant);
935	if ((error = fget(td, uap->fd, &fp)) != 0)
936		goto done2;
937	error = fo_stat(fp, &ub, td->td_ucred, td);
938	if (error == 0) {
939		cvtnstat(&ub, &nub);
940		error = copyout(&nub, uap->sb, sizeof(nub));
941	}
942	fdrop(fp, td);
943done2:
944	mtx_unlock(&Giant);
945	return (error);
946}
947
948/*
949 * Return pathconf information about a file descriptor.
950 */
951#ifndef _SYS_SYSPROTO_H_
952struct fpathconf_args {
953	int	fd;
954	int	name;
955};
956#endif
957/*
958 * MPSAFE
959 */
960/* ARGSUSED */
961int
962fpathconf(td, uap)
963	struct thread *td;
964	register struct fpathconf_args *uap;
965{
966	struct file *fp;
967	struct vnode *vp;
968	int error;
969
970	if ((error = fget(td, uap->fd, &fp)) != 0)
971		return (error);
972	switch (fp->f_type) {
973	case DTYPE_PIPE:
974	case DTYPE_SOCKET:
975		if (uap->name != _PC_PIPE_BUF) {
976			error = EINVAL;
977		} else {
978			td->td_retval[0] = PIPE_BUF;
979			error = 0;
980		}
981		break;
982	case DTYPE_FIFO:
983	case DTYPE_VNODE:
984		vp = (struct vnode *)fp->f_data;
985		mtx_lock(&Giant);
986		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
987		mtx_unlock(&Giant);
988		break;
989	default:
990		error = EOPNOTSUPP;
991		break;
992	}
993	fdrop(fp, td);
994	return (error);
995}
996
997/*
998 * Allocate a file descriptor for the process.
999 */
1000static int fdexpand;
1001SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1002
1003int
1004fdalloc(td, want, result)
1005	struct thread *td;
1006	int want;
1007	int *result;
1008{
1009	struct proc *p = td->td_proc;
1010	register struct filedesc *fdp = td->td_proc->p_fd;
1011	register int i;
1012	int lim, last, nfiles;
1013	struct file **newofile, **oldofile;
1014	char *newofileflags;
1015
1016	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1017
1018	/*
1019	 * Search for a free descriptor starting at the higher
1020	 * of want or fd_freefile.  If that fails, consider
1021	 * expanding the ofile array.
1022	 */
1023	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1024	for (;;) {
1025		last = min(fdp->fd_nfiles, lim);
1026		i = max(want, fdp->fd_freefile);
1027		for (; i < last; i++) {
1028			if (fdp->fd_ofiles[i] == NULL) {
1029				fdp->fd_ofileflags[i] = 0;
1030				if (i > fdp->fd_lastfile)
1031					fdp->fd_lastfile = i;
1032				if (want <= fdp->fd_freefile)
1033					fdp->fd_freefile = i;
1034				*result = i;
1035				return (0);
1036			}
1037		}
1038
1039		/*
1040		 * No space in current array.  Expand?
1041		 */
1042		if (i >= lim)
1043			return (EMFILE);
1044		if (fdp->fd_nfiles < NDEXTENT)
1045			nfiles = NDEXTENT;
1046		else
1047			nfiles = 2 * fdp->fd_nfiles;
1048		while (nfiles < want)
1049			nfiles <<= 1;
1050		FILEDESC_UNLOCK(fdp);
1051		newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1052
1053		/*
1054		 * Deal with file-table extend race that might have
1055		 * occurred while filedesc was unlocked.
1056		 */
1057		FILEDESC_LOCK(fdp);
1058		if (fdp->fd_nfiles >= nfiles) {
1059			free(newofile, M_FILEDESC);
1060			continue;
1061		}
1062		newofileflags = (char *) &newofile[nfiles];
1063		/*
1064		 * Copy the existing ofile and ofileflags arrays
1065		 * and zero the new portion of each array.
1066		 */
1067		i = fdp->fd_nfiles * sizeof(struct file *);
1068		bcopy(fdp->fd_ofiles, newofile,	i);
1069		bzero((char *)newofile + i,
1070		    nfiles * sizeof(struct file *) - i);
1071		i = fdp->fd_nfiles * sizeof(char);
1072		bcopy(fdp->fd_ofileflags, newofileflags, i);
1073		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1074		if (fdp->fd_nfiles > NDFILE)
1075			oldofile = fdp->fd_ofiles;
1076		else
1077			oldofile = NULL;
1078		fdp->fd_ofiles = newofile;
1079		fdp->fd_ofileflags = newofileflags;
1080		fdp->fd_nfiles = nfiles;
1081		fdexpand++;
1082		if (oldofile != NULL)
1083			free(oldofile, M_FILEDESC);
1084	}
1085	return (0);
1086}
1087
1088/*
1089 * Check to see whether n user file descriptors
1090 * are available to the process p.
1091 */
1092int
1093fdavail(td, n)
1094	struct thread *td;
1095	register int n;
1096{
1097	struct proc *p = td->td_proc;
1098	register struct filedesc *fdp = td->td_proc->p_fd;
1099	register struct file **fpp;
1100	register int i, lim, last;
1101
1102	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1103
1104	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1105	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1106		return (1);
1107	last = min(fdp->fd_nfiles, lim);
1108	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1109	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1110		if (*fpp == NULL && --n <= 0)
1111			return (1);
1112	}
1113	return (0);
1114}
1115
1116/*
1117 * Create a new open file structure and allocate
1118 * a file decriptor for the process that refers to it.
1119 */
1120int
1121falloc(td, resultfp, resultfd)
1122	register struct thread *td;
1123	struct file **resultfp;
1124	int *resultfd;
1125{
1126	struct proc *p = td->td_proc;
1127	register struct file *fp, *fq;
1128	int error, i;
1129
1130	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1131	sx_xlock(&filelist_lock);
1132	if (nfiles >= maxfiles) {
1133		sx_xunlock(&filelist_lock);
1134		uma_zfree(file_zone, fp);
1135		tablefull("file");
1136		return (ENFILE);
1137	}
1138	nfiles++;
1139
1140	/*
1141	 * If the process has file descriptor zero open, add the new file
1142	 * descriptor to the list of open files at that point, otherwise
1143	 * put it at the front of the list of open files.
1144	 */
1145	fp->f_mtxp = mtx_pool_alloc();
1146	fp->f_gcflag = 0;
1147	fp->f_count = 1;
1148	fp->f_cred = crhold(td->td_ucred);
1149	fp->f_ops = &badfileops;
1150	fp->f_seqcount = 1;
1151	FILEDESC_LOCK(p->p_fd);
1152	if ((fq = p->p_fd->fd_ofiles[0])) {
1153		LIST_INSERT_AFTER(fq, fp, f_list);
1154	} else {
1155		LIST_INSERT_HEAD(&filehead, fp, f_list);
1156	}
1157	sx_xunlock(&filelist_lock);
1158	if ((error = fdalloc(td, 0, &i))) {
1159		FILEDESC_UNLOCK(p->p_fd);
1160		fdrop(fp, td);
1161		return (error);
1162	}
1163	p->p_fd->fd_ofiles[i] = fp;
1164	FILEDESC_UNLOCK(p->p_fd);
1165	if (resultfp)
1166		*resultfp = fp;
1167	if (resultfd)
1168		*resultfd = i;
1169	return (0);
1170}
1171
1172/*
1173 * Free a file descriptor.
1174 */
1175void
1176ffree(fp)
1177	register struct file *fp;
1178{
1179
1180	KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1181	sx_xlock(&filelist_lock);
1182	LIST_REMOVE(fp, f_list);
1183	nfiles--;
1184	sx_xunlock(&filelist_lock);
1185	crfree(fp->f_cred);
1186	uma_zfree(file_zone, fp);
1187}
1188
1189/*
1190 * Build a new filedesc structure.
1191 */
1192struct filedesc *
1193fdinit(td)
1194	struct thread *td;
1195{
1196	register struct filedesc0 *newfdp;
1197	register struct filedesc *fdp = td->td_proc->p_fd;
1198
1199	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1200	    M_FILEDESC, M_WAITOK | M_ZERO);
1201	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1202	FILEDESC_LOCK(&newfdp->fd_fd);
1203	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1204	if (newfdp->fd_fd.fd_cdir)
1205		VREF(newfdp->fd_fd.fd_cdir);
1206	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1207	if (newfdp->fd_fd.fd_rdir)
1208		VREF(newfdp->fd_fd.fd_rdir);
1209	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1210	if (newfdp->fd_fd.fd_jdir)
1211		VREF(newfdp->fd_fd.fd_jdir);
1212
1213	/* Create the file descriptor table. */
1214	newfdp->fd_fd.fd_refcnt = 1;
1215	newfdp->fd_fd.fd_cmask = cmask;
1216	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1217	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1218	newfdp->fd_fd.fd_nfiles = NDFILE;
1219	newfdp->fd_fd.fd_knlistsize = -1;
1220	FILEDESC_UNLOCK(&newfdp->fd_fd);
1221
1222	return (&newfdp->fd_fd);
1223}
1224
1225/*
1226 * Share a filedesc structure.
1227 */
1228struct filedesc *
1229fdshare(p)
1230	struct proc *p;
1231{
1232	FILEDESC_LOCK(p->p_fd);
1233	p->p_fd->fd_refcnt++;
1234	FILEDESC_UNLOCK(p->p_fd);
1235	return (p->p_fd);
1236}
1237
1238/*
1239 * Copy a filedesc structure.
1240 */
1241struct filedesc *
1242fdcopy(td)
1243	struct thread *td;
1244{
1245	register struct filedesc *newfdp, *fdp;
1246	register struct file **fpp;
1247	register int i, j;
1248
1249	/* Certain daemons might not have file descriptors. */
1250	fdp = td->td_proc->p_fd;
1251	if (fdp == NULL)
1252		return (NULL);
1253
1254	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1255
1256	FILEDESC_UNLOCK(fdp);
1257	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1258	    M_FILEDESC, M_WAITOK);
1259	FILEDESC_LOCK(fdp);
1260	bcopy(fdp, newfdp, sizeof(struct filedesc));
1261	FILEDESC_UNLOCK(fdp);
1262	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1263	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1264	if (newfdp->fd_cdir)
1265		VREF(newfdp->fd_cdir);
1266	if (newfdp->fd_rdir)
1267		VREF(newfdp->fd_rdir);
1268	if (newfdp->fd_jdir)
1269		VREF(newfdp->fd_jdir);
1270	newfdp->fd_refcnt = 1;
1271
1272	/*
1273	 * If the number of open files fits in the internal arrays
1274	 * of the open file structure, use them, otherwise allocate
1275	 * additional memory for the number of descriptors currently
1276	 * in use.
1277	 */
1278	FILEDESC_LOCK(fdp);
1279	newfdp->fd_lastfile = fdp->fd_lastfile;
1280	newfdp->fd_nfiles = fdp->fd_nfiles;
1281	if (newfdp->fd_lastfile < NDFILE) {
1282		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1283		newfdp->fd_ofileflags =
1284		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1285		i = NDFILE;
1286	} else {
1287		/*
1288		 * Compute the smallest multiple of NDEXTENT needed
1289		 * for the file descriptors currently in use,
1290		 * allowing the table to shrink.
1291		 */
1292retry:
1293		i = newfdp->fd_nfiles;
1294		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1295			i /= 2;
1296		FILEDESC_UNLOCK(fdp);
1297		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1298		    M_FILEDESC, M_WAITOK);
1299		FILEDESC_LOCK(fdp);
1300		newfdp->fd_lastfile = fdp->fd_lastfile;
1301		newfdp->fd_nfiles = fdp->fd_nfiles;
1302		j = newfdp->fd_nfiles;
1303		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1304			j /= 2;
1305		if (i != j) {
1306			/*
1307			 * The size of the original table has changed.
1308			 * Go over once again.
1309			 */
1310			FILEDESC_UNLOCK(fdp);
1311			FREE(newfdp->fd_ofiles, M_FILEDESC);
1312			FILEDESC_LOCK(fdp);
1313			newfdp->fd_lastfile = fdp->fd_lastfile;
1314			newfdp->fd_nfiles = fdp->fd_nfiles;
1315			goto retry;
1316		}
1317		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1318	}
1319	newfdp->fd_nfiles = i;
1320	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1321	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1322
1323	/*
1324	 * kq descriptors cannot be copied.
1325	 */
1326	if (newfdp->fd_knlistsize != -1) {
1327		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1328		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1329			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1330				*fpp = NULL;
1331				if (i < newfdp->fd_freefile)
1332					newfdp->fd_freefile = i;
1333			}
1334			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1335				newfdp->fd_lastfile--;
1336		}
1337		newfdp->fd_knlist = NULL;
1338		newfdp->fd_knlistsize = -1;
1339		newfdp->fd_knhash = NULL;
1340		newfdp->fd_knhashmask = 0;
1341	}
1342
1343	fpp = newfdp->fd_ofiles;
1344	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1345		if (*fpp != NULL)
1346			fhold(*fpp);
1347	}
1348	return (newfdp);
1349}
1350
1351/*
1352 * Release a filedesc structure.
1353 */
1354void
1355fdfree(td)
1356	struct thread *td;
1357{
1358	register struct filedesc *fdp;
1359	struct file **fpp;
1360	register int i;
1361
1362	/* Certain daemons might not have file descriptors. */
1363	fdp = td->td_proc->p_fd;
1364	if (fdp == NULL)
1365		return;
1366
1367	FILEDESC_LOCK(fdp);
1368	if (--fdp->fd_refcnt > 0) {
1369		FILEDESC_UNLOCK(fdp);
1370		return;
1371	}
1372
1373	/*
1374	 * We are the last reference to the structure, so we can
1375	 * safely assume it will not change out from under us.
1376	 */
1377	FILEDESC_UNLOCK(fdp);
1378	fpp = fdp->fd_ofiles;
1379	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1380		if (*fpp)
1381			(void) closef(*fpp, td);
1382	}
1383	td->td_proc->p_fd = NULL;
1384	if (fdp->fd_nfiles > NDFILE)
1385		FREE(fdp->fd_ofiles, M_FILEDESC);
1386	if (fdp->fd_cdir)
1387		vrele(fdp->fd_cdir);
1388	if (fdp->fd_rdir)
1389		vrele(fdp->fd_rdir);
1390	if (fdp->fd_jdir)
1391		vrele(fdp->fd_jdir);
1392	if (fdp->fd_knlist)
1393		FREE(fdp->fd_knlist, M_KQUEUE);
1394	if (fdp->fd_knhash)
1395		FREE(fdp->fd_knhash, M_KQUEUE);
1396	mtx_destroy(&fdp->fd_mtx);
1397	FREE(fdp, M_FILEDESC);
1398}
1399
1400/*
1401 * For setugid programs, we don't want to people to use that setugidness
1402 * to generate error messages which write to a file which otherwise would
1403 * otherwise be off-limits to the process.  We check for filesystems where
1404 * the vnode can change out from under us after execve (like [lin]procfs).
1405 *
1406 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1407 * sufficient.  We also don't for check setugidness since we know we are.
1408 */
1409static int
1410is_unsafe(struct file *fp)
1411{
1412	if (fp->f_type == DTYPE_VNODE) {
1413		struct vnode *vp = (struct vnode *)fp->f_data;
1414
1415		if ((vp->v_vflag & VV_PROCDEP) != 0)
1416			return (1);
1417	}
1418	return (0);
1419}
1420
1421/*
1422 * Make this setguid thing safe, if at all possible.
1423 */
1424void
1425setugidsafety(td)
1426	struct thread *td;
1427{
1428	struct filedesc *fdp;
1429	register int i;
1430
1431	/* Certain daemons might not have file descriptors. */
1432	fdp = td->td_proc->p_fd;
1433	if (fdp == NULL)
1434		return;
1435
1436	/*
1437	 * Note: fdp->fd_ofiles may be reallocated out from under us while
1438	 * we are blocked in a close.  Be careful!
1439	 */
1440	FILEDESC_LOCK(fdp);
1441	for (i = 0; i <= fdp->fd_lastfile; i++) {
1442		if (i > 2)
1443			break;
1444		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1445			struct file *fp;
1446
1447#if 0
1448			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1449				(void) munmapfd(td, i);
1450#endif
1451			if (i < fdp->fd_knlistsize) {
1452				FILEDESC_UNLOCK(fdp);
1453				knote_fdclose(td, i);
1454				FILEDESC_LOCK(fdp);
1455			}
1456			/*
1457			 * NULL-out descriptor prior to close to avoid
1458			 * a race while close blocks.
1459			 */
1460			fp = fdp->fd_ofiles[i];
1461			fdp->fd_ofiles[i] = NULL;
1462			fdp->fd_ofileflags[i] = 0;
1463			if (i < fdp->fd_freefile)
1464				fdp->fd_freefile = i;
1465			FILEDESC_UNLOCK(fdp);
1466			(void) closef(fp, td);
1467			FILEDESC_LOCK(fdp);
1468		}
1469	}
1470	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1471		fdp->fd_lastfile--;
1472	FILEDESC_UNLOCK(fdp);
1473}
1474
1475/*
1476 * Close any files on exec?
1477 */
1478void
1479fdcloseexec(td)
1480	struct thread *td;
1481{
1482	struct filedesc *fdp;
1483	register int i;
1484
1485	/* Certain daemons might not have file descriptors. */
1486	fdp = td->td_proc->p_fd;
1487	if (fdp == NULL)
1488		return;
1489
1490	FILEDESC_LOCK(fdp);
1491
1492	/*
1493	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1494	 * may block and rip them out from under us.
1495	 */
1496	for (i = 0; i <= fdp->fd_lastfile; i++) {
1497		if (fdp->fd_ofiles[i] != NULL &&
1498		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1499			struct file *fp;
1500
1501#if 0
1502			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1503				(void) munmapfd(td, i);
1504#endif
1505			if (i < fdp->fd_knlistsize) {
1506				FILEDESC_UNLOCK(fdp);
1507				knote_fdclose(td, i);
1508				FILEDESC_LOCK(fdp);
1509			}
1510			/*
1511			 * NULL-out descriptor prior to close to avoid
1512			 * a race while close blocks.
1513			 */
1514			fp = fdp->fd_ofiles[i];
1515			fdp->fd_ofiles[i] = NULL;
1516			fdp->fd_ofileflags[i] = 0;
1517			if (i < fdp->fd_freefile)
1518				fdp->fd_freefile = i;
1519			FILEDESC_UNLOCK(fdp);
1520			(void) closef(fp, td);
1521			FILEDESC_LOCK(fdp);
1522		}
1523	}
1524	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1525		fdp->fd_lastfile--;
1526	FILEDESC_UNLOCK(fdp);
1527}
1528
1529/*
1530 * It is unsafe for set[ug]id processes to be started with file
1531 * descriptors 0..2 closed, as these descriptors are given implicit
1532 * significance in the Standard C library.  fdcheckstd() will create a
1533 * descriptor referencing /dev/null for each of stdin, stdout, and
1534 * stderr that is not already open.
1535 */
1536int
1537fdcheckstd(td)
1538	struct thread *td;
1539{
1540	struct nameidata nd;
1541	struct filedesc *fdp;
1542	struct file *fp;
1543	register_t retval;
1544	int fd, i, error, flags, devnull;
1545
1546	fdp = td->td_proc->p_fd;
1547	if (fdp == NULL)
1548		return (0);
1549	devnull = -1;
1550	error = 0;
1551	for (i = 0; i < 3; i++) {
1552		if (fdp->fd_ofiles[i] != NULL)
1553			continue;
1554		if (devnull < 0) {
1555			error = falloc(td, &fp, &fd);
1556			if (error != 0)
1557				break;
1558			KASSERT(fd == i, ("oof, we didn't get our fd"));
1559			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1560			    td);
1561			flags = FREAD | FWRITE;
1562			error = vn_open(&nd, &flags, 0);
1563			if (error != 0) {
1564				FILEDESC_LOCK(fdp);
1565				fdp->fd_ofiles[fd] = NULL;
1566				FILEDESC_UNLOCK(fdp);
1567				fdrop(fp, td);
1568				break;
1569			}
1570			NDFREE(&nd, NDF_ONLY_PNBUF);
1571			fp->f_data = nd.ni_vp;
1572			fp->f_flag = flags;
1573			fp->f_ops = &vnops;
1574			fp->f_type = DTYPE_VNODE;
1575			VOP_UNLOCK(nd.ni_vp, 0, td);
1576			devnull = fd;
1577		} else {
1578			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1579			if (error != 0)
1580				break;
1581		}
1582	}
1583	return (error);
1584}
1585
1586/*
1587 * Internal form of close.
1588 * Decrement reference count on file structure.
1589 * Note: td may be NULL when closing a file
1590 * that was being passed in a message.
1591 */
1592int
1593closef(fp, td)
1594	register struct file *fp;
1595	register struct thread *td;
1596{
1597	struct vnode *vp;
1598	struct flock lf;
1599
1600	if (fp == NULL)
1601		return (0);
1602	/*
1603	 * POSIX record locking dictates that any close releases ALL
1604	 * locks owned by this process.  This is handled by setting
1605	 * a flag in the unlock to free ONLY locks obeying POSIX
1606	 * semantics, and not to free BSD-style file locks.
1607	 * If the descriptor was in a message, POSIX-style locks
1608	 * aren't passed with the descriptor.
1609	 */
1610	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1611	    fp->f_type == DTYPE_VNODE) {
1612		lf.l_whence = SEEK_SET;
1613		lf.l_start = 0;
1614		lf.l_len = 0;
1615		lf.l_type = F_UNLCK;
1616		vp = (struct vnode *)fp->f_data;
1617		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1618		    F_UNLCK, &lf, F_POSIX);
1619	}
1620	return (fdrop(fp, td));
1621}
1622
1623/*
1624 * Drop reference on struct file passed in, may call closef if the
1625 * reference hits zero.
1626 */
1627int
1628fdrop(fp, td)
1629	struct file *fp;
1630	struct thread *td;
1631{
1632
1633	FILE_LOCK(fp);
1634	return (fdrop_locked(fp, td));
1635}
1636
1637/*
1638 * Extract the file pointer associated with the specified descriptor for
1639 * the current user process.
1640 *
1641 * If the descriptor doesn't exist, EBADF is returned.
1642 *
1643 * If the descriptor exists but doesn't match 'flags' then
1644 * return EBADF for read attempts and EINVAL for write attempts.
1645 *
1646 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1647 * It should be droped with fdrop().
1648 * If it is not set, then the refcount will not be bumped however the
1649 * thread's filedesc struct will be returned locked (for fgetsock).
1650 *
1651 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1652 * Otherwise *fpp is set and zero is returned.
1653 */
1654static __inline int
1655_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1656{
1657	struct filedesc *fdp;
1658	struct file *fp;
1659
1660	*fpp = NULL;
1661	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1662		return (EBADF);
1663	FILEDESC_LOCK(fdp);
1664	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1665		FILEDESC_UNLOCK(fdp);
1666		return (EBADF);
1667	}
1668
1669	/*
1670	 * Note: FREAD failures returns EBADF to maintain backwards
1671	 * compatibility with what routines returned before.
1672	 *
1673	 * Only one flag, or 0, may be specified.
1674	 */
1675	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1676		FILEDESC_UNLOCK(fdp);
1677		return (EBADF);
1678	}
1679	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1680		FILEDESC_UNLOCK(fdp);
1681		return (EINVAL);
1682	}
1683	if (hold) {
1684		fhold(fp);
1685		FILEDESC_UNLOCK(fdp);
1686	}
1687	*fpp = fp;
1688	return (0);
1689}
1690
1691int
1692fget(struct thread *td, int fd, struct file **fpp)
1693{
1694
1695	return(_fget(td, fd, fpp, 0, 1));
1696}
1697
1698int
1699fget_read(struct thread *td, int fd, struct file **fpp)
1700{
1701
1702	return(_fget(td, fd, fpp, FREAD, 1));
1703}
1704
1705int
1706fget_write(struct thread *td, int fd, struct file **fpp)
1707{
1708
1709	return(_fget(td, fd, fpp, FWRITE, 1));
1710}
1711
1712/*
1713 * Like fget() but loads the underlying vnode, or returns an error if
1714 * the descriptor does not represent a vnode.  Note that pipes use vnodes
1715 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1716 * error).  The returned vnode will be vref()d.
1717 */
1718static __inline int
1719_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1720{
1721	struct file *fp;
1722	int error;
1723
1724	*vpp = NULL;
1725	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1726		return (error);
1727	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1728		error = EINVAL;
1729	} else {
1730		*vpp = (struct vnode *)fp->f_data;
1731		vref(*vpp);
1732	}
1733	FILEDESC_UNLOCK(td->td_proc->p_fd);
1734	return (error);
1735}
1736
1737int
1738fgetvp(struct thread *td, int fd, struct vnode **vpp)
1739{
1740
1741	return (_fgetvp(td, fd, vpp, 0));
1742}
1743
1744int
1745fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1746{
1747
1748	return (_fgetvp(td, fd, vpp, FREAD));
1749}
1750
1751int
1752fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1753{
1754
1755	return (_fgetvp(td, fd, vpp, FWRITE));
1756}
1757
1758/*
1759 * Like fget() but loads the underlying socket, or returns an error if
1760 * the descriptor does not represent a socket.
1761 *
1762 * We bump the ref count on the returned socket.  XXX Also obtain the SX
1763 * lock in the future.
1764 */
1765int
1766fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1767{
1768	struct file *fp;
1769	int error;
1770
1771	*spp = NULL;
1772	if (fflagp != NULL)
1773		*fflagp = 0;
1774	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1775		return (error);
1776	if (fp->f_type != DTYPE_SOCKET) {
1777		error = ENOTSOCK;
1778	} else {
1779		*spp = (struct socket *)fp->f_data;
1780		if (fflagp)
1781			*fflagp = fp->f_flag;
1782		soref(*spp);
1783	}
1784	FILEDESC_UNLOCK(td->td_proc->p_fd);
1785	return (error);
1786}
1787
1788/*
1789 * Drop the reference count on the the socket and XXX release the SX lock in
1790 * the future.  The last reference closes the socket.
1791 */
1792void
1793fputsock(struct socket *so)
1794{
1795
1796	sorele(so);
1797}
1798
1799/*
1800 * Drop reference on struct file passed in, may call closef if the
1801 * reference hits zero.
1802 * Expects struct file locked, and will unlock it.
1803 */
1804int
1805fdrop_locked(fp, td)
1806	struct file *fp;
1807	struct thread *td;
1808{
1809	struct flock lf;
1810	struct vnode *vp;
1811	int error;
1812
1813	FILE_LOCK_ASSERT(fp, MA_OWNED);
1814
1815	if (--fp->f_count > 0) {
1816		FILE_UNLOCK(fp);
1817		return (0);
1818	}
1819	mtx_lock(&Giant);
1820	if (fp->f_count < 0)
1821		panic("fdrop: count < 0");
1822	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1823		lf.l_whence = SEEK_SET;
1824		lf.l_start = 0;
1825		lf.l_len = 0;
1826		lf.l_type = F_UNLCK;
1827		vp = (struct vnode *)fp->f_data;
1828		FILE_UNLOCK(fp);
1829		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1830	} else
1831		FILE_UNLOCK(fp);
1832	if (fp->f_ops != &badfileops)
1833		error = fo_close(fp, td);
1834	else
1835		error = 0;
1836	ffree(fp);
1837	mtx_unlock(&Giant);
1838	return (error);
1839}
1840
1841/*
1842 * Apply an advisory lock on a file descriptor.
1843 *
1844 * Just attempt to get a record lock of the requested type on
1845 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1846 */
1847#ifndef _SYS_SYSPROTO_H_
1848struct flock_args {
1849	int	fd;
1850	int	how;
1851};
1852#endif
1853/*
1854 * MPSAFE
1855 */
1856/* ARGSUSED */
1857int
1858flock(td, uap)
1859	struct thread *td;
1860	register struct flock_args *uap;
1861{
1862	struct file *fp;
1863	struct vnode *vp;
1864	struct flock lf;
1865	int error;
1866
1867	if ((error = fget(td, uap->fd, &fp)) != 0)
1868		return (error);
1869	if (fp->f_type != DTYPE_VNODE) {
1870		fdrop(fp, td);
1871		return (EOPNOTSUPP);
1872	}
1873
1874	mtx_lock(&Giant);
1875	vp = (struct vnode *)fp->f_data;
1876	lf.l_whence = SEEK_SET;
1877	lf.l_start = 0;
1878	lf.l_len = 0;
1879	if (uap->how & LOCK_UN) {
1880		lf.l_type = F_UNLCK;
1881		FILE_LOCK(fp);
1882		fp->f_flag &= ~FHASLOCK;
1883		FILE_UNLOCK(fp);
1884		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1885		goto done2;
1886	}
1887	if (uap->how & LOCK_EX)
1888		lf.l_type = F_WRLCK;
1889	else if (uap->how & LOCK_SH)
1890		lf.l_type = F_RDLCK;
1891	else {
1892		error = EBADF;
1893		goto done2;
1894	}
1895	FILE_LOCK(fp);
1896	fp->f_flag |= FHASLOCK;
1897	FILE_UNLOCK(fp);
1898	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1899	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1900done2:
1901	fdrop(fp, td);
1902	mtx_unlock(&Giant);
1903	return (error);
1904}
1905
1906/*
1907 * File Descriptor pseudo-device driver (/dev/fd/).
1908 *
1909 * Opening minor device N dup()s the file (if any) connected to file
1910 * descriptor N belonging to the calling process.  Note that this driver
1911 * consists of only the ``open()'' routine, because all subsequent
1912 * references to this file will be direct to the other driver.
1913 */
1914/* ARGSUSED */
1915static int
1916fdopen(dev, mode, type, td)
1917	dev_t dev;
1918	int mode, type;
1919	struct thread *td;
1920{
1921
1922	/*
1923	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1924	 * the file descriptor being sought for duplication. The error
1925	 * return ensures that the vnode for this device will be released
1926	 * by vn_open. Open will detect this special error and take the
1927	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1928	 * will simply report the error.
1929	 */
1930	td->td_dupfd = dev2unit(dev);
1931	return (ENODEV);
1932}
1933
1934/*
1935 * Duplicate the specified descriptor to a free descriptor.
1936 */
1937int
1938dupfdopen(td, fdp, indx, dfd, mode, error)
1939	struct thread *td;
1940	struct filedesc *fdp;
1941	int indx, dfd;
1942	int mode;
1943	int error;
1944{
1945	register struct file *wfp;
1946	struct file *fp;
1947
1948	/*
1949	 * If the to-be-dup'd fd number is greater than the allowed number
1950	 * of file descriptors, or the fd to be dup'd has already been
1951	 * closed, then reject.
1952	 */
1953	FILEDESC_LOCK(fdp);
1954	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1955	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1956		FILEDESC_UNLOCK(fdp);
1957		return (EBADF);
1958	}
1959
1960	/*
1961	 * There are two cases of interest here.
1962	 *
1963	 * For ENODEV simply dup (dfd) to file descriptor
1964	 * (indx) and return.
1965	 *
1966	 * For ENXIO steal away the file structure from (dfd) and
1967	 * store it in (indx).  (dfd) is effectively closed by
1968	 * this operation.
1969	 *
1970	 * Any other error code is just returned.
1971	 */
1972	switch (error) {
1973	case ENODEV:
1974		/*
1975		 * Check that the mode the file is being opened for is a
1976		 * subset of the mode of the existing descriptor.
1977		 */
1978		FILE_LOCK(wfp);
1979		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1980			FILE_UNLOCK(wfp);
1981			FILEDESC_UNLOCK(fdp);
1982			return (EACCES);
1983		}
1984		fp = fdp->fd_ofiles[indx];
1985#if 0
1986		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1987			(void) munmapfd(td, indx);
1988#endif
1989		fdp->fd_ofiles[indx] = wfp;
1990		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1991		fhold_locked(wfp);
1992		FILE_UNLOCK(wfp);
1993		if (indx > fdp->fd_lastfile)
1994			fdp->fd_lastfile = indx;
1995		if (fp != NULL)
1996			FILE_LOCK(fp);
1997		FILEDESC_UNLOCK(fdp);
1998		/*
1999		 * We now own the reference to fp that the ofiles[] array
2000		 * used to own.  Release it.
2001		 */
2002		if (fp != NULL)
2003			fdrop_locked(fp, td);
2004		return (0);
2005
2006	case ENXIO:
2007		/*
2008		 * Steal away the file pointer from dfd and stuff it into indx.
2009		 */
2010		fp = fdp->fd_ofiles[indx];
2011#if 0
2012		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2013			(void) munmapfd(td, indx);
2014#endif
2015		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2016		fdp->fd_ofiles[dfd] = NULL;
2017		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2018		fdp->fd_ofileflags[dfd] = 0;
2019
2020		/*
2021		 * Complete the clean up of the filedesc structure by
2022		 * recomputing the various hints.
2023		 */
2024		if (indx > fdp->fd_lastfile) {
2025			fdp->fd_lastfile = indx;
2026		} else {
2027			while (fdp->fd_lastfile > 0 &&
2028			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2029				fdp->fd_lastfile--;
2030			}
2031			if (dfd < fdp->fd_freefile)
2032				fdp->fd_freefile = dfd;
2033		}
2034		if (fp != NULL)
2035			FILE_LOCK(fp);
2036		FILEDESC_UNLOCK(fdp);
2037
2038		/*
2039		 * we now own the reference to fp that the ofiles[] array
2040		 * used to own.  Release it.
2041		 */
2042		if (fp != NULL)
2043			fdrop_locked(fp, td);
2044		return (0);
2045
2046	default:
2047		FILEDESC_UNLOCK(fdp);
2048		return (error);
2049	}
2050	/* NOTREACHED */
2051}
2052
2053/*
2054 * Get file structures.
2055 */
2056static int
2057sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2058{
2059	struct xfile xf;
2060	struct filedesc *fdp;
2061	struct file *fp;
2062	struct proc *p;
2063	int error, n;
2064
2065	sysctl_wire_old_buffer(req, 0);
2066	if (req->oldptr == NULL) {
2067		n = 16;		/* A slight overestimate. */
2068		sx_slock(&filelist_lock);
2069		LIST_FOREACH(fp, &filehead, f_list) {
2070			/*
2071			 * We should grab the lock, but this is an
2072			 * estimate, so does it really matter?
2073			 */
2074			/* mtx_lock(fp->f_mtxp); */
2075			n += fp->f_count;
2076			/* mtx_unlock(f->f_mtxp); */
2077		}
2078		sx_sunlock(&filelist_lock);
2079		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2080	}
2081	error = 0;
2082	bzero(&xf, sizeof(xf));
2083	xf.xf_size = sizeof(xf);
2084	sx_slock(&allproc_lock);
2085	LIST_FOREACH(p, &allproc, p_list) {
2086		PROC_LOCK(p);
2087		xf.xf_pid = p->p_pid;
2088		xf.xf_uid = p->p_ucred->cr_uid;
2089		if ((fdp = p->p_fd) == NULL) {
2090			PROC_UNLOCK(p);
2091			continue;
2092		}
2093		FILEDESC_LOCK(fdp);
2094		for (n = 0; n < fdp->fd_nfiles; ++n) {
2095			if ((fp = fdp->fd_ofiles[n]) == NULL)
2096				continue;
2097			xf.xf_fd = n;
2098			xf.xf_file = fp;
2099#define	XF_COPY(field) xf.xf_##field = fp->f_##field
2100			XF_COPY(type);
2101			XF_COPY(count);
2102			XF_COPY(msgcount);
2103			XF_COPY(offset);
2104			XF_COPY(data);
2105			XF_COPY(flag);
2106#undef XF_COPY
2107			error = SYSCTL_OUT(req, &xf, sizeof(xf));
2108			if (error)
2109				break;
2110		}
2111		FILEDESC_UNLOCK(fdp);
2112		PROC_UNLOCK(p);
2113		if (error)
2114			break;
2115	}
2116	sx_sunlock(&allproc_lock);
2117	return (error);
2118}
2119
2120SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2121    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2122
2123SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2124    &maxfilesperproc, 0, "Maximum files allowed open per process");
2125
2126SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2127    &maxfiles, 0, "Maximum number of files");
2128
2129SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2130    &nfiles, 0, "System-wide number of open files");
2131
2132static void
2133fildesc_drvinit(void *unused)
2134{
2135	dev_t dev;
2136
2137	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2138	make_dev_alias(dev, "stdin");
2139	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2140	make_dev_alias(dev, "stdout");
2141	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2142	make_dev_alias(dev, "stderr");
2143	if (!devfs_present) {
2144		int fd;
2145
2146		for (fd = 3; fd < NUMFDESC; fd++)
2147			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2148			    "fd/%d", fd);
2149	}
2150}
2151
2152struct fileops badfileops = {
2153	badfo_readwrite,
2154	badfo_readwrite,
2155	badfo_ioctl,
2156	badfo_poll,
2157	badfo_kqfilter,
2158	badfo_stat,
2159	badfo_close
2160};
2161
2162static int
2163badfo_readwrite(fp, uio, active_cred, flags, td)
2164	struct file *fp;
2165	struct uio *uio;
2166	struct ucred *active_cred;
2167	struct thread *td;
2168	int flags;
2169{
2170
2171	return (EBADF);
2172}
2173
2174static int
2175badfo_ioctl(fp, com, data, active_cred, td)
2176	struct file *fp;
2177	u_long com;
2178	void *data;
2179	struct ucred *active_cred;
2180	struct thread *td;
2181{
2182
2183	return (EBADF);
2184}
2185
2186static int
2187badfo_poll(fp, events, active_cred, td)
2188	struct file *fp;
2189	int events;
2190	struct ucred *active_cred;
2191	struct thread *td;
2192{
2193
2194	return (0);
2195}
2196
2197static int
2198badfo_kqfilter(fp, kn)
2199	struct file *fp;
2200	struct knote *kn;
2201{
2202
2203	return (0);
2204}
2205
2206static int
2207badfo_stat(fp, sb, active_cred, td)
2208	struct file *fp;
2209	struct stat *sb;
2210	struct ucred *active_cred;
2211	struct thread *td;
2212{
2213
2214	return (EBADF);
2215}
2216
2217static int
2218badfo_close(fp, td)
2219	struct file *fp;
2220	struct thread *td;
2221{
2222
2223	return (EBADF);
2224}
2225
2226SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2227					fildesc_drvinit,NULL)
2228
2229static void filelistinit(void *);
2230SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2231
2232/* ARGSUSED*/
2233static void
2234filelistinit(dummy)
2235	void *dummy;
2236{
2237
2238	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2239	    NULL, NULL, UMA_ALIGN_PTR, 0);
2240	sx_init(&filelist_lock, "filelist lock");
2241	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2242}
2243