kern_descrip.c revision 102412
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 102412 2002-08-25 13:23:09Z charnier $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/mutex.h>
49#include <sys/sysproto.h>
50#include <sys/conf.h>
51#include <sys/filedesc.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54#include <sys/vnode.h>
55#include <sys/proc.h>
56#include <sys/namei.h>
57#include <sys/file.h>
58#include <sys/stat.h>
59#include <sys/filio.h>
60#include <sys/fcntl.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/event.h>
64#include <sys/sx.h>
65#include <sys/socketvar.h>
66#include <sys/signalvar.h>
67
68#include <machine/limits.h>
69
70#include <vm/vm.h>
71#include <vm/vm_extern.h>
72#include <vm/uma.h>
73
74static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
75static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
76
77uma_zone_t file_zone;
78
79static	 d_open_t  fdopen;
80#define NUMFDESC 64
81
82#define CDEV_MAJOR 22
83static struct cdevsw fildesc_cdevsw = {
84	/* open */	fdopen,
85	/* close */	noclose,
86	/* read */	noread,
87	/* write */	nowrite,
88	/* ioctl */	noioctl,
89	/* poll */	nopoll,
90	/* mmap */	nommap,
91	/* strategy */	nostrategy,
92	/* name */	"FD",
93	/* maj */	CDEV_MAJOR,
94	/* dump */	nodump,
95	/* psize */	nopsize,
96	/* flags */	0,
97};
98
99static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval,
100    struct thread *td);
101static int badfo_readwrite(struct file *fp, struct uio *uio,
102    struct ucred *active_cred, int flags, struct thread *td);
103static int badfo_ioctl(struct file *fp, u_long com, void *data,
104    struct ucred *active_cred, struct thread *td);
105static int badfo_poll(struct file *fp, int events,
106    struct ucred *active_cred, struct thread *td);
107static int badfo_kqfilter(struct file *fp, struct knote *kn);
108static int badfo_stat(struct file *fp, struct stat *sb,
109    struct ucred *active_cred, struct thread *td);
110static int badfo_close(struct file *fp, struct thread *td);
111
112/*
113 * Descriptor management.
114 */
115struct filelist filehead;	/* head of list of open files */
116int nfiles;			/* actual number of open files */
117extern int cmask;
118struct sx filelist_lock;	/* sx to protect filelist */
119struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
120
121/*
122 * System calls on descriptors.
123 */
124#ifndef _SYS_SYSPROTO_H_
125struct getdtablesize_args {
126	int	dummy;
127};
128#endif
129/*
130 * MPSAFE
131 */
132/* ARGSUSED */
133int
134getdtablesize(td, uap)
135	struct thread *td;
136	struct getdtablesize_args *uap;
137{
138	struct proc *p = td->td_proc;
139
140	mtx_lock(&Giant);
141	td->td_retval[0] =
142	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
143	mtx_unlock(&Giant);
144	return (0);
145}
146
147/*
148 * Duplicate a file descriptor to a particular value.
149 *
150 * note: keep in mind that a potential race condition exists when closing
151 * descriptors from a shared descriptor table (via rfork).
152 */
153#ifndef _SYS_SYSPROTO_H_
154struct dup2_args {
155	u_int	from;
156	u_int	to;
157};
158#endif
159/*
160 * MPSAFE
161 */
162/* ARGSUSED */
163int
164dup2(td, uap)
165	struct thread *td;
166	struct dup2_args *uap;
167{
168	struct proc *p = td->td_proc;
169	register struct filedesc *fdp = td->td_proc->p_fd;
170	register u_int old = uap->from, new = uap->to;
171	int i, error;
172
173	FILEDESC_LOCK(fdp);
174retry:
175	if (old >= fdp->fd_nfiles ||
176	    fdp->fd_ofiles[old] == NULL ||
177	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
178	    new >= maxfilesperproc) {
179		FILEDESC_UNLOCK(fdp);
180		return (EBADF);
181	}
182	if (old == new) {
183		td->td_retval[0] = new;
184		FILEDESC_UNLOCK(fdp);
185		return (0);
186	}
187	if (new >= fdp->fd_nfiles) {
188		if ((error = fdalloc(td, new, &i))) {
189			FILEDESC_UNLOCK(fdp);
190			return (error);
191		}
192		/*
193		 * fdalloc() may block, retest everything.
194		 */
195		goto retry;
196	}
197	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
198	return(error);
199}
200
201/*
202 * Duplicate a file descriptor.
203 */
204#ifndef _SYS_SYSPROTO_H_
205struct dup_args {
206	u_int	fd;
207};
208#endif
209/*
210 * MPSAFE
211 */
212/* ARGSUSED */
213int
214dup(td, uap)
215	struct thread *td;
216	struct dup_args *uap;
217{
218	register struct filedesc *fdp;
219	u_int old;
220	int new, error;
221
222	old = uap->fd;
223	fdp = td->td_proc->p_fd;
224	FILEDESC_LOCK(fdp);
225	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
226		FILEDESC_UNLOCK(fdp);
227		return (EBADF);
228	}
229	if ((error = fdalloc(td, 0, &new))) {
230		FILEDESC_UNLOCK(fdp);
231		return (error);
232	}
233	error = do_dup(fdp, (int)old, new, td->td_retval, td);
234	return (error);
235}
236
237/*
238 * The file control system call.
239 */
240#ifndef _SYS_SYSPROTO_H_
241struct fcntl_args {
242	int	fd;
243	int	cmd;
244	long	arg;
245};
246#endif
247/*
248 * MPSAFE
249 */
250/* ARGSUSED */
251int
252fcntl(td, uap)
253	struct thread *td;
254	register struct fcntl_args *uap;
255{
256	register struct proc *p = td->td_proc;
257	register struct filedesc *fdp;
258	register struct file *fp;
259	register char *pop;
260	struct vnode *vp;
261	int i, tmp, error = 0, flg = F_POSIX;
262	struct flock fl;
263	u_int newmin;
264	struct proc *leaderp;
265
266	mtx_lock(&Giant);
267
268	fdp = p->p_fd;
269	FILEDESC_LOCK(fdp);
270	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
271	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
272		FILEDESC_UNLOCK(fdp);
273		error = EBADF;
274		goto done2;
275	}
276	pop = &fdp->fd_ofileflags[uap->fd];
277
278	switch (uap->cmd) {
279	case F_DUPFD:
280		newmin = uap->arg;
281		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
282		    newmin >= maxfilesperproc) {
283			FILEDESC_UNLOCK(fdp);
284			error = EINVAL;
285			break;
286		}
287		if ((error = fdalloc(td, newmin, &i))) {
288			FILEDESC_UNLOCK(fdp);
289			break;
290		}
291		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
292		break;
293
294	case F_GETFD:
295		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
296		FILEDESC_UNLOCK(fdp);
297		break;
298
299	case F_SETFD:
300		*pop = (*pop &~ UF_EXCLOSE) |
301		    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
302		FILEDESC_UNLOCK(fdp);
303		break;
304
305	case F_GETFL:
306		FILE_LOCK(fp);
307		FILEDESC_UNLOCK(fdp);
308		td->td_retval[0] = OFLAGS(fp->f_flag);
309		FILE_UNLOCK(fp);
310		break;
311
312	case F_SETFL:
313		fhold(fp);
314		FILEDESC_UNLOCK(fdp);
315		fp->f_flag &= ~FCNTLFLAGS;
316		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
317		tmp = fp->f_flag & FNONBLOCK;
318		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
319		if (error) {
320			fdrop(fp, td);
321			break;
322		}
323		tmp = fp->f_flag & FASYNC;
324		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
325		if (!error) {
326			fdrop(fp, td);
327			break;
328		}
329		fp->f_flag &= ~FNONBLOCK;
330		tmp = 0;
331		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
332		fdrop(fp, td);
333		break;
334
335	case F_GETOWN:
336		fhold(fp);
337		FILEDESC_UNLOCK(fdp);
338		error = fo_ioctl(fp, FIOGETOWN, (void *)td->td_retval,
339		    td->td_ucred, td);
340		fdrop(fp, td);
341		break;
342
343	case F_SETOWN:
344		fhold(fp);
345		FILEDESC_UNLOCK(fdp);
346		error = fo_ioctl(fp, FIOSETOWN, &uap->arg, td->td_ucred, td);
347		fdrop(fp, td);
348		break;
349
350	case F_SETLKW:
351		flg |= F_WAIT;
352		/* FALLTHROUGH F_SETLK */
353
354	case F_SETLK:
355		if (fp->f_type != DTYPE_VNODE) {
356			FILEDESC_UNLOCK(fdp);
357			error = EBADF;
358			break;
359		}
360		vp = (struct vnode *)fp->f_data;
361		/*
362		 * copyin/lockop may block
363		 */
364		fhold(fp);
365		FILEDESC_UNLOCK(fdp);
366		vp = (struct vnode *)fp->f_data;
367
368		/* Copy in the lock structure */
369		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
370		if (error) {
371			fdrop(fp, td);
372			break;
373		}
374		if (fl.l_whence == SEEK_CUR) {
375			if (fp->f_offset < 0 ||
376			    (fl.l_start > 0 &&
377			     fp->f_offset > OFF_MAX - fl.l_start)) {
378				fdrop(fp, td);
379				error = EOVERFLOW;
380				break;
381			}
382			fl.l_start += fp->f_offset;
383		}
384
385		switch (fl.l_type) {
386		case F_RDLCK:
387			if ((fp->f_flag & FREAD) == 0) {
388				error = EBADF;
389				break;
390			}
391			PROC_LOCK(p);
392			p->p_flag |= P_ADVLOCK;
393			leaderp = p->p_leader;
394			PROC_UNLOCK(p);
395			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
396			    &fl, flg);
397			break;
398		case F_WRLCK:
399			if ((fp->f_flag & FWRITE) == 0) {
400				error = EBADF;
401				break;
402			}
403			PROC_LOCK(p);
404			p->p_flag |= P_ADVLOCK;
405			leaderp = p->p_leader;
406			PROC_UNLOCK(p);
407			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
408			    &fl, flg);
409			break;
410		case F_UNLCK:
411			PROC_LOCK(p);
412			leaderp = p->p_leader;
413			PROC_UNLOCK(p);
414			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
415				&fl, F_POSIX);
416			break;
417		default:
418			error = EINVAL;
419			break;
420		}
421		fdrop(fp, td);
422		break;
423
424	case F_GETLK:
425		if (fp->f_type != DTYPE_VNODE) {
426			FILEDESC_UNLOCK(fdp);
427			error = EBADF;
428			break;
429		}
430		vp = (struct vnode *)fp->f_data;
431		/*
432		 * copyin/lockop may block
433		 */
434		fhold(fp);
435		FILEDESC_UNLOCK(fdp);
436		vp = (struct vnode *)fp->f_data;
437
438		/* Copy in the lock structure */
439		error = copyin((caddr_t)(intptr_t)uap->arg, &fl, sizeof(fl));
440		if (error) {
441			fdrop(fp, td);
442			break;
443		}
444		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
445		    fl.l_type != F_UNLCK) {
446			fdrop(fp, td);
447			error = EINVAL;
448			break;
449		}
450		if (fl.l_whence == SEEK_CUR) {
451			if ((fl.l_start > 0 &&
452			     fp->f_offset > OFF_MAX - fl.l_start) ||
453			    (fl.l_start < 0 &&
454			     fp->f_offset < OFF_MIN - fl.l_start)) {
455				fdrop(fp, td);
456				error = EOVERFLOW;
457				break;
458			}
459			fl.l_start += fp->f_offset;
460		}
461		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
462			    &fl, F_POSIX);
463		fdrop(fp, td);
464		if (error == 0) {
465			error = copyout(&fl, (caddr_t)(intptr_t)uap->arg,
466			    sizeof(fl));
467		}
468		break;
469	default:
470		FILEDESC_UNLOCK(fdp);
471		error = EINVAL;
472		break;
473	}
474done2:
475	mtx_unlock(&Giant);
476	return (error);
477}
478
479/*
480 * Common code for dup, dup2, and fcntl(F_DUPFD).
481 * filedesc must be locked, but will be unlocked as a side effect.
482 */
483static int
484do_dup(fdp, old, new, retval, td)
485	register struct filedesc *fdp;
486	register int old, new;
487	register_t *retval;
488	struct thread *td;
489{
490	struct file *fp;
491	struct file *delfp;
492
493	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
494
495	/*
496	 * Save info on the descriptor being overwritten.  We have
497	 * to do the unmap now, but we cannot close it without
498	 * introducing an ownership race for the slot.
499	 */
500	delfp = fdp->fd_ofiles[new];
501#if 0
502	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
503		(void) munmapfd(td, new);
504#endif
505
506	/*
507	 * Duplicate the source descriptor, update lastfile
508	 */
509	fp = fdp->fd_ofiles[old];
510	fdp->fd_ofiles[new] = fp;
511	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
512	fhold(fp);
513	if (new > fdp->fd_lastfile)
514		fdp->fd_lastfile = new;
515	*retval = new;
516
517	FILEDESC_UNLOCK(fdp);
518
519	/*
520	 * If we dup'd over a valid file, we now own the reference to it
521	 * and must dispose of it using closef() semantics (as if a
522	 * close() were performed on it).
523	 */
524	if (delfp) {
525		mtx_lock(&Giant);
526		(void) closef(delfp, td);
527		mtx_unlock(&Giant);
528	}
529	return (0);
530}
531
532/*
533 * If sigio is on the list associated with a process or process group,
534 * disable signalling from the device, remove sigio from the list and
535 * free sigio.
536 */
537void
538funsetown(sigiop)
539	struct sigio **sigiop;
540{
541	struct sigio *sigio;
542
543	SIGIO_LOCK();
544	sigio = *sigiop;
545	if (sigio == NULL) {
546		SIGIO_UNLOCK();
547		return;
548	}
549	*(sigio->sio_myref) = NULL;
550	if ((sigio)->sio_pgid < 0) {
551		struct pgrp *pg = (sigio)->sio_pgrp;
552		PGRP_LOCK(pg);
553		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
554			     sigio, sio_pgsigio);
555		PGRP_UNLOCK(pg);
556	} else {
557		struct proc *p = (sigio)->sio_proc;
558		PROC_LOCK(p);
559		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
560			     sigio, sio_pgsigio);
561		PROC_UNLOCK(p);
562	}
563	SIGIO_UNLOCK();
564	crfree(sigio->sio_ucred);
565	FREE(sigio, M_SIGIO);
566}
567
568/*
569 * Free a list of sigio structures.
570 * We only need to lock the SIGIO_LOCK because we have made ourselves
571 * inaccessable to callers of fsetown and therefore do not need to lock
572 * the proc or pgrp struct for the list manipulation.
573 */
574void
575funsetownlst(sigiolst)
576	struct sigiolst *sigiolst;
577{
578	struct sigio *sigio;
579	struct proc *p;
580	struct pgrp *pg;
581
582	sigio = SLIST_FIRST(sigiolst);
583	if (sigio == NULL)
584		return;
585
586	p = NULL;
587	pg = NULL;
588
589	/*
590	 * Every entry of the list should belong
591	 * to a single proc or pgrp.
592	 */
593	if (sigio->sio_pgid < 0) {
594		pg = sigio->sio_pgrp;
595		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
596	} else /* if (sigio->sio_pgid > 0) */ {
597		p = sigio->sio_proc;
598		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
599	}
600
601	SIGIO_LOCK();
602	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
603		*(sigio->sio_myref) = NULL;
604		if (pg != NULL) {
605			KASSERT(sigio->sio_pgid < 0,
606			    ("Proc sigio in pgrp sigio list"));
607			KASSERT(sigio->sio_pgrp == pg,
608			    ("Bogus pgrp in sigio list"));
609			PGRP_LOCK(pg);
610			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
611			    sio_pgsigio);
612			PGRP_UNLOCK(pg);
613		} else /* if (p != NULL) */ {
614			KASSERT(sigio->sio_pgid > 0,
615			    ("Pgrp sigio in proc sigio list"));
616			KASSERT(sigio->sio_proc == p,
617			    ("Bogus proc in sigio list"));
618			PROC_LOCK(p);
619			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
620			    sio_pgsigio);
621			PROC_UNLOCK(p);
622		}
623		SIGIO_UNLOCK();
624		crfree(sigio->sio_ucred);
625		FREE(sigio, M_SIGIO);
626		SIGIO_LOCK();
627	}
628	SIGIO_UNLOCK();
629}
630
631/*
632 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
633 *
634 * After permission checking, add a sigio structure to the sigio list for
635 * the process or process group.
636 */
637int
638fsetown(pgid, sigiop)
639	pid_t pgid;
640	struct sigio **sigiop;
641{
642	struct proc *proc;
643	struct pgrp *pgrp;
644	struct sigio *sigio;
645	int ret;
646
647	if (pgid == 0) {
648		funsetown(sigiop);
649		return (0);
650	}
651
652	ret = 0;
653
654	/* Allocate and fill in the new sigio out of locks. */
655	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
656	sigio->sio_pgid = pgid;
657	sigio->sio_ucred = crhold(curthread->td_ucred);
658	sigio->sio_myref = sigiop;
659
660	sx_slock(&proctree_lock);
661	if (pgid > 0) {
662		proc = pfind(pgid);
663		if (proc == NULL) {
664			ret = ESRCH;
665			goto fail;
666		}
667
668		/*
669		 * Policy - Don't allow a process to FSETOWN a process
670		 * in another session.
671		 *
672		 * Remove this test to allow maximum flexibility or
673		 * restrict FSETOWN to the current process or process
674		 * group for maximum safety.
675		 */
676		PROC_UNLOCK(proc);
677		if (proc->p_session != curthread->td_proc->p_session) {
678			ret = EPERM;
679			goto fail;
680		}
681
682		pgrp = NULL;
683	} else /* if (pgid < 0) */ {
684		pgrp = pgfind(-pgid);
685		if (pgrp == NULL) {
686			ret = ESRCH;
687			goto fail;
688		}
689		PGRP_UNLOCK(pgrp);
690
691		/*
692		 * Policy - Don't allow a process to FSETOWN a process
693		 * in another session.
694		 *
695		 * Remove this test to allow maximum flexibility or
696		 * restrict FSETOWN to the current process or process
697		 * group for maximum safety.
698		 */
699		if (pgrp->pg_session != curthread->td_proc->p_session) {
700			ret = EPERM;
701			goto fail;
702		}
703
704		proc = NULL;
705	}
706	funsetown(sigiop);
707	if (pgid > 0) {
708		PROC_LOCK(proc);
709		/*
710		 * since funsetownlst() is called without the proctree
711		 * locked we need to check for P_WEXIT.
712		 * XXX: is ESRCH correct?
713		 */
714		if ((proc->p_flag & P_WEXIT) != 0) {
715			PROC_UNLOCK(proc);
716			ret = ESRCH;
717			goto fail;
718		}
719		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
720		sigio->sio_proc = proc;
721		PROC_UNLOCK(proc);
722	} else {
723		PGRP_LOCK(pgrp);
724		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
725		sigio->sio_pgrp = pgrp;
726		PGRP_UNLOCK(pgrp);
727	}
728	sx_sunlock(&proctree_lock);
729	SIGIO_LOCK();
730	*sigiop = sigio;
731	SIGIO_UNLOCK();
732	return (0);
733
734fail:
735	sx_sunlock(&proctree_lock);
736	crfree(sigio->sio_ucred);
737	FREE(sigio, M_SIGIO);
738	return (ret);
739}
740
741/*
742 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
743 */
744pid_t
745fgetown(sigio)
746	struct sigio *sigio;
747{
748	return (sigio != NULL ? sigio->sio_pgid : 0);
749}
750
751/*
752 * Close a file descriptor.
753 */
754#ifndef _SYS_SYSPROTO_H_
755struct close_args {
756        int     fd;
757};
758#endif
759/*
760 * MPSAFE
761 */
762/* ARGSUSED */
763int
764close(td, uap)
765	struct thread *td;
766	struct close_args *uap;
767{
768	register struct filedesc *fdp;
769	register struct file *fp;
770	register int fd = uap->fd;
771	int error = 0;
772
773	mtx_lock(&Giant);
774	fdp = td->td_proc->p_fd;
775	FILEDESC_LOCK(fdp);
776	if ((unsigned)fd >= fdp->fd_nfiles ||
777	    (fp = fdp->fd_ofiles[fd]) == NULL) {
778		FILEDESC_UNLOCK(fdp);
779		error = EBADF;
780		goto done2;
781	}
782#if 0
783	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
784		(void) munmapfd(td, fd);
785#endif
786	fdp->fd_ofiles[fd] = NULL;
787	fdp->fd_ofileflags[fd] = 0;
788
789	/*
790	 * we now hold the fp reference that used to be owned by the descriptor
791	 * array.
792	 */
793	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
794		fdp->fd_lastfile--;
795	if (fd < fdp->fd_freefile)
796		fdp->fd_freefile = fd;
797	if (fd < fdp->fd_knlistsize) {
798		FILEDESC_UNLOCK(fdp);
799		knote_fdclose(td, fd);
800	} else
801		FILEDESC_UNLOCK(fdp);
802
803	error = closef(fp, td);
804done2:
805	mtx_unlock(&Giant);
806	return(error);
807}
808
809#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
810/*
811 * Return status information about a file descriptor.
812 */
813#ifndef _SYS_SYSPROTO_H_
814struct ofstat_args {
815	int	fd;
816	struct	ostat *sb;
817};
818#endif
819/*
820 * MPSAFE
821 */
822/* ARGSUSED */
823int
824ofstat(td, uap)
825	struct thread *td;
826	register struct ofstat_args *uap;
827{
828	struct file *fp;
829	struct stat ub;
830	struct ostat oub;
831	int error;
832
833	mtx_lock(&Giant);
834	if ((error = fget(td, uap->fd, &fp)) != 0)
835		goto done2;
836	error = fo_stat(fp, &ub, td->td_ucred, td);
837	if (error == 0) {
838		cvtstat(&ub, &oub);
839		error = copyout(&oub, uap->sb, sizeof (oub));
840	}
841	fdrop(fp, td);
842done2:
843	mtx_unlock(&Giant);
844	return (error);
845}
846#endif /* COMPAT_43 || COMPAT_SUNOS */
847
848/*
849 * Return status information about a file descriptor.
850 */
851#ifndef _SYS_SYSPROTO_H_
852struct fstat_args {
853	int	fd;
854	struct	stat *sb;
855};
856#endif
857/*
858 * MPSAFE
859 */
860/* ARGSUSED */
861int
862fstat(td, uap)
863	struct thread *td;
864	struct fstat_args *uap;
865{
866	struct file *fp;
867	struct stat ub;
868	int error;
869
870	mtx_lock(&Giant);
871	if ((error = fget(td, uap->fd, &fp)) != 0)
872		goto done2;
873	error = fo_stat(fp, &ub, td->td_ucred, td);
874	if (error == 0)
875		error = copyout(&ub, uap->sb, sizeof (ub));
876	fdrop(fp, td);
877done2:
878	mtx_unlock(&Giant);
879	return (error);
880}
881
882/*
883 * Return status information about a file descriptor.
884 */
885#ifndef _SYS_SYSPROTO_H_
886struct nfstat_args {
887	int	fd;
888	struct	nstat *sb;
889};
890#endif
891/*
892 * MPSAFE
893 */
894/* ARGSUSED */
895int
896nfstat(td, uap)
897	struct thread *td;
898	register struct nfstat_args *uap;
899{
900	struct file *fp;
901	struct stat ub;
902	struct nstat nub;
903	int error;
904
905	mtx_lock(&Giant);
906	if ((error = fget(td, uap->fd, &fp)) != 0)
907		goto done2;
908	error = fo_stat(fp, &ub, td->td_ucred, td);
909	if (error == 0) {
910		cvtnstat(&ub, &nub);
911		error = copyout(&nub, uap->sb, sizeof (nub));
912	}
913	fdrop(fp, td);
914done2:
915	mtx_unlock(&Giant);
916	return (error);
917}
918
919/*
920 * Return pathconf information about a file descriptor.
921 */
922#ifndef _SYS_SYSPROTO_H_
923struct fpathconf_args {
924	int	fd;
925	int	name;
926};
927#endif
928/*
929 * MPSAFE
930 */
931/* ARGSUSED */
932int
933fpathconf(td, uap)
934	struct thread *td;
935	register struct fpathconf_args *uap;
936{
937	struct file *fp;
938	struct vnode *vp;
939	int error;
940
941	if ((error = fget(td, uap->fd, &fp)) != 0)
942		return (error);
943
944	switch (fp->f_type) {
945	case DTYPE_PIPE:
946	case DTYPE_SOCKET:
947		if (uap->name != _PC_PIPE_BUF) {
948			error = EINVAL;
949		} else {
950			td->td_retval[0] = PIPE_BUF;
951			error = 0;
952		}
953		break;
954	case DTYPE_FIFO:
955	case DTYPE_VNODE:
956		vp = (struct vnode *)fp->f_data;
957		mtx_lock(&Giant);
958		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
959		mtx_unlock(&Giant);
960		break;
961	default:
962		error = EOPNOTSUPP;
963		break;
964	}
965	fdrop(fp, td);
966	return(error);
967}
968
969/*
970 * Allocate a file descriptor for the process.
971 */
972static int fdexpand;
973SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
974
975int
976fdalloc(td, want, result)
977	struct thread *td;
978	int want;
979	int *result;
980{
981	struct proc *p = td->td_proc;
982	register struct filedesc *fdp = td->td_proc->p_fd;
983	register int i;
984	int lim, last, nfiles;
985	struct file **newofile, **oldofile;
986	char *newofileflags;
987
988	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
989
990	/*
991	 * Search for a free descriptor starting at the higher
992	 * of want or fd_freefile.  If that fails, consider
993	 * expanding the ofile array.
994	 */
995	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
996	for (;;) {
997		last = min(fdp->fd_nfiles, lim);
998		if ((i = want) < fdp->fd_freefile)
999			i = fdp->fd_freefile;
1000		for (; i < last; i++) {
1001			if (fdp->fd_ofiles[i] == NULL) {
1002				fdp->fd_ofileflags[i] = 0;
1003				if (i > fdp->fd_lastfile)
1004					fdp->fd_lastfile = i;
1005				if (want <= fdp->fd_freefile)
1006					fdp->fd_freefile = i;
1007				*result = i;
1008				return (0);
1009			}
1010		}
1011
1012		/*
1013		 * No space in current array.  Expand?
1014		 */
1015		if (fdp->fd_nfiles >= lim)
1016			return (EMFILE);
1017		if (fdp->fd_nfiles < NDEXTENT)
1018			nfiles = NDEXTENT;
1019		else
1020			nfiles = 2 * fdp->fd_nfiles;
1021		FILEDESC_UNLOCK(fdp);
1022		mtx_lock(&Giant);
1023		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
1024		    M_FILEDESC, M_WAITOK);
1025		mtx_unlock(&Giant);
1026		FILEDESC_LOCK(fdp);
1027
1028		/*
1029		 * deal with file-table extend race that might have occured
1030		 * when malloc was blocked.
1031		 */
1032		if (fdp->fd_nfiles >= nfiles) {
1033			FILEDESC_UNLOCK(fdp);
1034			mtx_lock(&Giant);
1035			FREE(newofile, M_FILEDESC);
1036			mtx_unlock(&Giant);
1037			FILEDESC_LOCK(fdp);
1038			continue;
1039		}
1040		newofileflags = (char *) &newofile[nfiles];
1041		/*
1042		 * Copy the existing ofile and ofileflags arrays
1043		 * and zero the new portion of each array.
1044		 */
1045		bcopy(fdp->fd_ofiles, newofile,
1046			(i = sizeof(struct file *) * fdp->fd_nfiles));
1047		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1048		bcopy(fdp->fd_ofileflags, newofileflags,
1049			(i = sizeof(char) * fdp->fd_nfiles));
1050		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1051		if (fdp->fd_nfiles > NDFILE)
1052			oldofile = fdp->fd_ofiles;
1053		else
1054			oldofile = NULL;
1055		fdp->fd_ofiles = newofile;
1056		fdp->fd_ofileflags = newofileflags;
1057		fdp->fd_nfiles = nfiles;
1058		fdexpand++;
1059		if (oldofile != NULL) {
1060			FILEDESC_UNLOCK(fdp);
1061			mtx_lock(&Giant);
1062			FREE(oldofile, M_FILEDESC);
1063			mtx_unlock(&Giant);
1064			FILEDESC_LOCK(fdp);
1065		}
1066	}
1067	return (0);
1068}
1069
1070/*
1071 * Check to see whether n user file descriptors
1072 * are available to the process p.
1073 */
1074int
1075fdavail(td, n)
1076	struct thread *td;
1077	register int n;
1078{
1079	struct proc *p = td->td_proc;
1080	register struct filedesc *fdp = td->td_proc->p_fd;
1081	register struct file **fpp;
1082	register int i, lim, last;
1083
1084	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1085
1086	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1087	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1088		return (1);
1089
1090	last = min(fdp->fd_nfiles, lim);
1091	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1092	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1093		if (*fpp == NULL && --n <= 0)
1094			return (1);
1095	}
1096	return (0);
1097}
1098
1099/*
1100 * Create a new open file structure and allocate
1101 * a file decriptor for the process that refers to it.
1102 */
1103int
1104falloc(td, resultfp, resultfd)
1105	register struct thread *td;
1106	struct file **resultfp;
1107	int *resultfd;
1108{
1109	struct proc *p = td->td_proc;
1110	register struct file *fp, *fq;
1111	int error, i;
1112
1113	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1114	sx_xlock(&filelist_lock);
1115	if (nfiles >= maxfiles) {
1116		sx_xunlock(&filelist_lock);
1117		uma_zfree(file_zone, fp);
1118		tablefull("file");
1119		return (ENFILE);
1120	}
1121	nfiles++;
1122
1123	/*
1124	 * If the process has file descriptor zero open, add the new file
1125	 * descriptor to the list of open files at that point, otherwise
1126	 * put it at the front of the list of open files.
1127	 */
1128	FILEDESC_LOCK(p->p_fd);
1129	if ((error = fdalloc(td, 0, &i))) {
1130		FILEDESC_UNLOCK(p->p_fd);
1131		nfiles--;
1132		sx_xunlock(&filelist_lock);
1133		uma_zfree(file_zone, fp);
1134		return (error);
1135	}
1136	fp->f_mtxp = mtx_pool_alloc();
1137	fp->f_gcflag = 0;
1138	fp->f_count = 1;
1139	fp->f_cred = crhold(td->td_ucred);
1140	fp->f_ops = &badfileops;
1141	fp->f_seqcount = 1;
1142	if ((fq = p->p_fd->fd_ofiles[0])) {
1143		LIST_INSERT_AFTER(fq, fp, f_list);
1144	} else {
1145		LIST_INSERT_HEAD(&filehead, fp, f_list);
1146	}
1147	p->p_fd->fd_ofiles[i] = fp;
1148	FILEDESC_UNLOCK(p->p_fd);
1149	sx_xunlock(&filelist_lock);
1150	if (resultfp)
1151		*resultfp = fp;
1152	if (resultfd)
1153		*resultfd = i;
1154	return (0);
1155}
1156
1157/*
1158 * Free a file descriptor.
1159 */
1160void
1161ffree(fp)
1162	register struct file *fp;
1163{
1164
1165	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1166	sx_xlock(&filelist_lock);
1167	LIST_REMOVE(fp, f_list);
1168	nfiles--;
1169	sx_xunlock(&filelist_lock);
1170	crfree(fp->f_cred);
1171	uma_zfree(file_zone, fp);
1172}
1173
1174/*
1175 * Build a new filedesc structure.
1176 */
1177struct filedesc *
1178fdinit(td)
1179	struct thread *td;
1180{
1181	register struct filedesc0 *newfdp;
1182	register struct filedesc *fdp = td->td_proc->p_fd;
1183
1184	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1185	    M_FILEDESC, M_WAITOK | M_ZERO);
1186	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1187	FILEDESC_LOCK(&newfdp->fd_fd);
1188	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1189	if (newfdp->fd_fd.fd_cdir)
1190		VREF(newfdp->fd_fd.fd_cdir);
1191	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1192	if (newfdp->fd_fd.fd_rdir)
1193		VREF(newfdp->fd_fd.fd_rdir);
1194	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1195	if (newfdp->fd_fd.fd_jdir)
1196		VREF(newfdp->fd_fd.fd_jdir);
1197
1198	/* Create the file descriptor table. */
1199	newfdp->fd_fd.fd_refcnt = 1;
1200	newfdp->fd_fd.fd_cmask = cmask;
1201	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1202	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1203	newfdp->fd_fd.fd_nfiles = NDFILE;
1204	newfdp->fd_fd.fd_knlistsize = -1;
1205	FILEDESC_UNLOCK(&newfdp->fd_fd);
1206
1207	return (&newfdp->fd_fd);
1208}
1209
1210/*
1211 * Share a filedesc structure.
1212 */
1213struct filedesc *
1214fdshare(p)
1215	struct proc *p;
1216{
1217	FILEDESC_LOCK(p->p_fd);
1218	p->p_fd->fd_refcnt++;
1219	FILEDESC_UNLOCK(p->p_fd);
1220	return (p->p_fd);
1221}
1222
1223/*
1224 * Copy a filedesc structure.
1225 */
1226struct filedesc *
1227fdcopy(td)
1228	struct thread *td;
1229{
1230	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1231	register struct file **fpp;
1232	register int i, j;
1233
1234	/* Certain daemons might not have file descriptors. */
1235	if (fdp == NULL)
1236		return (NULL);
1237
1238	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1239
1240	FILEDESC_UNLOCK(fdp);
1241	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1242	    M_FILEDESC, M_WAITOK);
1243	FILEDESC_LOCK(fdp);
1244	bcopy(fdp, newfdp, sizeof(struct filedesc));
1245	FILEDESC_UNLOCK(fdp);
1246	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1247	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1248	if (newfdp->fd_cdir)
1249		VREF(newfdp->fd_cdir);
1250	if (newfdp->fd_rdir)
1251		VREF(newfdp->fd_rdir);
1252	if (newfdp->fd_jdir)
1253		VREF(newfdp->fd_jdir);
1254	newfdp->fd_refcnt = 1;
1255
1256	/*
1257	 * If the number of open files fits in the internal arrays
1258	 * of the open file structure, use them, otherwise allocate
1259	 * additional memory for the number of descriptors currently
1260	 * in use.
1261	 */
1262	FILEDESC_LOCK(fdp);
1263	newfdp->fd_lastfile = fdp->fd_lastfile;
1264	newfdp->fd_nfiles = fdp->fd_nfiles;
1265	if (newfdp->fd_lastfile < NDFILE) {
1266		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1267		newfdp->fd_ofileflags =
1268		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1269		i = NDFILE;
1270	} else {
1271		/*
1272		 * Compute the smallest multiple of NDEXTENT needed
1273		 * for the file descriptors currently in use,
1274		 * allowing the table to shrink.
1275		 */
1276retry:
1277		i = newfdp->fd_nfiles;
1278		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1279			i /= 2;
1280		FILEDESC_UNLOCK(fdp);
1281		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1282		    M_FILEDESC, M_WAITOK);
1283		FILEDESC_LOCK(fdp);
1284		newfdp->fd_lastfile = fdp->fd_lastfile;
1285		newfdp->fd_nfiles = fdp->fd_nfiles;
1286		j = newfdp->fd_nfiles;
1287		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1288			j /= 2;
1289		if (i != j) {
1290			/*
1291			 * The size of the original table has changed.
1292			 * Go over once again.
1293			 */
1294			FILEDESC_UNLOCK(fdp);
1295			FREE(newfdp->fd_ofiles, M_FILEDESC);
1296			FILEDESC_LOCK(fdp);
1297			newfdp->fd_lastfile = fdp->fd_lastfile;
1298			newfdp->fd_nfiles = fdp->fd_nfiles;
1299			goto retry;
1300		}
1301		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1302	}
1303	newfdp->fd_nfiles = i;
1304	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1305	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1306
1307	/*
1308	 * kq descriptors cannot be copied.
1309	 */
1310	if (newfdp->fd_knlistsize != -1) {
1311		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1312		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1313			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1314				*fpp = NULL;
1315				if (i < newfdp->fd_freefile)
1316					newfdp->fd_freefile = i;
1317			}
1318			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1319				newfdp->fd_lastfile--;
1320		}
1321		newfdp->fd_knlist = NULL;
1322		newfdp->fd_knlistsize = -1;
1323		newfdp->fd_knhash = NULL;
1324		newfdp->fd_knhashmask = 0;
1325	}
1326
1327	fpp = newfdp->fd_ofiles;
1328	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1329		if (*fpp != NULL) {
1330			fhold(*fpp);
1331		}
1332	}
1333	return (newfdp);
1334}
1335
1336/*
1337 * Release a filedesc structure.
1338 */
1339void
1340fdfree(td)
1341	struct thread *td;
1342{
1343	register struct filedesc *fdp;
1344	struct file **fpp;
1345	register int i;
1346
1347	fdp = td->td_proc->p_fd;
1348	/* Certain daemons might not have file descriptors. */
1349	if (fdp == NULL)
1350		return;
1351
1352	FILEDESC_LOCK(fdp);
1353	if (--fdp->fd_refcnt > 0) {
1354		FILEDESC_UNLOCK(fdp);
1355		return;
1356	}
1357	/*
1358	 * we are the last reference to the structure, we can
1359	 * safely assume it will not change out from under us.
1360	 */
1361	FILEDESC_UNLOCK(fdp);
1362	fpp = fdp->fd_ofiles;
1363	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1364		if (*fpp)
1365			(void) closef(*fpp, td);
1366	}
1367
1368	PROC_LOCK(td->td_proc);
1369	td->td_proc->p_fd = NULL;
1370	PROC_UNLOCK(td->td_proc);
1371
1372	if (fdp->fd_nfiles > NDFILE)
1373		FREE(fdp->fd_ofiles, M_FILEDESC);
1374	if (fdp->fd_cdir)
1375		vrele(fdp->fd_cdir);
1376	if (fdp->fd_rdir)
1377		vrele(fdp->fd_rdir);
1378	if (fdp->fd_jdir)
1379		vrele(fdp->fd_jdir);
1380	if (fdp->fd_knlist)
1381		FREE(fdp->fd_knlist, M_KQUEUE);
1382	if (fdp->fd_knhash)
1383		FREE(fdp->fd_knhash, M_KQUEUE);
1384	mtx_destroy(&fdp->fd_mtx);
1385	FREE(fdp, M_FILEDESC);
1386}
1387
1388/*
1389 * For setugid programs, we don't want to people to use that setugidness
1390 * to generate error messages which write to a file which otherwise would
1391 * otherwise be off-limits to the process.
1392 *
1393 * This is a gross hack to plug the hole.  A better solution would involve
1394 * a special vop or other form of generalized access control mechanism.  We
1395 * go ahead and just reject all procfs filesystems accesses as dangerous.
1396 *
1397 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1398 * sufficient.  We also don't for check setugidness since we know we are.
1399 */
1400static int
1401is_unsafe(struct file *fp)
1402{
1403	if (fp->f_type == DTYPE_VNODE &&
1404	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1405		return (1);
1406	return (0);
1407}
1408
1409/*
1410 * Make this setguid thing safe, if at all possible.
1411 */
1412void
1413setugidsafety(td)
1414	struct thread *td;
1415{
1416	struct filedesc *fdp = td->td_proc->p_fd;
1417	register int i;
1418
1419	/* Certain daemons might not have file descriptors. */
1420	if (fdp == NULL)
1421		return;
1422
1423	/*
1424	 * note: fdp->fd_ofiles may be reallocated out from under us while
1425	 * we are blocked in a close.  Be careful!
1426	 */
1427	FILEDESC_LOCK(fdp);
1428	for (i = 0; i <= fdp->fd_lastfile; i++) {
1429		if (i > 2)
1430			break;
1431		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1432			struct file *fp;
1433
1434#if 0
1435			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1436				(void) munmapfd(td, i);
1437#endif
1438			if (i < fdp->fd_knlistsize) {
1439				FILEDESC_UNLOCK(fdp);
1440				knote_fdclose(td, i);
1441				FILEDESC_LOCK(fdp);
1442			}
1443			/*
1444			 * NULL-out descriptor prior to close to avoid
1445			 * a race while close blocks.
1446			 */
1447			fp = fdp->fd_ofiles[i];
1448			fdp->fd_ofiles[i] = NULL;
1449			fdp->fd_ofileflags[i] = 0;
1450			if (i < fdp->fd_freefile)
1451				fdp->fd_freefile = i;
1452			FILEDESC_UNLOCK(fdp);
1453			(void) closef(fp, td);
1454			FILEDESC_LOCK(fdp);
1455		}
1456	}
1457	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1458		fdp->fd_lastfile--;
1459	FILEDESC_UNLOCK(fdp);
1460}
1461
1462/*
1463 * Close any files on exec?
1464 */
1465void
1466fdcloseexec(td)
1467	struct thread *td;
1468{
1469	struct filedesc *fdp = td->td_proc->p_fd;
1470	register int i;
1471
1472	/* Certain daemons might not have file descriptors. */
1473	if (fdp == NULL)
1474		return;
1475
1476	FILEDESC_LOCK(fdp);
1477
1478	/*
1479	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1480	 * may block and rip them out from under us.
1481	 */
1482	for (i = 0; i <= fdp->fd_lastfile; i++) {
1483		if (fdp->fd_ofiles[i] != NULL &&
1484		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1485			struct file *fp;
1486
1487#if 0
1488			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1489				(void) munmapfd(td, i);
1490#endif
1491			if (i < fdp->fd_knlistsize) {
1492				FILEDESC_UNLOCK(fdp);
1493				knote_fdclose(td, i);
1494				FILEDESC_LOCK(fdp);
1495			}
1496			/*
1497			 * NULL-out descriptor prior to close to avoid
1498			 * a race while close blocks.
1499			 */
1500			fp = fdp->fd_ofiles[i];
1501			fdp->fd_ofiles[i] = NULL;
1502			fdp->fd_ofileflags[i] = 0;
1503			if (i < fdp->fd_freefile)
1504				fdp->fd_freefile = i;
1505			FILEDESC_UNLOCK(fdp);
1506			(void) closef(fp, td);
1507			FILEDESC_LOCK(fdp);
1508		}
1509	}
1510	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1511		fdp->fd_lastfile--;
1512	FILEDESC_UNLOCK(fdp);
1513}
1514
1515/*
1516 * It is unsafe for set[ug]id processes to be started with file
1517 * descriptors 0..2 closed, as these descriptors are given implicit
1518 * significance in the Standard C library.  fdcheckstd() will create a
1519 * descriptor referencing /dev/null for each of stdin, stdout, and
1520 * stderr that is not already open.
1521 */
1522int
1523fdcheckstd(td)
1524	struct thread *td;
1525{
1526	struct nameidata nd;
1527	struct filedesc *fdp;
1528	struct file *fp;
1529	register_t retval;
1530	int fd, i, error, flags, devnull;
1531
1532	fdp = td->td_proc->p_fd;
1533	if (fdp == NULL)
1534		return (0);
1535	devnull = -1;
1536	error = 0;
1537	for (i = 0; i < 3; i++) {
1538		if (fdp->fd_ofiles[i] != NULL)
1539			continue;
1540		if (devnull < 0) {
1541			error = falloc(td, &fp, &fd);
1542			if (error != 0)
1543				break;
1544			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1545			    td);
1546			flags = FREAD | FWRITE;
1547			error = vn_open(&nd, &flags, 0);
1548			if (error != 0) {
1549				FILEDESC_LOCK(fdp);
1550				fdp->fd_ofiles[i] = NULL;
1551				FILEDESC_UNLOCK(fdp);
1552				fdrop(fp, td);
1553				break;
1554			}
1555			NDFREE(&nd, NDF_ONLY_PNBUF);
1556			fp->f_data = nd.ni_vp;
1557			fp->f_flag = flags;
1558			fp->f_ops = &vnops;
1559			fp->f_type = DTYPE_VNODE;
1560			VOP_UNLOCK(nd.ni_vp, 0, td);
1561			devnull = fd;
1562		} else {
1563			FILEDESC_LOCK(fdp);
1564			error = fdalloc(td, 0, &fd);
1565			if (error != 0) {
1566				FILEDESC_UNLOCK(fdp);
1567				break;
1568			}
1569			error = do_dup(fdp, devnull, fd, &retval, td);
1570			if (error != 0)
1571				break;
1572		}
1573	}
1574	return (error);
1575}
1576
1577/*
1578 * Internal form of close.
1579 * Decrement reference count on file structure.
1580 * Note: td may be NULL when closing a file
1581 * that was being passed in a message.
1582 */
1583int
1584closef(fp, td)
1585	register struct file *fp;
1586	register struct thread *td;
1587{
1588	struct vnode *vp;
1589	struct flock lf;
1590
1591	if (fp == NULL)
1592		return (0);
1593	/*
1594	 * POSIX record locking dictates that any close releases ALL
1595	 * locks owned by this process.  This is handled by setting
1596	 * a flag in the unlock to free ONLY locks obeying POSIX
1597	 * semantics, and not to free BSD-style file locks.
1598	 * If the descriptor was in a message, POSIX-style locks
1599	 * aren't passed with the descriptor.
1600	 */
1601	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1602	    fp->f_type == DTYPE_VNODE) {
1603		lf.l_whence = SEEK_SET;
1604		lf.l_start = 0;
1605		lf.l_len = 0;
1606		lf.l_type = F_UNLCK;
1607		vp = (struct vnode *)fp->f_data;
1608		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1609		    F_UNLCK, &lf, F_POSIX);
1610	}
1611	return (fdrop(fp, td));
1612}
1613
1614/*
1615 * Drop reference on struct file passed in, may call closef if the
1616 * reference hits zero.
1617 */
1618int
1619fdrop(fp, td)
1620	struct file *fp;
1621	struct thread *td;
1622{
1623
1624	FILE_LOCK(fp);
1625	return (fdrop_locked(fp, td));
1626}
1627
1628/*
1629 * Extract the file pointer associated with the specified descriptor for
1630 * the current user process.
1631 *
1632 * If the descriptor doesn't exist, EBADF is returned.
1633 *
1634 * If the descriptor exists but doesn't match 'flags' then
1635 * return EBADF for read attempts and EINVAL for write attempts.
1636 *
1637 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1638 * It should be droped with fdrop().
1639 * If it is not set, then the refcount will not be bumped however the
1640 * thread's filedesc struct will be returned locked (for fgetsock).
1641 *
1642 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1643 * Otherwise *fpp is set and zero is returned.
1644 */
1645static __inline
1646int
1647_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1648{
1649	struct filedesc *fdp;
1650	struct file *fp;
1651
1652	*fpp = NULL;
1653	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1654		return(EBADF);
1655	FILEDESC_LOCK(fdp);
1656	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1657		FILEDESC_UNLOCK(fdp);
1658		return(EBADF);
1659	}
1660
1661	/*
1662	 * Note: FREAD failures returns EBADF to maintain backwards
1663	 * compatibility with what routines returned before.
1664	 *
1665	 * Only one flag, or 0, may be specified.
1666	 */
1667	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1668		FILEDESC_UNLOCK(fdp);
1669		return(EBADF);
1670	}
1671	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1672		FILEDESC_UNLOCK(fdp);
1673		return(EINVAL);
1674	}
1675	if (hold) {
1676		fhold(fp);
1677		FILEDESC_UNLOCK(fdp);
1678	}
1679	*fpp = fp;
1680	return(0);
1681}
1682
1683int
1684fget(struct thread *td, int fd, struct file **fpp)
1685{
1686    return(_fget(td, fd, fpp, 0, 1));
1687}
1688
1689int
1690fget_read(struct thread *td, int fd, struct file **fpp)
1691{
1692    return(_fget(td, fd, fpp, FREAD, 1));
1693}
1694
1695int
1696fget_write(struct thread *td, int fd, struct file **fpp)
1697{
1698    return(_fget(td, fd, fpp, FWRITE, 1));
1699}
1700
1701/*
1702 * Like fget() but loads the underlying vnode, or returns an error if
1703 * the descriptor does not represent a vnode.  Note that pipes use vnodes
1704 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1705 * error).  The returned vnode will be vref()d.
1706 */
1707
1708static __inline
1709int
1710_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1711{
1712	struct file *fp;
1713	int error;
1714
1715	*vpp = NULL;
1716	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1717		return (error);
1718	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1719		error = EINVAL;
1720	} else {
1721		*vpp = (struct vnode *)fp->f_data;
1722		vref(*vpp);
1723	}
1724	FILEDESC_UNLOCK(td->td_proc->p_fd);
1725	return (error);
1726}
1727
1728int
1729fgetvp(struct thread *td, int fd, struct vnode **vpp)
1730{
1731	return(_fgetvp(td, fd, vpp, 0));
1732}
1733
1734int
1735fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1736{
1737	return(_fgetvp(td, fd, vpp, FREAD));
1738}
1739
1740int
1741fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1742{
1743	return(_fgetvp(td, fd, vpp, FWRITE));
1744}
1745
1746/*
1747 * Like fget() but loads the underlying socket, or returns an error if
1748 * the descriptor does not represent a socket.
1749 *
1750 * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1751 * the future.
1752 */
1753int
1754fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1755{
1756	struct file *fp;
1757	int error;
1758
1759	*spp = NULL;
1760	if (fflagp)
1761		*fflagp = 0;
1762	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1763		return (error);
1764	if (fp->f_type != DTYPE_SOCKET) {
1765		error = ENOTSOCK;
1766	} else {
1767		*spp = (struct socket *)fp->f_data;
1768		if (fflagp)
1769			*fflagp = fp->f_flag;
1770		soref(*spp);
1771	}
1772	FILEDESC_UNLOCK(td->td_proc->p_fd);
1773	return(error);
1774}
1775
1776/*
1777 * Drop the reference count on the the socket and XXX release the SX lock in
1778 * the future.  The last reference closes the socket.
1779 */
1780void
1781fputsock(struct socket *so)
1782{
1783	sorele(so);
1784}
1785
1786/*
1787 * Drop reference on struct file passed in, may call closef if the
1788 * reference hits zero.
1789 * Expects struct file locked, and will unlock it.
1790 */
1791int
1792fdrop_locked(fp, td)
1793	struct file *fp;
1794	struct thread *td;
1795{
1796	struct flock lf;
1797	struct vnode *vp;
1798	int error;
1799
1800	FILE_LOCK_ASSERT(fp, MA_OWNED);
1801
1802	if (--fp->f_count > 0) {
1803		FILE_UNLOCK(fp);
1804		return (0);
1805	}
1806	mtx_lock(&Giant);
1807	if (fp->f_count < 0)
1808		panic("fdrop: count < 0");
1809	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1810		lf.l_whence = SEEK_SET;
1811		lf.l_start = 0;
1812		lf.l_len = 0;
1813		lf.l_type = F_UNLCK;
1814		vp = (struct vnode *)fp->f_data;
1815		FILE_UNLOCK(fp);
1816		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1817	} else
1818		FILE_UNLOCK(fp);
1819	if (fp->f_ops != &badfileops)
1820		error = fo_close(fp, td);
1821	else
1822		error = 0;
1823	ffree(fp);
1824	mtx_unlock(&Giant);
1825	return (error);
1826}
1827
1828/*
1829 * Apply an advisory lock on a file descriptor.
1830 *
1831 * Just attempt to get a record lock of the requested type on
1832 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1833 */
1834#ifndef _SYS_SYSPROTO_H_
1835struct flock_args {
1836	int	fd;
1837	int	how;
1838};
1839#endif
1840/*
1841 * MPSAFE
1842 */
1843/* ARGSUSED */
1844int
1845flock(td, uap)
1846	struct thread *td;
1847	register struct flock_args *uap;
1848{
1849	struct file *fp;
1850	struct vnode *vp;
1851	struct flock lf;
1852	int error;
1853
1854	if ((error = fget(td, uap->fd, &fp)) != 0)
1855		return (error);
1856	if (fp->f_type != DTYPE_VNODE) {
1857		fdrop(fp, td);
1858		return (EOPNOTSUPP);
1859	}
1860
1861	mtx_lock(&Giant);
1862	vp = (struct vnode *)fp->f_data;
1863	lf.l_whence = SEEK_SET;
1864	lf.l_start = 0;
1865	lf.l_len = 0;
1866	if (uap->how & LOCK_UN) {
1867		lf.l_type = F_UNLCK;
1868		FILE_LOCK(fp);
1869		fp->f_flag &= ~FHASLOCK;
1870		FILE_UNLOCK(fp);
1871		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1872		goto done2;
1873	}
1874	if (uap->how & LOCK_EX)
1875		lf.l_type = F_WRLCK;
1876	else if (uap->how & LOCK_SH)
1877		lf.l_type = F_RDLCK;
1878	else {
1879		error = EBADF;
1880		goto done2;
1881	}
1882	FILE_LOCK(fp);
1883	fp->f_flag |= FHASLOCK;
1884	FILE_UNLOCK(fp);
1885	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1886	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1887done2:
1888	fdrop(fp, td);
1889	mtx_unlock(&Giant);
1890	return (error);
1891}
1892
1893/*
1894 * File Descriptor pseudo-device driver (/dev/fd/).
1895 *
1896 * Opening minor device N dup()s the file (if any) connected to file
1897 * descriptor N belonging to the calling process.  Note that this driver
1898 * consists of only the ``open()'' routine, because all subsequent
1899 * references to this file will be direct to the other driver.
1900 */
1901/* ARGSUSED */
1902static int
1903fdopen(dev, mode, type, td)
1904	dev_t dev;
1905	int mode, type;
1906	struct thread *td;
1907{
1908
1909	/*
1910	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1911	 * the file descriptor being sought for duplication. The error
1912	 * return ensures that the vnode for this device will be released
1913	 * by vn_open. Open will detect this special error and take the
1914	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1915	 * will simply report the error.
1916	 */
1917	td->td_dupfd = dev2unit(dev);
1918	return (ENODEV);
1919}
1920
1921/*
1922 * Duplicate the specified descriptor to a free descriptor.
1923 */
1924int
1925dupfdopen(td, fdp, indx, dfd, mode, error)
1926	struct thread *td;
1927	struct filedesc *fdp;
1928	int indx, dfd;
1929	int mode;
1930	int error;
1931{
1932	register struct file *wfp;
1933	struct file *fp;
1934
1935	/*
1936	 * If the to-be-dup'd fd number is greater than the allowed number
1937	 * of file descriptors, or the fd to be dup'd has already been
1938	 * closed, then reject.
1939	 */
1940	FILEDESC_LOCK(fdp);
1941	if ((u_int)dfd >= fdp->fd_nfiles ||
1942	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1943		FILEDESC_UNLOCK(fdp);
1944		return (EBADF);
1945	}
1946
1947	/*
1948	 * There are two cases of interest here.
1949	 *
1950	 * For ENODEV simply dup (dfd) to file descriptor
1951	 * (indx) and return.
1952	 *
1953	 * For ENXIO steal away the file structure from (dfd) and
1954	 * store it in (indx).  (dfd) is effectively closed by
1955	 * this operation.
1956	 *
1957	 * Any other error code is just returned.
1958	 */
1959	switch (error) {
1960	case ENODEV:
1961		/*
1962		 * Check that the mode the file is being opened for is a
1963		 * subset of the mode of the existing descriptor.
1964		 */
1965		FILE_LOCK(wfp);
1966		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1967			FILE_UNLOCK(wfp);
1968			FILEDESC_UNLOCK(fdp);
1969			return (EACCES);
1970		}
1971		fp = fdp->fd_ofiles[indx];
1972#if 0
1973		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1974			(void) munmapfd(td, indx);
1975#endif
1976		fdp->fd_ofiles[indx] = wfp;
1977		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1978		fhold_locked(wfp);
1979		FILE_UNLOCK(wfp);
1980		if (indx > fdp->fd_lastfile)
1981			fdp->fd_lastfile = indx;
1982		if (fp != NULL)
1983			FILE_LOCK(fp);
1984		FILEDESC_UNLOCK(fdp);
1985		/*
1986		 * we now own the reference to fp that the ofiles[] array
1987		 * used to own.  Release it.
1988		 */
1989		if (fp != NULL)
1990			fdrop_locked(fp, td);
1991		return (0);
1992
1993	case ENXIO:
1994		/*
1995		 * Steal away the file pointer from dfd, and stuff it into indx.
1996		 */
1997		fp = fdp->fd_ofiles[indx];
1998#if 0
1999		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2000			(void) munmapfd(td, indx);
2001#endif
2002		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2003		fdp->fd_ofiles[dfd] = NULL;
2004		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2005		fdp->fd_ofileflags[dfd] = 0;
2006
2007		/*
2008		 * Complete the clean up of the filedesc structure by
2009		 * recomputing the various hints.
2010		 */
2011		if (indx > fdp->fd_lastfile) {
2012			fdp->fd_lastfile = indx;
2013		} else {
2014			while (fdp->fd_lastfile > 0 &&
2015			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2016				fdp->fd_lastfile--;
2017			}
2018			if (dfd < fdp->fd_freefile)
2019				fdp->fd_freefile = dfd;
2020		}
2021		if (fp != NULL)
2022			FILE_LOCK(fp);
2023		FILEDESC_UNLOCK(fdp);
2024
2025		/*
2026		 * we now own the reference to fp that the ofiles[] array
2027		 * used to own.  Release it.
2028		 */
2029		if (fp != NULL)
2030			fdrop_locked(fp, td);
2031		return (0);
2032
2033	default:
2034		FILEDESC_UNLOCK(fdp);
2035		return (error);
2036	}
2037	/* NOTREACHED */
2038}
2039
2040/*
2041 * Get file structures.
2042 */
2043static int
2044sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2045{
2046	struct proc *p;
2047	struct filedesc *fdp;
2048	struct file *fp;
2049	struct xfile xf;
2050	int error, n;
2051
2052	sysctl_wire_old_buffer(req, 0);
2053	if (!req->oldptr) {
2054		n = 16; /* slight overestimate */
2055		sx_slock(&filelist_lock);
2056		LIST_FOREACH(fp, &filehead, f_list) {
2057			/*
2058			 * We should grab the lock, but this is an
2059			 * estimate, so does it really matter?
2060			 */
2061			/* mtx_lock(fp->f_mtxp); */
2062			n += fp->f_count;
2063			/* mtx_unlock(f->f_mtxp); */
2064		}
2065		sx_sunlock(&filelist_lock);
2066		return (SYSCTL_OUT(req, 0, n * sizeof xf));
2067	}
2068
2069	error = 0;
2070	bzero(&xf, sizeof xf);
2071	xf.xf_size = sizeof xf;
2072	sx_slock(&allproc_lock);
2073	LIST_FOREACH(p, &allproc, p_list) {
2074		PROC_LOCK(p);
2075		xf.xf_pid = p->p_pid;
2076		xf.xf_uid = p->p_ucred->cr_uid;
2077		if ((fdp = p->p_fd) == NULL) {
2078			PROC_UNLOCK(p);
2079			continue;
2080		}
2081		FILEDESC_LOCK(fdp);
2082		for (n = 0; n < fdp->fd_nfiles; ++n) {
2083			if ((fp = fdp->fd_ofiles[n]) == NULL)
2084				continue;
2085			xf.xf_fd = n;
2086			xf.xf_file = fp;
2087#define XF_COPY(field) xf.xf_##field = fp->f_##field
2088			XF_COPY(type);
2089			XF_COPY(count);
2090			XF_COPY(msgcount);
2091			XF_COPY(offset);
2092			XF_COPY(data);
2093			XF_COPY(flag);
2094#undef XF_COPY
2095			error = SYSCTL_OUT(req, &xf, sizeof xf);
2096			if (error)
2097				break;
2098		}
2099		FILEDESC_UNLOCK(fdp);
2100		PROC_UNLOCK(p);
2101		if (error)
2102			break;
2103	}
2104	sx_sunlock(&allproc_lock);
2105	return (error);
2106}
2107
2108SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2109    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2110
2111SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2112    &maxfilesperproc, 0, "Maximum files allowed open per process");
2113
2114SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2115    &maxfiles, 0, "Maximum number of files");
2116
2117SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2118    &nfiles, 0, "System-wide number of open files");
2119
2120static void
2121fildesc_drvinit(void *unused)
2122{
2123	dev_t dev;
2124
2125	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2126	make_dev_alias(dev, "stdin");
2127	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2128	make_dev_alias(dev, "stdout");
2129	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2130	make_dev_alias(dev, "stderr");
2131	if (!devfs_present) {
2132		int fd;
2133
2134		for (fd = 3; fd < NUMFDESC; fd++)
2135			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2136			    "fd/%d", fd);
2137	}
2138}
2139
2140struct fileops badfileops = {
2141	badfo_readwrite,
2142	badfo_readwrite,
2143	badfo_ioctl,
2144	badfo_poll,
2145	badfo_kqfilter,
2146	badfo_stat,
2147	badfo_close
2148};
2149
2150static int
2151badfo_readwrite(fp, uio, active_cred, flags, td)
2152	struct file *fp;
2153	struct uio *uio;
2154	struct ucred *active_cred;
2155	struct thread *td;
2156	int flags;
2157{
2158
2159	return (EBADF);
2160}
2161
2162static int
2163badfo_ioctl(fp, com, data, active_cred, td)
2164	struct file *fp;
2165	u_long com;
2166	void *data;
2167	struct ucred *active_cred;
2168	struct thread *td;
2169{
2170
2171	return (EBADF);
2172}
2173
2174static int
2175badfo_poll(fp, events, active_cred, td)
2176	struct file *fp;
2177	int events;
2178	struct ucred *active_cred;
2179	struct thread *td;
2180{
2181
2182	return (0);
2183}
2184
2185static int
2186badfo_kqfilter(fp, kn)
2187	struct file *fp;
2188	struct knote *kn;
2189{
2190
2191	return (0);
2192}
2193
2194static int
2195badfo_stat(fp, sb, active_cred, td)
2196	struct file *fp;
2197	struct stat *sb;
2198	struct ucred *active_cred;
2199	struct thread *td;
2200{
2201
2202	return (EBADF);
2203}
2204
2205static int
2206badfo_close(fp, td)
2207	struct file *fp;
2208	struct thread *td;
2209{
2210
2211	return (EBADF);
2212}
2213
2214SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2215					fildesc_drvinit,NULL)
2216
2217static void filelistinit(void *);
2218SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2219
2220/* ARGSUSED*/
2221static void
2222filelistinit(dummy)
2223	void *dummy;
2224{
2225	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2226	    NULL, NULL, UMA_ALIGN_PTR, 0);
2227
2228	sx_init(&filelist_lock, "filelist lock");
2229	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2230}
2231