kern_descrip.c revision 107819
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 107819 2002-12-13 09:59:40Z tjr $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/sysctl.h>
55#include <sys/vnode.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/namei.h>
59#include <sys/file.h>
60#include <sys/stat.h>
61#include <sys/filio.h>
62#include <sys/fcntl.h>
63#include <sys/unistd.h>
64#include <sys/resourcevar.h>
65#include <sys/event.h>
66#include <sys/sx.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69
70#include <machine/limits.h>
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74#include <vm/uma.h>
75
76static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
77static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
78
79uma_zone_t file_zone;
80
81static	 d_open_t  fdopen;
82#define	NUMFDESC 64
83
84#define	CDEV_MAJOR 22
85static struct cdevsw fildesc_cdevsw = {
86	/* open */	fdopen,
87	/* close */	noclose,
88	/* read */	noread,
89	/* write */	nowrite,
90	/* ioctl */	noioctl,
91	/* poll */	nopoll,
92	/* mmap */	nommap,
93	/* strategy */	nostrategy,
94	/* name */	"FD",
95	/* maj */	CDEV_MAJOR,
96	/* dump */	nodump,
97	/* psize */	nopsize,
98	/* flags */	0,
99};
100
101/* How to treat 'new' parameter when allocating a fd for do_dup(). */
102enum dup_type { DUP_VARIABLE, DUP_FIXED };
103
104static int do_dup(struct thread *td, enum dup_type type, int old, int new,
105    register_t *retval);
106static int badfo_readwrite(struct file *fp, struct uio *uio,
107    struct ucred *active_cred, int flags, struct thread *td);
108static int badfo_ioctl(struct file *fp, u_long com, void *data,
109    struct ucred *active_cred, struct thread *td);
110static int badfo_poll(struct file *fp, int events,
111    struct ucred *active_cred, struct thread *td);
112static int badfo_kqfilter(struct file *fp, struct knote *kn);
113static int badfo_stat(struct file *fp, struct stat *sb,
114    struct ucred *active_cred, struct thread *td);
115static int badfo_close(struct file *fp, struct thread *td);
116
117/*
118 * Descriptor management.
119 */
120struct filelist filehead;	/* head of list of open files */
121int nfiles;			/* actual number of open files */
122extern int cmask;
123struct sx filelist_lock;	/* sx to protect filelist */
124struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
125
126/*
127 * System calls on descriptors.
128 */
129#ifndef _SYS_SYSPROTO_H_
130struct getdtablesize_args {
131	int	dummy;
132};
133#endif
134/*
135 * MPSAFE
136 */
137/* ARGSUSED */
138int
139getdtablesize(td, uap)
140	struct thread *td;
141	struct getdtablesize_args *uap;
142{
143	struct proc *p = td->td_proc;
144
145	mtx_lock(&Giant);
146	td->td_retval[0] =
147	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
148	mtx_unlock(&Giant);
149	return (0);
150}
151
152/*
153 * Duplicate a file descriptor to a particular value.
154 *
155 * note: keep in mind that a potential race condition exists when closing
156 * descriptors from a shared descriptor table (via rfork).
157 */
158#ifndef _SYS_SYSPROTO_H_
159struct dup2_args {
160	u_int	from;
161	u_int	to;
162};
163#endif
164/*
165 * MPSAFE
166 */
167/* ARGSUSED */
168int
169dup2(td, uap)
170	struct thread *td;
171	struct dup2_args *uap;
172{
173
174	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
175		    td->td_retval));
176}
177
178/*
179 * Duplicate a file descriptor.
180 */
181#ifndef _SYS_SYSPROTO_H_
182struct dup_args {
183	u_int	fd;
184};
185#endif
186/*
187 * MPSAFE
188 */
189/* ARGSUSED */
190int
191dup(td, uap)
192	struct thread *td;
193	struct dup_args *uap;
194{
195
196	return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
197}
198
199/*
200 * The file control system call.
201 */
202#ifndef _SYS_SYSPROTO_H_
203struct fcntl_args {
204	int	fd;
205	int	cmd;
206	long	arg;
207};
208#endif
209/*
210 * MPSAFE
211 */
212/* ARGSUSED */
213int
214fcntl(td, uap)
215	struct thread *td;
216	register struct fcntl_args *uap;
217{
218	struct flock fl;
219	intptr_t arg;
220	int error;
221
222	error = 0;
223	switch (uap->cmd) {
224	case F_GETLK:
225	case F_SETLK:
226	case F_SETLKW:
227		error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
228		arg = (intptr_t)&fl;
229		break;
230	default:
231		arg = uap->arg;
232		break;
233	}
234	if (error)
235		return (error);
236	error = kern_fcntl(td, uap->fd, uap->cmd, arg);
237	if (error)
238		return (error);
239	if (uap->cmd == F_GETLK)
240		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
241	return (error);
242}
243
244int
245kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
246{
247	register struct filedesc *fdp;
248	struct flock *flp;
249	register struct file *fp;
250	struct proc *p;
251	register char *pop;
252	struct vnode *vp;
253	u_int newmin;
254	int error, flg, tmp;
255
256	error = 0;
257	flg = F_POSIX;
258	p = td->td_proc;
259	fdp = p->p_fd;
260	mtx_lock(&Giant);
261	FILEDESC_LOCK(fdp);
262	if ((unsigned)fd >= fdp->fd_nfiles ||
263	    (fp = fdp->fd_ofiles[fd]) == NULL) {
264		FILEDESC_UNLOCK(fdp);
265		error = EBADF;
266		goto done2;
267	}
268	pop = &fdp->fd_ofileflags[fd];
269
270	switch (cmd) {
271	case F_DUPFD:
272		FILEDESC_UNLOCK(fdp);
273		newmin = arg;
274		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
275		    newmin >= maxfilesperproc) {
276			error = EINVAL;
277			break;
278		}
279		error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
280		break;
281
282	case F_GETFD:
283		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
284		FILEDESC_UNLOCK(fdp);
285		break;
286
287	case F_SETFD:
288		*pop = (*pop &~ UF_EXCLOSE) |
289		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
290		FILEDESC_UNLOCK(fdp);
291		break;
292
293	case F_GETFL:
294		FILE_LOCK(fp);
295		FILEDESC_UNLOCK(fdp);
296		td->td_retval[0] = OFLAGS(fp->f_flag);
297		FILE_UNLOCK(fp);
298		break;
299
300	case F_SETFL:
301		fhold(fp);
302		FILEDESC_UNLOCK(fdp);
303		fp->f_flag &= ~FCNTLFLAGS;
304		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
305		tmp = fp->f_flag & FNONBLOCK;
306		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
307		if (error) {
308			fdrop(fp, td);
309			break;
310		}
311		tmp = fp->f_flag & FASYNC;
312		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
313		if (error == 0) {
314			fdrop(fp, td);
315			break;
316		}
317		fp->f_flag &= ~FNONBLOCK;
318		tmp = 0;
319		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
320		fdrop(fp, td);
321		break;
322
323	case F_GETOWN:
324		fhold(fp);
325		FILEDESC_UNLOCK(fdp);
326		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
327		if (error == 0)
328			td->td_retval[0] = tmp;
329		fdrop(fp, td);
330		break;
331
332	case F_SETOWN:
333		fhold(fp);
334		FILEDESC_UNLOCK(fdp);
335		tmp = arg;
336		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
337		fdrop(fp, td);
338		break;
339
340	case F_SETLKW:
341		flg |= F_WAIT;
342		/* FALLTHROUGH F_SETLK */
343
344	case F_SETLK:
345		if (fp->f_type != DTYPE_VNODE) {
346			FILEDESC_UNLOCK(fdp);
347			error = EBADF;
348			break;
349		}
350
351		flp = (struct flock *)arg;
352		if (flp->l_whence == SEEK_CUR) {
353			if (fp->f_offset < 0 ||
354			    (flp->l_start > 0 &&
355			     fp->f_offset > OFF_MAX - flp->l_start)) {
356				FILEDESC_UNLOCK(fdp);
357				error = EOVERFLOW;
358				break;
359			}
360			flp->l_start += fp->f_offset;
361		}
362
363		/*
364		 * VOP_ADVLOCK() may block.
365		 */
366		fhold(fp);
367		FILEDESC_UNLOCK(fdp);
368		vp = (struct vnode *)fp->f_data;
369
370		switch (flp->l_type) {
371		case F_RDLCK:
372			if ((fp->f_flag & FREAD) == 0) {
373				error = EBADF;
374				break;
375			}
376			PROC_LOCK(p);
377			p->p_flag |= P_ADVLOCK;
378			PROC_UNLOCK(p);
379			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
380			    flp, flg);
381			break;
382		case F_WRLCK:
383			if ((fp->f_flag & FWRITE) == 0) {
384				error = EBADF;
385				break;
386			}
387			PROC_LOCK(p);
388			p->p_flag |= P_ADVLOCK;
389			PROC_UNLOCK(p);
390			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
391			    flp, flg);
392			break;
393		case F_UNLCK:
394			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
395			    flp, F_POSIX);
396			break;
397		default:
398			error = EINVAL;
399			break;
400		}
401		fdrop(fp, td);
402		break;
403
404	case F_GETLK:
405		if (fp->f_type != DTYPE_VNODE) {
406			FILEDESC_UNLOCK(fdp);
407			error = EBADF;
408			break;
409		}
410		flp = (struct flock *)arg;
411		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
412		    flp->l_type != F_UNLCK) {
413			FILEDESC_UNLOCK(fdp);
414			error = EINVAL;
415			break;
416		}
417		if (flp->l_whence == SEEK_CUR) {
418			if ((flp->l_start > 0 &&
419			    fp->f_offset > OFF_MAX - flp->l_start) ||
420			    (flp->l_start < 0 &&
421			     fp->f_offset < OFF_MIN - flp->l_start)) {
422				FILEDESC_UNLOCK(fdp);
423				error = EOVERFLOW;
424				break;
425			}
426			flp->l_start += fp->f_offset;
427		}
428		/*
429		 * VOP_ADVLOCK() may block.
430		 */
431		fhold(fp);
432		FILEDESC_UNLOCK(fdp);
433		vp = (struct vnode *)fp->f_data;
434		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
435		    F_POSIX);
436		fdrop(fp, td);
437		break;
438	default:
439		FILEDESC_UNLOCK(fdp);
440		error = EINVAL;
441		break;
442	}
443done2:
444	mtx_unlock(&Giant);
445	return (error);
446}
447
448/*
449 * Common code for dup, dup2, and fcntl(F_DUPFD).
450 * filedesc must be locked, but will be unlocked as a side effect.
451 */
452static int
453do_dup(td, type, old, new, retval)
454	enum dup_type type;
455	int old, new;
456	register_t *retval;
457	struct thread *td;
458{
459	register struct filedesc *fdp;
460	struct proc *p;
461	struct file *fp;
462	struct file *delfp;
463	int error, newfd;
464
465	p = td->td_proc;
466	fdp = p->p_fd;
467
468	/*
469	 * Verify we have a valid descriptor to dup from and possibly to
470	 * dup to.
471	 */
472	if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
473	    new >= maxfilesperproc)
474		return (EBADF);
475	FILEDESC_LOCK(fdp);
476	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
477		FILEDESC_UNLOCK(fdp);
478		return (EBADF);
479	}
480	if (type == DUP_FIXED && old == new) {
481		*retval = new;
482		FILEDESC_UNLOCK(fdp);
483		return (0);
484	}
485	fp = fdp->fd_ofiles[old];
486	fhold(fp);
487
488	/*
489	 * Expand the table for the new descriptor if needed.  This may
490	 * block and drop and reacquire the filedesc lock.
491	 */
492	if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
493		error = fdalloc(td, new, &newfd);
494		if (error) {
495			FILEDESC_UNLOCK(fdp);
496			return (error);
497		}
498	}
499	if (type == DUP_VARIABLE)
500		new = newfd;
501
502	/*
503	 * If the old file changed out from under us then treat it as a
504	 * bad file descriptor.  Userland should do its own locking to
505	 * avoid this case.
506	 */
507	if (fdp->fd_ofiles[old] != fp) {
508		if (fdp->fd_ofiles[new] == NULL) {
509			if (new < fdp->fd_freefile)
510				fdp->fd_freefile = new;
511			while (fdp->fd_lastfile > 0 &&
512			    fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
513				fdp->fd_lastfile--;
514		}
515		FILEDESC_UNLOCK(fdp);
516		fdrop(fp, td);
517		return (EBADF);
518	}
519	KASSERT(old != new, ("new fd is same as old"));
520
521	/*
522	 * Save info on the descriptor being overwritten.  We have
523	 * to do the unmap now, but we cannot close it without
524	 * introducing an ownership race for the slot.
525	 */
526	delfp = fdp->fd_ofiles[new];
527	KASSERT(delfp == NULL || type == DUP_FIXED,
528	    ("dup() picked an open file"));
529#if 0
530	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
531		(void) munmapfd(td, new);
532#endif
533
534	/*
535	 * Duplicate the source descriptor, update lastfile
536	 */
537	fdp->fd_ofiles[new] = fp;
538 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
539	if (new > fdp->fd_lastfile)
540		fdp->fd_lastfile = new;
541	FILEDESC_UNLOCK(fdp);
542	*retval = new;
543
544	/*
545	 * If we dup'd over a valid file, we now own the reference to it
546	 * and must dispose of it using closef() semantics (as if a
547	 * close() were performed on it).
548	 */
549	if (delfp) {
550		mtx_lock(&Giant);
551		(void) closef(delfp, td);
552		mtx_unlock(&Giant);
553	}
554	return (0);
555}
556
557/*
558 * If sigio is on the list associated with a process or process group,
559 * disable signalling from the device, remove sigio from the list and
560 * free sigio.
561 */
562void
563funsetown(sigiop)
564	struct sigio **sigiop;
565{
566	struct sigio *sigio;
567
568	SIGIO_LOCK();
569	sigio = *sigiop;
570	if (sigio == NULL) {
571		SIGIO_UNLOCK();
572		return;
573	}
574	*(sigio->sio_myref) = NULL;
575	if ((sigio)->sio_pgid < 0) {
576		struct pgrp *pg = (sigio)->sio_pgrp;
577		PGRP_LOCK(pg);
578		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
579			     sigio, sio_pgsigio);
580		PGRP_UNLOCK(pg);
581	} else {
582		struct proc *p = (sigio)->sio_proc;
583		PROC_LOCK(p);
584		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
585			     sigio, sio_pgsigio);
586		PROC_UNLOCK(p);
587	}
588	SIGIO_UNLOCK();
589	crfree(sigio->sio_ucred);
590	FREE(sigio, M_SIGIO);
591}
592
593/*
594 * Free a list of sigio structures.
595 * We only need to lock the SIGIO_LOCK because we have made ourselves
596 * inaccessable to callers of fsetown and therefore do not need to lock
597 * the proc or pgrp struct for the list manipulation.
598 */
599void
600funsetownlst(sigiolst)
601	struct sigiolst *sigiolst;
602{
603	struct proc *p;
604	struct pgrp *pg;
605	struct sigio *sigio;
606
607	sigio = SLIST_FIRST(sigiolst);
608	if (sigio == NULL)
609		return;
610	p = NULL;
611	pg = NULL;
612
613	/*
614	 * Every entry of the list should belong
615	 * to a single proc or pgrp.
616	 */
617	if (sigio->sio_pgid < 0) {
618		pg = sigio->sio_pgrp;
619		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
620	} else /* if (sigio->sio_pgid > 0) */ {
621		p = sigio->sio_proc;
622		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
623	}
624
625	SIGIO_LOCK();
626	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
627		*(sigio->sio_myref) = NULL;
628		if (pg != NULL) {
629			KASSERT(sigio->sio_pgid < 0,
630			    ("Proc sigio in pgrp sigio list"));
631			KASSERT(sigio->sio_pgrp == pg,
632			    ("Bogus pgrp in sigio list"));
633			PGRP_LOCK(pg);
634			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
635			    sio_pgsigio);
636			PGRP_UNLOCK(pg);
637		} else /* if (p != NULL) */ {
638			KASSERT(sigio->sio_pgid > 0,
639			    ("Pgrp sigio in proc sigio list"));
640			KASSERT(sigio->sio_proc == p,
641			    ("Bogus proc in sigio list"));
642			PROC_LOCK(p);
643			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
644			    sio_pgsigio);
645			PROC_UNLOCK(p);
646		}
647		SIGIO_UNLOCK();
648		crfree(sigio->sio_ucred);
649		FREE(sigio, M_SIGIO);
650		SIGIO_LOCK();
651	}
652	SIGIO_UNLOCK();
653}
654
655/*
656 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
657 *
658 * After permission checking, add a sigio structure to the sigio list for
659 * the process or process group.
660 */
661int
662fsetown(pgid, sigiop)
663	pid_t pgid;
664	struct sigio **sigiop;
665{
666	struct proc *proc;
667	struct pgrp *pgrp;
668	struct sigio *sigio;
669	int ret;
670
671	if (pgid == 0) {
672		funsetown(sigiop);
673		return (0);
674	}
675
676	ret = 0;
677
678	/* Allocate and fill in the new sigio out of locks. */
679	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
680	sigio->sio_pgid = pgid;
681	sigio->sio_ucred = crhold(curthread->td_ucred);
682	sigio->sio_myref = sigiop;
683
684	sx_slock(&proctree_lock);
685	if (pgid > 0) {
686		proc = pfind(pgid);
687		if (proc == NULL) {
688			ret = ESRCH;
689			goto fail;
690		}
691
692		/*
693		 * Policy - Don't allow a process to FSETOWN a process
694		 * in another session.
695		 *
696		 * Remove this test to allow maximum flexibility or
697		 * restrict FSETOWN to the current process or process
698		 * group for maximum safety.
699		 */
700		PROC_UNLOCK(proc);
701		if (proc->p_session != curthread->td_proc->p_session) {
702			ret = EPERM;
703			goto fail;
704		}
705
706		pgrp = NULL;
707	} else /* if (pgid < 0) */ {
708		pgrp = pgfind(-pgid);
709		if (pgrp == NULL) {
710			ret = ESRCH;
711			goto fail;
712		}
713		PGRP_UNLOCK(pgrp);
714
715		/*
716		 * Policy - Don't allow a process to FSETOWN a process
717		 * in another session.
718		 *
719		 * Remove this test to allow maximum flexibility or
720		 * restrict FSETOWN to the current process or process
721		 * group for maximum safety.
722		 */
723		if (pgrp->pg_session != curthread->td_proc->p_session) {
724			ret = EPERM;
725			goto fail;
726		}
727
728		proc = NULL;
729	}
730	funsetown(sigiop);
731	if (pgid > 0) {
732		PROC_LOCK(proc);
733		/*
734		 * Since funsetownlst() is called without the proctree
735		 * locked, we need to check for P_WEXIT.
736		 * XXX: is ESRCH correct?
737		 */
738		if ((proc->p_flag & P_WEXIT) != 0) {
739			PROC_UNLOCK(proc);
740			ret = ESRCH;
741			goto fail;
742		}
743		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
744		sigio->sio_proc = proc;
745		PROC_UNLOCK(proc);
746	} else {
747		PGRP_LOCK(pgrp);
748		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
749		sigio->sio_pgrp = pgrp;
750		PGRP_UNLOCK(pgrp);
751	}
752	sx_sunlock(&proctree_lock);
753	SIGIO_LOCK();
754	*sigiop = sigio;
755	SIGIO_UNLOCK();
756	return (0);
757
758fail:
759	sx_sunlock(&proctree_lock);
760	crfree(sigio->sio_ucred);
761	FREE(sigio, M_SIGIO);
762	return (ret);
763}
764
765/*
766 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
767 */
768pid_t
769fgetown(sigiop)
770	struct sigio **sigiop;
771{
772	pid_t pgid;
773
774	SIGIO_LOCK();
775	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
776	SIGIO_UNLOCK();
777	return (pgid);
778}
779
780/*
781 * Close a file descriptor.
782 */
783#ifndef _SYS_SYSPROTO_H_
784struct close_args {
785        int     fd;
786};
787#endif
788/*
789 * MPSAFE
790 */
791/* ARGSUSED */
792int
793close(td, uap)
794	struct thread *td;
795	struct close_args *uap;
796{
797	register struct filedesc *fdp;
798	register struct file *fp;
799	int fd, error;
800
801	fd = uap->fd;
802	error = 0;
803	fdp = td->td_proc->p_fd;
804	mtx_lock(&Giant);
805	FILEDESC_LOCK(fdp);
806	if ((unsigned)fd >= fdp->fd_nfiles ||
807	    (fp = fdp->fd_ofiles[fd]) == NULL) {
808		FILEDESC_UNLOCK(fdp);
809		error = EBADF;
810		goto done2;
811	}
812#if 0
813	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
814		(void) munmapfd(td, fd);
815#endif
816	fdp->fd_ofiles[fd] = NULL;
817	fdp->fd_ofileflags[fd] = 0;
818
819	/*
820	 * we now hold the fp reference that used to be owned by the descriptor
821	 * array.
822	 */
823	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
824		fdp->fd_lastfile--;
825	if (fd < fdp->fd_freefile)
826		fdp->fd_freefile = fd;
827	if (fd < fdp->fd_knlistsize) {
828		FILEDESC_UNLOCK(fdp);
829		knote_fdclose(td, fd);
830	} else
831		FILEDESC_UNLOCK(fdp);
832
833	error = closef(fp, td);
834done2:
835	mtx_unlock(&Giant);
836	return (error);
837}
838
839#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
840/*
841 * Return status information about a file descriptor.
842 */
843#ifndef _SYS_SYSPROTO_H_
844struct ofstat_args {
845	int	fd;
846	struct	ostat *sb;
847};
848#endif
849/*
850 * MPSAFE
851 */
852/* ARGSUSED */
853int
854ofstat(td, uap)
855	struct thread *td;
856	register struct ofstat_args *uap;
857{
858	struct file *fp;
859	struct stat ub;
860	struct ostat oub;
861	int error;
862
863	mtx_lock(&Giant);
864	if ((error = fget(td, uap->fd, &fp)) != 0)
865		goto done2;
866	error = fo_stat(fp, &ub, td->td_ucred, td);
867	if (error == 0) {
868		cvtstat(&ub, &oub);
869		error = copyout(&oub, uap->sb, sizeof(oub));
870	}
871	fdrop(fp, td);
872done2:
873	mtx_unlock(&Giant);
874	return (error);
875}
876#endif /* COMPAT_43 || COMPAT_SUNOS */
877
878/*
879 * Return status information about a file descriptor.
880 */
881#ifndef _SYS_SYSPROTO_H_
882struct fstat_args {
883	int	fd;
884	struct	stat *sb;
885};
886#endif
887/*
888 * MPSAFE
889 */
890/* ARGSUSED */
891int
892fstat(td, uap)
893	struct thread *td;
894	struct fstat_args *uap;
895{
896	struct file *fp;
897	struct stat ub;
898	int error;
899
900	mtx_lock(&Giant);
901	if ((error = fget(td, uap->fd, &fp)) != 0)
902		goto done2;
903	error = fo_stat(fp, &ub, td->td_ucred, td);
904	if (error == 0)
905		error = copyout(&ub, uap->sb, sizeof(ub));
906	fdrop(fp, td);
907done2:
908	mtx_unlock(&Giant);
909	return (error);
910}
911
912/*
913 * Return status information about a file descriptor.
914 */
915#ifndef _SYS_SYSPROTO_H_
916struct nfstat_args {
917	int	fd;
918	struct	nstat *sb;
919};
920#endif
921/*
922 * MPSAFE
923 */
924/* ARGSUSED */
925int
926nfstat(td, uap)
927	struct thread *td;
928	register struct nfstat_args *uap;
929{
930	struct file *fp;
931	struct stat ub;
932	struct nstat nub;
933	int error;
934
935	mtx_lock(&Giant);
936	if ((error = fget(td, uap->fd, &fp)) != 0)
937		goto done2;
938	error = fo_stat(fp, &ub, td->td_ucred, td);
939	if (error == 0) {
940		cvtnstat(&ub, &nub);
941		error = copyout(&nub, uap->sb, sizeof(nub));
942	}
943	fdrop(fp, td);
944done2:
945	mtx_unlock(&Giant);
946	return (error);
947}
948
949/*
950 * Return pathconf information about a file descriptor.
951 */
952#ifndef _SYS_SYSPROTO_H_
953struct fpathconf_args {
954	int	fd;
955	int	name;
956};
957#endif
958/*
959 * MPSAFE
960 */
961/* ARGSUSED */
962int
963fpathconf(td, uap)
964	struct thread *td;
965	register struct fpathconf_args *uap;
966{
967	struct file *fp;
968	struct vnode *vp;
969	int error;
970
971	if ((error = fget(td, uap->fd, &fp)) != 0)
972		return (error);
973
974	/* If asynchronous I/O is available, it works for all descriptors. */
975	if (uap->name == _PC_ASYNC_IO) {
976		td->td_retval[0] = async_io_version;
977		goto out;
978	}
979	switch (fp->f_type) {
980	case DTYPE_PIPE:
981	case DTYPE_SOCKET:
982		if (uap->name != _PC_PIPE_BUF) {
983			error = EINVAL;
984		} else {
985			td->td_retval[0] = PIPE_BUF;
986			error = 0;
987		}
988		break;
989	case DTYPE_FIFO:
990	case DTYPE_VNODE:
991		vp = (struct vnode *)fp->f_data;
992		mtx_lock(&Giant);
993		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
994		mtx_unlock(&Giant);
995		break;
996	default:
997		error = EOPNOTSUPP;
998		break;
999	}
1000out:
1001	fdrop(fp, td);
1002	return (error);
1003}
1004
1005/*
1006 * Allocate a file descriptor for the process.
1007 */
1008static int fdexpand;
1009SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1010
1011int
1012fdalloc(td, want, result)
1013	struct thread *td;
1014	int want;
1015	int *result;
1016{
1017	struct proc *p = td->td_proc;
1018	register struct filedesc *fdp = td->td_proc->p_fd;
1019	register int i;
1020	int lim, last, nfiles;
1021	struct file **newofile, **oldofile;
1022	char *newofileflags;
1023
1024	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1025
1026	/*
1027	 * Search for a free descriptor starting at the higher
1028	 * of want or fd_freefile.  If that fails, consider
1029	 * expanding the ofile array.
1030	 */
1031	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1032	for (;;) {
1033		last = min(fdp->fd_nfiles, lim);
1034		i = max(want, fdp->fd_freefile);
1035		for (; i < last; i++) {
1036			if (fdp->fd_ofiles[i] == NULL) {
1037				fdp->fd_ofileflags[i] = 0;
1038				if (i > fdp->fd_lastfile)
1039					fdp->fd_lastfile = i;
1040				if (want <= fdp->fd_freefile)
1041					fdp->fd_freefile = i;
1042				*result = i;
1043				return (0);
1044			}
1045		}
1046
1047		/*
1048		 * No space in current array.  Expand?
1049		 */
1050		if (i >= lim)
1051			return (EMFILE);
1052		if (fdp->fd_nfiles < NDEXTENT)
1053			nfiles = NDEXTENT;
1054		else
1055			nfiles = 2 * fdp->fd_nfiles;
1056		while (nfiles < want)
1057			nfiles <<= 1;
1058		FILEDESC_UNLOCK(fdp);
1059		/*
1060		 * XXX malloc() calls uma_large_malloc() for sizes larger
1061		 * than KMEM_ZMAX bytes. uma_large_malloc() requires Giant.
1062		 */
1063		mtx_lock(&Giant);
1064		newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1065		mtx_unlock(&Giant);
1066
1067		/*
1068		 * Deal with file-table extend race that might have
1069		 * occurred while filedesc was unlocked.
1070		 */
1071		FILEDESC_LOCK(fdp);
1072		if (fdp->fd_nfiles >= nfiles) {
1073			/* XXX uma_large_free() needs Giant. */
1074			FILEDESC_UNLOCK(fdp);
1075			mtx_lock(&Giant);
1076			free(newofile, M_FILEDESC);
1077			mtx_unlock(&Giant);
1078			FILEDESC_LOCK(fdp);
1079			continue;
1080		}
1081		newofileflags = (char *) &newofile[nfiles];
1082		/*
1083		 * Copy the existing ofile and ofileflags arrays
1084		 * and zero the new portion of each array.
1085		 */
1086		i = fdp->fd_nfiles * sizeof(struct file *);
1087		bcopy(fdp->fd_ofiles, newofile,	i);
1088		bzero((char *)newofile + i,
1089		    nfiles * sizeof(struct file *) - i);
1090		i = fdp->fd_nfiles * sizeof(char);
1091		bcopy(fdp->fd_ofileflags, newofileflags, i);
1092		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1093		if (fdp->fd_nfiles > NDFILE)
1094			oldofile = fdp->fd_ofiles;
1095		else
1096			oldofile = NULL;
1097		fdp->fd_ofiles = newofile;
1098		fdp->fd_ofileflags = newofileflags;
1099		fdp->fd_nfiles = nfiles;
1100		fdexpand++;
1101		if (oldofile != NULL) {
1102			/* XXX uma_large_free() needs Giant. */
1103			FILEDESC_UNLOCK(fdp);
1104			mtx_lock(&Giant);
1105			free(oldofile, M_FILEDESC);
1106			mtx_unlock(&Giant);
1107			FILEDESC_LOCK(fdp);
1108		}
1109	}
1110	return (0);
1111}
1112
1113/*
1114 * Check to see whether n user file descriptors
1115 * are available to the process p.
1116 */
1117int
1118fdavail(td, n)
1119	struct thread *td;
1120	register int n;
1121{
1122	struct proc *p = td->td_proc;
1123	register struct filedesc *fdp = td->td_proc->p_fd;
1124	register struct file **fpp;
1125	register int i, lim, last;
1126
1127	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1128
1129	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1130	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1131		return (1);
1132	last = min(fdp->fd_nfiles, lim);
1133	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1134	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1135		if (*fpp == NULL && --n <= 0)
1136			return (1);
1137	}
1138	return (0);
1139}
1140
1141/*
1142 * Create a new open file structure and allocate
1143 * a file decriptor for the process that refers to it.
1144 */
1145int
1146falloc(td, resultfp, resultfd)
1147	register struct thread *td;
1148	struct file **resultfp;
1149	int *resultfd;
1150{
1151	struct proc *p = td->td_proc;
1152	register struct file *fp, *fq;
1153	int error, i;
1154
1155	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1156	sx_xlock(&filelist_lock);
1157	if (nfiles >= maxfiles) {
1158		sx_xunlock(&filelist_lock);
1159		uma_zfree(file_zone, fp);
1160		tablefull("file");
1161		return (ENFILE);
1162	}
1163	nfiles++;
1164
1165	/*
1166	 * If the process has file descriptor zero open, add the new file
1167	 * descriptor to the list of open files at that point, otherwise
1168	 * put it at the front of the list of open files.
1169	 */
1170	fp->f_mtxp = mtx_pool_alloc();
1171	fp->f_gcflag = 0;
1172	fp->f_count = 1;
1173	fp->f_cred = crhold(td->td_ucred);
1174	fp->f_ops = &badfileops;
1175	fp->f_seqcount = 1;
1176	FILEDESC_LOCK(p->p_fd);
1177	if ((fq = p->p_fd->fd_ofiles[0])) {
1178		LIST_INSERT_AFTER(fq, fp, f_list);
1179	} else {
1180		LIST_INSERT_HEAD(&filehead, fp, f_list);
1181	}
1182	sx_xunlock(&filelist_lock);
1183	if ((error = fdalloc(td, 0, &i))) {
1184		FILEDESC_UNLOCK(p->p_fd);
1185		fdrop(fp, td);
1186		return (error);
1187	}
1188	p->p_fd->fd_ofiles[i] = fp;
1189	FILEDESC_UNLOCK(p->p_fd);
1190	if (resultfp)
1191		*resultfp = fp;
1192	if (resultfd)
1193		*resultfd = i;
1194	return (0);
1195}
1196
1197/*
1198 * Free a file descriptor.
1199 */
1200void
1201ffree(fp)
1202	register struct file *fp;
1203{
1204
1205	KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1206	sx_xlock(&filelist_lock);
1207	LIST_REMOVE(fp, f_list);
1208	nfiles--;
1209	sx_xunlock(&filelist_lock);
1210	crfree(fp->f_cred);
1211	uma_zfree(file_zone, fp);
1212}
1213
1214/*
1215 * Build a new filedesc structure.
1216 */
1217struct filedesc *
1218fdinit(td)
1219	struct thread *td;
1220{
1221	register struct filedesc0 *newfdp;
1222	register struct filedesc *fdp = td->td_proc->p_fd;
1223
1224	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1225	    M_FILEDESC, M_WAITOK | M_ZERO);
1226	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1227	FILEDESC_LOCK(&newfdp->fd_fd);
1228	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1229	if (newfdp->fd_fd.fd_cdir)
1230		VREF(newfdp->fd_fd.fd_cdir);
1231	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1232	if (newfdp->fd_fd.fd_rdir)
1233		VREF(newfdp->fd_fd.fd_rdir);
1234	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1235	if (newfdp->fd_fd.fd_jdir)
1236		VREF(newfdp->fd_fd.fd_jdir);
1237
1238	/* Create the file descriptor table. */
1239	newfdp->fd_fd.fd_refcnt = 1;
1240	newfdp->fd_fd.fd_cmask = cmask;
1241	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1242	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1243	newfdp->fd_fd.fd_nfiles = NDFILE;
1244	newfdp->fd_fd.fd_knlistsize = -1;
1245	FILEDESC_UNLOCK(&newfdp->fd_fd);
1246
1247	return (&newfdp->fd_fd);
1248}
1249
1250/*
1251 * Share a filedesc structure.
1252 */
1253struct filedesc *
1254fdshare(p)
1255	struct proc *p;
1256{
1257	FILEDESC_LOCK(p->p_fd);
1258	p->p_fd->fd_refcnt++;
1259	FILEDESC_UNLOCK(p->p_fd);
1260	return (p->p_fd);
1261}
1262
1263/*
1264 * Copy a filedesc structure.
1265 */
1266struct filedesc *
1267fdcopy(td)
1268	struct thread *td;
1269{
1270	register struct filedesc *newfdp, *fdp;
1271	register struct file **fpp;
1272	register int i, j;
1273
1274	/* Certain daemons might not have file descriptors. */
1275	fdp = td->td_proc->p_fd;
1276	if (fdp == NULL)
1277		return (NULL);
1278
1279	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1280
1281	FILEDESC_UNLOCK(fdp);
1282	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1283	    M_FILEDESC, M_WAITOK);
1284	FILEDESC_LOCK(fdp);
1285	bcopy(fdp, newfdp, sizeof(struct filedesc));
1286	FILEDESC_UNLOCK(fdp);
1287	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1288	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1289	if (newfdp->fd_cdir)
1290		VREF(newfdp->fd_cdir);
1291	if (newfdp->fd_rdir)
1292		VREF(newfdp->fd_rdir);
1293	if (newfdp->fd_jdir)
1294		VREF(newfdp->fd_jdir);
1295	newfdp->fd_refcnt = 1;
1296
1297	/*
1298	 * If the number of open files fits in the internal arrays
1299	 * of the open file structure, use them, otherwise allocate
1300	 * additional memory for the number of descriptors currently
1301	 * in use.
1302	 */
1303	FILEDESC_LOCK(fdp);
1304	newfdp->fd_lastfile = fdp->fd_lastfile;
1305	newfdp->fd_nfiles = fdp->fd_nfiles;
1306	if (newfdp->fd_lastfile < NDFILE) {
1307		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1308		newfdp->fd_ofileflags =
1309		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1310		i = NDFILE;
1311	} else {
1312		/*
1313		 * Compute the smallest multiple of NDEXTENT needed
1314		 * for the file descriptors currently in use,
1315		 * allowing the table to shrink.
1316		 */
1317retry:
1318		i = newfdp->fd_nfiles;
1319		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1320			i /= 2;
1321		FILEDESC_UNLOCK(fdp);
1322		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1323		    M_FILEDESC, M_WAITOK);
1324		FILEDESC_LOCK(fdp);
1325		newfdp->fd_lastfile = fdp->fd_lastfile;
1326		newfdp->fd_nfiles = fdp->fd_nfiles;
1327		j = newfdp->fd_nfiles;
1328		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1329			j /= 2;
1330		if (i != j) {
1331			/*
1332			 * The size of the original table has changed.
1333			 * Go over once again.
1334			 */
1335			FILEDESC_UNLOCK(fdp);
1336			FREE(newfdp->fd_ofiles, M_FILEDESC);
1337			FILEDESC_LOCK(fdp);
1338			newfdp->fd_lastfile = fdp->fd_lastfile;
1339			newfdp->fd_nfiles = fdp->fd_nfiles;
1340			goto retry;
1341		}
1342		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1343	}
1344	newfdp->fd_nfiles = i;
1345	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1346	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1347
1348	/*
1349	 * kq descriptors cannot be copied.
1350	 */
1351	if (newfdp->fd_knlistsize != -1) {
1352		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1353		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1354			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1355				*fpp = NULL;
1356				if (i < newfdp->fd_freefile)
1357					newfdp->fd_freefile = i;
1358			}
1359			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1360				newfdp->fd_lastfile--;
1361		}
1362		newfdp->fd_knlist = NULL;
1363		newfdp->fd_knlistsize = -1;
1364		newfdp->fd_knhash = NULL;
1365		newfdp->fd_knhashmask = 0;
1366	}
1367
1368	fpp = newfdp->fd_ofiles;
1369	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1370		if (*fpp != NULL)
1371			fhold(*fpp);
1372	}
1373	return (newfdp);
1374}
1375
1376/*
1377 * Release a filedesc structure.
1378 */
1379void
1380fdfree(td)
1381	struct thread *td;
1382{
1383	register struct filedesc *fdp;
1384	struct file **fpp;
1385	register int i;
1386
1387	/* Certain daemons might not have file descriptors. */
1388	fdp = td->td_proc->p_fd;
1389	if (fdp == NULL)
1390		return;
1391
1392	FILEDESC_LOCK(fdp);
1393	if (--fdp->fd_refcnt > 0) {
1394		FILEDESC_UNLOCK(fdp);
1395		return;
1396	}
1397
1398	/*
1399	 * We are the last reference to the structure, so we can
1400	 * safely assume it will not change out from under us.
1401	 */
1402	FILEDESC_UNLOCK(fdp);
1403	fpp = fdp->fd_ofiles;
1404	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1405		if (*fpp)
1406			(void) closef(*fpp, td);
1407	}
1408	td->td_proc->p_fd = NULL;
1409	if (fdp->fd_nfiles > NDFILE)
1410		FREE(fdp->fd_ofiles, M_FILEDESC);
1411	if (fdp->fd_cdir)
1412		vrele(fdp->fd_cdir);
1413	if (fdp->fd_rdir)
1414		vrele(fdp->fd_rdir);
1415	if (fdp->fd_jdir)
1416		vrele(fdp->fd_jdir);
1417	if (fdp->fd_knlist)
1418		FREE(fdp->fd_knlist, M_KQUEUE);
1419	if (fdp->fd_knhash)
1420		FREE(fdp->fd_knhash, M_KQUEUE);
1421	mtx_destroy(&fdp->fd_mtx);
1422	FREE(fdp, M_FILEDESC);
1423}
1424
1425/*
1426 * For setugid programs, we don't want to people to use that setugidness
1427 * to generate error messages which write to a file which otherwise would
1428 * otherwise be off-limits to the process.  We check for filesystems where
1429 * the vnode can change out from under us after execve (like [lin]procfs).
1430 *
1431 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1432 * sufficient.  We also don't for check setugidness since we know we are.
1433 */
1434static int
1435is_unsafe(struct file *fp)
1436{
1437	if (fp->f_type == DTYPE_VNODE) {
1438		struct vnode *vp = (struct vnode *)fp->f_data;
1439
1440		if ((vp->v_vflag & VV_PROCDEP) != 0)
1441			return (1);
1442	}
1443	return (0);
1444}
1445
1446/*
1447 * Make this setguid thing safe, if at all possible.
1448 */
1449void
1450setugidsafety(td)
1451	struct thread *td;
1452{
1453	struct filedesc *fdp;
1454	register int i;
1455
1456	/* Certain daemons might not have file descriptors. */
1457	fdp = td->td_proc->p_fd;
1458	if (fdp == NULL)
1459		return;
1460
1461	/*
1462	 * Note: fdp->fd_ofiles may be reallocated out from under us while
1463	 * we are blocked in a close.  Be careful!
1464	 */
1465	FILEDESC_LOCK(fdp);
1466	for (i = 0; i <= fdp->fd_lastfile; i++) {
1467		if (i > 2)
1468			break;
1469		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1470			struct file *fp;
1471
1472#if 0
1473			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1474				(void) munmapfd(td, i);
1475#endif
1476			if (i < fdp->fd_knlistsize) {
1477				FILEDESC_UNLOCK(fdp);
1478				knote_fdclose(td, i);
1479				FILEDESC_LOCK(fdp);
1480			}
1481			/*
1482			 * NULL-out descriptor prior to close to avoid
1483			 * a race while close blocks.
1484			 */
1485			fp = fdp->fd_ofiles[i];
1486			fdp->fd_ofiles[i] = NULL;
1487			fdp->fd_ofileflags[i] = 0;
1488			if (i < fdp->fd_freefile)
1489				fdp->fd_freefile = i;
1490			FILEDESC_UNLOCK(fdp);
1491			(void) closef(fp, td);
1492			FILEDESC_LOCK(fdp);
1493		}
1494	}
1495	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1496		fdp->fd_lastfile--;
1497	FILEDESC_UNLOCK(fdp);
1498}
1499
1500/*
1501 * Close any files on exec?
1502 */
1503void
1504fdcloseexec(td)
1505	struct thread *td;
1506{
1507	struct filedesc *fdp;
1508	register int i;
1509
1510	/* Certain daemons might not have file descriptors. */
1511	fdp = td->td_proc->p_fd;
1512	if (fdp == NULL)
1513		return;
1514
1515	FILEDESC_LOCK(fdp);
1516
1517	/*
1518	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1519	 * may block and rip them out from under us.
1520	 */
1521	for (i = 0; i <= fdp->fd_lastfile; i++) {
1522		if (fdp->fd_ofiles[i] != NULL &&
1523		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1524			struct file *fp;
1525
1526#if 0
1527			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1528				(void) munmapfd(td, i);
1529#endif
1530			if (i < fdp->fd_knlistsize) {
1531				FILEDESC_UNLOCK(fdp);
1532				knote_fdclose(td, i);
1533				FILEDESC_LOCK(fdp);
1534			}
1535			/*
1536			 * NULL-out descriptor prior to close to avoid
1537			 * a race while close blocks.
1538			 */
1539			fp = fdp->fd_ofiles[i];
1540			fdp->fd_ofiles[i] = NULL;
1541			fdp->fd_ofileflags[i] = 0;
1542			if (i < fdp->fd_freefile)
1543				fdp->fd_freefile = i;
1544			FILEDESC_UNLOCK(fdp);
1545			(void) closef(fp, td);
1546			FILEDESC_LOCK(fdp);
1547		}
1548	}
1549	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1550		fdp->fd_lastfile--;
1551	FILEDESC_UNLOCK(fdp);
1552}
1553
1554/*
1555 * It is unsafe for set[ug]id processes to be started with file
1556 * descriptors 0..2 closed, as these descriptors are given implicit
1557 * significance in the Standard C library.  fdcheckstd() will create a
1558 * descriptor referencing /dev/null for each of stdin, stdout, and
1559 * stderr that is not already open.
1560 */
1561int
1562fdcheckstd(td)
1563	struct thread *td;
1564{
1565	struct nameidata nd;
1566	struct filedesc *fdp;
1567	struct file *fp;
1568	register_t retval;
1569	int fd, i, error, flags, devnull;
1570
1571	fdp = td->td_proc->p_fd;
1572	if (fdp == NULL)
1573		return (0);
1574	devnull = -1;
1575	error = 0;
1576	for (i = 0; i < 3; i++) {
1577		if (fdp->fd_ofiles[i] != NULL)
1578			continue;
1579		if (devnull < 0) {
1580			error = falloc(td, &fp, &fd);
1581			if (error != 0)
1582				break;
1583			KASSERT(fd == i, ("oof, we didn't get our fd"));
1584			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1585			    td);
1586			flags = FREAD | FWRITE;
1587			error = vn_open(&nd, &flags, 0);
1588			if (error != 0) {
1589				FILEDESC_LOCK(fdp);
1590				fdp->fd_ofiles[fd] = NULL;
1591				FILEDESC_UNLOCK(fdp);
1592				fdrop(fp, td);
1593				break;
1594			}
1595			NDFREE(&nd, NDF_ONLY_PNBUF);
1596			fp->f_data = nd.ni_vp;
1597			fp->f_flag = flags;
1598			fp->f_ops = &vnops;
1599			fp->f_type = DTYPE_VNODE;
1600			VOP_UNLOCK(nd.ni_vp, 0, td);
1601			devnull = fd;
1602		} else {
1603			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1604			if (error != 0)
1605				break;
1606		}
1607	}
1608	return (error);
1609}
1610
1611/*
1612 * Internal form of close.
1613 * Decrement reference count on file structure.
1614 * Note: td may be NULL when closing a file
1615 * that was being passed in a message.
1616 */
1617int
1618closef(fp, td)
1619	register struct file *fp;
1620	register struct thread *td;
1621{
1622	struct vnode *vp;
1623	struct flock lf;
1624
1625	if (fp == NULL)
1626		return (0);
1627	/*
1628	 * POSIX record locking dictates that any close releases ALL
1629	 * locks owned by this process.  This is handled by setting
1630	 * a flag in the unlock to free ONLY locks obeying POSIX
1631	 * semantics, and not to free BSD-style file locks.
1632	 * If the descriptor was in a message, POSIX-style locks
1633	 * aren't passed with the descriptor.
1634	 */
1635	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1636	    fp->f_type == DTYPE_VNODE) {
1637		lf.l_whence = SEEK_SET;
1638		lf.l_start = 0;
1639		lf.l_len = 0;
1640		lf.l_type = F_UNLCK;
1641		vp = (struct vnode *)fp->f_data;
1642		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1643		    F_UNLCK, &lf, F_POSIX);
1644	}
1645	return (fdrop(fp, td));
1646}
1647
1648/*
1649 * Drop reference on struct file passed in, may call closef if the
1650 * reference hits zero.
1651 */
1652int
1653fdrop(fp, td)
1654	struct file *fp;
1655	struct thread *td;
1656{
1657
1658	FILE_LOCK(fp);
1659	return (fdrop_locked(fp, td));
1660}
1661
1662/*
1663 * Extract the file pointer associated with the specified descriptor for
1664 * the current user process.
1665 *
1666 * If the descriptor doesn't exist, EBADF is returned.
1667 *
1668 * If the descriptor exists but doesn't match 'flags' then
1669 * return EBADF for read attempts and EINVAL for write attempts.
1670 *
1671 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1672 * It should be droped with fdrop().
1673 * If it is not set, then the refcount will not be bumped however the
1674 * thread's filedesc struct will be returned locked (for fgetsock).
1675 *
1676 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1677 * Otherwise *fpp is set and zero is returned.
1678 */
1679static __inline int
1680_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1681{
1682	struct filedesc *fdp;
1683	struct file *fp;
1684
1685	*fpp = NULL;
1686	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1687		return (EBADF);
1688	FILEDESC_LOCK(fdp);
1689	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1690		FILEDESC_UNLOCK(fdp);
1691		return (EBADF);
1692	}
1693
1694	/*
1695	 * Note: FREAD failures returns EBADF to maintain backwards
1696	 * compatibility with what routines returned before.
1697	 *
1698	 * Only one flag, or 0, may be specified.
1699	 */
1700	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1701		FILEDESC_UNLOCK(fdp);
1702		return (EBADF);
1703	}
1704	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1705		FILEDESC_UNLOCK(fdp);
1706		return (EINVAL);
1707	}
1708	if (hold) {
1709		fhold(fp);
1710		FILEDESC_UNLOCK(fdp);
1711	}
1712	*fpp = fp;
1713	return (0);
1714}
1715
1716int
1717fget(struct thread *td, int fd, struct file **fpp)
1718{
1719
1720	return(_fget(td, fd, fpp, 0, 1));
1721}
1722
1723int
1724fget_read(struct thread *td, int fd, struct file **fpp)
1725{
1726
1727	return(_fget(td, fd, fpp, FREAD, 1));
1728}
1729
1730int
1731fget_write(struct thread *td, int fd, struct file **fpp)
1732{
1733
1734	return(_fget(td, fd, fpp, FWRITE, 1));
1735}
1736
1737/*
1738 * Like fget() but loads the underlying vnode, or returns an error if
1739 * the descriptor does not represent a vnode.  Note that pipes use vnodes
1740 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1741 * error).  The returned vnode will be vref()d.
1742 */
1743static __inline int
1744_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1745{
1746	struct file *fp;
1747	int error;
1748
1749	*vpp = NULL;
1750	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1751		return (error);
1752	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1753		error = EINVAL;
1754	} else {
1755		*vpp = (struct vnode *)fp->f_data;
1756		vref(*vpp);
1757	}
1758	FILEDESC_UNLOCK(td->td_proc->p_fd);
1759	return (error);
1760}
1761
1762int
1763fgetvp(struct thread *td, int fd, struct vnode **vpp)
1764{
1765
1766	return (_fgetvp(td, fd, vpp, 0));
1767}
1768
1769int
1770fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1771{
1772
1773	return (_fgetvp(td, fd, vpp, FREAD));
1774}
1775
1776int
1777fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1778{
1779
1780	return (_fgetvp(td, fd, vpp, FWRITE));
1781}
1782
1783/*
1784 * Like fget() but loads the underlying socket, or returns an error if
1785 * the descriptor does not represent a socket.
1786 *
1787 * We bump the ref count on the returned socket.  XXX Also obtain the SX
1788 * lock in the future.
1789 */
1790int
1791fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1792{
1793	struct file *fp;
1794	int error;
1795
1796	*spp = NULL;
1797	if (fflagp != NULL)
1798		*fflagp = 0;
1799	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1800		return (error);
1801	if (fp->f_type != DTYPE_SOCKET) {
1802		error = ENOTSOCK;
1803	} else {
1804		*spp = (struct socket *)fp->f_data;
1805		if (fflagp)
1806			*fflagp = fp->f_flag;
1807		soref(*spp);
1808	}
1809	FILEDESC_UNLOCK(td->td_proc->p_fd);
1810	return (error);
1811}
1812
1813/*
1814 * Drop the reference count on the the socket and XXX release the SX lock in
1815 * the future.  The last reference closes the socket.
1816 */
1817void
1818fputsock(struct socket *so)
1819{
1820
1821	sorele(so);
1822}
1823
1824/*
1825 * Drop reference on struct file passed in, may call closef if the
1826 * reference hits zero.
1827 * Expects struct file locked, and will unlock it.
1828 */
1829int
1830fdrop_locked(fp, td)
1831	struct file *fp;
1832	struct thread *td;
1833{
1834	struct flock lf;
1835	struct vnode *vp;
1836	int error;
1837
1838	FILE_LOCK_ASSERT(fp, MA_OWNED);
1839
1840	if (--fp->f_count > 0) {
1841		FILE_UNLOCK(fp);
1842		return (0);
1843	}
1844	mtx_lock(&Giant);
1845	if (fp->f_count < 0)
1846		panic("fdrop: count < 0");
1847	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1848		lf.l_whence = SEEK_SET;
1849		lf.l_start = 0;
1850		lf.l_len = 0;
1851		lf.l_type = F_UNLCK;
1852		vp = (struct vnode *)fp->f_data;
1853		FILE_UNLOCK(fp);
1854		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1855	} else
1856		FILE_UNLOCK(fp);
1857	if (fp->f_ops != &badfileops)
1858		error = fo_close(fp, td);
1859	else
1860		error = 0;
1861	ffree(fp);
1862	mtx_unlock(&Giant);
1863	return (error);
1864}
1865
1866/*
1867 * Apply an advisory lock on a file descriptor.
1868 *
1869 * Just attempt to get a record lock of the requested type on
1870 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1871 */
1872#ifndef _SYS_SYSPROTO_H_
1873struct flock_args {
1874	int	fd;
1875	int	how;
1876};
1877#endif
1878/*
1879 * MPSAFE
1880 */
1881/* ARGSUSED */
1882int
1883flock(td, uap)
1884	struct thread *td;
1885	register struct flock_args *uap;
1886{
1887	struct file *fp;
1888	struct vnode *vp;
1889	struct flock lf;
1890	int error;
1891
1892	if ((error = fget(td, uap->fd, &fp)) != 0)
1893		return (error);
1894	if (fp->f_type != DTYPE_VNODE) {
1895		fdrop(fp, td);
1896		return (EOPNOTSUPP);
1897	}
1898
1899	mtx_lock(&Giant);
1900	vp = (struct vnode *)fp->f_data;
1901	lf.l_whence = SEEK_SET;
1902	lf.l_start = 0;
1903	lf.l_len = 0;
1904	if (uap->how & LOCK_UN) {
1905		lf.l_type = F_UNLCK;
1906		FILE_LOCK(fp);
1907		fp->f_flag &= ~FHASLOCK;
1908		FILE_UNLOCK(fp);
1909		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1910		goto done2;
1911	}
1912	if (uap->how & LOCK_EX)
1913		lf.l_type = F_WRLCK;
1914	else if (uap->how & LOCK_SH)
1915		lf.l_type = F_RDLCK;
1916	else {
1917		error = EBADF;
1918		goto done2;
1919	}
1920	FILE_LOCK(fp);
1921	fp->f_flag |= FHASLOCK;
1922	FILE_UNLOCK(fp);
1923	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1924	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1925done2:
1926	fdrop(fp, td);
1927	mtx_unlock(&Giant);
1928	return (error);
1929}
1930
1931/*
1932 * File Descriptor pseudo-device driver (/dev/fd/).
1933 *
1934 * Opening minor device N dup()s the file (if any) connected to file
1935 * descriptor N belonging to the calling process.  Note that this driver
1936 * consists of only the ``open()'' routine, because all subsequent
1937 * references to this file will be direct to the other driver.
1938 */
1939/* ARGSUSED */
1940static int
1941fdopen(dev, mode, type, td)
1942	dev_t dev;
1943	int mode, type;
1944	struct thread *td;
1945{
1946
1947	/*
1948	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1949	 * the file descriptor being sought for duplication. The error
1950	 * return ensures that the vnode for this device will be released
1951	 * by vn_open. Open will detect this special error and take the
1952	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1953	 * will simply report the error.
1954	 */
1955	td->td_dupfd = dev2unit(dev);
1956	return (ENODEV);
1957}
1958
1959/*
1960 * Duplicate the specified descriptor to a free descriptor.
1961 */
1962int
1963dupfdopen(td, fdp, indx, dfd, mode, error)
1964	struct thread *td;
1965	struct filedesc *fdp;
1966	int indx, dfd;
1967	int mode;
1968	int error;
1969{
1970	register struct file *wfp;
1971	struct file *fp;
1972
1973	/*
1974	 * If the to-be-dup'd fd number is greater than the allowed number
1975	 * of file descriptors, or the fd to be dup'd has already been
1976	 * closed, then reject.
1977	 */
1978	FILEDESC_LOCK(fdp);
1979	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1980	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1981		FILEDESC_UNLOCK(fdp);
1982		return (EBADF);
1983	}
1984
1985	/*
1986	 * There are two cases of interest here.
1987	 *
1988	 * For ENODEV simply dup (dfd) to file descriptor
1989	 * (indx) and return.
1990	 *
1991	 * For ENXIO steal away the file structure from (dfd) and
1992	 * store it in (indx).  (dfd) is effectively closed by
1993	 * this operation.
1994	 *
1995	 * Any other error code is just returned.
1996	 */
1997	switch (error) {
1998	case ENODEV:
1999		/*
2000		 * Check that the mode the file is being opened for is a
2001		 * subset of the mode of the existing descriptor.
2002		 */
2003		FILE_LOCK(wfp);
2004		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2005			FILE_UNLOCK(wfp);
2006			FILEDESC_UNLOCK(fdp);
2007			return (EACCES);
2008		}
2009		fp = fdp->fd_ofiles[indx];
2010#if 0
2011		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2012			(void) munmapfd(td, indx);
2013#endif
2014		fdp->fd_ofiles[indx] = wfp;
2015		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2016		fhold_locked(wfp);
2017		FILE_UNLOCK(wfp);
2018		if (indx > fdp->fd_lastfile)
2019			fdp->fd_lastfile = indx;
2020		if (fp != NULL)
2021			FILE_LOCK(fp);
2022		FILEDESC_UNLOCK(fdp);
2023		/*
2024		 * We now own the reference to fp that the ofiles[] array
2025		 * used to own.  Release it.
2026		 */
2027		if (fp != NULL)
2028			fdrop_locked(fp, td);
2029		return (0);
2030
2031	case ENXIO:
2032		/*
2033		 * Steal away the file pointer from dfd and stuff it into indx.
2034		 */
2035		fp = fdp->fd_ofiles[indx];
2036#if 0
2037		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2038			(void) munmapfd(td, indx);
2039#endif
2040		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2041		fdp->fd_ofiles[dfd] = NULL;
2042		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2043		fdp->fd_ofileflags[dfd] = 0;
2044
2045		/*
2046		 * Complete the clean up of the filedesc structure by
2047		 * recomputing the various hints.
2048		 */
2049		if (indx > fdp->fd_lastfile) {
2050			fdp->fd_lastfile = indx;
2051		} else {
2052			while (fdp->fd_lastfile > 0 &&
2053			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2054				fdp->fd_lastfile--;
2055			}
2056			if (dfd < fdp->fd_freefile)
2057				fdp->fd_freefile = dfd;
2058		}
2059		if (fp != NULL)
2060			FILE_LOCK(fp);
2061		FILEDESC_UNLOCK(fdp);
2062
2063		/*
2064		 * we now own the reference to fp that the ofiles[] array
2065		 * used to own.  Release it.
2066		 */
2067		if (fp != NULL)
2068			fdrop_locked(fp, td);
2069		return (0);
2070
2071	default:
2072		FILEDESC_UNLOCK(fdp);
2073		return (error);
2074	}
2075	/* NOTREACHED */
2076}
2077
2078/*
2079 * Get file structures.
2080 */
2081static int
2082sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2083{
2084	struct xfile xf;
2085	struct filedesc *fdp;
2086	struct file *fp;
2087	struct proc *p;
2088	int error, n;
2089
2090	sysctl_wire_old_buffer(req, 0);
2091	if (req->oldptr == NULL) {
2092		n = 16;		/* A slight overestimate. */
2093		sx_slock(&filelist_lock);
2094		LIST_FOREACH(fp, &filehead, f_list) {
2095			/*
2096			 * We should grab the lock, but this is an
2097			 * estimate, so does it really matter?
2098			 */
2099			/* mtx_lock(fp->f_mtxp); */
2100			n += fp->f_count;
2101			/* mtx_unlock(f->f_mtxp); */
2102		}
2103		sx_sunlock(&filelist_lock);
2104		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2105	}
2106	error = 0;
2107	bzero(&xf, sizeof(xf));
2108	xf.xf_size = sizeof(xf);
2109	sx_slock(&allproc_lock);
2110	LIST_FOREACH(p, &allproc, p_list) {
2111		PROC_LOCK(p);
2112		xf.xf_pid = p->p_pid;
2113		xf.xf_uid = p->p_ucred->cr_uid;
2114		if ((fdp = p->p_fd) == NULL) {
2115			PROC_UNLOCK(p);
2116			continue;
2117		}
2118		FILEDESC_LOCK(fdp);
2119		for (n = 0; n < fdp->fd_nfiles; ++n) {
2120			if ((fp = fdp->fd_ofiles[n]) == NULL)
2121				continue;
2122			xf.xf_fd = n;
2123			xf.xf_file = fp;
2124#define	XF_COPY(field) xf.xf_##field = fp->f_##field
2125			XF_COPY(type);
2126			XF_COPY(count);
2127			XF_COPY(msgcount);
2128			XF_COPY(offset);
2129			XF_COPY(data);
2130			XF_COPY(flag);
2131#undef XF_COPY
2132			error = SYSCTL_OUT(req, &xf, sizeof(xf));
2133			if (error)
2134				break;
2135		}
2136		FILEDESC_UNLOCK(fdp);
2137		PROC_UNLOCK(p);
2138		if (error)
2139			break;
2140	}
2141	sx_sunlock(&allproc_lock);
2142	return (error);
2143}
2144
2145SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2146    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2147
2148SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2149    &maxfilesperproc, 0, "Maximum files allowed open per process");
2150
2151SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2152    &maxfiles, 0, "Maximum number of files");
2153
2154SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2155    &nfiles, 0, "System-wide number of open files");
2156
2157static void
2158fildesc_drvinit(void *unused)
2159{
2160	dev_t dev;
2161
2162	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2163	make_dev_alias(dev, "stdin");
2164	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2165	make_dev_alias(dev, "stdout");
2166	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2167	make_dev_alias(dev, "stderr");
2168	if (!devfs_present) {
2169		int fd;
2170
2171		for (fd = 3; fd < NUMFDESC; fd++)
2172			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2173			    "fd/%d", fd);
2174	}
2175}
2176
2177struct fileops badfileops = {
2178	badfo_readwrite,
2179	badfo_readwrite,
2180	badfo_ioctl,
2181	badfo_poll,
2182	badfo_kqfilter,
2183	badfo_stat,
2184	badfo_close
2185};
2186
2187static int
2188badfo_readwrite(fp, uio, active_cred, flags, td)
2189	struct file *fp;
2190	struct uio *uio;
2191	struct ucred *active_cred;
2192	struct thread *td;
2193	int flags;
2194{
2195
2196	return (EBADF);
2197}
2198
2199static int
2200badfo_ioctl(fp, com, data, active_cred, td)
2201	struct file *fp;
2202	u_long com;
2203	void *data;
2204	struct ucred *active_cred;
2205	struct thread *td;
2206{
2207
2208	return (EBADF);
2209}
2210
2211static int
2212badfo_poll(fp, events, active_cred, td)
2213	struct file *fp;
2214	int events;
2215	struct ucred *active_cred;
2216	struct thread *td;
2217{
2218
2219	return (0);
2220}
2221
2222static int
2223badfo_kqfilter(fp, kn)
2224	struct file *fp;
2225	struct knote *kn;
2226{
2227
2228	return (0);
2229}
2230
2231static int
2232badfo_stat(fp, sb, active_cred, td)
2233	struct file *fp;
2234	struct stat *sb;
2235	struct ucred *active_cred;
2236	struct thread *td;
2237{
2238
2239	return (EBADF);
2240}
2241
2242static int
2243badfo_close(fp, td)
2244	struct file *fp;
2245	struct thread *td;
2246{
2247
2248	return (EBADF);
2249}
2250
2251SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2252					fildesc_drvinit,NULL)
2253
2254static void filelistinit(void *);
2255SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2256
2257/* ARGSUSED*/
2258static void
2259filelistinit(dummy)
2260	void *dummy;
2261{
2262
2263	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2264	    NULL, NULL, UMA_ALIGN_PTR, 0);
2265	sx_init(&filelist_lock, "filelist lock");
2266	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2267}
2268