kern_descrip.c revision 56313
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 56313 2000-01-20 07:12:52Z imp $
40 */
41
42#include "opt_compat.h"
43#include "opt_dontuse.h"
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/sysproto.h>
47#include <sys/conf.h>
48#include <sys/filedesc.h>
49#include <sys/kernel.h>
50#include <sys/sysctl.h>
51#include <sys/vnode.h>
52#include <sys/proc.h>
53#include <sys/file.h>
54#include <sys/stat.h>
55#include <sys/filio.h>
56#include <sys/fcntl.h>
57#include <sys/malloc.h>
58#include <sys/unistd.h>
59#include <sys/resourcevar.h>
60
61#include <vm/vm.h>
62#include <vm/vm_extern.h>
63
64static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
65MALLOC_DEFINE(M_FILE, "file", "Open file structure");
66static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
67
68static	 d_open_t  fdopen;
69#define NUMFDESC 64
70
71#define CDEV_MAJOR 22
72static struct cdevsw fildesc_cdevsw = {
73	/* open */	fdopen,
74	/* close */	noclose,
75	/* read */	noread,
76	/* write */	nowrite,
77	/* ioctl */	noioctl,
78	/* poll */	nopoll,
79	/* mmap */	nommap,
80	/* strategy */	nostrategy,
81	/* name */	"FD",
82	/* maj */	CDEV_MAJOR,
83	/* dump */	nodump,
84	/* psize */	nopsize,
85	/* flags */	0,
86	/* bmaj */	-1
87};
88
89static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
90static int badfo_readwrite __P((struct file *fp, struct uio *uio,
91    struct ucred *cred, int flags, struct proc *p));
92static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
93    struct proc *p));
94static int badfo_poll __P((struct file *fp, int events,
95    struct ucred *cred, struct proc *p));
96static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
97static int badfo_close __P((struct file *fp, struct proc *p));
98
99/*
100 * Descriptor management.
101 */
102struct filelist filehead;	/* head of list of open files */
103int nfiles;			/* actual number of open files */
104extern int cmask;
105
106/*
107 * System calls on descriptors.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct getdtablesize_args {
111	int	dummy;
112};
113#endif
114/* ARGSUSED */
115int
116getdtablesize(p, uap)
117	struct proc *p;
118	struct getdtablesize_args *uap;
119{
120
121	p->p_retval[0] =
122	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
123	return (0);
124}
125
126/*
127 * Duplicate a file descriptor to a particular value.
128 */
129#ifndef _SYS_SYSPROTO_H_
130struct dup2_args {
131	u_int	from;
132	u_int	to;
133};
134#endif
135/* ARGSUSED */
136int
137dup2(p, uap)
138	struct proc *p;
139	struct dup2_args *uap;
140{
141	register struct filedesc *fdp = p->p_fd;
142	register u_int old = uap->from, new = uap->to;
143	int i, error;
144
145	if (old >= fdp->fd_nfiles ||
146	    fdp->fd_ofiles[old] == NULL ||
147	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
148	    new >= maxfilesperproc)
149		return (EBADF);
150	if (old == new) {
151		p->p_retval[0] = new;
152		return (0);
153	}
154	if (new >= fdp->fd_nfiles) {
155		if ((error = fdalloc(p, new, &i)))
156			return (error);
157		if (new != i)
158			panic("dup2: fdalloc");
159	} else if (fdp->fd_ofiles[new]) {
160		if (fdp->fd_ofileflags[new] & UF_MAPPED)
161			(void) munmapfd(p, new);
162		/*
163		 * dup2() must succeed even if the close has an error.
164		 */
165		(void) closef(fdp->fd_ofiles[new], p);
166	}
167	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
168}
169
170/*
171 * Duplicate a file descriptor.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct dup_args {
175	u_int	fd;
176};
177#endif
178/* ARGSUSED */
179int
180dup(p, uap)
181	struct proc *p;
182	struct dup_args *uap;
183{
184	register struct filedesc *fdp;
185	u_int old;
186	int new, error;
187
188	old = uap->fd;
189	fdp = p->p_fd;
190	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
191		return (EBADF);
192	if ((error = fdalloc(p, 0, &new)))
193		return (error);
194	return (finishdup(fdp, (int)old, new, p->p_retval));
195}
196
197/*
198 * The file control system call.
199 */
200#ifndef _SYS_SYSPROTO_H_
201struct fcntl_args {
202	int	fd;
203	int	cmd;
204	long	arg;
205};
206#endif
207/* ARGSUSED */
208int
209fcntl(p, uap)
210	struct proc *p;
211	register struct fcntl_args *uap;
212{
213	register struct filedesc *fdp = p->p_fd;
214	register struct file *fp;
215	register char *pop;
216	struct vnode *vp;
217	int i, tmp, error, flg = F_POSIX;
218	struct flock fl;
219	u_int newmin;
220
221	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
222	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
223		return (EBADF);
224	pop = &fdp->fd_ofileflags[uap->fd];
225	switch (uap->cmd) {
226
227	case F_DUPFD:
228		newmin = uap->arg;
229		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
230		    newmin >= maxfilesperproc)
231			return (EINVAL);
232		if ((error = fdalloc(p, newmin, &i)))
233			return (error);
234		return (finishdup(fdp, uap->fd, i, p->p_retval));
235
236	case F_GETFD:
237		p->p_retval[0] = *pop & 1;
238		return (0);
239
240	case F_SETFD:
241		*pop = (*pop &~ 1) | (uap->arg & 1);
242		return (0);
243
244	case F_GETFL:
245		p->p_retval[0] = OFLAGS(fp->f_flag);
246		return (0);
247
248	case F_SETFL:
249		fp->f_flag &= ~FCNTLFLAGS;
250		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
251		tmp = fp->f_flag & FNONBLOCK;
252		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
253		if (error)
254			return (error);
255		tmp = fp->f_flag & FASYNC;
256		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
257		if (!error)
258			return (0);
259		fp->f_flag &= ~FNONBLOCK;
260		tmp = 0;
261		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
262		return (error);
263
264	case F_GETOWN:
265		return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p));
266
267	case F_SETOWN:
268		return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
269
270	case F_SETLKW:
271		flg |= F_WAIT;
272		/* Fall into F_SETLK */
273
274	case F_SETLK:
275		if (fp->f_type != DTYPE_VNODE)
276			return (EBADF);
277		vp = (struct vnode *)fp->f_data;
278		/* Copy in the lock structure */
279		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
280		    sizeof(fl));
281		if (error)
282			return (error);
283		if (fl.l_whence == SEEK_CUR)
284			fl.l_start += fp->f_offset;
285		switch (fl.l_type) {
286
287		case F_RDLCK:
288			if ((fp->f_flag & FREAD) == 0)
289				return (EBADF);
290			p->p_flag |= P_ADVLOCK;
291			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
292
293		case F_WRLCK:
294			if ((fp->f_flag & FWRITE) == 0)
295				return (EBADF);
296			p->p_flag |= P_ADVLOCK;
297			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
298
299		case F_UNLCK:
300			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl,
301				F_POSIX));
302
303		default:
304			return (EINVAL);
305		}
306
307	case F_GETLK:
308		if (fp->f_type != DTYPE_VNODE)
309			return (EBADF);
310		vp = (struct vnode *)fp->f_data;
311		/* Copy in the lock structure */
312		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
313		    sizeof(fl));
314		if (error)
315			return (error);
316		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
317		    fl.l_type != F_UNLCK)
318			return (EINVAL);
319		if (fl.l_whence == SEEK_CUR)
320			fl.l_start += fp->f_offset;
321		if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX)))
322			return (error);
323		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
324		    sizeof(fl)));
325
326	default:
327		return (EINVAL);
328	}
329	/* NOTREACHED */
330}
331
332/*
333 * Common code for dup, dup2, and fcntl(F_DUPFD).
334 */
335static int
336finishdup(fdp, old, new, retval)
337	register struct filedesc *fdp;
338	register int old, new;
339	register_t *retval;
340{
341	register struct file *fp;
342
343	fp = fdp->fd_ofiles[old];
344	fdp->fd_ofiles[new] = fp;
345	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
346	fhold(fp);
347	if (new > fdp->fd_lastfile)
348		fdp->fd_lastfile = new;
349	*retval = new;
350	return (0);
351}
352
353/*
354 * If sigio is on the list associated with a process or process group,
355 * disable signalling from the device, remove sigio from the list and
356 * free sigio.
357 */
358void
359funsetown(sigio)
360	struct sigio *sigio;
361{
362	int s;
363
364	if (sigio == NULL)
365		return;
366	s = splhigh();
367	*(sigio->sio_myref) = NULL;
368	splx(s);
369	if (sigio->sio_pgid < 0) {
370		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
371			     sigio, sio_pgsigio);
372	} else /* if ((*sigiop)->sio_pgid > 0) */ {
373		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
374			     sigio, sio_pgsigio);
375	}
376	crfree(sigio->sio_ucred);
377	FREE(sigio, M_SIGIO);
378}
379
380/* Free a list of sigio structures. */
381void
382funsetownlst(sigiolst)
383	struct sigiolst *sigiolst;
384{
385	struct sigio *sigio;
386
387	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
388		funsetown(sigio);
389}
390
391/*
392 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
393 *
394 * After permission checking, add a sigio structure to the sigio list for
395 * the process or process group.
396 */
397int
398fsetown(pgid, sigiop)
399	pid_t pgid;
400	struct sigio **sigiop;
401{
402	struct proc *proc;
403	struct pgrp *pgrp;
404	struct sigio *sigio;
405	int s;
406
407	if (pgid == 0) {
408		funsetown(*sigiop);
409		return (0);
410	}
411	if (pgid > 0) {
412		proc = pfind(pgid);
413		if (proc == NULL)
414			return (ESRCH);
415
416		/*
417		 * Policy - Don't allow a process to FSETOWN a process
418		 * in another session.
419		 *
420		 * Remove this test to allow maximum flexibility or
421		 * restrict FSETOWN to the current process or process
422		 * group for maximum safety.
423		 */
424		if (proc->p_session != curproc->p_session)
425			return (EPERM);
426
427		pgrp = NULL;
428	} else /* if (pgid < 0) */ {
429		pgrp = pgfind(-pgid);
430		if (pgrp == NULL)
431			return (ESRCH);
432
433		/*
434		 * Policy - Don't allow a process to FSETOWN a process
435		 * in another session.
436		 *
437		 * Remove this test to allow maximum flexibility or
438		 * restrict FSETOWN to the current process or process
439		 * group for maximum safety.
440		 */
441		if (pgrp->pg_session != curproc->p_session)
442			return (EPERM);
443
444		proc = NULL;
445	}
446	funsetown(*sigiop);
447	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
448	if (pgid > 0) {
449		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
450		sigio->sio_proc = proc;
451	} else {
452		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
453		sigio->sio_pgrp = pgrp;
454	}
455	sigio->sio_pgid = pgid;
456	crhold(curproc->p_ucred);
457	sigio->sio_ucred = curproc->p_ucred;
458	/* It would be convenient if p_ruid was in ucred. */
459	sigio->sio_ruid = curproc->p_cred->p_ruid;
460	sigio->sio_myref = sigiop;
461	s = splhigh();
462	*sigiop = sigio;
463	splx(s);
464	return (0);
465}
466
467/*
468 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
469 */
470pid_t
471fgetown(sigio)
472	struct sigio *sigio;
473{
474	return (sigio != NULL ? sigio->sio_pgid : 0);
475}
476
477/*
478 * Close a file descriptor.
479 */
480#ifndef _SYS_SYSPROTO_H_
481struct close_args {
482        int     fd;
483};
484#endif
485/* ARGSUSED */
486int
487close(p, uap)
488	struct proc *p;
489	struct close_args *uap;
490{
491	register struct filedesc *fdp = p->p_fd;
492	register struct file *fp;
493	register int fd = uap->fd;
494	register u_char *pf;
495
496	if ((unsigned)fd >= fdp->fd_nfiles ||
497	    (fp = fdp->fd_ofiles[fd]) == NULL)
498		return (EBADF);
499	pf = (u_char *)&fdp->fd_ofileflags[fd];
500	if (*pf & UF_MAPPED)
501		(void) munmapfd(p, fd);
502	fdp->fd_ofiles[fd] = NULL;
503	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
504		fdp->fd_lastfile--;
505	if (fd < fdp->fd_freefile)
506		fdp->fd_freefile = fd;
507	*pf = 0;
508	return (closef(fp, p));
509}
510
511#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
512/*
513 * Return status information about a file descriptor.
514 */
515#ifndef _SYS_SYSPROTO_H_
516struct ofstat_args {
517	int	fd;
518	struct	ostat *sb;
519};
520#endif
521/* ARGSUSED */
522int
523ofstat(p, uap)
524	struct proc *p;
525	register struct ofstat_args *uap;
526{
527	register struct filedesc *fdp = p->p_fd;
528	register struct file *fp;
529	struct stat ub;
530	struct ostat oub;
531	int error;
532
533	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
534	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
535		return (EBADF);
536	error = fo_stat(fp, &ub, p);
537	if (error == 0) {
538		cvtstat(&ub, &oub);
539		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
540	}
541	return (error);
542}
543#endif /* COMPAT_43 || COMPAT_SUNOS */
544
545/*
546 * Return status information about a file descriptor.
547 */
548#ifndef _SYS_SYSPROTO_H_
549struct fstat_args {
550	int	fd;
551	struct	stat *sb;
552};
553#endif
554/* ARGSUSED */
555int
556fstat(p, uap)
557	struct proc *p;
558	register struct fstat_args *uap;
559{
560	register struct filedesc *fdp = p->p_fd;
561	register struct file *fp;
562	struct stat ub;
563	int error;
564
565	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
566	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
567		return (EBADF);
568	error = fo_stat(fp, &ub, p);
569	if (error == 0)
570		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
571	return (error);
572}
573
574/*
575 * Return status information about a file descriptor.
576 */
577#ifndef _SYS_SYSPROTO_H_
578struct nfstat_args {
579	int	fd;
580	struct	nstat *sb;
581};
582#endif
583/* ARGSUSED */
584int
585nfstat(p, uap)
586	struct proc *p;
587	register struct nfstat_args *uap;
588{
589	register struct filedesc *fdp = p->p_fd;
590	register struct file *fp;
591	struct stat ub;
592	struct nstat nub;
593	int error;
594
595	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
596	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
597		return (EBADF);
598	error = fo_stat(fp, &ub, p);
599	if (error == 0) {
600		cvtnstat(&ub, &nub);
601		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
602	}
603	return (error);
604}
605
606/*
607 * Return pathconf information about a file descriptor.
608 */
609#ifndef _SYS_SYSPROTO_H_
610struct fpathconf_args {
611	int	fd;
612	int	name;
613};
614#endif
615/* ARGSUSED */
616int
617fpathconf(p, uap)
618	struct proc *p;
619	register struct fpathconf_args *uap;
620{
621	struct filedesc *fdp = p->p_fd;
622	struct file *fp;
623	struct vnode *vp;
624
625	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
626	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
627		return (EBADF);
628	switch (fp->f_type) {
629
630	case DTYPE_PIPE:
631	case DTYPE_SOCKET:
632		if (uap->name != _PC_PIPE_BUF)
633			return (EINVAL);
634		p->p_retval[0] = PIPE_BUF;
635		return (0);
636
637	case DTYPE_FIFO:
638	case DTYPE_VNODE:
639		vp = (struct vnode *)fp->f_data;
640		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
641
642	default:
643		panic("fpathconf");
644	}
645	/*NOTREACHED*/
646}
647
648/*
649 * Allocate a file descriptor for the process.
650 */
651static int fdexpand;
652SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
653
654int
655fdalloc(p, want, result)
656	struct proc *p;
657	int want;
658	int *result;
659{
660	register struct filedesc *fdp = p->p_fd;
661	register int i;
662	int lim, last, nfiles;
663	struct file **newofile;
664	char *newofileflags;
665
666	/*
667	 * Search for a free descriptor starting at the higher
668	 * of want or fd_freefile.  If that fails, consider
669	 * expanding the ofile array.
670	 */
671	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
672	for (;;) {
673		last = min(fdp->fd_nfiles, lim);
674		if ((i = want) < fdp->fd_freefile)
675			i = fdp->fd_freefile;
676		for (; i < last; i++) {
677			if (fdp->fd_ofiles[i] == NULL) {
678				fdp->fd_ofileflags[i] = 0;
679				if (i > fdp->fd_lastfile)
680					fdp->fd_lastfile = i;
681				if (want <= fdp->fd_freefile)
682					fdp->fd_freefile = i;
683				*result = i;
684				return (0);
685			}
686		}
687
688		/*
689		 * No space in current array.  Expand?
690		 */
691		if (fdp->fd_nfiles >= lim)
692			return (EMFILE);
693		if (fdp->fd_nfiles < NDEXTENT)
694			nfiles = NDEXTENT;
695		else
696			nfiles = 2 * fdp->fd_nfiles;
697		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
698		    M_FILEDESC, M_WAITOK);
699		newofileflags = (char *) &newofile[nfiles];
700		/*
701		 * Copy the existing ofile and ofileflags arrays
702		 * and zero the new portion of each array.
703		 */
704		bcopy(fdp->fd_ofiles, newofile,
705			(i = sizeof(struct file *) * fdp->fd_nfiles));
706		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
707		bcopy(fdp->fd_ofileflags, newofileflags,
708			(i = sizeof(char) * fdp->fd_nfiles));
709		bzero(newofileflags + i, nfiles * sizeof(char) - i);
710		if (fdp->fd_nfiles > NDFILE)
711			FREE(fdp->fd_ofiles, M_FILEDESC);
712		fdp->fd_ofiles = newofile;
713		fdp->fd_ofileflags = newofileflags;
714		fdp->fd_nfiles = nfiles;
715		fdexpand++;
716	}
717	return (0);
718}
719
720/*
721 * Check to see whether n user file descriptors
722 * are available to the process p.
723 */
724int
725fdavail(p, n)
726	struct proc *p;
727	register int n;
728{
729	register struct filedesc *fdp = p->p_fd;
730	register struct file **fpp;
731	register int i, lim, last;
732
733	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
734	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
735		return (1);
736
737	last = min(fdp->fd_nfiles, lim);
738	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
739	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
740		if (*fpp == NULL && --n <= 0)
741			return (1);
742	return (0);
743}
744
745/*
746 * Create a new open file structure and allocate
747 * a file decriptor for the process that refers to it.
748 */
749int
750falloc(p, resultfp, resultfd)
751	register struct proc *p;
752	struct file **resultfp;
753	int *resultfd;
754{
755	register struct file *fp, *fq;
756	int error, i;
757
758	if ((error = fdalloc(p, 0, &i)))
759		return (error);
760	if (nfiles >= maxfiles) {
761		tablefull("file");
762		return (ENFILE);
763	}
764	/*
765	 * Allocate a new file descriptor.
766	 * If the process has file descriptor zero open, add to the list
767	 * of open files at that point, otherwise put it at the front of
768	 * the list of open files.
769	 */
770	nfiles++;
771	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
772	bzero(fp, sizeof(struct file));
773	fp->f_count = 1;
774	fp->f_cred = p->p_ucred;
775	fp->f_ops = &badfileops;
776	fp->f_seqcount = 1;
777	crhold(fp->f_cred);
778	if ((fq = p->p_fd->fd_ofiles[0])) {
779		LIST_INSERT_AFTER(fq, fp, f_list);
780	} else {
781		LIST_INSERT_HEAD(&filehead, fp, f_list);
782	}
783	p->p_fd->fd_ofiles[i] = fp;
784	if (resultfp)
785		*resultfp = fp;
786	if (resultfd)
787		*resultfd = i;
788	return (0);
789}
790
791/*
792 * Free a file descriptor.
793 */
794void
795ffree(fp)
796	register struct file *fp;
797{
798	LIST_REMOVE(fp, f_list);
799	crfree(fp->f_cred);
800#if defined(DIAGNOSTIC) || defined(INVARIANTS)
801	fp->f_count = 0;
802#endif
803	nfiles--;
804	FREE(fp, M_FILE);
805}
806
807/*
808 * Build a new filedesc structure.
809 */
810struct filedesc *
811fdinit(p)
812	struct proc *p;
813{
814	register struct filedesc0 *newfdp;
815	register struct filedesc *fdp = p->p_fd;
816
817	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
818	    M_FILEDESC, M_WAITOK);
819	bzero(newfdp, sizeof(struct filedesc0));
820	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
821	VREF(newfdp->fd_fd.fd_cdir);
822	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
823	VREF(newfdp->fd_fd.fd_rdir);
824	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
825	if (newfdp->fd_fd.fd_jdir)
826		VREF(newfdp->fd_fd.fd_jdir);
827
828	/* Create the file descriptor table. */
829	newfdp->fd_fd.fd_refcnt = 1;
830	newfdp->fd_fd.fd_cmask = cmask;
831	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
832	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
833	newfdp->fd_fd.fd_nfiles = NDFILE;
834
835	newfdp->fd_fd.fd_freefile = 0;
836	newfdp->fd_fd.fd_lastfile = 0;
837
838	return (&newfdp->fd_fd);
839}
840
841/*
842 * Share a filedesc structure.
843 */
844struct filedesc *
845fdshare(p)
846	struct proc *p;
847{
848	p->p_fd->fd_refcnt++;
849	return (p->p_fd);
850}
851
852/*
853 * Copy a filedesc structure.
854 */
855struct filedesc *
856fdcopy(p)
857	struct proc *p;
858{
859	register struct filedesc *newfdp, *fdp = p->p_fd;
860	register struct file **fpp;
861	register int i;
862
863	/* Certain daemons might not have file descriptors. */
864	if (fdp == NULL)
865		return (NULL);
866
867	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
868	    M_FILEDESC, M_WAITOK);
869	bcopy(fdp, newfdp, sizeof(struct filedesc));
870	VREF(newfdp->fd_cdir);
871	VREF(newfdp->fd_rdir);
872	if (newfdp->fd_jdir)
873		VREF(newfdp->fd_jdir);
874	newfdp->fd_refcnt = 1;
875
876	/*
877	 * If the number of open files fits in the internal arrays
878	 * of the open file structure, use them, otherwise allocate
879	 * additional memory for the number of descriptors currently
880	 * in use.
881	 */
882	if (newfdp->fd_lastfile < NDFILE) {
883		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
884		newfdp->fd_ofileflags =
885		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
886		i = NDFILE;
887	} else {
888		/*
889		 * Compute the smallest multiple of NDEXTENT needed
890		 * for the file descriptors currently in use,
891		 * allowing the table to shrink.
892		 */
893		i = newfdp->fd_nfiles;
894		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
895			i /= 2;
896		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
897		    M_FILEDESC, M_WAITOK);
898		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
899	}
900	newfdp->fd_nfiles = i;
901	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
902	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
903	fpp = newfdp->fd_ofiles;
904	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
905		if (*fpp != NULL)
906			fhold(*fpp);
907	return (newfdp);
908}
909
910/*
911 * Release a filedesc structure.
912 */
913void
914fdfree(p)
915	struct proc *p;
916{
917	register struct filedesc *fdp = p->p_fd;
918	struct file **fpp;
919	register int i;
920
921	/* Certain daemons might not have file descriptors. */
922	if (fdp == NULL)
923		return;
924
925	if (--fdp->fd_refcnt > 0)
926		return;
927	fpp = fdp->fd_ofiles;
928	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
929		if (*fpp)
930			(void) closef(*fpp, p);
931	if (fdp->fd_nfiles > NDFILE)
932		FREE(fdp->fd_ofiles, M_FILEDESC);
933	vrele(fdp->fd_cdir);
934	vrele(fdp->fd_rdir);
935	if (fdp->fd_jdir)
936		vrele(fdp->fd_jdir);
937	FREE(fdp, M_FILEDESC);
938}
939
940/*
941 * For setuid/setgid programs we don't want to people to use that setuidness
942 * to generate error messages which write to a file which otherwise would
943 * otherwise be off limits to the proces.
944 *
945 * This is a gross hack to plug the hole.  A better solution would involve
946 * a special vop or other form of generalized access control mechanism.  We
947 * go ahead and just reject all procfs file systems accesses as dangerous.
948 *
949 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
950 * sufficient.  We also don't for setugidness since we know we are.
951 */
952static int
953is_unsafe(struct file *fp)
954{
955#if PROCFS
956	if (fp->f_type == DTYPE_VNODE &&
957	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
958		return (1);
959#endif
960	return (0);
961}
962
963/*
964 * Make this setguid thing safe, if at all possible.
965 */
966void
967setugidsafety(p)
968	struct proc *p;
969{
970	struct filedesc *fdp = p->p_fd;
971	struct file **fpp;
972	char *fdfp;
973	register int i;
974
975	/* Certain daemons might not have file descriptors. */
976	if (fdp == NULL)
977		return;
978
979	fpp = fdp->fd_ofiles;
980	fdfp = fdp->fd_ofileflags;
981	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) {
982		if (i > 2)
983			break;
984		if (*fpp != NULL && is_unsafe(*fpp)) {
985			if (*fdfp & UF_MAPPED)
986				(void) munmapfd(p, i);
987			(void) closef(*fpp, p);
988			*fpp = NULL;
989			*fdfp = 0;
990			if (i < fdp->fd_freefile)
991				fdp->fd_freefile = i;
992		}
993	}
994	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
995		fdp->fd_lastfile--;
996}
997
998/*
999 * Close any files on exec?
1000 */
1001void
1002fdcloseexec(p)
1003	struct proc *p;
1004{
1005	struct filedesc *fdp = p->p_fd;
1006	struct file **fpp;
1007	char *fdfp;
1008	register int i;
1009
1010	/* Certain daemons might not have file descriptors. */
1011	if (fdp == NULL)
1012		return;
1013
1014	fpp = fdp->fd_ofiles;
1015	fdfp = fdp->fd_ofileflags;
1016	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1017		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1018			if (*fdfp & UF_MAPPED)
1019				(void) munmapfd(p, i);
1020			(void) closef(*fpp, p);
1021			*fpp = NULL;
1022			*fdfp = 0;
1023			if (i < fdp->fd_freefile)
1024				fdp->fd_freefile = i;
1025		}
1026	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1027		fdp->fd_lastfile--;
1028}
1029
1030/*
1031 * Internal form of close.
1032 * Decrement reference count on file structure.
1033 * Note: p may be NULL when closing a file
1034 * that was being passed in a message.
1035 */
1036int
1037closef(fp, p)
1038	register struct file *fp;
1039	register struct proc *p;
1040{
1041	struct vnode *vp;
1042	struct flock lf;
1043
1044	if (fp == NULL)
1045		return (0);
1046	/*
1047	 * POSIX record locking dictates that any close releases ALL
1048	 * locks owned by this process.  This is handled by setting
1049	 * a flag in the unlock to free ONLY locks obeying POSIX
1050	 * semantics, and not to free BSD-style file locks.
1051	 * If the descriptor was in a message, POSIX-style locks
1052	 * aren't passed with the descriptor.
1053	 */
1054	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1055		lf.l_whence = SEEK_SET;
1056		lf.l_start = 0;
1057		lf.l_len = 0;
1058		lf.l_type = F_UNLCK;
1059		vp = (struct vnode *)fp->f_data;
1060		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1061	}
1062	return (fdrop(fp, p));
1063}
1064
1065int
1066fdrop(fp, p)
1067	struct file *fp;
1068	struct proc *p;
1069{
1070	struct flock lf;
1071	struct vnode *vp;
1072	int error;
1073
1074	if (--fp->f_count > 0)
1075		return (0);
1076	if (fp->f_count < 0)
1077		panic("fdrop: count < 0");
1078	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1079		lf.l_whence = SEEK_SET;
1080		lf.l_start = 0;
1081		lf.l_len = 0;
1082		lf.l_type = F_UNLCK;
1083		vp = (struct vnode *)fp->f_data;
1084		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1085	}
1086	if (fp->f_ops != &badfileops)
1087		error = fo_close(fp, p);
1088	else
1089		error = 0;
1090	ffree(fp);
1091	return (error);
1092}
1093
1094/*
1095 * Apply an advisory lock on a file descriptor.
1096 *
1097 * Just attempt to get a record lock of the requested type on
1098 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1099 */
1100#ifndef _SYS_SYSPROTO_H_
1101struct flock_args {
1102	int	fd;
1103	int	how;
1104};
1105#endif
1106/* ARGSUSED */
1107int
1108flock(p, uap)
1109	struct proc *p;
1110	register struct flock_args *uap;
1111{
1112	register struct filedesc *fdp = p->p_fd;
1113	register struct file *fp;
1114	struct vnode *vp;
1115	struct flock lf;
1116
1117	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1118	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1119		return (EBADF);
1120	if (fp->f_type != DTYPE_VNODE)
1121		return (EOPNOTSUPP);
1122	vp = (struct vnode *)fp->f_data;
1123	lf.l_whence = SEEK_SET;
1124	lf.l_start = 0;
1125	lf.l_len = 0;
1126	if (uap->how & LOCK_UN) {
1127		lf.l_type = F_UNLCK;
1128		fp->f_flag &= ~FHASLOCK;
1129		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1130	}
1131	if (uap->how & LOCK_EX)
1132		lf.l_type = F_WRLCK;
1133	else if (uap->how & LOCK_SH)
1134		lf.l_type = F_RDLCK;
1135	else
1136		return (EBADF);
1137	fp->f_flag |= FHASLOCK;
1138	if (uap->how & LOCK_NB)
1139		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1140	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1141}
1142
1143/*
1144 * File Descriptor pseudo-device driver (/dev/fd/).
1145 *
1146 * Opening minor device N dup()s the file (if any) connected to file
1147 * descriptor N belonging to the calling process.  Note that this driver
1148 * consists of only the ``open()'' routine, because all subsequent
1149 * references to this file will be direct to the other driver.
1150 */
1151/* ARGSUSED */
1152static int
1153fdopen(dev, mode, type, p)
1154	dev_t dev;
1155	int mode, type;
1156	struct proc *p;
1157{
1158
1159	/*
1160	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1161	 * the file descriptor being sought for duplication. The error
1162	 * return ensures that the vnode for this device will be released
1163	 * by vn_open. Open will detect this special error and take the
1164	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1165	 * will simply report the error.
1166	 */
1167	p->p_dupfd = minor(dev);
1168	return (ENODEV);
1169}
1170
1171/*
1172 * Duplicate the specified descriptor to a free descriptor.
1173 */
1174int
1175dupfdopen(fdp, indx, dfd, mode, error)
1176	register struct filedesc *fdp;
1177	register int indx, dfd;
1178	int mode;
1179	int error;
1180{
1181	register struct file *wfp;
1182	struct file *fp;
1183
1184	/*
1185	 * If the to-be-dup'd fd number is greater than the allowed number
1186	 * of file descriptors, or the fd to be dup'd has already been
1187	 * closed, reject.  Note, check for new == old is necessary as
1188	 * falloc could allocate an already closed to-be-dup'd descriptor
1189	 * as the new descriptor.
1190	 */
1191	fp = fdp->fd_ofiles[indx];
1192	if ((u_int)dfd >= fdp->fd_nfiles ||
1193	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1194		return (EBADF);
1195
1196	/*
1197	 * There are two cases of interest here.
1198	 *
1199	 * For ENODEV simply dup (dfd) to file descriptor
1200	 * (indx) and return.
1201	 *
1202	 * For ENXIO steal away the file structure from (dfd) and
1203	 * store it in (indx).  (dfd) is effectively closed by
1204	 * this operation.
1205	 *
1206	 * Any other error code is just returned.
1207	 */
1208	switch (error) {
1209	case ENODEV:
1210		/*
1211		 * Check that the mode the file is being opened for is a
1212		 * subset of the mode of the existing descriptor.
1213		 */
1214		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1215			return (EACCES);
1216		fdp->fd_ofiles[indx] = wfp;
1217		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1218		fhold(wfp);
1219		if (indx > fdp->fd_lastfile)
1220			fdp->fd_lastfile = indx;
1221		return (0);
1222
1223	case ENXIO:
1224		/*
1225		 * Steal away the file pointer from dfd, and stuff it into indx.
1226		 */
1227		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1228		fdp->fd_ofiles[dfd] = NULL;
1229		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1230		fdp->fd_ofileflags[dfd] = 0;
1231		/*
1232		 * Complete the clean up of the filedesc structure by
1233		 * recomputing the various hints.
1234		 */
1235		if (indx > fdp->fd_lastfile)
1236			fdp->fd_lastfile = indx;
1237		else
1238			while (fdp->fd_lastfile > 0 &&
1239			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1240				fdp->fd_lastfile--;
1241			if (dfd < fdp->fd_freefile)
1242				fdp->fd_freefile = dfd;
1243		return (0);
1244
1245	default:
1246		return (error);
1247	}
1248	/* NOTREACHED */
1249}
1250
1251/*
1252 * Get file structures.
1253 */
1254static int
1255sysctl_kern_file SYSCTL_HANDLER_ARGS
1256{
1257	int error;
1258	struct file *fp;
1259
1260	if (!req->oldptr) {
1261		/*
1262		 * overestimate by 10 files
1263		 */
1264		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1265				(nfiles + 10) * sizeof(struct file)));
1266	}
1267
1268	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1269	if (error)
1270		return (error);
1271
1272	/*
1273	 * followed by an array of file structures
1274	 */
1275	LIST_FOREACH(fp, &filehead, f_list) {
1276		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1277		if (error)
1278			return (error);
1279	}
1280	return (0);
1281}
1282
1283SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1284    0, 0, sysctl_kern_file, "S,file", "Entire file table");
1285
1286SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1287    &maxfilesperproc, 0, "Maximum files allowed open per process");
1288
1289SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1290    &maxfiles, 0, "Maximum number of files");
1291
1292static void
1293fildesc_drvinit(void *unused)
1294{
1295	int fd;
1296
1297	for (fd = 0; fd < NUMFDESC; fd++)
1298		make_dev(&fildesc_cdevsw, fd,
1299		    UID_BIN, GID_BIN, 0666, "fd/%d", fd);
1300	make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
1301	make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
1302	make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
1303}
1304
1305struct fileops badfileops = {
1306	badfo_readwrite,
1307	badfo_readwrite,
1308	badfo_ioctl,
1309	badfo_poll,
1310	badfo_stat,
1311	badfo_close
1312};
1313
1314static int
1315badfo_readwrite(fp, uio, cred, flags, p)
1316	struct file *fp;
1317	struct uio *uio;
1318	struct ucred *cred;
1319	struct proc *p;
1320	int flags;
1321{
1322
1323	return (EBADF);
1324}
1325
1326static int
1327badfo_ioctl(fp, com, data, p)
1328	struct file *fp;
1329	u_long com;
1330	caddr_t data;
1331	struct proc *p;
1332{
1333
1334	return (EBADF);
1335}
1336
1337static int
1338badfo_poll(fp, events, cred, p)
1339	struct file *fp;
1340	int events;
1341	struct ucred *cred;
1342	struct proc *p;
1343{
1344
1345	return (0);
1346}
1347
1348static int
1349badfo_stat(fp, sb, p)
1350	struct file *fp;
1351	struct stat *sb;
1352	struct proc *p;
1353{
1354
1355	return (EBADF);
1356}
1357
1358static int
1359badfo_close(fp, p)
1360	struct file *fp;
1361	struct proc *p;
1362{
1363
1364	return (EBADF);
1365}
1366
1367SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1368					fildesc_drvinit,NULL)
1369