kern_descrip.c revision 47625
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.61 1999/05/11 19:54:28 phk Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_devfs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/vnode.h>
53#include <sys/proc.h>
54#include <sys/file.h>
55#include <sys/socketvar.h>
56#include <sys/stat.h>
57#include <sys/filio.h>
58#include <sys/ttycom.h>
59#include <sys/fcntl.h>
60#include <sys/malloc.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/pipe.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68#ifdef DEVFS
69#include <sys/devfsext.h>
70#endif /*DEVFS*/
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
75
76
77static	 d_open_t  fdopen;
78#define NUMFDESC 64
79
80#define CDEV_MAJOR 22
81static struct cdevsw fildesc_cdevsw = {
82	/* open */	fdopen,
83	/* close */	noclose,
84	/* read */	noread,
85	/* write */	nowrite,
86	/* ioctl */	noioctl,
87	/* stop */	nostop,
88	/* reset */	noreset,
89	/* devtotty */	nodevtotty,
90	/* poll */	nopoll,
91	/* mmap */	nommap,
92	/* strategy */	nostrategy,
93	/* name */	"FD",
94	/* parms */	noparms,
95	/* maj */	CDEV_MAJOR,
96	/* dump */	nodump,
97	/* psize */	nopsize,
98	/* flags */	0,
99	/* maxio */	0,
100	/* bmaj */	-1
101};
102
103static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
104/*
105 * Descriptor management.
106 */
107struct filelist filehead;	/* head of list of open files */
108int nfiles;			/* actual number of open files */
109extern int cmask;
110
111/*
112 * System calls on descriptors.
113 */
114#ifndef _SYS_SYSPROTO_H_
115struct getdtablesize_args {
116	int	dummy;
117};
118#endif
119/* ARGSUSED */
120int
121getdtablesize(p, uap)
122	struct proc *p;
123	struct getdtablesize_args *uap;
124{
125
126	p->p_retval[0] =
127	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
128	return (0);
129}
130
131/*
132 * Duplicate a file descriptor to a particular value.
133 */
134#ifndef _SYS_SYSPROTO_H_
135struct dup2_args {
136	u_int	from;
137	u_int	to;
138};
139#endif
140/* ARGSUSED */
141int
142dup2(p, uap)
143	struct proc *p;
144	struct dup2_args *uap;
145{
146	register struct filedesc *fdp = p->p_fd;
147	register u_int old = uap->from, new = uap->to;
148	int i, error;
149
150	if (old >= fdp->fd_nfiles ||
151	    fdp->fd_ofiles[old] == NULL ||
152	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
153	    new >= maxfilesperproc)
154		return (EBADF);
155	if (old == new) {
156		p->p_retval[0] = new;
157		return (0);
158	}
159	if (new >= fdp->fd_nfiles) {
160		if ((error = fdalloc(p, new, &i)))
161			return (error);
162		if (new != i)
163			panic("dup2: fdalloc");
164	} else if (fdp->fd_ofiles[new]) {
165		if (fdp->fd_ofileflags[new] & UF_MAPPED)
166			(void) munmapfd(p, new);
167		/*
168		 * dup2() must succeed even if the close has an error.
169		 */
170		(void) closef(fdp->fd_ofiles[new], p);
171	}
172	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
173}
174
175/*
176 * Duplicate a file descriptor.
177 */
178#ifndef _SYS_SYSPROTO_H_
179struct dup_args {
180	u_int	fd;
181};
182#endif
183/* ARGSUSED */
184int
185dup(p, uap)
186	struct proc *p;
187	struct dup_args *uap;
188{
189	register struct filedesc *fdp;
190	u_int old;
191	int new, error;
192
193	old = uap->fd;
194
195#if 0
196	/*
197	 * XXX Compatibility
198	 */
199	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
200#endif
201
202	fdp = p->p_fd;
203	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
204		return (EBADF);
205	if ((error = fdalloc(p, 0, &new)))
206		return (error);
207	return (finishdup(fdp, (int)old, new, p->p_retval));
208}
209
210/*
211 * The file control system call.
212 */
213#ifndef _SYS_SYSPROTO_H_
214struct fcntl_args {
215	int	fd;
216	int	cmd;
217	long	arg;
218};
219#endif
220/* ARGSUSED */
221int
222fcntl(p, uap)
223	struct proc *p;
224	register struct fcntl_args *uap;
225{
226	register struct filedesc *fdp = p->p_fd;
227	register struct file *fp;
228	register char *pop;
229	struct vnode *vp;
230	int i, tmp, error, flg = F_POSIX;
231	struct flock fl;
232	u_int newmin;
233
234	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
235	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
236		return (EBADF);
237	pop = &fdp->fd_ofileflags[uap->fd];
238	switch (uap->cmd) {
239
240	case F_DUPFD:
241		newmin = uap->arg;
242		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
243		    newmin >= maxfilesperproc)
244			return (EINVAL);
245		if ((error = fdalloc(p, newmin, &i)))
246			return (error);
247		return (finishdup(fdp, uap->fd, i, p->p_retval));
248
249	case F_GETFD:
250		p->p_retval[0] = *pop & 1;
251		return (0);
252
253	case F_SETFD:
254		*pop = (*pop &~ 1) | (uap->arg & 1);
255		return (0);
256
257	case F_GETFL:
258		p->p_retval[0] = OFLAGS(fp->f_flag);
259		return (0);
260
261	case F_SETFL:
262		fp->f_flag &= ~FCNTLFLAGS;
263		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
264		tmp = fp->f_flag & FNONBLOCK;
265		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
266		if (error)
267			return (error);
268		tmp = fp->f_flag & FASYNC;
269		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
270		if (!error)
271			return (0);
272		fp->f_flag &= ~FNONBLOCK;
273		tmp = 0;
274		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
275		return (error);
276
277	case F_GETOWN:
278		error = (*fp->f_ops->fo_ioctl)
279			(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
280		return (error);
281
282	case F_SETOWN:
283		return ((*fp->f_ops->fo_ioctl)
284			(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
285
286	case F_SETLKW:
287		flg |= F_WAIT;
288		/* Fall into F_SETLK */
289
290	case F_SETLK:
291		if (fp->f_type != DTYPE_VNODE)
292			return (EBADF);
293		vp = (struct vnode *)fp->f_data;
294		/* Copy in the lock structure */
295		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
296		    sizeof(fl));
297		if (error)
298			return (error);
299		if (fl.l_whence == SEEK_CUR)
300			fl.l_start += fp->f_offset;
301		switch (fl.l_type) {
302
303		case F_RDLCK:
304			if ((fp->f_flag & FREAD) == 0)
305				return (EBADF);
306			p->p_flag |= P_ADVLOCK;
307			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
308
309		case F_WRLCK:
310			if ((fp->f_flag & FWRITE) == 0)
311				return (EBADF);
312			p->p_flag |= P_ADVLOCK;
313			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
314
315		case F_UNLCK:
316			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
317				F_POSIX));
318
319		default:
320			return (EINVAL);
321		}
322
323	case F_GETLK:
324		if (fp->f_type != DTYPE_VNODE)
325			return (EBADF);
326		vp = (struct vnode *)fp->f_data;
327		/* Copy in the lock structure */
328		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
329		    sizeof(fl));
330		if (error)
331			return (error);
332		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
333		    fl.l_type != F_UNLCK)
334			return (EINVAL);
335		if (fl.l_whence == SEEK_CUR)
336			fl.l_start += fp->f_offset;
337		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
338			return (error);
339		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
340		    sizeof(fl)));
341
342	default:
343		return (EINVAL);
344	}
345	/* NOTREACHED */
346}
347
348/*
349 * Common code for dup, dup2, and fcntl(F_DUPFD).
350 */
351static int
352finishdup(fdp, old, new, retval)
353	register struct filedesc *fdp;
354	register int old, new;
355	register_t *retval;
356{
357	register struct file *fp;
358
359	fp = fdp->fd_ofiles[old];
360	fdp->fd_ofiles[new] = fp;
361	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
362	fp->f_count++;
363	if (new > fdp->fd_lastfile)
364		fdp->fd_lastfile = new;
365	*retval = new;
366	return (0);
367}
368
369/*
370 * If sigio is on the list associated with a process or process group,
371 * disable signalling from the device, remove sigio from the list and
372 * free sigio.
373 */
374void
375funsetown(sigio)
376	struct sigio *sigio;
377{
378	int s;
379
380	if (sigio == NULL)
381		return;
382	s = splhigh();
383	*(sigio->sio_myref) = NULL;
384	splx(s);
385	if (sigio->sio_pgid < 0) {
386		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
387			     sigio, sio_pgsigio);
388	} else /* if ((*sigiop)->sio_pgid > 0) */ {
389		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
390			     sigio, sio_pgsigio);
391	}
392	crfree(sigio->sio_ucred);
393	FREE(sigio, M_SIGIO);
394}
395
396/* Free a list of sigio structures. */
397void
398funsetownlst(sigiolst)
399	struct sigiolst *sigiolst;
400{
401	struct sigio *sigio;
402
403	while ((sigio = sigiolst->slh_first) != NULL)
404		funsetown(sigio);
405}
406
407/*
408 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
409 *
410 * After permission checking, add a sigio structure to the sigio list for
411 * the process or process group.
412 */
413int
414fsetown(pgid, sigiop)
415	pid_t pgid;
416	struct sigio **sigiop;
417{
418	struct proc *proc;
419	struct pgrp *pgrp;
420	struct sigio *sigio;
421	int s;
422
423	if (pgid == 0) {
424		funsetown(*sigiop);
425		return (0);
426	}
427	if (pgid > 0) {
428		proc = pfind(pgid);
429		if (proc == NULL)
430			return (ESRCH);
431		/*
432		 * Policy - Don't allow a process to FSETOWN a process
433		 * in another session.
434		 *
435		 * Remove this test to allow maximum flexibility or
436		 * restrict FSETOWN to the current process or process
437		 * group for maximum safety.
438		 */
439		else if (proc->p_session != curproc->p_session)
440			return (EPERM);
441		pgrp = NULL;
442	} else /* if (pgid < 0) */ {
443		pgrp = pgfind(-pgid);
444		if (pgrp == NULL)
445			return (ESRCH);
446		/*
447		 * Policy - Don't allow a process to FSETOWN a process
448		 * in another session.
449		 *
450		 * Remove this test to allow maximum flexibility or
451		 * restrict FSETOWN to the current process or process
452		 * group for maximum safety.
453		 */
454		else if (pgrp->pg_session != curproc->p_session)
455			return (EPERM);
456		proc = NULL;
457	}
458	funsetown(*sigiop);
459	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO,
460	       M_WAITOK);
461	if (pgid > 0) {
462		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
463		sigio->sio_proc = proc;
464	} else {
465		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
466		sigio->sio_pgrp = pgrp;
467	}
468	sigio->sio_pgid = pgid;
469	crhold(curproc->p_ucred);
470	sigio->sio_ucred = curproc->p_ucred;
471	/* It would be convenient if p_ruid was in ucred. */
472	sigio->sio_ruid = curproc->p_cred->p_ruid;
473	sigio->sio_myref = sigiop;
474	s = splhigh();
475	*sigiop = sigio;
476	splx(s);
477	return (0);
478}
479
480/*
481 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
482 */
483pid_t
484fgetown(sigio)
485	struct sigio *sigio;
486{
487	return (sigio != NULL ? sigio->sio_pgid : 0);
488}
489
490/*
491 * Close a file descriptor.
492 */
493#ifndef _SYS_SYSPROTO_H_
494struct close_args {
495        int     fd;
496};
497#endif
498/* ARGSUSED */
499int
500close(p, uap)
501	struct proc *p;
502	struct close_args *uap;
503{
504	register struct filedesc *fdp = p->p_fd;
505	register struct file *fp;
506	register int fd = uap->fd;
507	register u_char *pf;
508
509	if ((unsigned)fd >= fdp->fd_nfiles ||
510	    (fp = fdp->fd_ofiles[fd]) == NULL)
511		return (EBADF);
512	pf = (u_char *)&fdp->fd_ofileflags[fd];
513	if (*pf & UF_MAPPED)
514		(void) munmapfd(p, fd);
515	fdp->fd_ofiles[fd] = NULL;
516	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
517		fdp->fd_lastfile--;
518	if (fd < fdp->fd_freefile)
519		fdp->fd_freefile = fd;
520	*pf = 0;
521	return (closef(fp, p));
522}
523
524#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
525/*
526 * Return status information about a file descriptor.
527 */
528#ifndef _SYS_SYSPROTO_H_
529struct ofstat_args {
530	int	fd;
531	struct	ostat *sb;
532};
533#endif
534/* ARGSUSED */
535int
536ofstat(p, uap)
537	struct proc *p;
538	register struct ofstat_args *uap;
539{
540	register struct filedesc *fdp = p->p_fd;
541	register struct file *fp;
542	struct stat ub;
543	struct ostat oub;
544	int error;
545
546	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
547	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
548		return (EBADF);
549	switch (fp->f_type) {
550
551	case DTYPE_FIFO:
552	case DTYPE_VNODE:
553		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
554		break;
555
556	case DTYPE_SOCKET:
557		error = soo_stat((struct socket *)fp->f_data, &ub);
558		break;
559
560	case DTYPE_PIPE:
561		error = pipe_stat((struct pipe *)fp->f_data, &ub);
562		break;
563
564	default:
565		panic("ofstat");
566		/*NOTREACHED*/
567	}
568	cvtstat(&ub, &oub);
569	if (error == 0)
570		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
571	return (error);
572}
573#endif /* COMPAT_43 || COMPAT_SUNOS */
574
575/*
576 * Return status information about a file descriptor.
577 */
578#ifndef _SYS_SYSPROTO_H_
579struct fstat_args {
580	int	fd;
581	struct	stat *sb;
582};
583#endif
584/* ARGSUSED */
585int
586fstat(p, uap)
587	struct proc *p;
588	register struct fstat_args *uap;
589{
590	register struct filedesc *fdp = p->p_fd;
591	register struct file *fp;
592	struct stat ub;
593	int error;
594
595	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
596	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
597		return (EBADF);
598	switch (fp->f_type) {
599
600	case DTYPE_FIFO:
601	case DTYPE_VNODE:
602		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
603		break;
604
605	case DTYPE_SOCKET:
606		error = soo_stat((struct socket *)fp->f_data, &ub);
607		break;
608
609	case DTYPE_PIPE:
610		error = pipe_stat((struct pipe *)fp->f_data, &ub);
611		break;
612
613	default:
614		panic("fstat");
615		/*NOTREACHED*/
616	}
617	if (error == 0)
618		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
619	return (error);
620}
621
622/*
623 * Return status information about a file descriptor.
624 */
625#ifndef _SYS_SYSPROTO_H_
626struct nfstat_args {
627	int	fd;
628	struct	nstat *sb;
629};
630#endif
631/* ARGSUSED */
632int
633nfstat(p, uap)
634	struct proc *p;
635	register struct nfstat_args *uap;
636{
637	register struct filedesc *fdp = p->p_fd;
638	register struct file *fp;
639	struct stat ub;
640	struct nstat nub;
641	int error;
642
643	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
644	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
645		return (EBADF);
646	switch (fp->f_type) {
647
648	case DTYPE_FIFO:
649	case DTYPE_VNODE:
650		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
651		break;
652
653	case DTYPE_SOCKET:
654		error = soo_stat((struct socket *)fp->f_data, &ub);
655		break;
656
657	case DTYPE_PIPE:
658		error = pipe_stat((struct pipe *)fp->f_data, &ub);
659		break;
660
661	default:
662		panic("fstat");
663		/*NOTREACHED*/
664	}
665	if (error == 0) {
666		cvtnstat(&ub, &nub);
667		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
668	}
669	return (error);
670}
671
672/*
673 * Return pathconf information about a file descriptor.
674 */
675#ifndef _SYS_SYSPROTO_H_
676struct fpathconf_args {
677	int	fd;
678	int	name;
679};
680#endif
681/* ARGSUSED */
682int
683fpathconf(p, uap)
684	struct proc *p;
685	register struct fpathconf_args *uap;
686{
687	struct filedesc *fdp = p->p_fd;
688	struct file *fp;
689	struct vnode *vp;
690
691	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
692	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
693		return (EBADF);
694	switch (fp->f_type) {
695
696	case DTYPE_PIPE:
697	case DTYPE_SOCKET:
698		if (uap->name != _PC_PIPE_BUF)
699			return (EINVAL);
700		p->p_retval[0] = PIPE_BUF;
701		return (0);
702
703	case DTYPE_FIFO:
704	case DTYPE_VNODE:
705		vp = (struct vnode *)fp->f_data;
706		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
707
708	default:
709		panic("fpathconf");
710	}
711	/*NOTREACHED*/
712}
713
714/*
715 * Allocate a file descriptor for the process.
716 */
717static int fdexpand;
718SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
719
720int
721fdalloc(p, want, result)
722	struct proc *p;
723	int want;
724	int *result;
725{
726	register struct filedesc *fdp = p->p_fd;
727	register int i;
728	int lim, last, nfiles;
729	struct file **newofile;
730	char *newofileflags;
731
732	/*
733	 * Search for a free descriptor starting at the higher
734	 * of want or fd_freefile.  If that fails, consider
735	 * expanding the ofile array.
736	 */
737	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
738	for (;;) {
739		last = min(fdp->fd_nfiles, lim);
740		if ((i = want) < fdp->fd_freefile)
741			i = fdp->fd_freefile;
742		for (; i < last; i++) {
743			if (fdp->fd_ofiles[i] == NULL) {
744				fdp->fd_ofileflags[i] = 0;
745				if (i > fdp->fd_lastfile)
746					fdp->fd_lastfile = i;
747				if (want <= fdp->fd_freefile)
748					fdp->fd_freefile = i;
749				*result = i;
750				return (0);
751			}
752		}
753
754		/*
755		 * No space in current array.  Expand?
756		 */
757		if (fdp->fd_nfiles >= lim)
758			return (EMFILE);
759		if (fdp->fd_nfiles < NDEXTENT)
760			nfiles = NDEXTENT;
761		else
762			nfiles = 2 * fdp->fd_nfiles;
763		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
764		    M_FILEDESC, M_WAITOK);
765		newofileflags = (char *) &newofile[nfiles];
766		/*
767		 * Copy the existing ofile and ofileflags arrays
768		 * and zero the new portion of each array.
769		 */
770		bcopy(fdp->fd_ofiles, newofile,
771			(i = sizeof(struct file *) * fdp->fd_nfiles));
772		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
773		bcopy(fdp->fd_ofileflags, newofileflags,
774			(i = sizeof(char) * fdp->fd_nfiles));
775		bzero(newofileflags + i, nfiles * sizeof(char) - i);
776		if (fdp->fd_nfiles > NDFILE)
777			FREE(fdp->fd_ofiles, M_FILEDESC);
778		fdp->fd_ofiles = newofile;
779		fdp->fd_ofileflags = newofileflags;
780		fdp->fd_nfiles = nfiles;
781		fdexpand++;
782	}
783	return (0);
784}
785
786/*
787 * Check to see whether n user file descriptors
788 * are available to the process p.
789 */
790int
791fdavail(p, n)
792	struct proc *p;
793	register int n;
794{
795	register struct filedesc *fdp = p->p_fd;
796	register struct file **fpp;
797	register int i, lim, last;
798
799	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
800	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
801		return (1);
802
803	last = min(fdp->fd_nfiles, lim);
804	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
805	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
806		if (*fpp == NULL && --n <= 0)
807			return (1);
808	return (0);
809}
810
811/*
812 * Create a new open file structure and allocate
813 * a file decriptor for the process that refers to it.
814 */
815int
816falloc(p, resultfp, resultfd)
817	register struct proc *p;
818	struct file **resultfp;
819	int *resultfd;
820{
821	register struct file *fp, *fq;
822	int error, i;
823
824	if ((error = fdalloc(p, 0, &i)))
825		return (error);
826	if (nfiles >= maxfiles) {
827		tablefull("file");
828		return (ENFILE);
829	}
830	/*
831	 * Allocate a new file descriptor.
832	 * If the process has file descriptor zero open, add to the list
833	 * of open files at that point, otherwise put it at the front of
834	 * the list of open files.
835	 */
836	nfiles++;
837	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
838	bzero(fp, sizeof(struct file));
839	if ((fq = p->p_fd->fd_ofiles[0])) {
840		LIST_INSERT_AFTER(fq, fp, f_list);
841	} else {
842		LIST_INSERT_HEAD(&filehead, fp, f_list);
843	}
844	p->p_fd->fd_ofiles[i] = fp;
845	fp->f_count = 1;
846	fp->f_cred = p->p_ucred;
847	fp->f_seqcount = 1;
848	crhold(fp->f_cred);
849	if (resultfp)
850		*resultfp = fp;
851	if (resultfd)
852		*resultfd = i;
853	return (0);
854}
855
856/*
857 * Free a file descriptor.
858 */
859void
860ffree(fp)
861	register struct file *fp;
862{
863	LIST_REMOVE(fp, f_list);
864	crfree(fp->f_cred);
865#if defined(DIAGNOSTIC) || defined(INVARIANTS)
866	fp->f_count = 0;
867#endif
868	nfiles--;
869	FREE(fp, M_FILE);
870}
871
872/*
873 * Build a new filedesc structure.
874 */
875struct filedesc *
876fdinit(p)
877	struct proc *p;
878{
879	register struct filedesc0 *newfdp;
880	register struct filedesc *fdp = p->p_fd;
881
882	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
883	    M_FILEDESC, M_WAITOK);
884	bzero(newfdp, sizeof(struct filedesc0));
885	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
886	VREF(newfdp->fd_fd.fd_cdir);
887	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
888	VREF(newfdp->fd_fd.fd_rdir);
889
890	/* Create the file descriptor table. */
891	newfdp->fd_fd.fd_refcnt = 1;
892	newfdp->fd_fd.fd_cmask = cmask;
893	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
894	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
895	newfdp->fd_fd.fd_nfiles = NDFILE;
896
897	newfdp->fd_fd.fd_freefile = 0;
898	newfdp->fd_fd.fd_lastfile = 0;
899
900	return (&newfdp->fd_fd);
901}
902
903/*
904 * Share a filedesc structure.
905 */
906struct filedesc *
907fdshare(p)
908	struct proc *p;
909{
910	p->p_fd->fd_refcnt++;
911	return (p->p_fd);
912}
913
914/*
915 * Copy a filedesc structure.
916 */
917struct filedesc *
918fdcopy(p)
919	struct proc *p;
920{
921	register struct filedesc *newfdp, *fdp = p->p_fd;
922	register struct file **fpp;
923	register int i;
924
925/*
926 * Certain daemons might not have file descriptors
927 */
928	if (fdp == NULL)
929		return NULL;
930
931	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
932	    M_FILEDESC, M_WAITOK);
933	bcopy(fdp, newfdp, sizeof(struct filedesc));
934	VREF(newfdp->fd_cdir);
935	VREF(newfdp->fd_rdir);
936	newfdp->fd_refcnt = 1;
937
938	/*
939	 * If the number of open files fits in the internal arrays
940	 * of the open file structure, use them, otherwise allocate
941	 * additional memory for the number of descriptors currently
942	 * in use.
943	 */
944	if (newfdp->fd_lastfile < NDFILE) {
945		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
946		newfdp->fd_ofileflags =
947		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
948		i = NDFILE;
949	} else {
950		/*
951		 * Compute the smallest multiple of NDEXTENT needed
952		 * for the file descriptors currently in use,
953		 * allowing the table to shrink.
954		 */
955		i = newfdp->fd_nfiles;
956		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
957			i /= 2;
958		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
959		    M_FILEDESC, M_WAITOK);
960		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
961	}
962	newfdp->fd_nfiles = i;
963	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
964	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
965	fpp = newfdp->fd_ofiles;
966	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
967		if (*fpp != NULL)
968			(*fpp)->f_count++;
969	return (newfdp);
970}
971
972/*
973 * Release a filedesc structure.
974 */
975void
976fdfree(p)
977	struct proc *p;
978{
979	register struct filedesc *fdp = p->p_fd;
980	struct file **fpp;
981	register int i;
982
983/*
984 * Certain daemons might not have file descriptors
985 */
986	if (fdp == NULL)
987		return;
988
989	if (--fdp->fd_refcnt > 0)
990		return;
991	fpp = fdp->fd_ofiles;
992	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
993		if (*fpp)
994			(void) closef(*fpp, p);
995	if (fdp->fd_nfiles > NDFILE)
996		FREE(fdp->fd_ofiles, M_FILEDESC);
997	vrele(fdp->fd_cdir);
998	vrele(fdp->fd_rdir);
999	FREE(fdp, M_FILEDESC);
1000}
1001
1002/*
1003 * Close any files on exec?
1004 */
1005void
1006fdcloseexec(p)
1007	struct proc *p;
1008{
1009	struct filedesc *fdp = p->p_fd;
1010	struct file **fpp;
1011	char *fdfp;
1012	register int i;
1013
1014/*
1015 * Certain daemons might not have file descriptors
1016 */
1017	if (fdp == NULL)
1018		return;
1019
1020	fpp = fdp->fd_ofiles;
1021	fdfp = fdp->fd_ofileflags;
1022	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1023		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1024			if (*fdfp & UF_MAPPED)
1025				(void) munmapfd(p, i);
1026			(void) closef(*fpp, p);
1027			*fpp = NULL;
1028			*fdfp = 0;
1029			if (i < fdp->fd_freefile)
1030				fdp->fd_freefile = i;
1031		}
1032	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1033		fdp->fd_lastfile--;
1034}
1035
1036/*
1037 * Internal form of close.
1038 * Decrement reference count on file structure.
1039 * Note: p may be NULL when closing a file
1040 * that was being passed in a message.
1041 */
1042int
1043closef(fp, p)
1044	register struct file *fp;
1045	register struct proc *p;
1046{
1047	struct vnode *vp;
1048	struct flock lf;
1049	int error;
1050
1051	if (fp == NULL)
1052		return (0);
1053	/*
1054	 * POSIX record locking dictates that any close releases ALL
1055	 * locks owned by this process.  This is handled by setting
1056	 * a flag in the unlock to free ONLY locks obeying POSIX
1057	 * semantics, and not to free BSD-style file locks.
1058	 * If the descriptor was in a message, POSIX-style locks
1059	 * aren't passed with the descriptor.
1060	 */
1061	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1062		lf.l_whence = SEEK_SET;
1063		lf.l_start = 0;
1064		lf.l_len = 0;
1065		lf.l_type = F_UNLCK;
1066		vp = (struct vnode *)fp->f_data;
1067		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1068	}
1069	if (--fp->f_count > 0)
1070		return (0);
1071	if (fp->f_count < 0)
1072		panic("closef: count < 0");
1073	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1074		lf.l_whence = SEEK_SET;
1075		lf.l_start = 0;
1076		lf.l_len = 0;
1077		lf.l_type = F_UNLCK;
1078		vp = (struct vnode *)fp->f_data;
1079		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1080	}
1081	if (fp->f_ops)
1082		error = (*fp->f_ops->fo_close)(fp, p);
1083	else
1084		error = 0;
1085	ffree(fp);
1086	return (error);
1087}
1088
1089/*
1090 * Apply an advisory lock on a file descriptor.
1091 *
1092 * Just attempt to get a record lock of the requested type on
1093 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1094 */
1095#ifndef _SYS_SYSPROTO_H_
1096struct flock_args {
1097	int	fd;
1098	int	how;
1099};
1100#endif
1101/* ARGSUSED */
1102int
1103flock(p, uap)
1104	struct proc *p;
1105	register struct flock_args *uap;
1106{
1107	register struct filedesc *fdp = p->p_fd;
1108	register struct file *fp;
1109	struct vnode *vp;
1110	struct flock lf;
1111
1112	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1113	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1114		return (EBADF);
1115	if (fp->f_type != DTYPE_VNODE)
1116		return (EOPNOTSUPP);
1117	vp = (struct vnode *)fp->f_data;
1118	lf.l_whence = SEEK_SET;
1119	lf.l_start = 0;
1120	lf.l_len = 0;
1121	if (uap->how & LOCK_UN) {
1122		lf.l_type = F_UNLCK;
1123		fp->f_flag &= ~FHASLOCK;
1124		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1125	}
1126	if (uap->how & LOCK_EX)
1127		lf.l_type = F_WRLCK;
1128	else if (uap->how & LOCK_SH)
1129		lf.l_type = F_RDLCK;
1130	else
1131		return (EBADF);
1132	fp->f_flag |= FHASLOCK;
1133	if (uap->how & LOCK_NB)
1134		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1135	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1136}
1137
1138/*
1139 * File Descriptor pseudo-device driver (/dev/fd/).
1140 *
1141 * Opening minor device N dup()s the file (if any) connected to file
1142 * descriptor N belonging to the calling process.  Note that this driver
1143 * consists of only the ``open()'' routine, because all subsequent
1144 * references to this file will be direct to the other driver.
1145 */
1146/* ARGSUSED */
1147static int
1148fdopen(dev, mode, type, p)
1149	dev_t dev;
1150	int mode, type;
1151	struct proc *p;
1152{
1153
1154	/*
1155	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1156	 * the file descriptor being sought for duplication. The error
1157	 * return ensures that the vnode for this device will be released
1158	 * by vn_open. Open will detect this special error and take the
1159	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1160	 * will simply report the error.
1161	 */
1162	p->p_dupfd = minor(dev);
1163	return (ENODEV);
1164}
1165
1166/*
1167 * Duplicate the specified descriptor to a free descriptor.
1168 */
1169int
1170dupfdopen(fdp, indx, dfd, mode, error)
1171	register struct filedesc *fdp;
1172	register int indx, dfd;
1173	int mode;
1174	int error;
1175{
1176	register struct file *wfp;
1177	struct file *fp;
1178
1179	/*
1180	 * If the to-be-dup'd fd number is greater than the allowed number
1181	 * of file descriptors, or the fd to be dup'd has already been
1182	 * closed, reject.  Note, check for new == old is necessary as
1183	 * falloc could allocate an already closed to-be-dup'd descriptor
1184	 * as the new descriptor.
1185	 */
1186	fp = fdp->fd_ofiles[indx];
1187	if ((u_int)dfd >= fdp->fd_nfiles ||
1188	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1189		return (EBADF);
1190
1191	/*
1192	 * There are two cases of interest here.
1193	 *
1194	 * For ENODEV simply dup (dfd) to file descriptor
1195	 * (indx) and return.
1196	 *
1197	 * For ENXIO steal away the file structure from (dfd) and
1198	 * store it in (indx).  (dfd) is effectively closed by
1199	 * this operation.
1200	 *
1201	 * Any other error code is just returned.
1202	 */
1203	switch (error) {
1204	case ENODEV:
1205		/*
1206		 * Check that the mode the file is being opened for is a
1207		 * subset of the mode of the existing descriptor.
1208		 */
1209		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1210			return (EACCES);
1211		fdp->fd_ofiles[indx] = wfp;
1212		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1213		wfp->f_count++;
1214		if (indx > fdp->fd_lastfile)
1215			fdp->fd_lastfile = indx;
1216		return (0);
1217
1218	case ENXIO:
1219		/*
1220		 * Steal away the file pointer from dfd, and stuff it into indx.
1221		 */
1222		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1223		fdp->fd_ofiles[dfd] = NULL;
1224		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1225		fdp->fd_ofileflags[dfd] = 0;
1226		/*
1227		 * Complete the clean up of the filedesc structure by
1228		 * recomputing the various hints.
1229		 */
1230		if (indx > fdp->fd_lastfile)
1231			fdp->fd_lastfile = indx;
1232		else
1233			while (fdp->fd_lastfile > 0 &&
1234			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1235				fdp->fd_lastfile--;
1236			if (dfd < fdp->fd_freefile)
1237				fdp->fd_freefile = dfd;
1238		return (0);
1239
1240	default:
1241		return (error);
1242	}
1243	/* NOTREACHED */
1244}
1245
1246/*
1247 * Get file structures.
1248 */
1249static int
1250sysctl_kern_file SYSCTL_HANDLER_ARGS
1251{
1252	int error;
1253	struct file *fp;
1254
1255	if (!req->oldptr) {
1256		/*
1257		 * overestimate by 10 files
1258		 */
1259		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1260				(nfiles + 10) * sizeof(struct file)));
1261	}
1262
1263	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1264	if (error)
1265		return (error);
1266
1267	/*
1268	 * followed by an array of file structures
1269	 */
1270	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1271		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1272		if (error)
1273			return (error);
1274	}
1275	return (0);
1276}
1277
1278SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1279    0, 0, sysctl_kern_file, "S,file", "Entire file table");
1280
1281SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1282    &maxfilesperproc, 0, "Maximum files allowed open per process");
1283
1284SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1285    &maxfiles, 0, "Maximum number of files");
1286
1287static int fildesc_devsw_installed;
1288#ifdef DEVFS
1289static	void *devfs_token_stdin;
1290static	void *devfs_token_stdout;
1291static	void *devfs_token_stderr;
1292static	void *devfs_token_fildesc[NUMFDESC];
1293#endif
1294
1295static void 	fildesc_drvinit(void *unused)
1296{
1297	dev_t dev;
1298#ifdef DEVFS
1299	int fd;
1300#endif
1301
1302	if( ! fildesc_devsw_installed ) {
1303		dev = makedev(CDEV_MAJOR,0);
1304		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1305		fildesc_devsw_installed = 1;
1306#ifdef DEVFS
1307		for (fd = 0; fd < NUMFDESC; fd++)
1308			devfs_token_fildesc[fd] =
1309				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1310						 UID_BIN, GID_BIN, 0666,
1311						 "fd/%d", fd);
1312		devfs_token_stdin =
1313			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1314					 UID_ROOT, GID_WHEEL, 0666,
1315					 "stdin");
1316		devfs_token_stdout =
1317			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1318					 UID_ROOT, GID_WHEEL, 0666,
1319					 "stdout");
1320		devfs_token_stderr =
1321			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1322					 UID_ROOT, GID_WHEEL, 0666,
1323					 "stderr");
1324#endif
1325    	}
1326}
1327
1328SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1329					fildesc_drvinit,NULL)
1330
1331
1332