kern_descrip.c revision 46381
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.59 1999/04/28 10:53:22 dt Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_devfs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/vnode.h>
53#include <sys/proc.h>
54#include <sys/file.h>
55#include <sys/socketvar.h>
56#include <sys/stat.h>
57#include <sys/filio.h>
58#include <sys/ttycom.h>
59#include <sys/fcntl.h>
60#include <sys/malloc.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/pipe.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68#ifdef DEVFS
69#include <sys/devfsext.h>
70#endif /*DEVFS*/
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
75
76
77static	 d_open_t  fdopen;
78#define NUMFDESC 64
79
80#define CDEV_MAJOR 22
81static struct cdevsw fildesc_cdevsw =
82	{ fdopen,	noclose,	noread,		nowrite,
83	  noioc,	nostop,		nullreset,	nodevtotty,
84	  seltrue,	nommap,		nostrat };
85
86static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
87/*
88 * Descriptor management.
89 */
90struct filelist filehead;	/* head of list of open files */
91int nfiles;			/* actual number of open files */
92extern int cmask;
93
94/*
95 * System calls on descriptors.
96 */
97#ifndef _SYS_SYSPROTO_H_
98struct getdtablesize_args {
99	int	dummy;
100};
101#endif
102/* ARGSUSED */
103int
104getdtablesize(p, uap)
105	struct proc *p;
106	struct getdtablesize_args *uap;
107{
108
109	p->p_retval[0] =
110	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
111	return (0);
112}
113
114/*
115 * Duplicate a file descriptor to a particular value.
116 */
117#ifndef _SYS_SYSPROTO_H_
118struct dup2_args {
119	u_int	from;
120	u_int	to;
121};
122#endif
123/* ARGSUSED */
124int
125dup2(p, uap)
126	struct proc *p;
127	struct dup2_args *uap;
128{
129	register struct filedesc *fdp = p->p_fd;
130	register u_int old = uap->from, new = uap->to;
131	int i, error;
132
133	if (old >= fdp->fd_nfiles ||
134	    fdp->fd_ofiles[old] == NULL ||
135	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
136	    new >= maxfilesperproc)
137		return (EBADF);
138	if (old == new) {
139		p->p_retval[0] = new;
140		return (0);
141	}
142	if (new >= fdp->fd_nfiles) {
143		if ((error = fdalloc(p, new, &i)))
144			return (error);
145		if (new != i)
146			panic("dup2: fdalloc");
147	} else if (fdp->fd_ofiles[new]) {
148		if (fdp->fd_ofileflags[new] & UF_MAPPED)
149			(void) munmapfd(p, new);
150		/*
151		 * dup2() must succeed even if the close has an error.
152		 */
153		(void) closef(fdp->fd_ofiles[new], p);
154	}
155	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
156}
157
158/*
159 * Duplicate a file descriptor.
160 */
161#ifndef _SYS_SYSPROTO_H_
162struct dup_args {
163	u_int	fd;
164};
165#endif
166/* ARGSUSED */
167int
168dup(p, uap)
169	struct proc *p;
170	struct dup_args *uap;
171{
172	register struct filedesc *fdp;
173	u_int old;
174	int new, error;
175
176	old = uap->fd;
177
178#if 0
179	/*
180	 * XXX Compatibility
181	 */
182	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
183#endif
184
185	fdp = p->p_fd;
186	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
187		return (EBADF);
188	if ((error = fdalloc(p, 0, &new)))
189		return (error);
190	return (finishdup(fdp, (int)old, new, p->p_retval));
191}
192
193/*
194 * The file control system call.
195 */
196#ifndef _SYS_SYSPROTO_H_
197struct fcntl_args {
198	int	fd;
199	int	cmd;
200	long	arg;
201};
202#endif
203/* ARGSUSED */
204int
205fcntl(p, uap)
206	struct proc *p;
207	register struct fcntl_args *uap;
208{
209	register struct filedesc *fdp = p->p_fd;
210	register struct file *fp;
211	register char *pop;
212	struct vnode *vp;
213	int i, tmp, error, flg = F_POSIX;
214	struct flock fl;
215	u_int newmin;
216
217	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
218	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
219		return (EBADF);
220	pop = &fdp->fd_ofileflags[uap->fd];
221	switch (uap->cmd) {
222
223	case F_DUPFD:
224		newmin = uap->arg;
225		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
226		    newmin >= maxfilesperproc)
227			return (EINVAL);
228		if ((error = fdalloc(p, newmin, &i)))
229			return (error);
230		return (finishdup(fdp, uap->fd, i, p->p_retval));
231
232	case F_GETFD:
233		p->p_retval[0] = *pop & 1;
234		return (0);
235
236	case F_SETFD:
237		*pop = (*pop &~ 1) | (uap->arg & 1);
238		return (0);
239
240	case F_GETFL:
241		p->p_retval[0] = OFLAGS(fp->f_flag);
242		return (0);
243
244	case F_SETFL:
245		fp->f_flag &= ~FCNTLFLAGS;
246		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
247		tmp = fp->f_flag & FNONBLOCK;
248		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
249		if (error)
250			return (error);
251		tmp = fp->f_flag & FASYNC;
252		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
253		if (!error)
254			return (0);
255		fp->f_flag &= ~FNONBLOCK;
256		tmp = 0;
257		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
258		return (error);
259
260	case F_GETOWN:
261		error = (*fp->f_ops->fo_ioctl)
262			(fp, FIOGETOWN, (caddr_t)p->p_retval, p);
263		return (error);
264
265	case F_SETOWN:
266		return ((*fp->f_ops->fo_ioctl)
267			(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
268
269	case F_SETLKW:
270		flg |= F_WAIT;
271		/* Fall into F_SETLK */
272
273	case F_SETLK:
274		if (fp->f_type != DTYPE_VNODE)
275			return (EBADF);
276		vp = (struct vnode *)fp->f_data;
277		/* Copy in the lock structure */
278		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
279		    sizeof(fl));
280		if (error)
281			return (error);
282		if (fl.l_whence == SEEK_CUR)
283			fl.l_start += fp->f_offset;
284		switch (fl.l_type) {
285
286		case F_RDLCK:
287			if ((fp->f_flag & FREAD) == 0)
288				return (EBADF);
289			p->p_flag |= P_ADVLOCK;
290			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
291
292		case F_WRLCK:
293			if ((fp->f_flag & FWRITE) == 0)
294				return (EBADF);
295			p->p_flag |= P_ADVLOCK;
296			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
297
298		case F_UNLCK:
299			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
300				F_POSIX));
301
302		default:
303			return (EINVAL);
304		}
305
306	case F_GETLK:
307		if (fp->f_type != DTYPE_VNODE)
308			return (EBADF);
309		vp = (struct vnode *)fp->f_data;
310		/* Copy in the lock structure */
311		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
312		    sizeof(fl));
313		if (error)
314			return (error);
315		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
316		    fl.l_type != F_UNLCK)
317			return (EINVAL);
318		if (fl.l_whence == SEEK_CUR)
319			fl.l_start += fp->f_offset;
320		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
321			return (error);
322		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
323		    sizeof(fl)));
324
325	default:
326		return (EINVAL);
327	}
328	/* NOTREACHED */
329}
330
331/*
332 * Common code for dup, dup2, and fcntl(F_DUPFD).
333 */
334static int
335finishdup(fdp, old, new, retval)
336	register struct filedesc *fdp;
337	register int old, new;
338	register_t *retval;
339{
340	register struct file *fp;
341
342	fp = fdp->fd_ofiles[old];
343	fdp->fd_ofiles[new] = fp;
344	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
345	fp->f_count++;
346	if (new > fdp->fd_lastfile)
347		fdp->fd_lastfile = new;
348	*retval = new;
349	return (0);
350}
351
352/*
353 * If sigio is on the list associated with a process or process group,
354 * disable signalling from the device, remove sigio from the list and
355 * free sigio.
356 */
357void
358funsetown(sigio)
359	struct sigio *sigio;
360{
361	int s;
362
363	if (sigio == NULL)
364		return;
365	s = splhigh();
366	*(sigio->sio_myref) = NULL;
367	splx(s);
368	if (sigio->sio_pgid < 0) {
369		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
370			     sigio, sio_pgsigio);
371	} else /* if ((*sigiop)->sio_pgid > 0) */ {
372		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
373			     sigio, sio_pgsigio);
374	}
375	crfree(sigio->sio_ucred);
376	FREE(sigio, M_SIGIO);
377}
378
379/* Free a list of sigio structures. */
380void
381funsetownlst(sigiolst)
382	struct sigiolst *sigiolst;
383{
384	struct sigio *sigio;
385
386	while ((sigio = sigiolst->slh_first) != NULL)
387		funsetown(sigio);
388}
389
390/*
391 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
392 *
393 * After permission checking, add a sigio structure to the sigio list for
394 * the process or process group.
395 */
396int
397fsetown(pgid, sigiop)
398	pid_t pgid;
399	struct sigio **sigiop;
400{
401	struct proc *proc;
402	struct pgrp *pgrp;
403	struct sigio *sigio;
404	int s;
405
406	if (pgid == 0) {
407		funsetown(*sigiop);
408		return (0);
409	}
410	if (pgid > 0) {
411		proc = pfind(pgid);
412		if (proc == NULL)
413			return (ESRCH);
414		/*
415		 * Policy - Don't allow a process to FSETOWN a process
416		 * in another session.
417		 *
418		 * Remove this test to allow maximum flexibility or
419		 * restrict FSETOWN to the current process or process
420		 * group for maximum safety.
421		 */
422		else if (proc->p_session != curproc->p_session)
423			return (EPERM);
424		pgrp = NULL;
425	} else /* if (pgid < 0) */ {
426		pgrp = pgfind(-pgid);
427		if (pgrp == NULL)
428			return (ESRCH);
429		/*
430		 * Policy - Don't allow a process to FSETOWN a process
431		 * in another session.
432		 *
433		 * Remove this test to allow maximum flexibility or
434		 * restrict FSETOWN to the current process or process
435		 * group for maximum safety.
436		 */
437		else if (pgrp->pg_session != curproc->p_session)
438			return (EPERM);
439		proc = NULL;
440	}
441	funsetown(*sigiop);
442	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO,
443	       M_WAITOK);
444	if (pgid > 0) {
445		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
446		sigio->sio_proc = proc;
447	} else {
448		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
449		sigio->sio_pgrp = pgrp;
450	}
451	sigio->sio_pgid = pgid;
452	crhold(curproc->p_ucred);
453	sigio->sio_ucred = curproc->p_ucred;
454	/* It would be convenient if p_ruid was in ucred. */
455	sigio->sio_ruid = curproc->p_cred->p_ruid;
456	sigio->sio_myref = sigiop;
457	s = splhigh();
458	*sigiop = sigio;
459	splx(s);
460	return (0);
461}
462
463/*
464 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
465 */
466pid_t
467fgetown(sigio)
468	struct sigio *sigio;
469{
470	return (sigio != NULL ? sigio->sio_pgid : 0);
471}
472
473/*
474 * Close a file descriptor.
475 */
476#ifndef _SYS_SYSPROTO_H_
477struct close_args {
478        int     fd;
479};
480#endif
481/* ARGSUSED */
482int
483close(p, uap)
484	struct proc *p;
485	struct close_args *uap;
486{
487	register struct filedesc *fdp = p->p_fd;
488	register struct file *fp;
489	register int fd = uap->fd;
490	register u_char *pf;
491
492	if ((unsigned)fd >= fdp->fd_nfiles ||
493	    (fp = fdp->fd_ofiles[fd]) == NULL)
494		return (EBADF);
495	pf = (u_char *)&fdp->fd_ofileflags[fd];
496	if (*pf & UF_MAPPED)
497		(void) munmapfd(p, fd);
498	fdp->fd_ofiles[fd] = NULL;
499	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
500		fdp->fd_lastfile--;
501	if (fd < fdp->fd_freefile)
502		fdp->fd_freefile = fd;
503	*pf = 0;
504	return (closef(fp, p));
505}
506
507#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
508/*
509 * Return status information about a file descriptor.
510 */
511#ifndef _SYS_SYSPROTO_H_
512struct ofstat_args {
513	int	fd;
514	struct	ostat *sb;
515};
516#endif
517/* ARGSUSED */
518int
519ofstat(p, uap)
520	struct proc *p;
521	register struct ofstat_args *uap;
522{
523	register struct filedesc *fdp = p->p_fd;
524	register struct file *fp;
525	struct stat ub;
526	struct ostat oub;
527	int error;
528
529	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
530	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
531		return (EBADF);
532	switch (fp->f_type) {
533
534	case DTYPE_FIFO:
535	case DTYPE_VNODE:
536		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
537		break;
538
539	case DTYPE_SOCKET:
540		error = soo_stat((struct socket *)fp->f_data, &ub);
541		break;
542
543	case DTYPE_PIPE:
544		error = pipe_stat((struct pipe *)fp->f_data, &ub);
545		break;
546
547	default:
548		panic("ofstat");
549		/*NOTREACHED*/
550	}
551	cvtstat(&ub, &oub);
552	if (error == 0)
553		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
554	return (error);
555}
556#endif /* COMPAT_43 || COMPAT_SUNOS */
557
558/*
559 * Return status information about a file descriptor.
560 */
561#ifndef _SYS_SYSPROTO_H_
562struct fstat_args {
563	int	fd;
564	struct	stat *sb;
565};
566#endif
567/* ARGSUSED */
568int
569fstat(p, uap)
570	struct proc *p;
571	register struct fstat_args *uap;
572{
573	register struct filedesc *fdp = p->p_fd;
574	register struct file *fp;
575	struct stat ub;
576	int error;
577
578	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
579	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
580		return (EBADF);
581	switch (fp->f_type) {
582
583	case DTYPE_FIFO:
584	case DTYPE_VNODE:
585		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
586		break;
587
588	case DTYPE_SOCKET:
589		error = soo_stat((struct socket *)fp->f_data, &ub);
590		break;
591
592	case DTYPE_PIPE:
593		error = pipe_stat((struct pipe *)fp->f_data, &ub);
594		break;
595
596	default:
597		panic("fstat");
598		/*NOTREACHED*/
599	}
600	if (error == 0)
601		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
602	return (error);
603}
604
605/*
606 * Return status information about a file descriptor.
607 */
608#ifndef _SYS_SYSPROTO_H_
609struct nfstat_args {
610	int	fd;
611	struct	nstat *sb;
612};
613#endif
614/* ARGSUSED */
615int
616nfstat(p, uap)
617	struct proc *p;
618	register struct nfstat_args *uap;
619{
620	register struct filedesc *fdp = p->p_fd;
621	register struct file *fp;
622	struct stat ub;
623	struct nstat nub;
624	int error;
625
626	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
627	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
628		return (EBADF);
629	switch (fp->f_type) {
630
631	case DTYPE_FIFO:
632	case DTYPE_VNODE:
633		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
634		break;
635
636	case DTYPE_SOCKET:
637		error = soo_stat((struct socket *)fp->f_data, &ub);
638		break;
639
640	case DTYPE_PIPE:
641		error = pipe_stat((struct pipe *)fp->f_data, &ub);
642		break;
643
644	default:
645		panic("fstat");
646		/*NOTREACHED*/
647	}
648	if (error == 0) {
649		cvtnstat(&ub, &nub);
650		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
651	}
652	return (error);
653}
654
655/*
656 * Return pathconf information about a file descriptor.
657 */
658#ifndef _SYS_SYSPROTO_H_
659struct fpathconf_args {
660	int	fd;
661	int	name;
662};
663#endif
664/* ARGSUSED */
665int
666fpathconf(p, uap)
667	struct proc *p;
668	register struct fpathconf_args *uap;
669{
670	struct filedesc *fdp = p->p_fd;
671	struct file *fp;
672	struct vnode *vp;
673
674	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
675	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
676		return (EBADF);
677	switch (fp->f_type) {
678
679	case DTYPE_PIPE:
680	case DTYPE_SOCKET:
681		if (uap->name != _PC_PIPE_BUF)
682			return (EINVAL);
683		p->p_retval[0] = PIPE_BUF;
684		return (0);
685
686	case DTYPE_FIFO:
687	case DTYPE_VNODE:
688		vp = (struct vnode *)fp->f_data;
689		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
690
691	default:
692		panic("fpathconf");
693	}
694	/*NOTREACHED*/
695}
696
697/*
698 * Allocate a file descriptor for the process.
699 */
700static int fdexpand;
701SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
702
703int
704fdalloc(p, want, result)
705	struct proc *p;
706	int want;
707	int *result;
708{
709	register struct filedesc *fdp = p->p_fd;
710	register int i;
711	int lim, last, nfiles;
712	struct file **newofile;
713	char *newofileflags;
714
715	/*
716	 * Search for a free descriptor starting at the higher
717	 * of want or fd_freefile.  If that fails, consider
718	 * expanding the ofile array.
719	 */
720	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
721	for (;;) {
722		last = min(fdp->fd_nfiles, lim);
723		if ((i = want) < fdp->fd_freefile)
724			i = fdp->fd_freefile;
725		for (; i < last; i++) {
726			if (fdp->fd_ofiles[i] == NULL) {
727				fdp->fd_ofileflags[i] = 0;
728				if (i > fdp->fd_lastfile)
729					fdp->fd_lastfile = i;
730				if (want <= fdp->fd_freefile)
731					fdp->fd_freefile = i;
732				*result = i;
733				return (0);
734			}
735		}
736
737		/*
738		 * No space in current array.  Expand?
739		 */
740		if (fdp->fd_nfiles >= lim)
741			return (EMFILE);
742		if (fdp->fd_nfiles < NDEXTENT)
743			nfiles = NDEXTENT;
744		else
745			nfiles = 2 * fdp->fd_nfiles;
746		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
747		    M_FILEDESC, M_WAITOK);
748		newofileflags = (char *) &newofile[nfiles];
749		/*
750		 * Copy the existing ofile and ofileflags arrays
751		 * and zero the new portion of each array.
752		 */
753		bcopy(fdp->fd_ofiles, newofile,
754			(i = sizeof(struct file *) * fdp->fd_nfiles));
755		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
756		bcopy(fdp->fd_ofileflags, newofileflags,
757			(i = sizeof(char) * fdp->fd_nfiles));
758		bzero(newofileflags + i, nfiles * sizeof(char) - i);
759		if (fdp->fd_nfiles > NDFILE)
760			FREE(fdp->fd_ofiles, M_FILEDESC);
761		fdp->fd_ofiles = newofile;
762		fdp->fd_ofileflags = newofileflags;
763		fdp->fd_nfiles = nfiles;
764		fdexpand++;
765	}
766	return (0);
767}
768
769/*
770 * Check to see whether n user file descriptors
771 * are available to the process p.
772 */
773int
774fdavail(p, n)
775	struct proc *p;
776	register int n;
777{
778	register struct filedesc *fdp = p->p_fd;
779	register struct file **fpp;
780	register int i, lim, last;
781
782	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
783	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
784		return (1);
785
786	last = min(fdp->fd_nfiles, lim);
787	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
788	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
789		if (*fpp == NULL && --n <= 0)
790			return (1);
791	return (0);
792}
793
794/*
795 * Create a new open file structure and allocate
796 * a file decriptor for the process that refers to it.
797 */
798int
799falloc(p, resultfp, resultfd)
800	register struct proc *p;
801	struct file **resultfp;
802	int *resultfd;
803{
804	register struct file *fp, *fq;
805	int error, i;
806
807	if ((error = fdalloc(p, 0, &i)))
808		return (error);
809	if (nfiles >= maxfiles) {
810		tablefull("file");
811		return (ENFILE);
812	}
813	/*
814	 * Allocate a new file descriptor.
815	 * If the process has file descriptor zero open, add to the list
816	 * of open files at that point, otherwise put it at the front of
817	 * the list of open files.
818	 */
819	nfiles++;
820	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
821	bzero(fp, sizeof(struct file));
822	if ((fq = p->p_fd->fd_ofiles[0])) {
823		LIST_INSERT_AFTER(fq, fp, f_list);
824	} else {
825		LIST_INSERT_HEAD(&filehead, fp, f_list);
826	}
827	p->p_fd->fd_ofiles[i] = fp;
828	fp->f_count = 1;
829	fp->f_cred = p->p_ucred;
830	fp->f_seqcount = 1;
831	crhold(fp->f_cred);
832	if (resultfp)
833		*resultfp = fp;
834	if (resultfd)
835		*resultfd = i;
836	return (0);
837}
838
839/*
840 * Free a file descriptor.
841 */
842void
843ffree(fp)
844	register struct file *fp;
845{
846	LIST_REMOVE(fp, f_list);
847	crfree(fp->f_cred);
848#if defined(DIAGNOSTIC) || defined(INVARIANTS)
849	fp->f_count = 0;
850#endif
851	nfiles--;
852	FREE(fp, M_FILE);
853}
854
855/*
856 * Build a new filedesc structure.
857 */
858struct filedesc *
859fdinit(p)
860	struct proc *p;
861{
862	register struct filedesc0 *newfdp;
863	register struct filedesc *fdp = p->p_fd;
864
865	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
866	    M_FILEDESC, M_WAITOK);
867	bzero(newfdp, sizeof(struct filedesc0));
868	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
869	VREF(newfdp->fd_fd.fd_cdir);
870	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
871	VREF(newfdp->fd_fd.fd_rdir);
872
873	/* Create the file descriptor table. */
874	newfdp->fd_fd.fd_refcnt = 1;
875	newfdp->fd_fd.fd_cmask = cmask;
876	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
877	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
878	newfdp->fd_fd.fd_nfiles = NDFILE;
879
880	newfdp->fd_fd.fd_freefile = 0;
881	newfdp->fd_fd.fd_lastfile = 0;
882
883	return (&newfdp->fd_fd);
884}
885
886/*
887 * Share a filedesc structure.
888 */
889struct filedesc *
890fdshare(p)
891	struct proc *p;
892{
893	p->p_fd->fd_refcnt++;
894	return (p->p_fd);
895}
896
897/*
898 * Copy a filedesc structure.
899 */
900struct filedesc *
901fdcopy(p)
902	struct proc *p;
903{
904	register struct filedesc *newfdp, *fdp = p->p_fd;
905	register struct file **fpp;
906	register int i;
907
908/*
909 * Certain daemons might not have file descriptors
910 */
911	if (fdp == NULL)
912		return NULL;
913
914	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
915	    M_FILEDESC, M_WAITOK);
916	bcopy(fdp, newfdp, sizeof(struct filedesc));
917	VREF(newfdp->fd_cdir);
918	VREF(newfdp->fd_rdir);
919	newfdp->fd_refcnt = 1;
920
921	/*
922	 * If the number of open files fits in the internal arrays
923	 * of the open file structure, use them, otherwise allocate
924	 * additional memory for the number of descriptors currently
925	 * in use.
926	 */
927	if (newfdp->fd_lastfile < NDFILE) {
928		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
929		newfdp->fd_ofileflags =
930		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
931		i = NDFILE;
932	} else {
933		/*
934		 * Compute the smallest multiple of NDEXTENT needed
935		 * for the file descriptors currently in use,
936		 * allowing the table to shrink.
937		 */
938		i = newfdp->fd_nfiles;
939		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
940			i /= 2;
941		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
942		    M_FILEDESC, M_WAITOK);
943		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
944	}
945	newfdp->fd_nfiles = i;
946	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
947	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
948	fpp = newfdp->fd_ofiles;
949	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
950		if (*fpp != NULL)
951			(*fpp)->f_count++;
952	return (newfdp);
953}
954
955/*
956 * Release a filedesc structure.
957 */
958void
959fdfree(p)
960	struct proc *p;
961{
962	register struct filedesc *fdp = p->p_fd;
963	struct file **fpp;
964	register int i;
965
966/*
967 * Certain daemons might not have file descriptors
968 */
969	if (fdp == NULL)
970		return;
971
972	if (--fdp->fd_refcnt > 0)
973		return;
974	fpp = fdp->fd_ofiles;
975	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
976		if (*fpp)
977			(void) closef(*fpp, p);
978	if (fdp->fd_nfiles > NDFILE)
979		FREE(fdp->fd_ofiles, M_FILEDESC);
980	vrele(fdp->fd_cdir);
981	vrele(fdp->fd_rdir);
982	FREE(fdp, M_FILEDESC);
983}
984
985/*
986 * Close any files on exec?
987 */
988void
989fdcloseexec(p)
990	struct proc *p;
991{
992	struct filedesc *fdp = p->p_fd;
993	struct file **fpp;
994	char *fdfp;
995	register int i;
996
997/*
998 * Certain daemons might not have file descriptors
999 */
1000	if (fdp == NULL)
1001		return;
1002
1003	fpp = fdp->fd_ofiles;
1004	fdfp = fdp->fd_ofileflags;
1005	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1006		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1007			if (*fdfp & UF_MAPPED)
1008				(void) munmapfd(p, i);
1009			(void) closef(*fpp, p);
1010			*fpp = NULL;
1011			*fdfp = 0;
1012			if (i < fdp->fd_freefile)
1013				fdp->fd_freefile = i;
1014		}
1015	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1016		fdp->fd_lastfile--;
1017}
1018
1019/*
1020 * Internal form of close.
1021 * Decrement reference count on file structure.
1022 * Note: p may be NULL when closing a file
1023 * that was being passed in a message.
1024 */
1025int
1026closef(fp, p)
1027	register struct file *fp;
1028	register struct proc *p;
1029{
1030	struct vnode *vp;
1031	struct flock lf;
1032	int error;
1033
1034	if (fp == NULL)
1035		return (0);
1036	/*
1037	 * POSIX record locking dictates that any close releases ALL
1038	 * locks owned by this process.  This is handled by setting
1039	 * a flag in the unlock to free ONLY locks obeying POSIX
1040	 * semantics, and not to free BSD-style file locks.
1041	 * If the descriptor was in a message, POSIX-style locks
1042	 * aren't passed with the descriptor.
1043	 */
1044	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1045		lf.l_whence = SEEK_SET;
1046		lf.l_start = 0;
1047		lf.l_len = 0;
1048		lf.l_type = F_UNLCK;
1049		vp = (struct vnode *)fp->f_data;
1050		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1051	}
1052	if (--fp->f_count > 0)
1053		return (0);
1054	if (fp->f_count < 0)
1055		panic("closef: count < 0");
1056	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1057		lf.l_whence = SEEK_SET;
1058		lf.l_start = 0;
1059		lf.l_len = 0;
1060		lf.l_type = F_UNLCK;
1061		vp = (struct vnode *)fp->f_data;
1062		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1063	}
1064	if (fp->f_ops)
1065		error = (*fp->f_ops->fo_close)(fp, p);
1066	else
1067		error = 0;
1068	ffree(fp);
1069	return (error);
1070}
1071
1072/*
1073 * Apply an advisory lock on a file descriptor.
1074 *
1075 * Just attempt to get a record lock of the requested type on
1076 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1077 */
1078#ifndef _SYS_SYSPROTO_H_
1079struct flock_args {
1080	int	fd;
1081	int	how;
1082};
1083#endif
1084/* ARGSUSED */
1085int
1086flock(p, uap)
1087	struct proc *p;
1088	register struct flock_args *uap;
1089{
1090	register struct filedesc *fdp = p->p_fd;
1091	register struct file *fp;
1092	struct vnode *vp;
1093	struct flock lf;
1094
1095	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1096	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1097		return (EBADF);
1098	if (fp->f_type != DTYPE_VNODE)
1099		return (EOPNOTSUPP);
1100	vp = (struct vnode *)fp->f_data;
1101	lf.l_whence = SEEK_SET;
1102	lf.l_start = 0;
1103	lf.l_len = 0;
1104	if (uap->how & LOCK_UN) {
1105		lf.l_type = F_UNLCK;
1106		fp->f_flag &= ~FHASLOCK;
1107		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1108	}
1109	if (uap->how & LOCK_EX)
1110		lf.l_type = F_WRLCK;
1111	else if (uap->how & LOCK_SH)
1112		lf.l_type = F_RDLCK;
1113	else
1114		return (EBADF);
1115	fp->f_flag |= FHASLOCK;
1116	if (uap->how & LOCK_NB)
1117		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1118	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1119}
1120
1121/*
1122 * File Descriptor pseudo-device driver (/dev/fd/).
1123 *
1124 * Opening minor device N dup()s the file (if any) connected to file
1125 * descriptor N belonging to the calling process.  Note that this driver
1126 * consists of only the ``open()'' routine, because all subsequent
1127 * references to this file will be direct to the other driver.
1128 */
1129/* ARGSUSED */
1130static int
1131fdopen(dev, mode, type, p)
1132	dev_t dev;
1133	int mode, type;
1134	struct proc *p;
1135{
1136
1137	/*
1138	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1139	 * the file descriptor being sought for duplication. The error
1140	 * return ensures that the vnode for this device will be released
1141	 * by vn_open. Open will detect this special error and take the
1142	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1143	 * will simply report the error.
1144	 */
1145	p->p_dupfd = minor(dev);
1146	return (ENODEV);
1147}
1148
1149/*
1150 * Duplicate the specified descriptor to a free descriptor.
1151 */
1152int
1153dupfdopen(fdp, indx, dfd, mode, error)
1154	register struct filedesc *fdp;
1155	register int indx, dfd;
1156	int mode;
1157	int error;
1158{
1159	register struct file *wfp;
1160	struct file *fp;
1161
1162	/*
1163	 * If the to-be-dup'd fd number is greater than the allowed number
1164	 * of file descriptors, or the fd to be dup'd has already been
1165	 * closed, reject.  Note, check for new == old is necessary as
1166	 * falloc could allocate an already closed to-be-dup'd descriptor
1167	 * as the new descriptor.
1168	 */
1169	fp = fdp->fd_ofiles[indx];
1170	if ((u_int)dfd >= fdp->fd_nfiles ||
1171	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1172		return (EBADF);
1173
1174	/*
1175	 * There are two cases of interest here.
1176	 *
1177	 * For ENODEV simply dup (dfd) to file descriptor
1178	 * (indx) and return.
1179	 *
1180	 * For ENXIO steal away the file structure from (dfd) and
1181	 * store it in (indx).  (dfd) is effectively closed by
1182	 * this operation.
1183	 *
1184	 * Any other error code is just returned.
1185	 */
1186	switch (error) {
1187	case ENODEV:
1188		/*
1189		 * Check that the mode the file is being opened for is a
1190		 * subset of the mode of the existing descriptor.
1191		 */
1192		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1193			return (EACCES);
1194		fdp->fd_ofiles[indx] = wfp;
1195		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1196		wfp->f_count++;
1197		if (indx > fdp->fd_lastfile)
1198			fdp->fd_lastfile = indx;
1199		return (0);
1200
1201	case ENXIO:
1202		/*
1203		 * Steal away the file pointer from dfd, and stuff it into indx.
1204		 */
1205		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1206		fdp->fd_ofiles[dfd] = NULL;
1207		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1208		fdp->fd_ofileflags[dfd] = 0;
1209		/*
1210		 * Complete the clean up of the filedesc structure by
1211		 * recomputing the various hints.
1212		 */
1213		if (indx > fdp->fd_lastfile)
1214			fdp->fd_lastfile = indx;
1215		else
1216			while (fdp->fd_lastfile > 0 &&
1217			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1218				fdp->fd_lastfile--;
1219			if (dfd < fdp->fd_freefile)
1220				fdp->fd_freefile = dfd;
1221		return (0);
1222
1223	default:
1224		return (error);
1225	}
1226	/* NOTREACHED */
1227}
1228
1229/*
1230 * Get file structures.
1231 */
1232static int
1233sysctl_kern_file SYSCTL_HANDLER_ARGS
1234{
1235	int error;
1236	struct file *fp;
1237
1238	if (!req->oldptr) {
1239		/*
1240		 * overestimate by 10 files
1241		 */
1242		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1243				(nfiles + 10) * sizeof(struct file)));
1244	}
1245
1246	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1247	if (error)
1248		return (error);
1249
1250	/*
1251	 * followed by an array of file structures
1252	 */
1253	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1254		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1255		if (error)
1256			return (error);
1257	}
1258	return (0);
1259}
1260
1261SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1262    0, 0, sysctl_kern_file, "S,file", "Entire file table");
1263
1264SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1265    &maxfilesperproc, 0, "Maximum files allowed open per process");
1266
1267SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1268    &maxfiles, 0, "Maximum number of files");
1269
1270static int fildesc_devsw_installed;
1271#ifdef DEVFS
1272static	void *devfs_token_stdin;
1273static	void *devfs_token_stdout;
1274static	void *devfs_token_stderr;
1275static	void *devfs_token_fildesc[NUMFDESC];
1276#endif
1277
1278static void 	fildesc_drvinit(void *unused)
1279{
1280	dev_t dev;
1281#ifdef DEVFS
1282	int fd;
1283#endif
1284
1285	if( ! fildesc_devsw_installed ) {
1286		dev = makedev(CDEV_MAJOR,0);
1287		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1288		fildesc_devsw_installed = 1;
1289#ifdef DEVFS
1290		for (fd = 0; fd < NUMFDESC; fd++)
1291			devfs_token_fildesc[fd] =
1292				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1293						 UID_BIN, GID_BIN, 0666,
1294						 "fd/%d", fd);
1295		devfs_token_stdin =
1296			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1297					 UID_ROOT, GID_WHEEL, 0666,
1298					 "stdin");
1299		devfs_token_stdout =
1300			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1301					 UID_ROOT, GID_WHEEL, 0666,
1302					 "stdout");
1303		devfs_token_stderr =
1304			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1305					 UID_ROOT, GID_WHEEL, 0666,
1306					 "stderr");
1307#endif
1308    	}
1309}
1310
1311SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1312					fildesc_drvinit,NULL)
1313
1314
1315