kern_descrip.c revision 33360
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.50 1998/02/06 12:13:22 eivind Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_devfs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/vnode.h>
53#include <sys/proc.h>
54#include <sys/file.h>
55#include <sys/socketvar.h>
56#include <sys/stat.h>
57#include <sys/filio.h>
58#include <sys/ttycom.h>
59#include <sys/fcntl.h>
60#include <sys/malloc.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/pipe.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68#ifdef DEVFS
69#include <sys/devfsext.h>
70#endif /*DEVFS*/
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74
75
76static	 d_open_t  fdopen;
77#define NUMFDESC 64
78
79#define CDEV_MAJOR 22
80static struct cdevsw fildesc_cdevsw =
81	{ fdopen,	noclose,	noread,		nowrite,
82	  noioc,	nostop,		nullreset,	nodevtotty,
83	  seltrue,	nommap,		nostrat };
84
85static int finishdup __P((struct filedesc *fdp, int old, int new, int *retval));
86/*
87 * Descriptor management.
88 */
89struct filelist filehead;	/* head of list of open files */
90int nfiles;			/* actual number of open files */
91extern int cmask;
92
93/*
94 * System calls on descriptors.
95 */
96#ifndef _SYS_SYSPROTO_H_
97struct getdtablesize_args {
98	int	dummy;
99};
100#endif
101/* ARGSUSED */
102int
103getdtablesize(p, uap)
104	struct proc *p;
105	struct getdtablesize_args *uap;
106{
107
108	p->p_retval[0] =
109	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
110	return (0);
111}
112
113/*
114 * Duplicate a file descriptor to a particular value.
115 */
116#ifndef _SYS_SYSPROTO_H_
117struct dup2_args {
118	u_int	from;
119	u_int	to;
120};
121#endif
122/* ARGSUSED */
123int
124dup2(p, uap)
125	struct proc *p;
126	struct dup2_args *uap;
127{
128	register struct filedesc *fdp = p->p_fd;
129	register u_int old = uap->from, new = uap->to;
130	int i, error;
131
132	if (old >= fdp->fd_nfiles ||
133	    fdp->fd_ofiles[old] == NULL ||
134	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
135	    new >= maxfilesperproc)
136		return (EBADF);
137	if (old == new) {
138		p->p_retval[0] = new;
139		return (0);
140	}
141	if (new >= fdp->fd_nfiles) {
142		if ((error = fdalloc(p, new, &i)))
143			return (error);
144		if (new != i)
145			panic("dup2: fdalloc");
146	} else if (fdp->fd_ofiles[new]) {
147		if (fdp->fd_ofileflags[new] & UF_MAPPED)
148			(void) munmapfd(p, new);
149		/*
150		 * dup2() must succeed even if the close has an error.
151		 */
152		(void) closef(fdp->fd_ofiles[new], p);
153	}
154	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
155}
156
157/*
158 * Duplicate a file descriptor.
159 */
160#ifndef _SYS_SYSPROTO_H_
161struct dup_args {
162	u_int	fd;
163};
164#endif
165/* ARGSUSED */
166int
167dup(p, uap)
168	struct proc *p;
169	struct dup_args *uap;
170{
171	register struct filedesc *fdp;
172	u_int old;
173	int new, error;
174
175	old = uap->fd;
176
177#if 0
178	/*
179	 * XXX Compatibility
180	 */
181	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
182#endif
183
184	fdp = p->p_fd;
185	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
186		return (EBADF);
187	if ((error = fdalloc(p, 0, &new)))
188		return (error);
189	return (finishdup(fdp, (int)old, new, p->p_retval));
190}
191
192/*
193 * The file control system call.
194 */
195#ifndef _SYS_SYSPROTO_H_
196struct fcntl_args {
197	int	fd;
198	int	cmd;
199	int	arg;
200};
201#endif
202/* ARGSUSED */
203int
204fcntl(p, uap)
205	struct proc *p;
206	register struct fcntl_args *uap;
207{
208	register struct filedesc *fdp = p->p_fd;
209	register struct file *fp;
210	register char *pop;
211	struct vnode *vp;
212	int i, tmp, error, flg = F_POSIX;
213	struct flock fl;
214	u_int newmin;
215
216	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
217	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
218		return (EBADF);
219	pop = &fdp->fd_ofileflags[uap->fd];
220	switch (uap->cmd) {
221
222	case F_DUPFD:
223		newmin = uap->arg;
224		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
225		    newmin >= maxfilesperproc)
226			return (EINVAL);
227		if ((error = fdalloc(p, newmin, &i)))
228			return (error);
229		return (finishdup(fdp, uap->fd, i, p->p_retval));
230
231	case F_GETFD:
232		p->p_retval[0] = *pop & 1;
233		return (0);
234
235	case F_SETFD:
236		*pop = (*pop &~ 1) | (uap->arg & 1);
237		return (0);
238
239	case F_GETFL:
240		p->p_retval[0] = OFLAGS(fp->f_flag);
241		return (0);
242
243	case F_SETFL:
244		fp->f_flag &= ~FCNTLFLAGS;
245		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
246		tmp = fp->f_flag & FNONBLOCK;
247		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
248		if (error)
249			return (error);
250		tmp = fp->f_flag & FASYNC;
251		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
252		if (!error)
253			return (0);
254		fp->f_flag &= ~FNONBLOCK;
255		tmp = 0;
256		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
257		return (error);
258
259	case F_GETOWN:
260		if (fp->f_type == DTYPE_SOCKET) {
261			p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid;
262			return (0);
263		}
264		error = (*fp->f_ops->fo_ioctl)
265			(fp, TIOCGPGRP, (caddr_t)p->p_retval, p);
266		p->p_retval[0] = - p->p_retval[0];
267		return (error);
268
269	case F_SETOWN:
270		if (fp->f_type == DTYPE_SOCKET) {
271			((struct socket *)fp->f_data)->so_pgid = uap->arg;
272			return (0);
273		}
274		if (uap->arg <= 0) {
275			uap->arg = -uap->arg;
276		} else {
277			struct proc *p1 = pfind(uap->arg);
278			if (p1 == 0)
279				return (ESRCH);
280			uap->arg = p1->p_pgrp->pg_id;
281		}
282		return ((*fp->f_ops->fo_ioctl)
283			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
284
285	case F_SETLKW:
286		flg |= F_WAIT;
287		/* Fall into F_SETLK */
288
289	case F_SETLK:
290		if (fp->f_type != DTYPE_VNODE)
291			return (EBADF);
292		vp = (struct vnode *)fp->f_data;
293		/* Copy in the lock structure */
294		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
295		if (error)
296			return (error);
297		if (fl.l_whence == SEEK_CUR)
298			fl.l_start += fp->f_offset;
299		switch (fl.l_type) {
300
301		case F_RDLCK:
302			if ((fp->f_flag & FREAD) == 0)
303				return (EBADF);
304			p->p_flag |= P_ADVLOCK;
305			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
306
307		case F_WRLCK:
308			if ((fp->f_flag & FWRITE) == 0)
309				return (EBADF);
310			p->p_flag |= P_ADVLOCK;
311			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
312
313		case F_UNLCK:
314			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
315				F_POSIX));
316
317		default:
318			return (EINVAL);
319		}
320
321	case F_GETLK:
322		if (fp->f_type != DTYPE_VNODE)
323			return (EBADF);
324		vp = (struct vnode *)fp->f_data;
325		/* Copy in the lock structure */
326		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
327		if (error)
328			return (error);
329		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
330		    fl.l_type != F_UNLCK)
331			return (EINVAL);
332		if (fl.l_whence == SEEK_CUR)
333			fl.l_start += fp->f_offset;
334		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
335			return (error);
336		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
337
338	default:
339		return (EINVAL);
340	}
341	/* NOTREACHED */
342}
343
344/*
345 * Common code for dup, dup2, and fcntl(F_DUPFD).
346 */
347static int
348finishdup(fdp, old, new, retval)
349	register struct filedesc *fdp;
350	register int old, new, *retval;
351{
352	register struct file *fp;
353
354	fp = fdp->fd_ofiles[old];
355	fdp->fd_ofiles[new] = fp;
356	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
357	fp->f_count++;
358	if (new > fdp->fd_lastfile)
359		fdp->fd_lastfile = new;
360	*retval = new;
361	return (0);
362}
363
364/*
365 * Close a file descriptor.
366 */
367#ifndef _SYS_SYSPROTO_H_
368struct close_args {
369        int     fd;
370};
371#endif
372/* ARGSUSED */
373int
374close(p, uap)
375	struct proc *p;
376	struct close_args *uap;
377{
378	register struct filedesc *fdp = p->p_fd;
379	register struct file *fp;
380	register int fd = uap->fd;
381	register u_char *pf;
382
383	if ((unsigned)fd >= fdp->fd_nfiles ||
384	    (fp = fdp->fd_ofiles[fd]) == NULL)
385		return (EBADF);
386	pf = (u_char *)&fdp->fd_ofileflags[fd];
387	if (*pf & UF_MAPPED)
388		(void) munmapfd(p, fd);
389	fdp->fd_ofiles[fd] = NULL;
390	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
391		fdp->fd_lastfile--;
392	if (fd < fdp->fd_freefile)
393		fdp->fd_freefile = fd;
394	*pf = 0;
395	return (closef(fp, p));
396}
397
398#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
399/*
400 * Return status information about a file descriptor.
401 */
402#ifndef _SYS_SYSPROTO_H_
403struct ofstat_args {
404	int	fd;
405	struct	ostat *sb;
406};
407#endif
408/* ARGSUSED */
409int
410ofstat(p, uap)
411	struct proc *p;
412	register struct ofstat_args *uap;
413{
414	register struct filedesc *fdp = p->p_fd;
415	register struct file *fp;
416	struct stat ub;
417	struct ostat oub;
418	int error;
419
420	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
421	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
422		return (EBADF);
423	switch (fp->f_type) {
424
425	case DTYPE_FIFO:
426	case DTYPE_VNODE:
427		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
428		break;
429
430	case DTYPE_SOCKET:
431		error = soo_stat((struct socket *)fp->f_data, &ub);
432		break;
433
434	case DTYPE_PIPE:
435		error = pipe_stat((struct pipe *)fp->f_data, &ub);
436		break;
437
438	default:
439		panic("ofstat");
440		/*NOTREACHED*/
441	}
442	cvtstat(&ub, &oub);
443	if (error == 0)
444		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
445	return (error);
446}
447#endif /* COMPAT_43 || COMPAT_SUNOS */
448
449/*
450 * Return status information about a file descriptor.
451 */
452#ifndef _SYS_SYSPROTO_H_
453struct fstat_args {
454	int	fd;
455	struct	stat *sb;
456};
457#endif
458/* ARGSUSED */
459int
460fstat(p, uap)
461	struct proc *p;
462	register struct fstat_args *uap;
463{
464	register struct filedesc *fdp = p->p_fd;
465	register struct file *fp;
466	struct stat ub;
467	int error;
468
469	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
470	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
471		return (EBADF);
472	switch (fp->f_type) {
473
474	case DTYPE_FIFO:
475	case DTYPE_VNODE:
476		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
477		break;
478
479	case DTYPE_SOCKET:
480		error = soo_stat((struct socket *)fp->f_data, &ub);
481		break;
482
483	case DTYPE_PIPE:
484		error = pipe_stat((struct pipe *)fp->f_data, &ub);
485		break;
486
487	default:
488		panic("fstat");
489		/*NOTREACHED*/
490	}
491	if (error == 0)
492		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
493	return (error);
494}
495
496/*
497 * Return pathconf information about a file descriptor.
498 */
499#ifndef _SYS_SYSPROTO_H_
500struct fpathconf_args {
501	int	fd;
502	int	name;
503};
504#endif
505/* ARGSUSED */
506int
507fpathconf(p, uap)
508	struct proc *p;
509	register struct fpathconf_args *uap;
510{
511	struct filedesc *fdp = p->p_fd;
512	struct file *fp;
513	struct vnode *vp;
514
515	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
516	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
517		return (EBADF);
518	switch (fp->f_type) {
519
520	case DTYPE_PIPE:
521	case DTYPE_SOCKET:
522		if (uap->name != _PC_PIPE_BUF)
523			return (EINVAL);
524		p->p_retval[0] = PIPE_BUF;
525		return (0);
526
527	case DTYPE_FIFO:
528	case DTYPE_VNODE:
529		vp = (struct vnode *)fp->f_data;
530		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
531
532	default:
533		panic("fpathconf");
534	}
535	/*NOTREACHED*/
536}
537
538/*
539 * Allocate a file descriptor for the process.
540 */
541static int fdexpand;
542SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
543
544int
545fdalloc(p, want, result)
546	struct proc *p;
547	int want;
548	int *result;
549{
550	register struct filedesc *fdp = p->p_fd;
551	register int i;
552	int lim, last, nfiles;
553	struct file **newofile;
554	char *newofileflags;
555
556	/*
557	 * Search for a free descriptor starting at the higher
558	 * of want or fd_freefile.  If that fails, consider
559	 * expanding the ofile array.
560	 */
561	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
562	for (;;) {
563		last = min(fdp->fd_nfiles, lim);
564		if ((i = want) < fdp->fd_freefile)
565			i = fdp->fd_freefile;
566		for (; i < last; i++) {
567			if (fdp->fd_ofiles[i] == NULL) {
568				fdp->fd_ofileflags[i] = 0;
569				if (i > fdp->fd_lastfile)
570					fdp->fd_lastfile = i;
571				if (want <= fdp->fd_freefile)
572					fdp->fd_freefile = i;
573				*result = i;
574				return (0);
575			}
576		}
577
578		/*
579		 * No space in current array.  Expand?
580		 */
581		if (fdp->fd_nfiles >= lim)
582			return (EMFILE);
583		if (fdp->fd_nfiles < NDEXTENT)
584			nfiles = NDEXTENT;
585		else
586			nfiles = 2 * fdp->fd_nfiles;
587		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
588		    M_FILEDESC, M_WAITOK);
589		newofileflags = (char *) &newofile[nfiles];
590		/*
591		 * Copy the existing ofile and ofileflags arrays
592		 * and zero the new portion of each array.
593		 */
594		bcopy(fdp->fd_ofiles, newofile,
595			(i = sizeof(struct file *) * fdp->fd_nfiles));
596		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
597		bcopy(fdp->fd_ofileflags, newofileflags,
598			(i = sizeof(char) * fdp->fd_nfiles));
599		bzero(newofileflags + i, nfiles * sizeof(char) - i);
600		if (fdp->fd_nfiles > NDFILE)
601			FREE(fdp->fd_ofiles, M_FILEDESC);
602		fdp->fd_ofiles = newofile;
603		fdp->fd_ofileflags = newofileflags;
604		fdp->fd_nfiles = nfiles;
605		fdexpand++;
606	}
607	return (0);
608}
609
610/*
611 * Check to see whether n user file descriptors
612 * are available to the process p.
613 */
614int
615fdavail(p, n)
616	struct proc *p;
617	register int n;
618{
619	register struct filedesc *fdp = p->p_fd;
620	register struct file **fpp;
621	register int i, lim, last;
622
623	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
624	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
625		return (1);
626
627	last = min(fdp->fd_nfiles, lim);
628	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
629	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
630		if (*fpp == NULL && --n <= 0)
631			return (1);
632	return (0);
633}
634
635/*
636 * Create a new open file structure and allocate
637 * a file decriptor for the process that refers to it.
638 */
639int
640falloc(p, resultfp, resultfd)
641	register struct proc *p;
642	struct file **resultfp;
643	int *resultfd;
644{
645	register struct file *fp, *fq;
646	int error, i;
647
648	if ((error = fdalloc(p, 0, &i)))
649		return (error);
650	if (nfiles >= maxfiles) {
651		tablefull("file");
652		return (ENFILE);
653	}
654	/*
655	 * Allocate a new file descriptor.
656	 * If the process has file descriptor zero open, add to the list
657	 * of open files at that point, otherwise put it at the front of
658	 * the list of open files.
659	 */
660	nfiles++;
661	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
662	bzero(fp, sizeof(struct file));
663	if ((fq = p->p_fd->fd_ofiles[0])) {
664		LIST_INSERT_AFTER(fq, fp, f_list);
665	} else {
666		LIST_INSERT_HEAD(&filehead, fp, f_list);
667	}
668	p->p_fd->fd_ofiles[i] = fp;
669	fp->f_count = 1;
670	fp->f_cred = p->p_ucred;
671	fp->f_seqcount = 1;
672	crhold(fp->f_cred);
673	if (resultfp)
674		*resultfp = fp;
675	if (resultfd)
676		*resultfd = i;
677	return (0);
678}
679
680/*
681 * Free a file descriptor.
682 */
683void
684ffree(fp)
685	register struct file *fp;
686{
687	LIST_REMOVE(fp, f_list);
688	crfree(fp->f_cred);
689#ifdef DIAGNOSTIC
690	fp->f_count = 0;
691#endif
692	nfiles--;
693	FREE(fp, M_FILE);
694}
695
696/*
697 * Build a new filedesc structure.
698 */
699struct filedesc *
700fdinit(p)
701	struct proc *p;
702{
703	register struct filedesc0 *newfdp;
704	register struct filedesc *fdp = p->p_fd;
705
706	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
707	    M_FILEDESC, M_WAITOK);
708	bzero(newfdp, sizeof(struct filedesc0));
709	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
710	VREF(newfdp->fd_fd.fd_cdir);
711	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
712	VREF(newfdp->fd_fd.fd_rdir);
713
714	/* Create the file descriptor table. */
715	newfdp->fd_fd.fd_refcnt = 1;
716	newfdp->fd_fd.fd_cmask = cmask;
717	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
718	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
719	newfdp->fd_fd.fd_nfiles = NDFILE;
720
721	newfdp->fd_fd.fd_freefile = 0;
722	newfdp->fd_fd.fd_lastfile = 0;
723
724	return (&newfdp->fd_fd);
725}
726
727/*
728 * Share a filedesc structure.
729 */
730struct filedesc *
731fdshare(p)
732	struct proc *p;
733{
734	p->p_fd->fd_refcnt++;
735	return (p->p_fd);
736}
737
738/*
739 * Copy a filedesc structure.
740 */
741struct filedesc *
742fdcopy(p)
743	struct proc *p;
744{
745	register struct filedesc *newfdp, *fdp = p->p_fd;
746	register struct file **fpp;
747	register int i;
748
749/*
750 * Certain daemons might not have file descriptors
751 */
752	if (fdp == NULL)
753		return NULL;
754
755	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
756	    M_FILEDESC, M_WAITOK);
757	bcopy(fdp, newfdp, sizeof(struct filedesc));
758	VREF(newfdp->fd_cdir);
759	VREF(newfdp->fd_rdir);
760	newfdp->fd_refcnt = 1;
761
762	/*
763	 * If the number of open files fits in the internal arrays
764	 * of the open file structure, use them, otherwise allocate
765	 * additional memory for the number of descriptors currently
766	 * in use.
767	 */
768	if (newfdp->fd_lastfile < NDFILE) {
769		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
770		newfdp->fd_ofileflags =
771		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
772		i = NDFILE;
773	} else {
774		/*
775		 * Compute the smallest multiple of NDEXTENT needed
776		 * for the file descriptors currently in use,
777		 * allowing the table to shrink.
778		 */
779		i = newfdp->fd_nfiles;
780		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
781			i /= 2;
782		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
783		    M_FILEDESC, M_WAITOK);
784		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
785	}
786	newfdp->fd_nfiles = i;
787	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
788	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
789	fpp = newfdp->fd_ofiles;
790	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
791		if (*fpp != NULL)
792			(*fpp)->f_count++;
793	return (newfdp);
794}
795
796/*
797 * Release a filedesc structure.
798 */
799void
800fdfree(p)
801	struct proc *p;
802{
803	register struct filedesc *fdp = p->p_fd;
804	struct file **fpp;
805	register int i;
806
807/*
808 * Certain daemons might not have file descriptors
809 */
810	if (fdp == NULL)
811		return;
812
813	if (--fdp->fd_refcnt > 0)
814		return;
815	fpp = fdp->fd_ofiles;
816	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
817		if (*fpp)
818			(void) closef(*fpp, p);
819	if (fdp->fd_nfiles > NDFILE)
820		FREE(fdp->fd_ofiles, M_FILEDESC);
821	vrele(fdp->fd_cdir);
822	vrele(fdp->fd_rdir);
823	FREE(fdp, M_FILEDESC);
824}
825
826/*
827 * Close any files on exec?
828 */
829void
830fdcloseexec(p)
831	struct proc *p;
832{
833	struct filedesc *fdp = p->p_fd;
834	struct file **fpp;
835	char *fdfp;
836	register int i;
837
838/*
839 * Certain daemons might not have file descriptors
840 */
841	if (fdp == NULL)
842		return;
843
844	fpp = fdp->fd_ofiles;
845	fdfp = fdp->fd_ofileflags;
846	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
847		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
848			if (*fdfp & UF_MAPPED)
849				(void) munmapfd(p, i);
850			(void) closef(*fpp, p);
851			*fpp = NULL;
852			*fdfp = 0;
853			if (i < fdp->fd_freefile)
854				fdp->fd_freefile = i;
855		}
856	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
857		fdp->fd_lastfile--;
858}
859
860/*
861 * Internal form of close.
862 * Decrement reference count on file structure.
863 * Note: p may be NULL when closing a file
864 * that was being passed in a message.
865 */
866int
867closef(fp, p)
868	register struct file *fp;
869	register struct proc *p;
870{
871	struct vnode *vp;
872	struct flock lf;
873	int error;
874
875	if (fp == NULL)
876		return (0);
877	/*
878	 * POSIX record locking dictates that any close releases ALL
879	 * locks owned by this process.  This is handled by setting
880	 * a flag in the unlock to free ONLY locks obeying POSIX
881	 * semantics, and not to free BSD-style file locks.
882	 * If the descriptor was in a message, POSIX-style locks
883	 * aren't passed with the descriptor.
884	 */
885	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
886		lf.l_whence = SEEK_SET;
887		lf.l_start = 0;
888		lf.l_len = 0;
889		lf.l_type = F_UNLCK;
890		vp = (struct vnode *)fp->f_data;
891		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
892	}
893	if (--fp->f_count > 0)
894		return (0);
895	if (fp->f_count < 0)
896		panic("closef: count < 0");
897	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
898		lf.l_whence = SEEK_SET;
899		lf.l_start = 0;
900		lf.l_len = 0;
901		lf.l_type = F_UNLCK;
902		vp = (struct vnode *)fp->f_data;
903		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
904	}
905	if (fp->f_ops)
906		error = (*fp->f_ops->fo_close)(fp, p);
907	else
908		error = 0;
909	ffree(fp);
910	return (error);
911}
912
913/*
914 * Apply an advisory lock on a file descriptor.
915 *
916 * Just attempt to get a record lock of the requested type on
917 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
918 */
919#ifndef _SYS_SYSPROTO_H_
920struct flock_args {
921	int	fd;
922	int	how;
923};
924#endif
925/* ARGSUSED */
926int
927flock(p, uap)
928	struct proc *p;
929	register struct flock_args *uap;
930{
931	register struct filedesc *fdp = p->p_fd;
932	register struct file *fp;
933	struct vnode *vp;
934	struct flock lf;
935
936	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
937	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
938		return (EBADF);
939	if (fp->f_type != DTYPE_VNODE)
940		return (EOPNOTSUPP);
941	vp = (struct vnode *)fp->f_data;
942	lf.l_whence = SEEK_SET;
943	lf.l_start = 0;
944	lf.l_len = 0;
945	if (uap->how & LOCK_UN) {
946		lf.l_type = F_UNLCK;
947		fp->f_flag &= ~FHASLOCK;
948		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
949	}
950	if (uap->how & LOCK_EX)
951		lf.l_type = F_WRLCK;
952	else if (uap->how & LOCK_SH)
953		lf.l_type = F_RDLCK;
954	else
955		return (EBADF);
956	fp->f_flag |= FHASLOCK;
957	if (uap->how & LOCK_NB)
958		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
959	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
960}
961
962/*
963 * File Descriptor pseudo-device driver (/dev/fd/).
964 *
965 * Opening minor device N dup()s the file (if any) connected to file
966 * descriptor N belonging to the calling process.  Note that this driver
967 * consists of only the ``open()'' routine, because all subsequent
968 * references to this file will be direct to the other driver.
969 */
970/* ARGSUSED */
971static int
972fdopen(dev, mode, type, p)
973	dev_t dev;
974	int mode, type;
975	struct proc *p;
976{
977
978	/*
979	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
980	 * the file descriptor being sought for duplication. The error
981	 * return ensures that the vnode for this device will be released
982	 * by vn_open. Open will detect this special error and take the
983	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
984	 * will simply report the error.
985	 */
986	p->p_dupfd = minor(dev);
987	return (ENODEV);
988}
989
990/*
991 * Duplicate the specified descriptor to a free descriptor.
992 */
993int
994dupfdopen(fdp, indx, dfd, mode, error)
995	register struct filedesc *fdp;
996	register int indx, dfd;
997	int mode;
998	int error;
999{
1000	register struct file *wfp;
1001	struct file *fp;
1002
1003	/*
1004	 * If the to-be-dup'd fd number is greater than the allowed number
1005	 * of file descriptors, or the fd to be dup'd has already been
1006	 * closed, reject.  Note, check for new == old is necessary as
1007	 * falloc could allocate an already closed to-be-dup'd descriptor
1008	 * as the new descriptor.
1009	 */
1010	fp = fdp->fd_ofiles[indx];
1011	if ((u_int)dfd >= fdp->fd_nfiles ||
1012	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1013		return (EBADF);
1014
1015	/*
1016	 * There are two cases of interest here.
1017	 *
1018	 * For ENODEV simply dup (dfd) to file descriptor
1019	 * (indx) and return.
1020	 *
1021	 * For ENXIO steal away the file structure from (dfd) and
1022	 * store it in (indx).  (dfd) is effectively closed by
1023	 * this operation.
1024	 *
1025	 * Any other error code is just returned.
1026	 */
1027	switch (error) {
1028	case ENODEV:
1029		/*
1030		 * Check that the mode the file is being opened for is a
1031		 * subset of the mode of the existing descriptor.
1032		 */
1033		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1034			return (EACCES);
1035		fdp->fd_ofiles[indx] = wfp;
1036		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1037		wfp->f_count++;
1038		if (indx > fdp->fd_lastfile)
1039			fdp->fd_lastfile = indx;
1040		return (0);
1041
1042	case ENXIO:
1043		/*
1044		 * Steal away the file pointer from dfd, and stuff it into indx.
1045		 */
1046		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1047		fdp->fd_ofiles[dfd] = NULL;
1048		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1049		fdp->fd_ofileflags[dfd] = 0;
1050		/*
1051		 * Complete the clean up of the filedesc structure by
1052		 * recomputing the various hints.
1053		 */
1054		if (indx > fdp->fd_lastfile)
1055			fdp->fd_lastfile = indx;
1056		else
1057			while (fdp->fd_lastfile > 0 &&
1058			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1059				fdp->fd_lastfile--;
1060			if (dfd < fdp->fd_freefile)
1061				fdp->fd_freefile = dfd;
1062		return (0);
1063
1064	default:
1065		return (error);
1066	}
1067	/* NOTREACHED */
1068}
1069
1070/*
1071 * Get file structures.
1072 */
1073static int
1074sysctl_kern_file SYSCTL_HANDLER_ARGS
1075{
1076	int error;
1077	struct file *fp;
1078
1079	if (!req->oldptr) {
1080		/*
1081		 * overestimate by 10 files
1082		 */
1083		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1084				(nfiles + 10) * sizeof(struct file)));
1085	}
1086
1087	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1088	if (error)
1089		return (error);
1090
1091	/*
1092	 * followed by an array of file structures
1093	 */
1094	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1095		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1096		if (error)
1097			return (error);
1098	}
1099	return (0);
1100}
1101
1102SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1103	0, 0, sysctl_kern_file, "S,file", "");
1104
1105SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1106	CTLFLAG_RW, &maxfilesperproc, 0, "");
1107
1108SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1109
1110static fildesc_devsw_installed = 0;
1111#ifdef DEVFS
1112static	void *devfs_token_stdin;
1113static	void *devfs_token_stdout;
1114static	void *devfs_token_stderr;
1115static	void *devfs_token_fildesc[NUMFDESC];
1116#endif
1117
1118static void 	fildesc_drvinit(void *unused)
1119{
1120	dev_t dev;
1121#ifdef DEVFS
1122	int fd;
1123#endif
1124
1125	if( ! fildesc_devsw_installed ) {
1126		dev = makedev(CDEV_MAJOR,0);
1127		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1128		fildesc_devsw_installed = 1;
1129#ifdef DEVFS
1130		for (fd = 0; fd < NUMFDESC; fd++)
1131			devfs_token_fildesc[fd] =
1132				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1133						 UID_BIN, GID_BIN, 0666,
1134						 "fd/%d", fd);
1135		devfs_token_stdin =
1136			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1137					 UID_ROOT, GID_WHEEL, 0666,
1138					 "stdin", fd);
1139		devfs_token_stdout =
1140			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1141					 UID_ROOT, GID_WHEEL, 0666,
1142					 "stdout", fd);
1143		devfs_token_stderr =
1144			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1145					 UID_ROOT, GID_WHEEL, 0666,
1146					 "stderr", fd);
1147#endif
1148    	}
1149}
1150
1151SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1152					fildesc_drvinit,NULL)
1153
1154
1155