kern_descrip.c revision 30309
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.40 1997/09/14 02:52:13 peter Exp $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/sysproto.h>
45#include <sys/conf.h>
46#include <sys/filedesc.h>
47#include <sys/kernel.h>
48#include <sys/sysctl.h>
49#include <sys/vnode.h>
50#include <sys/proc.h>
51#include <sys/file.h>
52#include <sys/socketvar.h>
53#include <sys/stat.h>
54#include <sys/filio.h>
55#include <sys/ttycom.h>
56#include <sys/fcntl.h>
57#include <sys/malloc.h>
58#include <sys/unistd.h>
59#include <sys/resourcevar.h>
60#include <sys/pipe.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64
65#ifdef DEVFS
66#include <sys/devfsext.h>
67#endif /*DEVFS*/
68
69MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
70
71static	 d_open_t  fdopen;
72#define NUMFDESC 64
73
74#define CDEV_MAJOR 22
75static struct cdevsw fildesc_cdevsw =
76	{ fdopen,	noclose,	noread,		nowrite,
77	  noioc,	nostop,		nullreset,	nodevtotty,
78	  seltrue,	nommap,		nostrat };
79
80static int finishdup __P((struct filedesc *fdp, int old, int new, int *retval));
81/*
82 * Descriptor management.
83 */
84struct filelist filehead;	/* head of list of open files */
85int nfiles;			/* actual number of open files */
86extern int cmask;
87
88/*
89 * System calls on descriptors.
90 */
91#ifndef _SYS_SYSPROTO_H_
92struct getdtablesize_args {
93	int	dummy;
94};
95#endif
96/* ARGSUSED */
97int
98getdtablesize(p, uap, retval)
99	struct proc *p;
100	struct getdtablesize_args *uap;
101	int *retval;
102{
103
104	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
105	return (0);
106}
107
108/*
109 * Duplicate a file descriptor to a particular value.
110 */
111#ifndef _SYS_SYSPROTO_H_
112struct dup2_args {
113	u_int	from;
114	u_int	to;
115};
116#endif
117/* ARGSUSED */
118int
119dup2(p, uap, retval)
120	struct proc *p;
121	struct dup2_args *uap;
122	int *retval;
123{
124	register struct filedesc *fdp = p->p_fd;
125	register u_int old = uap->from, new = uap->to;
126	int i, error;
127
128	if (old >= fdp->fd_nfiles ||
129	    fdp->fd_ofiles[old] == NULL ||
130	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
131	    new >= maxfilesperproc)
132		return (EBADF);
133	if (old == new) {
134		*retval = new;
135		return (0);
136	}
137	if (new >= fdp->fd_nfiles) {
138		if ((error = fdalloc(p, new, &i)))
139			return (error);
140		if (new != i)
141			panic("dup2: fdalloc");
142	} else if (fdp->fd_ofiles[new]) {
143		if (fdp->fd_ofileflags[new] & UF_MAPPED)
144			(void) munmapfd(p, new);
145		/*
146		 * dup2() must succeed even if the close has an error.
147		 */
148		(void) closef(fdp->fd_ofiles[new], p);
149	}
150	return (finishdup(fdp, (int)old, (int)new, retval));
151}
152
153/*
154 * Duplicate a file descriptor.
155 */
156#ifndef _SYS_SYSPROTO_H_
157struct dup_args {
158	u_int	fd;
159};
160#endif
161/* ARGSUSED */
162int
163dup(p, uap, retval)
164	struct proc *p;
165	struct dup_args *uap;
166	int *retval;
167{
168	register struct filedesc *fdp;
169	u_int old;
170	int new, error;
171
172	old = uap->fd;
173
174#if 0
175	/*
176	 * XXX Compatibility
177	 */
178	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
179#endif
180
181	fdp = p->p_fd;
182	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
183		return (EBADF);
184	if ((error = fdalloc(p, 0, &new)))
185		return (error);
186	return (finishdup(fdp, (int)old, new, retval));
187}
188
189/*
190 * The file control system call.
191 */
192#ifndef _SYS_SYSPROTO_H_
193struct fcntl_args {
194	int	fd;
195	int	cmd;
196	int	arg;
197};
198#endif
199/* ARGSUSED */
200int
201fcntl(p, uap, retval)
202	struct proc *p;
203	register struct fcntl_args *uap;
204	int *retval;
205{
206	register struct filedesc *fdp = p->p_fd;
207	register struct file *fp;
208	register char *pop;
209	struct vnode *vp;
210	int i, tmp, error, flg = F_POSIX;
211	struct flock fl;
212	u_int newmin;
213
214	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
215	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
216		return (EBADF);
217	pop = &fdp->fd_ofileflags[uap->fd];
218	switch (uap->cmd) {
219
220	case F_DUPFD:
221		newmin = uap->arg;
222		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
223		    newmin >= maxfilesperproc)
224			return (EINVAL);
225		if ((error = fdalloc(p, newmin, &i)))
226			return (error);
227		return (finishdup(fdp, uap->fd, i, retval));
228
229	case F_GETFD:
230		*retval = *pop & 1;
231		return (0);
232
233	case F_SETFD:
234		*pop = (*pop &~ 1) | (uap->arg & 1);
235		return (0);
236
237	case F_GETFL:
238		*retval = OFLAGS(fp->f_flag);
239		return (0);
240
241	case F_SETFL:
242		fp->f_flag &= ~FCNTLFLAGS;
243		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
244		tmp = fp->f_flag & FNONBLOCK;
245		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
246		if (error)
247			return (error);
248		tmp = fp->f_flag & FASYNC;
249		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
250		if (!error)
251			return (0);
252		fp->f_flag &= ~FNONBLOCK;
253		tmp = 0;
254		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
255		return (error);
256
257	case F_GETOWN:
258		if (fp->f_type == DTYPE_SOCKET) {
259			*retval = ((struct socket *)fp->f_data)->so_pgid;
260			return (0);
261		}
262		error = (*fp->f_ops->fo_ioctl)
263			(fp, TIOCGPGRP, (caddr_t)retval, p);
264		*retval = -*retval;
265		return (error);
266
267	case F_SETOWN:
268		if (fp->f_type == DTYPE_SOCKET) {
269			((struct socket *)fp->f_data)->so_pgid = uap->arg;
270			return (0);
271		}
272		if (uap->arg <= 0) {
273			uap->arg = -uap->arg;
274		} else {
275			struct proc *p1 = pfind(uap->arg);
276			if (p1 == 0)
277				return (ESRCH);
278			uap->arg = p1->p_pgrp->pg_id;
279		}
280		return ((*fp->f_ops->fo_ioctl)
281			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
282
283	case F_SETLKW:
284		flg |= F_WAIT;
285		/* Fall into F_SETLK */
286
287	case F_SETLK:
288		if (fp->f_type != DTYPE_VNODE)
289			return (EBADF);
290		vp = (struct vnode *)fp->f_data;
291		/* Copy in the lock structure */
292		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
293		if (error)
294			return (error);
295		if (fl.l_whence == SEEK_CUR)
296			fl.l_start += fp->f_offset;
297		switch (fl.l_type) {
298
299		case F_RDLCK:
300			if ((fp->f_flag & FREAD) == 0)
301				return (EBADF);
302			p->p_flag |= P_ADVLOCK;
303			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
304
305		case F_WRLCK:
306			if ((fp->f_flag & FWRITE) == 0)
307				return (EBADF);
308			p->p_flag |= P_ADVLOCK;
309			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
310
311		case F_UNLCK:
312			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
313				F_POSIX));
314
315		default:
316			return (EINVAL);
317		}
318
319	case F_GETLK:
320		if (fp->f_type != DTYPE_VNODE)
321			return (EBADF);
322		vp = (struct vnode *)fp->f_data;
323		/* Copy in the lock structure */
324		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
325		if (error)
326			return (error);
327		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
328		    fl.l_type != F_UNLCK)
329			return (EINVAL);
330		if (fl.l_whence == SEEK_CUR)
331			fl.l_start += fp->f_offset;
332		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
333			return (error);
334		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
335
336	default:
337		return (EINVAL);
338	}
339	/* NOTREACHED */
340}
341
342/*
343 * Common code for dup, dup2, and fcntl(F_DUPFD).
344 */
345static int
346finishdup(fdp, old, new, retval)
347	register struct filedesc *fdp;
348	register int old, new, *retval;
349{
350	register struct file *fp;
351
352	fp = fdp->fd_ofiles[old];
353	fdp->fd_ofiles[new] = fp;
354	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
355	fp->f_count++;
356	if (new > fdp->fd_lastfile)
357		fdp->fd_lastfile = new;
358	*retval = new;
359	return (0);
360}
361
362/*
363 * Close a file descriptor.
364 */
365#ifndef _SYS_SYSPROTO_H_
366struct close_args {
367        int     fd;
368};
369#endif
370/* ARGSUSED */
371int
372close(p, uap, retval)
373	struct proc *p;
374	struct close_args *uap;
375	int *retval;
376{
377	register struct filedesc *fdp = p->p_fd;
378	register struct file *fp;
379	register int fd = uap->fd;
380	register u_char *pf;
381
382	if ((unsigned)fd >= fdp->fd_nfiles ||
383	    (fp = fdp->fd_ofiles[fd]) == NULL)
384		return (EBADF);
385	pf = (u_char *)&fdp->fd_ofileflags[fd];
386	if (*pf & UF_MAPPED)
387		(void) munmapfd(p, fd);
388	fdp->fd_ofiles[fd] = NULL;
389	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
390		fdp->fd_lastfile--;
391	if (fd < fdp->fd_freefile)
392		fdp->fd_freefile = fd;
393	*pf = 0;
394	return (closef(fp, p));
395}
396
397#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
398/*
399 * Return status information about a file descriptor.
400 */
401#ifndef _SYS_SYSPROTO_H_
402struct ofstat_args {
403	int	fd;
404	struct	ostat *sb;
405};
406#endif
407/* ARGSUSED */
408int
409ofstat(p, uap, retval)
410	struct proc *p;
411	register struct ofstat_args *uap;
412	int *retval;
413{
414	register struct filedesc *fdp = p->p_fd;
415	register struct file *fp;
416	struct stat ub;
417	struct ostat oub;
418	int error;
419
420	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
421	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
422		return (EBADF);
423	switch (fp->f_type) {
424
425	case DTYPE_FIFO:
426	case DTYPE_VNODE:
427		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
428		break;
429
430	case DTYPE_SOCKET:
431		error = soo_stat((struct socket *)fp->f_data, &ub);
432		break;
433
434	case DTYPE_PIPE:
435		error = pipe_stat((struct pipe *)fp->f_data, &ub);
436		break;
437
438	default:
439		panic("ofstat");
440		/*NOTREACHED*/
441	}
442	cvtstat(&ub, &oub);
443	if (error == 0)
444		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
445	return (error);
446}
447#endif /* COMPAT_43 || COMPAT_SUNOS */
448
449/*
450 * Return status information about a file descriptor.
451 */
452#ifndef _SYS_SYSPROTO_H_
453struct fstat_args {
454	int	fd;
455	struct	stat *sb;
456};
457#endif
458/* ARGSUSED */
459int
460fstat(p, uap, retval)
461	struct proc *p;
462	register struct fstat_args *uap;
463	int *retval;
464{
465	register struct filedesc *fdp = p->p_fd;
466	register struct file *fp;
467	struct stat ub;
468	int error;
469
470	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
471	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
472		return (EBADF);
473	switch (fp->f_type) {
474
475	case DTYPE_FIFO:
476	case DTYPE_VNODE:
477		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
478		break;
479
480	case DTYPE_SOCKET:
481		error = soo_stat((struct socket *)fp->f_data, &ub);
482		break;
483
484	case DTYPE_PIPE:
485		error = pipe_stat((struct pipe *)fp->f_data, &ub);
486		break;
487
488	default:
489		panic("fstat");
490		/*NOTREACHED*/
491	}
492	if (error == 0)
493		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
494	return (error);
495}
496
497/*
498 * Return pathconf information about a file descriptor.
499 */
500#ifndef _SYS_SYSPROTO_H_
501struct fpathconf_args {
502	int	fd;
503	int	name;
504};
505#endif
506/* ARGSUSED */
507int
508fpathconf(p, uap, retval)
509	struct proc *p;
510	register struct fpathconf_args *uap;
511	int *retval;
512{
513	struct filedesc *fdp = p->p_fd;
514	struct file *fp;
515	struct vnode *vp;
516
517	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
518	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
519		return (EBADF);
520	switch (fp->f_type) {
521
522	case DTYPE_PIPE:
523	case DTYPE_SOCKET:
524		if (uap->name != _PC_PIPE_BUF)
525			return (EINVAL);
526		*retval = PIPE_BUF;
527		return (0);
528
529	case DTYPE_FIFO:
530	case DTYPE_VNODE:
531		vp = (struct vnode *)fp->f_data;
532		return (VOP_PATHCONF(vp, uap->name, retval));
533
534	default:
535		panic("fpathconf");
536	}
537	/*NOTREACHED*/
538}
539
540/*
541 * Allocate a file descriptor for the process.
542 */
543static int fdexpand;
544SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
545
546int
547fdalloc(p, want, result)
548	struct proc *p;
549	int want;
550	int *result;
551{
552	register struct filedesc *fdp = p->p_fd;
553	register int i;
554	int lim, last, nfiles;
555	struct file **newofile;
556	char *newofileflags;
557
558	/*
559	 * Search for a free descriptor starting at the higher
560	 * of want or fd_freefile.  If that fails, consider
561	 * expanding the ofile array.
562	 */
563	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
564	for (;;) {
565		last = min(fdp->fd_nfiles, lim);
566		if ((i = want) < fdp->fd_freefile)
567			i = fdp->fd_freefile;
568		for (; i < last; i++) {
569			if (fdp->fd_ofiles[i] == NULL) {
570				fdp->fd_ofileflags[i] = 0;
571				if (i > fdp->fd_lastfile)
572					fdp->fd_lastfile = i;
573				if (want <= fdp->fd_freefile)
574					fdp->fd_freefile = i;
575				*result = i;
576				return (0);
577			}
578		}
579
580		/*
581		 * No space in current array.  Expand?
582		 */
583		if (fdp->fd_nfiles >= lim)
584			return (EMFILE);
585		if (fdp->fd_nfiles < NDEXTENT)
586			nfiles = NDEXTENT;
587		else
588			nfiles = 2 * fdp->fd_nfiles;
589		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
590		    M_FILEDESC, M_WAITOK);
591		newofileflags = (char *) &newofile[nfiles];
592		/*
593		 * Copy the existing ofile and ofileflags arrays
594		 * and zero the new portion of each array.
595		 */
596		bcopy(fdp->fd_ofiles, newofile,
597			(i = sizeof(struct file *) * fdp->fd_nfiles));
598		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
599		bcopy(fdp->fd_ofileflags, newofileflags,
600			(i = sizeof(char) * fdp->fd_nfiles));
601		bzero(newofileflags + i, nfiles * sizeof(char) - i);
602		if (fdp->fd_nfiles > NDFILE)
603			FREE(fdp->fd_ofiles, M_FILEDESC);
604		fdp->fd_ofiles = newofile;
605		fdp->fd_ofileflags = newofileflags;
606		fdp->fd_nfiles = nfiles;
607		fdexpand++;
608	}
609	return (0);
610}
611
612/*
613 * Check to see whether n user file descriptors
614 * are available to the process p.
615 */
616int
617fdavail(p, n)
618	struct proc *p;
619	register int n;
620{
621	register struct filedesc *fdp = p->p_fd;
622	register struct file **fpp;
623	register int i, lim, last;
624
625	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
626	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
627		return (1);
628
629	last = min(fdp->fd_nfiles, lim);
630	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
631	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
632		if (*fpp == NULL && --n <= 0)
633			return (1);
634	return (0);
635}
636
637/*
638 * Create a new open file structure and allocate
639 * a file decriptor for the process that refers to it.
640 */
641int
642falloc(p, resultfp, resultfd)
643	register struct proc *p;
644	struct file **resultfp;
645	int *resultfd;
646{
647	register struct file *fp, *fq;
648	int error, i;
649
650	if ((error = fdalloc(p, 0, &i)))
651		return (error);
652	if (nfiles >= maxfiles) {
653		tablefull("file");
654		return (ENFILE);
655	}
656	/*
657	 * Allocate a new file descriptor.
658	 * If the process has file descriptor zero open, add to the list
659	 * of open files at that point, otherwise put it at the front of
660	 * the list of open files.
661	 */
662	nfiles++;
663	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
664	bzero(fp, sizeof(struct file));
665	if ((fq = p->p_fd->fd_ofiles[0])) {
666		LIST_INSERT_AFTER(fq, fp, f_list);
667	} else {
668		LIST_INSERT_HEAD(&filehead, fp, f_list);
669	}
670	p->p_fd->fd_ofiles[i] = fp;
671	fp->f_count = 1;
672	fp->f_cred = p->p_ucred;
673	fp->f_seqcount = 1;
674	crhold(fp->f_cred);
675	if (resultfp)
676		*resultfp = fp;
677	if (resultfd)
678		*resultfd = i;
679	return (0);
680}
681
682/*
683 * Free a file descriptor.
684 */
685void
686ffree(fp)
687	register struct file *fp;
688{
689	LIST_REMOVE(fp, f_list);
690	crfree(fp->f_cred);
691#ifdef DIAGNOSTIC
692	fp->f_count = 0;
693#endif
694	nfiles--;
695	FREE(fp, M_FILE);
696}
697
698/*
699 * Build a new filedesc structure.
700 */
701struct filedesc *
702fdinit(p)
703	struct proc *p;
704{
705	register struct filedesc0 *newfdp;
706	register struct filedesc *fdp = p->p_fd;
707
708	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
709	    M_FILEDESC, M_WAITOK);
710	bzero(newfdp, sizeof(struct filedesc0));
711	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
712	VREF(newfdp->fd_fd.fd_cdir);
713	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
714	if (newfdp->fd_fd.fd_rdir)
715		VREF(newfdp->fd_fd.fd_rdir);
716
717	/* Create the file descriptor table. */
718	newfdp->fd_fd.fd_refcnt = 1;
719	newfdp->fd_fd.fd_cmask = cmask;
720	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
721	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
722	newfdp->fd_fd.fd_nfiles = NDFILE;
723
724	newfdp->fd_fd.fd_freefile = 0;
725	newfdp->fd_fd.fd_lastfile = 0;
726
727	return (&newfdp->fd_fd);
728}
729
730/*
731 * Share a filedesc structure.
732 */
733struct filedesc *
734fdshare(p)
735	struct proc *p;
736{
737	p->p_fd->fd_refcnt++;
738	return (p->p_fd);
739}
740
741/*
742 * Copy a filedesc structure.
743 */
744struct filedesc *
745fdcopy(p)
746	struct proc *p;
747{
748	register struct filedesc *newfdp, *fdp = p->p_fd;
749	register struct file **fpp;
750	register int i;
751
752	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
753	    M_FILEDESC, M_WAITOK);
754	bcopy(fdp, newfdp, sizeof(struct filedesc));
755	VREF(newfdp->fd_cdir);
756	if (newfdp->fd_rdir)
757		VREF(newfdp->fd_rdir);
758	newfdp->fd_refcnt = 1;
759
760	/*
761	 * If the number of open files fits in the internal arrays
762	 * of the open file structure, use them, otherwise allocate
763	 * additional memory for the number of descriptors currently
764	 * in use.
765	 */
766	if (newfdp->fd_lastfile < NDFILE) {
767		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
768		newfdp->fd_ofileflags =
769		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
770		i = NDFILE;
771	} else {
772		/*
773		 * Compute the smallest multiple of NDEXTENT needed
774		 * for the file descriptors currently in use,
775		 * allowing the table to shrink.
776		 */
777		i = newfdp->fd_nfiles;
778		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
779			i /= 2;
780		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
781		    M_FILEDESC, M_WAITOK);
782		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
783	}
784	newfdp->fd_nfiles = i;
785	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
786	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
787	fpp = newfdp->fd_ofiles;
788	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
789		if (*fpp != NULL)
790			(*fpp)->f_count++;
791	return (newfdp);
792}
793
794/*
795 * Release a filedesc structure.
796 */
797void
798fdfree(p)
799	struct proc *p;
800{
801	register struct filedesc *fdp = p->p_fd;
802	struct file **fpp;
803	register int i;
804
805	if (--fdp->fd_refcnt > 0)
806		return;
807	fpp = fdp->fd_ofiles;
808	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
809		if (*fpp)
810			(void) closef(*fpp, p);
811	if (fdp->fd_nfiles > NDFILE)
812		FREE(fdp->fd_ofiles, M_FILEDESC);
813	vrele(fdp->fd_cdir);
814	if (fdp->fd_rdir)
815		vrele(fdp->fd_rdir);
816	FREE(fdp, M_FILEDESC);
817}
818
819/*
820 * Close any files on exec?
821 */
822void
823fdcloseexec(p)
824	struct proc *p;
825{
826	struct filedesc *fdp = p->p_fd;
827	struct file **fpp;
828	char *fdfp;
829	register int i;
830
831	fpp = fdp->fd_ofiles;
832	fdfp = fdp->fd_ofileflags;
833	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
834		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
835			if (*fdfp & UF_MAPPED)
836				(void) munmapfd(p, i);
837			(void) closef(*fpp, p);
838			*fpp = NULL;
839			*fdfp = 0;
840			if (i < fdp->fd_freefile)
841				fdp->fd_freefile = i;
842		}
843	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
844		fdp->fd_lastfile--;
845}
846
847/*
848 * Internal form of close.
849 * Decrement reference count on file structure.
850 * Note: p may be NULL when closing a file
851 * that was being passed in a message.
852 */
853int
854closef(fp, p)
855	register struct file *fp;
856	register struct proc *p;
857{
858	struct vnode *vp;
859	struct flock lf;
860	int error;
861
862	if (fp == NULL)
863		return (0);
864	/*
865	 * POSIX record locking dictates that any close releases ALL
866	 * locks owned by this process.  This is handled by setting
867	 * a flag in the unlock to free ONLY locks obeying POSIX
868	 * semantics, and not to free BSD-style file locks.
869	 * If the descriptor was in a message, POSIX-style locks
870	 * aren't passed with the descriptor.
871	 */
872	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
873		lf.l_whence = SEEK_SET;
874		lf.l_start = 0;
875		lf.l_len = 0;
876		lf.l_type = F_UNLCK;
877		vp = (struct vnode *)fp->f_data;
878		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
879	}
880	if (--fp->f_count > 0)
881		return (0);
882	if (fp->f_count < 0)
883		panic("closef: count < 0");
884	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
885		lf.l_whence = SEEK_SET;
886		lf.l_start = 0;
887		lf.l_len = 0;
888		lf.l_type = F_UNLCK;
889		vp = (struct vnode *)fp->f_data;
890		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
891	}
892	if (fp->f_ops)
893		error = (*fp->f_ops->fo_close)(fp, p);
894	else
895		error = 0;
896	ffree(fp);
897	return (error);
898}
899
900/*
901 * Apply an advisory lock on a file descriptor.
902 *
903 * Just attempt to get a record lock of the requested type on
904 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
905 */
906#ifndef _SYS_SYSPROTO_H_
907struct flock_args {
908	int	fd;
909	int	how;
910};
911#endif
912/* ARGSUSED */
913int
914flock(p, uap, retval)
915	struct proc *p;
916	register struct flock_args *uap;
917	int *retval;
918{
919	register struct filedesc *fdp = p->p_fd;
920	register struct file *fp;
921	struct vnode *vp;
922	struct flock lf;
923
924	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
925	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
926		return (EBADF);
927	if (fp->f_type != DTYPE_VNODE)
928		return (EOPNOTSUPP);
929	vp = (struct vnode *)fp->f_data;
930	lf.l_whence = SEEK_SET;
931	lf.l_start = 0;
932	lf.l_len = 0;
933	if (uap->how & LOCK_UN) {
934		lf.l_type = F_UNLCK;
935		fp->f_flag &= ~FHASLOCK;
936		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
937	}
938	if (uap->how & LOCK_EX)
939		lf.l_type = F_WRLCK;
940	else if (uap->how & LOCK_SH)
941		lf.l_type = F_RDLCK;
942	else
943		return (EBADF);
944	fp->f_flag |= FHASLOCK;
945	if (uap->how & LOCK_NB)
946		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
947	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
948}
949
950/*
951 * File Descriptor pseudo-device driver (/dev/fd/).
952 *
953 * Opening minor device N dup()s the file (if any) connected to file
954 * descriptor N belonging to the calling process.  Note that this driver
955 * consists of only the ``open()'' routine, because all subsequent
956 * references to this file will be direct to the other driver.
957 */
958/* ARGSUSED */
959static int
960fdopen(dev, mode, type, p)
961	dev_t dev;
962	int mode, type;
963	struct proc *p;
964{
965
966	/*
967	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
968	 * the file descriptor being sought for duplication. The error
969	 * return ensures that the vnode for this device will be released
970	 * by vn_open. Open will detect this special error and take the
971	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
972	 * will simply report the error.
973	 */
974	p->p_dupfd = minor(dev);
975	return (ENODEV);
976}
977
978/*
979 * Duplicate the specified descriptor to a free descriptor.
980 */
981int
982dupfdopen(fdp, indx, dfd, mode, error)
983	register struct filedesc *fdp;
984	register int indx, dfd;
985	int mode;
986	int error;
987{
988	register struct file *wfp;
989	struct file *fp;
990
991	/*
992	 * If the to-be-dup'd fd number is greater than the allowed number
993	 * of file descriptors, or the fd to be dup'd has already been
994	 * closed, reject.  Note, check for new == old is necessary as
995	 * falloc could allocate an already closed to-be-dup'd descriptor
996	 * as the new descriptor.
997	 */
998	fp = fdp->fd_ofiles[indx];
999	if ((u_int)dfd >= fdp->fd_nfiles ||
1000	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1001		return (EBADF);
1002
1003	/*
1004	 * There are two cases of interest here.
1005	 *
1006	 * For ENODEV simply dup (dfd) to file descriptor
1007	 * (indx) and return.
1008	 *
1009	 * For ENXIO steal away the file structure from (dfd) and
1010	 * store it in (indx).  (dfd) is effectively closed by
1011	 * this operation.
1012	 *
1013	 * Any other error code is just returned.
1014	 */
1015	switch (error) {
1016	case ENODEV:
1017		/*
1018		 * Check that the mode the file is being opened for is a
1019		 * subset of the mode of the existing descriptor.
1020		 */
1021		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1022			return (EACCES);
1023		fdp->fd_ofiles[indx] = wfp;
1024		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1025		wfp->f_count++;
1026		if (indx > fdp->fd_lastfile)
1027			fdp->fd_lastfile = indx;
1028		return (0);
1029
1030	case ENXIO:
1031		/*
1032		 * Steal away the file pointer from dfd, and stuff it into indx.
1033		 */
1034		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1035		fdp->fd_ofiles[dfd] = NULL;
1036		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1037		fdp->fd_ofileflags[dfd] = 0;
1038		/*
1039		 * Complete the clean up of the filedesc structure by
1040		 * recomputing the various hints.
1041		 */
1042		if (indx > fdp->fd_lastfile)
1043			fdp->fd_lastfile = indx;
1044		else
1045			while (fdp->fd_lastfile > 0 &&
1046			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1047				fdp->fd_lastfile--;
1048			if (dfd < fdp->fd_freefile)
1049				fdp->fd_freefile = dfd;
1050		return (0);
1051
1052	default:
1053		return (error);
1054	}
1055	/* NOTREACHED */
1056}
1057
1058/*
1059 * Get file structures.
1060 */
1061static int
1062sysctl_kern_file SYSCTL_HANDLER_ARGS
1063{
1064	int error;
1065	struct file *fp;
1066
1067	if (!req->oldptr) {
1068		/*
1069		 * overestimate by 10 files
1070		 */
1071		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1072				(nfiles + 10) * sizeof(struct file)));
1073	}
1074
1075	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1076	if (error)
1077		return (error);
1078
1079	/*
1080	 * followed by an array of file structures
1081	 */
1082	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1083		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1084		if (error)
1085			return (error);
1086	}
1087	return (0);
1088}
1089
1090SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1091	0, 0, sysctl_kern_file, "S,file", "");
1092
1093SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1094	CTLFLAG_RW, &maxfilesperproc, 0, "");
1095
1096SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1097
1098static fildesc_devsw_installed = 0;
1099#ifdef DEVFS
1100static	void *devfs_token_stdin;
1101static	void *devfs_token_stdout;
1102static	void *devfs_token_stderr;
1103static	void *devfs_token_fildesc[NUMFDESC];
1104#endif
1105
1106static void 	fildesc_drvinit(void *unused)
1107{
1108	dev_t dev;
1109#ifdef DEVFS
1110	int fd;
1111#endif
1112
1113	if( ! fildesc_devsw_installed ) {
1114		dev = makedev(CDEV_MAJOR,0);
1115		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1116		fildesc_devsw_installed = 1;
1117#ifdef DEVFS
1118		for (fd = 0; fd < NUMFDESC; fd++)
1119			devfs_token_fildesc[fd] =
1120				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1121						 UID_BIN, GID_BIN, 0666,
1122						 "fd/%d", fd);
1123		devfs_token_stdin =
1124			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1125					 UID_ROOT, GID_WHEEL, 0666,
1126					 "stdin", fd);
1127		devfs_token_stdout =
1128			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1129					 UID_ROOT, GID_WHEEL, 0666,
1130					 "stdout", fd);
1131		devfs_token_stderr =
1132			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1133					 UID_ROOT, GID_WHEEL, 0666,
1134					 "stderr", fd);
1135#endif
1136    	}
1137}
1138
1139SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1140					fildesc_drvinit,NULL)
1141
1142
1143