kern_descrip.c revision 36844
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.52 1998/05/11 03:55:24 dyson Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_devfs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/vnode.h>
53#include <sys/proc.h>
54#include <sys/file.h>
55#include <sys/socketvar.h>
56#include <sys/stat.h>
57#include <sys/filio.h>
58#include <sys/ttycom.h>
59#include <sys/fcntl.h>
60#include <sys/malloc.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/pipe.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68#ifdef DEVFS
69#include <sys/devfsext.h>
70#endif /*DEVFS*/
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74
75
76static	 d_open_t  fdopen;
77#define NUMFDESC 64
78
79#define CDEV_MAJOR 22
80static struct cdevsw fildesc_cdevsw =
81	{ fdopen,	noclose,	noread,		nowrite,
82	  noioc,	nostop,		nullreset,	nodevtotty,
83	  seltrue,	nommap,		nostrat };
84
85static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
86/*
87 * Descriptor management.
88 */
89struct filelist filehead;	/* head of list of open files */
90int nfiles;			/* actual number of open files */
91extern int cmask;
92
93/*
94 * System calls on descriptors.
95 */
96#ifndef _SYS_SYSPROTO_H_
97struct getdtablesize_args {
98	int	dummy;
99};
100#endif
101/* ARGSUSED */
102int
103getdtablesize(p, uap)
104	struct proc *p;
105	struct getdtablesize_args *uap;
106{
107
108	p->p_retval[0] =
109	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
110	return (0);
111}
112
113/*
114 * Duplicate a file descriptor to a particular value.
115 */
116#ifndef _SYS_SYSPROTO_H_
117struct dup2_args {
118	u_int	from;
119	u_int	to;
120};
121#endif
122/* ARGSUSED */
123int
124dup2(p, uap)
125	struct proc *p;
126	struct dup2_args *uap;
127{
128	register struct filedesc *fdp = p->p_fd;
129	register u_int old = uap->from, new = uap->to;
130	int i, error;
131
132	if (old >= fdp->fd_nfiles ||
133	    fdp->fd_ofiles[old] == NULL ||
134	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
135	    new >= maxfilesperproc)
136		return (EBADF);
137	if (old == new) {
138		p->p_retval[0] = new;
139		return (0);
140	}
141	if (new >= fdp->fd_nfiles) {
142		if ((error = fdalloc(p, new, &i)))
143			return (error);
144		if (new != i)
145			panic("dup2: fdalloc");
146	} else if (fdp->fd_ofiles[new]) {
147		if (fdp->fd_ofileflags[new] & UF_MAPPED)
148			(void) munmapfd(p, new);
149		/*
150		 * dup2() must succeed even if the close has an error.
151		 */
152		(void) closef(fdp->fd_ofiles[new], p);
153	}
154	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
155}
156
157/*
158 * Duplicate a file descriptor.
159 */
160#ifndef _SYS_SYSPROTO_H_
161struct dup_args {
162	u_int	fd;
163};
164#endif
165/* ARGSUSED */
166int
167dup(p, uap)
168	struct proc *p;
169	struct dup_args *uap;
170{
171	register struct filedesc *fdp;
172	u_int old;
173	int new, error;
174
175	old = uap->fd;
176
177#if 0
178	/*
179	 * XXX Compatibility
180	 */
181	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
182#endif
183
184	fdp = p->p_fd;
185	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
186		return (EBADF);
187	if ((error = fdalloc(p, 0, &new)))
188		return (error);
189	return (finishdup(fdp, (int)old, new, p->p_retval));
190}
191
192/*
193 * The file control system call.
194 */
195#ifndef _SYS_SYSPROTO_H_
196struct fcntl_args {
197	int	fd;
198	int	cmd;
199	int	arg;
200};
201#endif
202/* ARGSUSED */
203int
204fcntl(p, uap)
205	struct proc *p;
206	register struct fcntl_args *uap;
207{
208	register struct filedesc *fdp = p->p_fd;
209	register struct file *fp;
210	register char *pop;
211	struct vnode *vp;
212	int i, tmp, error, flg = F_POSIX;
213	struct flock fl;
214	u_int newmin;
215
216	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
217	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
218		return (EBADF);
219	pop = &fdp->fd_ofileflags[uap->fd];
220	switch (uap->cmd) {
221
222	case F_DUPFD:
223		newmin = uap->arg;
224		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
225		    newmin >= maxfilesperproc)
226			return (EINVAL);
227		if ((error = fdalloc(p, newmin, &i)))
228			return (error);
229		return (finishdup(fdp, uap->fd, i, p->p_retval));
230
231	case F_GETFD:
232		p->p_retval[0] = *pop & 1;
233		return (0);
234
235	case F_SETFD:
236		*pop = (*pop &~ 1) | (uap->arg & 1);
237		return (0);
238
239	case F_GETFL:
240		p->p_retval[0] = OFLAGS(fp->f_flag);
241		return (0);
242
243	case F_SETFL:
244		fp->f_flag &= ~FCNTLFLAGS;
245		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
246		tmp = fp->f_flag & FNONBLOCK;
247		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
248		if (error)
249			return (error);
250		tmp = fp->f_flag & FASYNC;
251		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
252		if (!error)
253			return (0);
254		fp->f_flag &= ~FNONBLOCK;
255		tmp = 0;
256		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
257		return (error);
258
259	case F_GETOWN:
260		if (fp->f_type == DTYPE_SOCKET) {
261			p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid;
262			return (0);
263		}
264		error = (*fp->f_ops->fo_ioctl)
265			(fp, TIOCGPGRP, (caddr_t)p->p_retval, p);
266		p->p_retval[0] = - p->p_retval[0];
267		return (error);
268
269	case F_SETOWN:
270		if (fp->f_type == DTYPE_SOCKET) {
271			((struct socket *)fp->f_data)->so_pgid = uap->arg;
272			return (0);
273		}
274		if (uap->arg <= 0) {
275			uap->arg = -uap->arg;
276		} else {
277			struct proc *p1 = pfind(uap->arg);
278			if (p1 == 0)
279				return (ESRCH);
280			uap->arg = p1->p_pgrp->pg_id;
281		}
282		return ((*fp->f_ops->fo_ioctl)
283			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
284
285	case F_SETLKW:
286		flg |= F_WAIT;
287		/* Fall into F_SETLK */
288
289	case F_SETLK:
290		if (fp->f_type != DTYPE_VNODE)
291			return (EBADF);
292		vp = (struct vnode *)fp->f_data;
293		/* Copy in the lock structure */
294		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
295		if (error)
296			return (error);
297		if (fl.l_whence == SEEK_CUR)
298			fl.l_start += fp->f_offset;
299		switch (fl.l_type) {
300
301		case F_RDLCK:
302			if ((fp->f_flag & FREAD) == 0)
303				return (EBADF);
304			p->p_flag |= P_ADVLOCK;
305			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
306
307		case F_WRLCK:
308			if ((fp->f_flag & FWRITE) == 0)
309				return (EBADF);
310			p->p_flag |= P_ADVLOCK;
311			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
312
313		case F_UNLCK:
314			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
315				F_POSIX));
316
317		default:
318			return (EINVAL);
319		}
320
321	case F_GETLK:
322		if (fp->f_type != DTYPE_VNODE)
323			return (EBADF);
324		vp = (struct vnode *)fp->f_data;
325		/* Copy in the lock structure */
326		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
327		if (error)
328			return (error);
329		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
330		    fl.l_type != F_UNLCK)
331			return (EINVAL);
332		if (fl.l_whence == SEEK_CUR)
333			fl.l_start += fp->f_offset;
334		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
335			return (error);
336		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
337
338	default:
339		return (EINVAL);
340	}
341	/* NOTREACHED */
342}
343
344/*
345 * Common code for dup, dup2, and fcntl(F_DUPFD).
346 */
347static int
348finishdup(fdp, old, new, retval)
349	register struct filedesc *fdp;
350	register int old, new;
351	register_t *retval;
352{
353	register struct file *fp;
354
355	fp = fdp->fd_ofiles[old];
356	fdp->fd_ofiles[new] = fp;
357	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
358	fp->f_count++;
359	if (new > fdp->fd_lastfile)
360		fdp->fd_lastfile = new;
361	*retval = new;
362	return (0);
363}
364
365/*
366 * Close a file descriptor.
367 */
368#ifndef _SYS_SYSPROTO_H_
369struct close_args {
370        int     fd;
371};
372#endif
373/* ARGSUSED */
374int
375close(p, uap)
376	struct proc *p;
377	struct close_args *uap;
378{
379	register struct filedesc *fdp = p->p_fd;
380	register struct file *fp;
381	register int fd = uap->fd;
382	register u_char *pf;
383
384	if ((unsigned)fd >= fdp->fd_nfiles ||
385	    (fp = fdp->fd_ofiles[fd]) == NULL)
386		return (EBADF);
387	pf = (u_char *)&fdp->fd_ofileflags[fd];
388	if (*pf & UF_MAPPED)
389		(void) munmapfd(p, fd);
390	fdp->fd_ofiles[fd] = NULL;
391	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
392		fdp->fd_lastfile--;
393	if (fd < fdp->fd_freefile)
394		fdp->fd_freefile = fd;
395	*pf = 0;
396	return (closef(fp, p));
397}
398
399#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
400/*
401 * Return status information about a file descriptor.
402 */
403#ifndef _SYS_SYSPROTO_H_
404struct ofstat_args {
405	int	fd;
406	struct	ostat *sb;
407};
408#endif
409/* ARGSUSED */
410int
411ofstat(p, uap)
412	struct proc *p;
413	register struct ofstat_args *uap;
414{
415	register struct filedesc *fdp = p->p_fd;
416	register struct file *fp;
417	struct stat ub;
418	struct ostat oub;
419	int error;
420
421	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
422	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
423		return (EBADF);
424	switch (fp->f_type) {
425
426	case DTYPE_FIFO:
427	case DTYPE_VNODE:
428		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
429		break;
430
431	case DTYPE_SOCKET:
432		error = soo_stat((struct socket *)fp->f_data, &ub);
433		break;
434
435	case DTYPE_PIPE:
436		error = pipe_stat((struct pipe *)fp->f_data, &ub);
437		break;
438
439	default:
440		panic("ofstat");
441		/*NOTREACHED*/
442	}
443	cvtstat(&ub, &oub);
444	if (error == 0)
445		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
446	return (error);
447}
448#endif /* COMPAT_43 || COMPAT_SUNOS */
449
450/*
451 * Return status information about a file descriptor.
452 */
453#ifndef _SYS_SYSPROTO_H_
454struct fstat_args {
455	int	fd;
456	struct	stat *sb;
457};
458#endif
459/* ARGSUSED */
460int
461fstat(p, uap)
462	struct proc *p;
463	register struct fstat_args *uap;
464{
465	register struct filedesc *fdp = p->p_fd;
466	register struct file *fp;
467	struct stat ub;
468	int error;
469
470	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
471	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
472		return (EBADF);
473	switch (fp->f_type) {
474
475	case DTYPE_FIFO:
476	case DTYPE_VNODE:
477		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
478		break;
479
480	case DTYPE_SOCKET:
481		error = soo_stat((struct socket *)fp->f_data, &ub);
482		break;
483
484	case DTYPE_PIPE:
485		error = pipe_stat((struct pipe *)fp->f_data, &ub);
486		break;
487
488	default:
489		panic("fstat");
490		/*NOTREACHED*/
491	}
492	if (error == 0)
493		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
494	return (error);
495}
496
497/*
498 * Return status information about a file descriptor.
499 */
500#ifndef _SYS_SYSPROTO_H_
501struct nfstat_args {
502	int	fd;
503	struct	nstat *sb;
504};
505#endif
506/* ARGSUSED */
507int
508nfstat(p, uap)
509	struct proc *p;
510	register struct nfstat_args *uap;
511{
512	register struct filedesc *fdp = p->p_fd;
513	register struct file *fp;
514	struct stat ub;
515	struct nstat nub;
516	int error;
517
518	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
519	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
520		return (EBADF);
521	switch (fp->f_type) {
522
523	case DTYPE_FIFO:
524	case DTYPE_VNODE:
525		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
526		break;
527
528	case DTYPE_SOCKET:
529		error = soo_stat((struct socket *)fp->f_data, &ub);
530		break;
531
532	case DTYPE_PIPE:
533		error = pipe_stat((struct pipe *)fp->f_data, &ub);
534		break;
535
536	default:
537		panic("fstat");
538		/*NOTREACHED*/
539	}
540	if (error == 0) {
541		cvtnstat(&ub, &nub);
542		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
543	}
544	return (error);
545}
546
547/*
548 * Return pathconf information about a file descriptor.
549 */
550#ifndef _SYS_SYSPROTO_H_
551struct fpathconf_args {
552	int	fd;
553	int	name;
554};
555#endif
556/* ARGSUSED */
557int
558fpathconf(p, uap)
559	struct proc *p;
560	register struct fpathconf_args *uap;
561{
562	struct filedesc *fdp = p->p_fd;
563	struct file *fp;
564	struct vnode *vp;
565
566	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
567	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
568		return (EBADF);
569	switch (fp->f_type) {
570
571	case DTYPE_PIPE:
572	case DTYPE_SOCKET:
573		if (uap->name != _PC_PIPE_BUF)
574			return (EINVAL);
575		p->p_retval[0] = PIPE_BUF;
576		return (0);
577
578	case DTYPE_FIFO:
579	case DTYPE_VNODE:
580		vp = (struct vnode *)fp->f_data;
581		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
582
583	default:
584		panic("fpathconf");
585	}
586	/*NOTREACHED*/
587}
588
589/*
590 * Allocate a file descriptor for the process.
591 */
592static int fdexpand;
593SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
594
595int
596fdalloc(p, want, result)
597	struct proc *p;
598	int want;
599	int *result;
600{
601	register struct filedesc *fdp = p->p_fd;
602	register int i;
603	int lim, last, nfiles;
604	struct file **newofile;
605	char *newofileflags;
606
607	/*
608	 * Search for a free descriptor starting at the higher
609	 * of want or fd_freefile.  If that fails, consider
610	 * expanding the ofile array.
611	 */
612	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
613	for (;;) {
614		last = min(fdp->fd_nfiles, lim);
615		if ((i = want) < fdp->fd_freefile)
616			i = fdp->fd_freefile;
617		for (; i < last; i++) {
618			if (fdp->fd_ofiles[i] == NULL) {
619				fdp->fd_ofileflags[i] = 0;
620				if (i > fdp->fd_lastfile)
621					fdp->fd_lastfile = i;
622				if (want <= fdp->fd_freefile)
623					fdp->fd_freefile = i;
624				*result = i;
625				return (0);
626			}
627		}
628
629		/*
630		 * No space in current array.  Expand?
631		 */
632		if (fdp->fd_nfiles >= lim)
633			return (EMFILE);
634		if (fdp->fd_nfiles < NDEXTENT)
635			nfiles = NDEXTENT;
636		else
637			nfiles = 2 * fdp->fd_nfiles;
638		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
639		    M_FILEDESC, M_WAITOK);
640		newofileflags = (char *) &newofile[nfiles];
641		/*
642		 * Copy the existing ofile and ofileflags arrays
643		 * and zero the new portion of each array.
644		 */
645		bcopy(fdp->fd_ofiles, newofile,
646			(i = sizeof(struct file *) * fdp->fd_nfiles));
647		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
648		bcopy(fdp->fd_ofileflags, newofileflags,
649			(i = sizeof(char) * fdp->fd_nfiles));
650		bzero(newofileflags + i, nfiles * sizeof(char) - i);
651		if (fdp->fd_nfiles > NDFILE)
652			FREE(fdp->fd_ofiles, M_FILEDESC);
653		fdp->fd_ofiles = newofile;
654		fdp->fd_ofileflags = newofileflags;
655		fdp->fd_nfiles = nfiles;
656		fdexpand++;
657	}
658	return (0);
659}
660
661/*
662 * Check to see whether n user file descriptors
663 * are available to the process p.
664 */
665int
666fdavail(p, n)
667	struct proc *p;
668	register int n;
669{
670	register struct filedesc *fdp = p->p_fd;
671	register struct file **fpp;
672	register int i, lim, last;
673
674	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
675	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
676		return (1);
677
678	last = min(fdp->fd_nfiles, lim);
679	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
680	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
681		if (*fpp == NULL && --n <= 0)
682			return (1);
683	return (0);
684}
685
686/*
687 * Create a new open file structure and allocate
688 * a file decriptor for the process that refers to it.
689 */
690int
691falloc(p, resultfp, resultfd)
692	register struct proc *p;
693	struct file **resultfp;
694	int *resultfd;
695{
696	register struct file *fp, *fq;
697	int error, i;
698
699	if ((error = fdalloc(p, 0, &i)))
700		return (error);
701	if (nfiles >= maxfiles) {
702		tablefull("file");
703		return (ENFILE);
704	}
705	/*
706	 * Allocate a new file descriptor.
707	 * If the process has file descriptor zero open, add to the list
708	 * of open files at that point, otherwise put it at the front of
709	 * the list of open files.
710	 */
711	nfiles++;
712	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
713	bzero(fp, sizeof(struct file));
714	if ((fq = p->p_fd->fd_ofiles[0])) {
715		LIST_INSERT_AFTER(fq, fp, f_list);
716	} else {
717		LIST_INSERT_HEAD(&filehead, fp, f_list);
718	}
719	p->p_fd->fd_ofiles[i] = fp;
720	fp->f_count = 1;
721	fp->f_cred = p->p_ucred;
722	fp->f_seqcount = 1;
723	crhold(fp->f_cred);
724	if (resultfp)
725		*resultfp = fp;
726	if (resultfd)
727		*resultfd = i;
728	return (0);
729}
730
731/*
732 * Free a file descriptor.
733 */
734void
735ffree(fp)
736	register struct file *fp;
737{
738	LIST_REMOVE(fp, f_list);
739	crfree(fp->f_cred);
740#ifdef DIAGNOSTIC
741	fp->f_count = 0;
742#endif
743	nfiles--;
744	FREE(fp, M_FILE);
745}
746
747/*
748 * Build a new filedesc structure.
749 */
750struct filedesc *
751fdinit(p)
752	struct proc *p;
753{
754	register struct filedesc0 *newfdp;
755	register struct filedesc *fdp = p->p_fd;
756
757	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
758	    M_FILEDESC, M_WAITOK);
759	bzero(newfdp, sizeof(struct filedesc0));
760	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
761	VREF(newfdp->fd_fd.fd_cdir);
762	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
763	VREF(newfdp->fd_fd.fd_rdir);
764
765	/* Create the file descriptor table. */
766	newfdp->fd_fd.fd_refcnt = 1;
767	newfdp->fd_fd.fd_cmask = cmask;
768	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
769	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
770	newfdp->fd_fd.fd_nfiles = NDFILE;
771
772	newfdp->fd_fd.fd_freefile = 0;
773	newfdp->fd_fd.fd_lastfile = 0;
774
775	return (&newfdp->fd_fd);
776}
777
778/*
779 * Share a filedesc structure.
780 */
781struct filedesc *
782fdshare(p)
783	struct proc *p;
784{
785	p->p_fd->fd_refcnt++;
786	return (p->p_fd);
787}
788
789/*
790 * Copy a filedesc structure.
791 */
792struct filedesc *
793fdcopy(p)
794	struct proc *p;
795{
796	register struct filedesc *newfdp, *fdp = p->p_fd;
797	register struct file **fpp;
798	register int i;
799
800/*
801 * Certain daemons might not have file descriptors
802 */
803	if (fdp == NULL)
804		return NULL;
805
806	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
807	    M_FILEDESC, M_WAITOK);
808	bcopy(fdp, newfdp, sizeof(struct filedesc));
809	VREF(newfdp->fd_cdir);
810	VREF(newfdp->fd_rdir);
811	newfdp->fd_refcnt = 1;
812
813	/*
814	 * If the number of open files fits in the internal arrays
815	 * of the open file structure, use them, otherwise allocate
816	 * additional memory for the number of descriptors currently
817	 * in use.
818	 */
819	if (newfdp->fd_lastfile < NDFILE) {
820		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
821		newfdp->fd_ofileflags =
822		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
823		i = NDFILE;
824	} else {
825		/*
826		 * Compute the smallest multiple of NDEXTENT needed
827		 * for the file descriptors currently in use,
828		 * allowing the table to shrink.
829		 */
830		i = newfdp->fd_nfiles;
831		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
832			i /= 2;
833		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
834		    M_FILEDESC, M_WAITOK);
835		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
836	}
837	newfdp->fd_nfiles = i;
838	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
839	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
840	fpp = newfdp->fd_ofiles;
841	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
842		if (*fpp != NULL)
843			(*fpp)->f_count++;
844	return (newfdp);
845}
846
847/*
848 * Release a filedesc structure.
849 */
850void
851fdfree(p)
852	struct proc *p;
853{
854	register struct filedesc *fdp = p->p_fd;
855	struct file **fpp;
856	register int i;
857
858/*
859 * Certain daemons might not have file descriptors
860 */
861	if (fdp == NULL)
862		return;
863
864	if (--fdp->fd_refcnt > 0)
865		return;
866	fpp = fdp->fd_ofiles;
867	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
868		if (*fpp)
869			(void) closef(*fpp, p);
870	if (fdp->fd_nfiles > NDFILE)
871		FREE(fdp->fd_ofiles, M_FILEDESC);
872	vrele(fdp->fd_cdir);
873	vrele(fdp->fd_rdir);
874	FREE(fdp, M_FILEDESC);
875}
876
877/*
878 * Close any files on exec?
879 */
880void
881fdcloseexec(p)
882	struct proc *p;
883{
884	struct filedesc *fdp = p->p_fd;
885	struct file **fpp;
886	char *fdfp;
887	register int i;
888
889/*
890 * Certain daemons might not have file descriptors
891 */
892	if (fdp == NULL)
893		return;
894
895	fpp = fdp->fd_ofiles;
896	fdfp = fdp->fd_ofileflags;
897	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
898		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
899			if (*fdfp & UF_MAPPED)
900				(void) munmapfd(p, i);
901			(void) closef(*fpp, p);
902			*fpp = NULL;
903			*fdfp = 0;
904			if (i < fdp->fd_freefile)
905				fdp->fd_freefile = i;
906		}
907	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
908		fdp->fd_lastfile--;
909}
910
911/*
912 * Internal form of close.
913 * Decrement reference count on file structure.
914 * Note: p may be NULL when closing a file
915 * that was being passed in a message.
916 */
917int
918closef(fp, p)
919	register struct file *fp;
920	register struct proc *p;
921{
922	struct vnode *vp;
923	struct flock lf;
924	int error;
925
926	if (fp == NULL)
927		return (0);
928	/*
929	 * POSIX record locking dictates that any close releases ALL
930	 * locks owned by this process.  This is handled by setting
931	 * a flag in the unlock to free ONLY locks obeying POSIX
932	 * semantics, and not to free BSD-style file locks.
933	 * If the descriptor was in a message, POSIX-style locks
934	 * aren't passed with the descriptor.
935	 */
936	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
937		lf.l_whence = SEEK_SET;
938		lf.l_start = 0;
939		lf.l_len = 0;
940		lf.l_type = F_UNLCK;
941		vp = (struct vnode *)fp->f_data;
942		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
943	}
944	if (--fp->f_count > 0)
945		return (0);
946	if (fp->f_count < 0)
947		panic("closef: count < 0");
948	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
949		lf.l_whence = SEEK_SET;
950		lf.l_start = 0;
951		lf.l_len = 0;
952		lf.l_type = F_UNLCK;
953		vp = (struct vnode *)fp->f_data;
954		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
955	}
956	if (fp->f_ops)
957		error = (*fp->f_ops->fo_close)(fp, p);
958	else
959		error = 0;
960	ffree(fp);
961	return (error);
962}
963
964/*
965 * Apply an advisory lock on a file descriptor.
966 *
967 * Just attempt to get a record lock of the requested type on
968 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
969 */
970#ifndef _SYS_SYSPROTO_H_
971struct flock_args {
972	int	fd;
973	int	how;
974};
975#endif
976/* ARGSUSED */
977int
978flock(p, uap)
979	struct proc *p;
980	register struct flock_args *uap;
981{
982	register struct filedesc *fdp = p->p_fd;
983	register struct file *fp;
984	struct vnode *vp;
985	struct flock lf;
986
987	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
988	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
989		return (EBADF);
990	if (fp->f_type != DTYPE_VNODE)
991		return (EOPNOTSUPP);
992	vp = (struct vnode *)fp->f_data;
993	lf.l_whence = SEEK_SET;
994	lf.l_start = 0;
995	lf.l_len = 0;
996	if (uap->how & LOCK_UN) {
997		lf.l_type = F_UNLCK;
998		fp->f_flag &= ~FHASLOCK;
999		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1000	}
1001	if (uap->how & LOCK_EX)
1002		lf.l_type = F_WRLCK;
1003	else if (uap->how & LOCK_SH)
1004		lf.l_type = F_RDLCK;
1005	else
1006		return (EBADF);
1007	fp->f_flag |= FHASLOCK;
1008	if (uap->how & LOCK_NB)
1009		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1010	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1011}
1012
1013/*
1014 * File Descriptor pseudo-device driver (/dev/fd/).
1015 *
1016 * Opening minor device N dup()s the file (if any) connected to file
1017 * descriptor N belonging to the calling process.  Note that this driver
1018 * consists of only the ``open()'' routine, because all subsequent
1019 * references to this file will be direct to the other driver.
1020 */
1021/* ARGSUSED */
1022static int
1023fdopen(dev, mode, type, p)
1024	dev_t dev;
1025	int mode, type;
1026	struct proc *p;
1027{
1028
1029	/*
1030	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1031	 * the file descriptor being sought for duplication. The error
1032	 * return ensures that the vnode for this device will be released
1033	 * by vn_open. Open will detect this special error and take the
1034	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1035	 * will simply report the error.
1036	 */
1037	p->p_dupfd = minor(dev);
1038	return (ENODEV);
1039}
1040
1041/*
1042 * Duplicate the specified descriptor to a free descriptor.
1043 */
1044int
1045dupfdopen(fdp, indx, dfd, mode, error)
1046	register struct filedesc *fdp;
1047	register int indx, dfd;
1048	int mode;
1049	int error;
1050{
1051	register struct file *wfp;
1052	struct file *fp;
1053
1054	/*
1055	 * If the to-be-dup'd fd number is greater than the allowed number
1056	 * of file descriptors, or the fd to be dup'd has already been
1057	 * closed, reject.  Note, check for new == old is necessary as
1058	 * falloc could allocate an already closed to-be-dup'd descriptor
1059	 * as the new descriptor.
1060	 */
1061	fp = fdp->fd_ofiles[indx];
1062	if ((u_int)dfd >= fdp->fd_nfiles ||
1063	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1064		return (EBADF);
1065
1066	/*
1067	 * There are two cases of interest here.
1068	 *
1069	 * For ENODEV simply dup (dfd) to file descriptor
1070	 * (indx) and return.
1071	 *
1072	 * For ENXIO steal away the file structure from (dfd) and
1073	 * store it in (indx).  (dfd) is effectively closed by
1074	 * this operation.
1075	 *
1076	 * Any other error code is just returned.
1077	 */
1078	switch (error) {
1079	case ENODEV:
1080		/*
1081		 * Check that the mode the file is being opened for is a
1082		 * subset of the mode of the existing descriptor.
1083		 */
1084		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1085			return (EACCES);
1086		fdp->fd_ofiles[indx] = wfp;
1087		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1088		wfp->f_count++;
1089		if (indx > fdp->fd_lastfile)
1090			fdp->fd_lastfile = indx;
1091		return (0);
1092
1093	case ENXIO:
1094		/*
1095		 * Steal away the file pointer from dfd, and stuff it into indx.
1096		 */
1097		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1098		fdp->fd_ofiles[dfd] = NULL;
1099		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1100		fdp->fd_ofileflags[dfd] = 0;
1101		/*
1102		 * Complete the clean up of the filedesc structure by
1103		 * recomputing the various hints.
1104		 */
1105		if (indx > fdp->fd_lastfile)
1106			fdp->fd_lastfile = indx;
1107		else
1108			while (fdp->fd_lastfile > 0 &&
1109			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1110				fdp->fd_lastfile--;
1111			if (dfd < fdp->fd_freefile)
1112				fdp->fd_freefile = dfd;
1113		return (0);
1114
1115	default:
1116		return (error);
1117	}
1118	/* NOTREACHED */
1119}
1120
1121/*
1122 * Get file structures.
1123 */
1124static int
1125sysctl_kern_file SYSCTL_HANDLER_ARGS
1126{
1127	int error;
1128	struct file *fp;
1129
1130	if (!req->oldptr) {
1131		/*
1132		 * overestimate by 10 files
1133		 */
1134		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1135				(nfiles + 10) * sizeof(struct file)));
1136	}
1137
1138	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1139	if (error)
1140		return (error);
1141
1142	/*
1143	 * followed by an array of file structures
1144	 */
1145	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1146		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1147		if (error)
1148			return (error);
1149	}
1150	return (0);
1151}
1152
1153SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1154	0, 0, sysctl_kern_file, "S,file", "");
1155
1156SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1157	CTLFLAG_RW, &maxfilesperproc, 0, "");
1158
1159SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1160
1161static fildesc_devsw_installed = 0;
1162#ifdef DEVFS
1163static	void *devfs_token_stdin;
1164static	void *devfs_token_stdout;
1165static	void *devfs_token_stderr;
1166static	void *devfs_token_fildesc[NUMFDESC];
1167#endif
1168
1169static void 	fildesc_drvinit(void *unused)
1170{
1171	dev_t dev;
1172#ifdef DEVFS
1173	int fd;
1174#endif
1175
1176	if( ! fildesc_devsw_installed ) {
1177		dev = makedev(CDEV_MAJOR,0);
1178		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1179		fildesc_devsw_installed = 1;
1180#ifdef DEVFS
1181		for (fd = 0; fd < NUMFDESC; fd++)
1182			devfs_token_fildesc[fd] =
1183				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1184						 UID_BIN, GID_BIN, 0666,
1185						 "fd/%d", fd);
1186		devfs_token_stdin =
1187			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1188					 UID_ROOT, GID_WHEEL, 0666,
1189					 "stdin", fd);
1190		devfs_token_stdout =
1191			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1192					 UID_ROOT, GID_WHEEL, 0666,
1193					 "stdout", fd);
1194		devfs_token_stderr =
1195			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1196					 UID_ROOT, GID_WHEEL, 0666,
1197					 "stderr", fd);
1198#endif
1199    	}
1200}
1201
1202SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1203					fildesc_drvinit,NULL)
1204
1205
1206