kern_descrip.c revision 21002
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.34 1996/12/19 19:59:51 bde Exp $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/sysproto.h>
45#include <sys/conf.h>
46#include <sys/filedesc.h>
47#include <sys/kernel.h>
48#include <sys/sysctl.h>
49#include <sys/vnode.h>
50#include <sys/proc.h>
51#include <sys/file.h>
52#include <sys/socketvar.h>
53#include <sys/stat.h>
54#include <sys/filio.h>
55#include <sys/ttycom.h>
56#include <sys/fcntl.h>
57#include <sys/malloc.h>
58#include <sys/unistd.h>
59#include <sys/resourcevar.h>
60#include <sys/pipe.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64
65#ifdef DEVFS
66#include <sys/devfsext.h>
67#endif /*DEVFS*/
68
69static	 d_open_t  fdopen;
70#define NUMFDESC 64
71
72#define CDEV_MAJOR 22
73static struct cdevsw fildesc_cdevsw =
74	{ fdopen,	noclose,	noread,		nowrite,	/*22*/
75	  noioc,	nostop,		nullreset,	nodevtotty,/*fd(!=Fd)*/
76	  noselect,	nommap,		nostrat };
77
78static int finishdup(struct filedesc *fdp, int old, int new, int *retval);
79/*
80 * Descriptor management.
81 */
82struct filelist filehead;	/* head of list of open files */
83int nfiles;			/* actual number of open files */
84extern int cmask;
85
86/*
87 * System calls on descriptors.
88 */
89#ifndef _SYS_SYSPROTO_H_
90struct getdtablesize_args {
91	int	dummy;
92};
93#endif
94/* ARGSUSED */
95int
96getdtablesize(p, uap, retval)
97	struct proc *p;
98	struct getdtablesize_args *uap;
99	int *retval;
100{
101
102	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
103	return (0);
104}
105
106/*
107 * Duplicate a file descriptor to a particular value.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct dup2_args {
111	u_int	from;
112	u_int	to;
113};
114#endif
115/* ARGSUSED */
116int
117dup2(p, uap, retval)
118	struct proc *p;
119	struct dup2_args *uap;
120	int *retval;
121{
122	register struct filedesc *fdp = p->p_fd;
123	register u_int old = uap->from, new = uap->to;
124	int i, error;
125
126	if (old >= fdp->fd_nfiles ||
127	    fdp->fd_ofiles[old] == NULL ||
128	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
129	    new >= maxfilesperproc)
130		return (EBADF);
131	if (old == new) {
132		*retval = new;
133		return (0);
134	}
135	if (new >= fdp->fd_nfiles) {
136		if ((error = fdalloc(p, new, &i)))
137			return (error);
138		if (new != i)
139			panic("dup2: fdalloc");
140	} else if (fdp->fd_ofiles[new]) {
141		if (fdp->fd_ofileflags[new] & UF_MAPPED)
142			(void) munmapfd(p, new);
143		/*
144		 * dup2() must succeed even if the close has an error.
145		 */
146		(void) closef(fdp->fd_ofiles[new], p);
147	}
148	return (finishdup(fdp, (int)old, (int)new, retval));
149}
150
151/*
152 * Duplicate a file descriptor.
153 */
154#ifndef _SYS_SYSPROTO_H_
155struct dup_args {
156	u_int	fd;
157};
158#endif
159/* ARGSUSED */
160int
161dup(p, uap, retval)
162	struct proc *p;
163	struct dup_args *uap;
164	int *retval;
165{
166	register struct filedesc *fdp;
167	u_int old;
168	int new, error;
169
170	old = uap->fd;
171
172#if 0
173	/*
174	 * XXX Compatibility
175	 */
176	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
177#endif
178
179	fdp = p->p_fd;
180	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
181		return (EBADF);
182	if ((error = fdalloc(p, 0, &new)))
183		return (error);
184	return (finishdup(fdp, (int)old, new, retval));
185}
186
187/*
188 * The file control system call.
189 */
190#ifndef _SYS_SYSPROTO_H_
191struct fcntl_args {
192	int	fd;
193	int	cmd;
194	int	arg;
195};
196#endif
197/* ARGSUSED */
198int
199fcntl(p, uap, retval)
200	struct proc *p;
201	register struct fcntl_args *uap;
202	int *retval;
203{
204	register struct filedesc *fdp = p->p_fd;
205	register struct file *fp;
206	register char *pop;
207	struct vnode *vp;
208	int i, tmp, error, flg = F_POSIX;
209	struct flock fl;
210	u_int newmin;
211
212	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
213	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
214		return (EBADF);
215	pop = &fdp->fd_ofileflags[uap->fd];
216	switch (uap->cmd) {
217
218	case F_DUPFD:
219		newmin = uap->arg;
220		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
221		    newmin >= maxfilesperproc)
222			return (EINVAL);
223		if ((error = fdalloc(p, newmin, &i)))
224			return (error);
225		return (finishdup(fdp, uap->fd, i, retval));
226
227	case F_GETFD:
228		*retval = *pop & 1;
229		return (0);
230
231	case F_SETFD:
232		*pop = (*pop &~ 1) | (uap->arg & 1);
233		return (0);
234
235	case F_GETFL:
236		*retval = OFLAGS(fp->f_flag);
237		return (0);
238
239	case F_SETFL:
240		fp->f_flag &= ~FCNTLFLAGS;
241		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
242		tmp = fp->f_flag & FNONBLOCK;
243		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
244		if (error)
245			return (error);
246		tmp = fp->f_flag & FASYNC;
247		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
248		if (!error)
249			return (0);
250		fp->f_flag &= ~FNONBLOCK;
251		tmp = 0;
252		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
253		return (error);
254
255	case F_GETOWN:
256		if (fp->f_type == DTYPE_SOCKET) {
257			*retval = ((struct socket *)fp->f_data)->so_pgid;
258			return (0);
259		}
260		error = (*fp->f_ops->fo_ioctl)
261			(fp, TIOCGPGRP, (caddr_t)retval, p);
262		*retval = -*retval;
263		return (error);
264
265	case F_SETOWN:
266		if (fp->f_type == DTYPE_SOCKET) {
267			((struct socket *)fp->f_data)->so_pgid = uap->arg;
268			return (0);
269		}
270		if (uap->arg <= 0) {
271			uap->arg = -uap->arg;
272		} else {
273			struct proc *p1 = pfind(uap->arg);
274			if (p1 == 0)
275				return (ESRCH);
276			uap->arg = p1->p_pgrp->pg_id;
277		}
278		return ((*fp->f_ops->fo_ioctl)
279			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
280
281	case F_SETLKW:
282		flg |= F_WAIT;
283		/* Fall into F_SETLK */
284
285	case F_SETLK:
286		if (fp->f_type != DTYPE_VNODE)
287			return (EBADF);
288		vp = (struct vnode *)fp->f_data;
289		/* Copy in the lock structure */
290		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
291		if (error)
292			return (error);
293		if (fl.l_whence == SEEK_CUR)
294			fl.l_start += fp->f_offset;
295		switch (fl.l_type) {
296
297		case F_RDLCK:
298			if ((fp->f_flag & FREAD) == 0)
299				return (EBADF);
300			p->p_flag |= P_ADVLOCK;
301			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
302
303		case F_WRLCK:
304			if ((fp->f_flag & FWRITE) == 0)
305				return (EBADF);
306			p->p_flag |= P_ADVLOCK;
307			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
308
309		case F_UNLCK:
310			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
311				F_POSIX));
312
313		default:
314			return (EINVAL);
315		}
316
317	case F_GETLK:
318		if (fp->f_type != DTYPE_VNODE)
319			return (EBADF);
320		vp = (struct vnode *)fp->f_data;
321		/* Copy in the lock structure */
322		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
323		if (error)
324			return (error);
325		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
326		    fl.l_type != F_UNLCK)
327			return (EINVAL);
328		if (fl.l_whence == SEEK_CUR)
329			fl.l_start += fp->f_offset;
330		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
331			return (error);
332		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
333
334	default:
335		return (EINVAL);
336	}
337	/* NOTREACHED */
338}
339
340/*
341 * Common code for dup, dup2, and fcntl(F_DUPFD).
342 */
343static int
344finishdup(fdp, old, new, retval)
345	register struct filedesc *fdp;
346	register int old, new, *retval;
347{
348	register struct file *fp;
349
350	fp = fdp->fd_ofiles[old];
351	fdp->fd_ofiles[new] = fp;
352	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
353	fp->f_count++;
354	if (new > fdp->fd_lastfile)
355		fdp->fd_lastfile = new;
356	*retval = new;
357	return (0);
358}
359
360/*
361 * Close a file descriptor.
362 */
363#ifndef _SYS_SYSPROTO_H_
364struct close_args {
365        int     fd;
366};
367#endif
368/* ARGSUSED */
369int
370close(p, uap, retval)
371	struct proc *p;
372	struct close_args *uap;
373	int *retval;
374{
375	register struct filedesc *fdp = p->p_fd;
376	register struct file *fp;
377	register int fd = uap->fd;
378	register u_char *pf;
379
380	if ((unsigned)fd >= fdp->fd_nfiles ||
381	    (fp = fdp->fd_ofiles[fd]) == NULL)
382		return (EBADF);
383	pf = (u_char *)&fdp->fd_ofileflags[fd];
384	if (*pf & UF_MAPPED)
385		(void) munmapfd(p, fd);
386	fdp->fd_ofiles[fd] = NULL;
387	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
388		fdp->fd_lastfile--;
389	if (fd < fdp->fd_freefile)
390		fdp->fd_freefile = fd;
391	*pf = 0;
392	return (closef(fp, p));
393}
394
395#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
396/*
397 * Return status information about a file descriptor.
398 */
399#ifndef _SYS_SYSPROTO_H_
400struct ofstat_args {
401	int	fd;
402	struct	ostat *sb;
403};
404#endif
405/* ARGSUSED */
406int
407ofstat(p, uap, retval)
408	struct proc *p;
409	register struct ofstat_args *uap;
410	int *retval;
411{
412	register struct filedesc *fdp = p->p_fd;
413	register struct file *fp;
414	struct stat ub;
415	struct ostat oub;
416	int error;
417
418	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
419	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
420		return (EBADF);
421	switch (fp->f_type) {
422
423	case DTYPE_FIFO:
424	case DTYPE_VNODE:
425		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
426		break;
427
428	case DTYPE_SOCKET:
429		error = soo_stat((struct socket *)fp->f_data, &ub);
430		break;
431
432#ifndef OLD_PIPE
433	case DTYPE_PIPE:
434		error = pipe_stat((struct pipe *)fp->f_data, &ub);
435		break;
436#endif
437
438	default:
439		panic("ofstat");
440		/*NOTREACHED*/
441	}
442	cvtstat(&ub, &oub);
443	if (error == 0)
444		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
445	return (error);
446}
447#endif /* COMPAT_43 || COMPAT_SUNOS */
448
449/*
450 * Return status information about a file descriptor.
451 */
452#ifndef _SYS_SYSPROTO_H_
453struct fstat_args {
454	int	fd;
455	struct	stat *sb;
456};
457#endif
458/* ARGSUSED */
459int
460fstat(p, uap, retval)
461	struct proc *p;
462	register struct fstat_args *uap;
463	int *retval;
464{
465	register struct filedesc *fdp = p->p_fd;
466	register struct file *fp;
467	struct stat ub;
468	int error;
469
470	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
471	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
472		return (EBADF);
473	switch (fp->f_type) {
474
475	case DTYPE_FIFO:
476	case DTYPE_VNODE:
477		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
478		break;
479
480	case DTYPE_SOCKET:
481		error = soo_stat((struct socket *)fp->f_data, &ub);
482		break;
483
484#ifndef OLD_PIPE
485	case DTYPE_PIPE:
486		error = pipe_stat((struct pipe *)fp->f_data, &ub);
487		break;
488#endif
489
490	default:
491		panic("fstat");
492		/*NOTREACHED*/
493	}
494	if (error == 0)
495		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
496	return (error);
497}
498
499/*
500 * Return pathconf information about a file descriptor.
501 */
502#ifndef _SYS_SYSPROTO_H_
503struct fpathconf_args {
504	int	fd;
505	int	name;
506};
507#endif
508/* ARGSUSED */
509int
510fpathconf(p, uap, retval)
511	struct proc *p;
512	register struct fpathconf_args *uap;
513	int *retval;
514{
515	struct filedesc *fdp = p->p_fd;
516	struct file *fp;
517	struct vnode *vp;
518
519	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
520	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
521		return (EBADF);
522	switch (fp->f_type) {
523
524#ifndef OLD_PIPE
525	case DTYPE_PIPE:
526#endif
527	case DTYPE_SOCKET:
528		if (uap->name != _PC_PIPE_BUF)
529			return (EINVAL);
530		*retval = PIPE_BUF;
531		return (0);
532
533	case DTYPE_FIFO:
534	case DTYPE_VNODE:
535		vp = (struct vnode *)fp->f_data;
536		return (VOP_PATHCONF(vp, uap->name, retval));
537
538	default:
539		panic("fpathconf");
540	}
541	/*NOTREACHED*/
542}
543
544/*
545 * Allocate a file descriptor for the process.
546 */
547static int fdexpand;
548SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
549
550int
551fdalloc(p, want, result)
552	struct proc *p;
553	int want;
554	int *result;
555{
556	register struct filedesc *fdp = p->p_fd;
557	register int i;
558	int lim, last, nfiles;
559	struct file **newofile;
560	char *newofileflags;
561
562	/*
563	 * Search for a free descriptor starting at the higher
564	 * of want or fd_freefile.  If that fails, consider
565	 * expanding the ofile array.
566	 */
567	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
568	for (;;) {
569		last = min(fdp->fd_nfiles, lim);
570		if ((i = want) < fdp->fd_freefile)
571			i = fdp->fd_freefile;
572		for (; i < last; i++) {
573			if (fdp->fd_ofiles[i] == NULL) {
574				fdp->fd_ofileflags[i] = 0;
575				if (i > fdp->fd_lastfile)
576					fdp->fd_lastfile = i;
577				if (want <= fdp->fd_freefile)
578					fdp->fd_freefile = i;
579				*result = i;
580				return (0);
581			}
582		}
583
584		/*
585		 * No space in current array.  Expand?
586		 */
587		if (fdp->fd_nfiles >= lim)
588			return (EMFILE);
589		if (fdp->fd_nfiles < NDEXTENT)
590			nfiles = NDEXTENT;
591		else
592			nfiles = 2 * fdp->fd_nfiles;
593		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
594		    M_FILEDESC, M_WAITOK);
595		newofileflags = (char *) &newofile[nfiles];
596		/*
597		 * Copy the existing ofile and ofileflags arrays
598		 * and zero the new portion of each array.
599		 */
600		bcopy(fdp->fd_ofiles, newofile,
601			(i = sizeof(struct file *) * fdp->fd_nfiles));
602		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
603		bcopy(fdp->fd_ofileflags, newofileflags,
604			(i = sizeof(char) * fdp->fd_nfiles));
605		bzero(newofileflags + i, nfiles * sizeof(char) - i);
606		if (fdp->fd_nfiles > NDFILE)
607			FREE(fdp->fd_ofiles, M_FILEDESC);
608		fdp->fd_ofiles = newofile;
609		fdp->fd_ofileflags = newofileflags;
610		fdp->fd_nfiles = nfiles;
611		fdexpand++;
612	}
613	return (0);
614}
615
616/*
617 * Check to see whether n user file descriptors
618 * are available to the process p.
619 */
620int
621fdavail(p, n)
622	struct proc *p;
623	register int n;
624{
625	register struct filedesc *fdp = p->p_fd;
626	register struct file **fpp;
627	register int i, lim, last;
628
629	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
630	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
631		return (1);
632
633	last = min(fdp->fd_nfiles, lim);
634	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
635	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
636		if (*fpp == NULL && --n <= 0)
637			return (1);
638	return (0);
639}
640
641/*
642 * Create a new open file structure and allocate
643 * a file decriptor for the process that refers to it.
644 */
645int
646falloc(p, resultfp, resultfd)
647	register struct proc *p;
648	struct file **resultfp;
649	int *resultfd;
650{
651	register struct file *fp, *fq;
652	int error, i;
653
654	if ((error = fdalloc(p, 0, &i)))
655		return (error);
656	if (nfiles >= maxfiles) {
657		tablefull("file");
658		return (ENFILE);
659	}
660	/*
661	 * Allocate a new file descriptor.
662	 * If the process has file descriptor zero open, add to the list
663	 * of open files at that point, otherwise put it at the front of
664	 * the list of open files.
665	 */
666	nfiles++;
667	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
668	bzero(fp, sizeof(struct file));
669	if ((fq = p->p_fd->fd_ofiles[0])) {
670		LIST_INSERT_AFTER(fq, fp, f_list);
671	} else {
672		LIST_INSERT_HEAD(&filehead, fp, f_list);
673	}
674	p->p_fd->fd_ofiles[i] = fp;
675	fp->f_count = 1;
676	fp->f_cred = p->p_ucred;
677	fp->f_seqcount = 1;
678	crhold(fp->f_cred);
679	if (resultfp)
680		*resultfp = fp;
681	if (resultfd)
682		*resultfd = i;
683	return (0);
684}
685
686/*
687 * Free a file descriptor.
688 */
689void
690ffree(fp)
691	register struct file *fp;
692{
693	LIST_REMOVE(fp, f_list);
694	crfree(fp->f_cred);
695#ifdef DIAGNOSTIC
696	fp->f_count = 0;
697#endif
698	nfiles--;
699	FREE(fp, M_FILE);
700}
701
702/*
703 * Build a new filedesc structure.
704 */
705struct filedesc *
706fdinit(p)
707	struct proc *p;
708{
709	register struct filedesc0 *newfdp;
710	register struct filedesc *fdp = p->p_fd;
711
712	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
713	    M_FILEDESC, M_WAITOK);
714	bzero(newfdp, sizeof(struct filedesc0));
715	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
716	VREF(newfdp->fd_fd.fd_cdir);
717	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
718	if (newfdp->fd_fd.fd_rdir)
719		VREF(newfdp->fd_fd.fd_rdir);
720
721	/* Create the file descriptor table. */
722	newfdp->fd_fd.fd_refcnt = 1;
723	newfdp->fd_fd.fd_cmask = cmask;
724	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
725	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
726	newfdp->fd_fd.fd_nfiles = NDFILE;
727
728	newfdp->fd_fd.fd_freefile = 0;
729	newfdp->fd_fd.fd_lastfile = 0;
730
731	return (&newfdp->fd_fd);
732}
733
734/*
735 * Share a filedesc structure.
736 */
737struct filedesc *
738fdshare(p)
739	struct proc *p;
740{
741	p->p_fd->fd_refcnt++;
742	return (p->p_fd);
743}
744
745/*
746 * Copy a filedesc structure.
747 */
748struct filedesc *
749fdcopy(p)
750	struct proc *p;
751{
752	register struct filedesc *newfdp, *fdp = p->p_fd;
753	register struct file **fpp;
754	register int i;
755
756	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
757	    M_FILEDESC, M_WAITOK);
758	bcopy(fdp, newfdp, sizeof(struct filedesc));
759	VREF(newfdp->fd_cdir);
760	if (newfdp->fd_rdir)
761		VREF(newfdp->fd_rdir);
762	newfdp->fd_refcnt = 1;
763
764	/*
765	 * If the number of open files fits in the internal arrays
766	 * of the open file structure, use them, otherwise allocate
767	 * additional memory for the number of descriptors currently
768	 * in use.
769	 */
770	if (newfdp->fd_lastfile < NDFILE) {
771		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
772		newfdp->fd_ofileflags =
773		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
774		i = NDFILE;
775	} else {
776		/*
777		 * Compute the smallest multiple of NDEXTENT needed
778		 * for the file descriptors currently in use,
779		 * allowing the table to shrink.
780		 */
781		i = newfdp->fd_nfiles;
782		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
783			i /= 2;
784		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
785		    M_FILEDESC, M_WAITOK);
786		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
787	}
788	newfdp->fd_nfiles = i;
789	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
790	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
791	fpp = newfdp->fd_ofiles;
792	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
793		if (*fpp != NULL)
794			(*fpp)->f_count++;
795	return (newfdp);
796}
797
798/*
799 * Release a filedesc structure.
800 */
801void
802fdfree(p)
803	struct proc *p;
804{
805	register struct filedesc *fdp = p->p_fd;
806	struct file **fpp;
807	register int i;
808
809	if (--fdp->fd_refcnt > 0)
810		return;
811	fpp = fdp->fd_ofiles;
812	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
813		if (*fpp)
814			(void) closef(*fpp, p);
815	if (fdp->fd_nfiles > NDFILE)
816		FREE(fdp->fd_ofiles, M_FILEDESC);
817	vrele(fdp->fd_cdir);
818	if (fdp->fd_rdir)
819		vrele(fdp->fd_rdir);
820	FREE(fdp, M_FILEDESC);
821}
822
823/*
824 * Close any files on exec?
825 */
826void
827fdcloseexec(p)
828	struct proc *p;
829{
830	struct filedesc *fdp = p->p_fd;
831	struct file **fpp;
832	char *fdfp;
833	register int i;
834
835	fpp = fdp->fd_ofiles;
836	fdfp = fdp->fd_ofileflags;
837	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
838		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
839			if (*fdfp & UF_MAPPED)
840				(void) munmapfd(p, i);
841			(void) closef(*fpp, p);
842			*fpp = NULL;
843			*fdfp = 0;
844			if (i < fdp->fd_freefile)
845				fdp->fd_freefile = i;
846		}
847	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
848		fdp->fd_lastfile--;
849}
850
851/*
852 * Internal form of close.
853 * Decrement reference count on file structure.
854 * Note: p may be NULL when closing a file
855 * that was being passed in a message.
856 */
857int
858closef(fp, p)
859	register struct file *fp;
860	register struct proc *p;
861{
862	struct vnode *vp;
863	struct flock lf;
864	int error;
865
866	if (fp == NULL)
867		return (0);
868	/*
869	 * POSIX record locking dictates that any close releases ALL
870	 * locks owned by this process.  This is handled by setting
871	 * a flag in the unlock to free ONLY locks obeying POSIX
872	 * semantics, and not to free BSD-style file locks.
873	 * If the descriptor was in a message, POSIX-style locks
874	 * aren't passed with the descriptor.
875	 */
876	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
877		lf.l_whence = SEEK_SET;
878		lf.l_start = 0;
879		lf.l_len = 0;
880		lf.l_type = F_UNLCK;
881		vp = (struct vnode *)fp->f_data;
882		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
883	}
884	if (--fp->f_count > 0)
885		return (0);
886	if (fp->f_count < 0)
887		panic("closef: count < 0");
888	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
889		lf.l_whence = SEEK_SET;
890		lf.l_start = 0;
891		lf.l_len = 0;
892		lf.l_type = F_UNLCK;
893		vp = (struct vnode *)fp->f_data;
894		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
895	}
896	if (fp->f_ops)
897		error = (*fp->f_ops->fo_close)(fp, p);
898	else
899		error = 0;
900	ffree(fp);
901	return (error);
902}
903
904/*
905 * Apply an advisory lock on a file descriptor.
906 *
907 * Just attempt to get a record lock of the requested type on
908 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
909 */
910#ifndef _SYS_SYSPROTO_H_
911struct flock_args {
912	int	fd;
913	int	how;
914};
915#endif
916/* ARGSUSED */
917int
918flock(p, uap, retval)
919	struct proc *p;
920	register struct flock_args *uap;
921	int *retval;
922{
923	register struct filedesc *fdp = p->p_fd;
924	register struct file *fp;
925	struct vnode *vp;
926	struct flock lf;
927
928	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
929	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
930		return (EBADF);
931	if (fp->f_type != DTYPE_VNODE)
932		return (EOPNOTSUPP);
933	vp = (struct vnode *)fp->f_data;
934	lf.l_whence = SEEK_SET;
935	lf.l_start = 0;
936	lf.l_len = 0;
937	if (uap->how & LOCK_UN) {
938		lf.l_type = F_UNLCK;
939		fp->f_flag &= ~FHASLOCK;
940		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
941	}
942	if (uap->how & LOCK_EX)
943		lf.l_type = F_WRLCK;
944	else if (uap->how & LOCK_SH)
945		lf.l_type = F_RDLCK;
946	else
947		return (EBADF);
948	fp->f_flag |= FHASLOCK;
949	if (uap->how & LOCK_NB)
950		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
951	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
952}
953
954/*
955 * File Descriptor pseudo-device driver (/dev/fd/).
956 *
957 * Opening minor device N dup()s the file (if any) connected to file
958 * descriptor N belonging to the calling process.  Note that this driver
959 * consists of only the ``open()'' routine, because all subsequent
960 * references to this file will be direct to the other driver.
961 */
962/* ARGSUSED */
963static int
964fdopen(dev, mode, type, p)
965	dev_t dev;
966	int mode, type;
967	struct proc *p;
968{
969
970	/*
971	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
972	 * the file descriptor being sought for duplication. The error
973	 * return ensures that the vnode for this device will be released
974	 * by vn_open. Open will detect this special error and take the
975	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
976	 * will simply report the error.
977	 */
978	p->p_dupfd = minor(dev);
979	return (ENODEV);
980}
981
982/*
983 * Duplicate the specified descriptor to a free descriptor.
984 */
985int
986dupfdopen(fdp, indx, dfd, mode, error)
987	register struct filedesc *fdp;
988	register int indx, dfd;
989	int mode;
990	int error;
991{
992	register struct file *wfp;
993	struct file *fp;
994
995	/*
996	 * If the to-be-dup'd fd number is greater than the allowed number
997	 * of file descriptors, or the fd to be dup'd has already been
998	 * closed, reject.  Note, check for new == old is necessary as
999	 * falloc could allocate an already closed to-be-dup'd descriptor
1000	 * as the new descriptor.
1001	 */
1002	fp = fdp->fd_ofiles[indx];
1003	if ((u_int)dfd >= fdp->fd_nfiles ||
1004	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1005		return (EBADF);
1006
1007	/*
1008	 * There are two cases of interest here.
1009	 *
1010	 * For ENODEV simply dup (dfd) to file descriptor
1011	 * (indx) and return.
1012	 *
1013	 * For ENXIO steal away the file structure from (dfd) and
1014	 * store it in (indx).  (dfd) is effectively closed by
1015	 * this operation.
1016	 *
1017	 * Any other error code is just returned.
1018	 */
1019	switch (error) {
1020	case ENODEV:
1021		/*
1022		 * Check that the mode the file is being opened for is a
1023		 * subset of the mode of the existing descriptor.
1024		 */
1025		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1026			return (EACCES);
1027		fdp->fd_ofiles[indx] = wfp;
1028		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1029		wfp->f_count++;
1030		if (indx > fdp->fd_lastfile)
1031			fdp->fd_lastfile = indx;
1032		return (0);
1033
1034	case ENXIO:
1035		/*
1036		 * Steal away the file pointer from dfd, and stuff it into indx.
1037		 */
1038		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1039		fdp->fd_ofiles[dfd] = NULL;
1040		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1041		fdp->fd_ofileflags[dfd] = 0;
1042		/*
1043		 * Complete the clean up of the filedesc structure by
1044		 * recomputing the various hints.
1045		 */
1046		if (indx > fdp->fd_lastfile)
1047			fdp->fd_lastfile = indx;
1048		else
1049			while (fdp->fd_lastfile > 0 &&
1050			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1051				fdp->fd_lastfile--;
1052			if (dfd < fdp->fd_freefile)
1053				fdp->fd_freefile = dfd;
1054		return (0);
1055
1056	default:
1057		return (error);
1058	}
1059	/* NOTREACHED */
1060}
1061
1062/*
1063 * Get file structures.
1064 */
1065static int
1066sysctl_kern_file SYSCTL_HANDLER_ARGS
1067{
1068	int error;
1069	struct file *fp;
1070
1071	if (!req->oldptr) {
1072		/*
1073		 * overestimate by 10 files
1074		 */
1075		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1076				(nfiles + 10) * sizeof(struct file)));
1077	}
1078
1079	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1080	if (error)
1081		return (error);
1082
1083	/*
1084	 * followed by an array of file structures
1085	 */
1086	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1087		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1088		if (error)
1089			return (error);
1090	}
1091	return (0);
1092}
1093
1094SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1095	0, 0, sysctl_kern_file, "S,file", "");
1096
1097SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1098	CTLFLAG_RW, &maxfilesperproc, 0, "");
1099
1100SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1101
1102static fildesc_devsw_installed = 0;
1103#ifdef DEVFS
1104static	void *devfs_token_stdin;
1105static	void *devfs_token_stdout;
1106static	void *devfs_token_stderr;
1107static	void *devfs_token_fildesc[NUMFDESC];
1108#endif
1109
1110static void 	fildesc_drvinit(void *unused)
1111{
1112	dev_t dev;
1113#ifdef DEVFS
1114	int fd;
1115#endif
1116
1117	if( ! fildesc_devsw_installed ) {
1118		dev = makedev(CDEV_MAJOR,0);
1119		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1120		fildesc_devsw_installed = 1;
1121#ifdef DEVFS
1122		for (fd = 0; fd < NUMFDESC; fd++)
1123			devfs_token_fildesc[fd] =
1124				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1125						 UID_BIN, GID_BIN, 0666,
1126						 "fd/%d", fd);
1127		devfs_token_stdin =
1128			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1129					 UID_ROOT, GID_WHEEL, 0666,
1130					 "stdin", fd);
1131		devfs_token_stdout =
1132			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1133					 UID_ROOT, GID_WHEEL, 0666,
1134					 "stdout", fd);
1135		devfs_token_stderr =
1136			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1137					 UID_ROOT, GID_WHEEL, 0666,
1138					 "stderr", fd);
1139#endif
1140    	}
1141}
1142
1143SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1144					fildesc_drvinit,NULL)
1145
1146
1147