kern_descrip.c revision 29361
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.39 1997/08/26 00:09:44 bde Exp $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/sysproto.h>
45#include <sys/conf.h>
46#include <sys/filedesc.h>
47#include <sys/kernel.h>
48#include <sys/sysctl.h>
49#include <sys/vnode.h>
50#include <sys/proc.h>
51#include <sys/file.h>
52#include <sys/socketvar.h>
53#include <sys/stat.h>
54#include <sys/filio.h>
55#include <sys/ttycom.h>
56#include <sys/fcntl.h>
57#include <sys/malloc.h>
58#include <sys/unistd.h>
59#include <sys/resourcevar.h>
60#include <sys/pipe.h>
61
62#include <vm/vm.h>
63#include <vm/vm_extern.h>
64
65#ifdef DEVFS
66#include <sys/devfsext.h>
67#endif /*DEVFS*/
68
69static	 d_open_t  fdopen;
70#define NUMFDESC 64
71
72#define CDEV_MAJOR 22
73static struct cdevsw fildesc_cdevsw =
74	{ fdopen,	noclose,	noread,		nowrite,
75	  noioc,	nostop,		nullreset,	nodevtotty,
76	  seltrue,	nommap,		nostrat };
77
78static int finishdup __P((struct filedesc *fdp, int old, int new, int *retval));
79/*
80 * Descriptor management.
81 */
82struct filelist filehead;	/* head of list of open files */
83int nfiles;			/* actual number of open files */
84extern int cmask;
85
86/*
87 * System calls on descriptors.
88 */
89#ifndef _SYS_SYSPROTO_H_
90struct getdtablesize_args {
91	int	dummy;
92};
93#endif
94/* ARGSUSED */
95int
96getdtablesize(p, uap, retval)
97	struct proc *p;
98	struct getdtablesize_args *uap;
99	int *retval;
100{
101
102	*retval = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
103	return (0);
104}
105
106/*
107 * Duplicate a file descriptor to a particular value.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct dup2_args {
111	u_int	from;
112	u_int	to;
113};
114#endif
115/* ARGSUSED */
116int
117dup2(p, uap, retval)
118	struct proc *p;
119	struct dup2_args *uap;
120	int *retval;
121{
122	register struct filedesc *fdp = p->p_fd;
123	register u_int old = uap->from, new = uap->to;
124	int i, error;
125
126	if (old >= fdp->fd_nfiles ||
127	    fdp->fd_ofiles[old] == NULL ||
128	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
129	    new >= maxfilesperproc)
130		return (EBADF);
131	if (old == new) {
132		*retval = new;
133		return (0);
134	}
135	if (new >= fdp->fd_nfiles) {
136		if ((error = fdalloc(p, new, &i)))
137			return (error);
138		if (new != i)
139			panic("dup2: fdalloc");
140	} else if (fdp->fd_ofiles[new]) {
141		if (fdp->fd_ofileflags[new] & UF_MAPPED)
142			(void) munmapfd(p, new);
143		/*
144		 * dup2() must succeed even if the close has an error.
145		 */
146		(void) closef(fdp->fd_ofiles[new], p);
147	}
148	return (finishdup(fdp, (int)old, (int)new, retval));
149}
150
151/*
152 * Duplicate a file descriptor.
153 */
154#ifndef _SYS_SYSPROTO_H_
155struct dup_args {
156	u_int	fd;
157};
158#endif
159/* ARGSUSED */
160int
161dup(p, uap, retval)
162	struct proc *p;
163	struct dup_args *uap;
164	int *retval;
165{
166	register struct filedesc *fdp;
167	u_int old;
168	int new, error;
169
170	old = uap->fd;
171
172#if 0
173	/*
174	 * XXX Compatibility
175	 */
176	if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, retval)); }
177#endif
178
179	fdp = p->p_fd;
180	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
181		return (EBADF);
182	if ((error = fdalloc(p, 0, &new)))
183		return (error);
184	return (finishdup(fdp, (int)old, new, retval));
185}
186
187/*
188 * The file control system call.
189 */
190#ifndef _SYS_SYSPROTO_H_
191struct fcntl_args {
192	int	fd;
193	int	cmd;
194	int	arg;
195};
196#endif
197/* ARGSUSED */
198int
199fcntl(p, uap, retval)
200	struct proc *p;
201	register struct fcntl_args *uap;
202	int *retval;
203{
204	register struct filedesc *fdp = p->p_fd;
205	register struct file *fp;
206	register char *pop;
207	struct vnode *vp;
208	int i, tmp, error, flg = F_POSIX;
209	struct flock fl;
210	u_int newmin;
211
212	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
213	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
214		return (EBADF);
215	pop = &fdp->fd_ofileflags[uap->fd];
216	switch (uap->cmd) {
217
218	case F_DUPFD:
219		newmin = uap->arg;
220		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
221		    newmin >= maxfilesperproc)
222			return (EINVAL);
223		if ((error = fdalloc(p, newmin, &i)))
224			return (error);
225		return (finishdup(fdp, uap->fd, i, retval));
226
227	case F_GETFD:
228		*retval = *pop & 1;
229		return (0);
230
231	case F_SETFD:
232		*pop = (*pop &~ 1) | (uap->arg & 1);
233		return (0);
234
235	case F_GETFL:
236		*retval = OFLAGS(fp->f_flag);
237		return (0);
238
239	case F_SETFL:
240		fp->f_flag &= ~FCNTLFLAGS;
241		fp->f_flag |= FFLAGS(uap->arg) & FCNTLFLAGS;
242		tmp = fp->f_flag & FNONBLOCK;
243		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
244		if (error)
245			return (error);
246		tmp = fp->f_flag & FASYNC;
247		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
248		if (!error)
249			return (0);
250		fp->f_flag &= ~FNONBLOCK;
251		tmp = 0;
252		(void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
253		return (error);
254
255	case F_GETOWN:
256		if (fp->f_type == DTYPE_SOCKET) {
257			*retval = ((struct socket *)fp->f_data)->so_pgid;
258			return (0);
259		}
260		error = (*fp->f_ops->fo_ioctl)
261			(fp, TIOCGPGRP, (caddr_t)retval, p);
262		*retval = -*retval;
263		return (error);
264
265	case F_SETOWN:
266		if (fp->f_type == DTYPE_SOCKET) {
267			((struct socket *)fp->f_data)->so_pgid = uap->arg;
268			return (0);
269		}
270		if (uap->arg <= 0) {
271			uap->arg = -uap->arg;
272		} else {
273			struct proc *p1 = pfind(uap->arg);
274			if (p1 == 0)
275				return (ESRCH);
276			uap->arg = p1->p_pgrp->pg_id;
277		}
278		return ((*fp->f_ops->fo_ioctl)
279			(fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
280
281	case F_SETLKW:
282		flg |= F_WAIT;
283		/* Fall into F_SETLK */
284
285	case F_SETLK:
286		if (fp->f_type != DTYPE_VNODE)
287			return (EBADF);
288		vp = (struct vnode *)fp->f_data;
289		/* Copy in the lock structure */
290		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
291		if (error)
292			return (error);
293		if (fl.l_whence == SEEK_CUR)
294			fl.l_start += fp->f_offset;
295		switch (fl.l_type) {
296
297		case F_RDLCK:
298			if ((fp->f_flag & FREAD) == 0)
299				return (EBADF);
300			p->p_flag |= P_ADVLOCK;
301			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
302
303		case F_WRLCK:
304			if ((fp->f_flag & FWRITE) == 0)
305				return (EBADF);
306			p->p_flag |= P_ADVLOCK;
307			return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
308
309		case F_UNLCK:
310			return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
311				F_POSIX));
312
313		default:
314			return (EINVAL);
315		}
316
317	case F_GETLK:
318		if (fp->f_type != DTYPE_VNODE)
319			return (EBADF);
320		vp = (struct vnode *)fp->f_data;
321		/* Copy in the lock structure */
322		error = copyin((caddr_t)uap->arg, (caddr_t)&fl, sizeof (fl));
323		if (error)
324			return (error);
325		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
326		    fl.l_type != F_UNLCK)
327			return (EINVAL);
328		if (fl.l_whence == SEEK_CUR)
329			fl.l_start += fp->f_offset;
330		if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
331			return (error);
332		return (copyout((caddr_t)&fl, (caddr_t)uap->arg, sizeof (fl)));
333
334	default:
335		return (EINVAL);
336	}
337	/* NOTREACHED */
338}
339
340/*
341 * Common code for dup, dup2, and fcntl(F_DUPFD).
342 */
343static int
344finishdup(fdp, old, new, retval)
345	register struct filedesc *fdp;
346	register int old, new, *retval;
347{
348	register struct file *fp;
349
350	fp = fdp->fd_ofiles[old];
351	fdp->fd_ofiles[new] = fp;
352	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
353	fp->f_count++;
354	if (new > fdp->fd_lastfile)
355		fdp->fd_lastfile = new;
356	*retval = new;
357	return (0);
358}
359
360/*
361 * Close a file descriptor.
362 */
363#ifndef _SYS_SYSPROTO_H_
364struct close_args {
365        int     fd;
366};
367#endif
368/* ARGSUSED */
369int
370close(p, uap, retval)
371	struct proc *p;
372	struct close_args *uap;
373	int *retval;
374{
375	register struct filedesc *fdp = p->p_fd;
376	register struct file *fp;
377	register int fd = uap->fd;
378	register u_char *pf;
379
380	if ((unsigned)fd >= fdp->fd_nfiles ||
381	    (fp = fdp->fd_ofiles[fd]) == NULL)
382		return (EBADF);
383	pf = (u_char *)&fdp->fd_ofileflags[fd];
384	if (*pf & UF_MAPPED)
385		(void) munmapfd(p, fd);
386	fdp->fd_ofiles[fd] = NULL;
387	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
388		fdp->fd_lastfile--;
389	if (fd < fdp->fd_freefile)
390		fdp->fd_freefile = fd;
391	*pf = 0;
392	return (closef(fp, p));
393}
394
395#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
396/*
397 * Return status information about a file descriptor.
398 */
399#ifndef _SYS_SYSPROTO_H_
400struct ofstat_args {
401	int	fd;
402	struct	ostat *sb;
403};
404#endif
405/* ARGSUSED */
406int
407ofstat(p, uap, retval)
408	struct proc *p;
409	register struct ofstat_args *uap;
410	int *retval;
411{
412	register struct filedesc *fdp = p->p_fd;
413	register struct file *fp;
414	struct stat ub;
415	struct ostat oub;
416	int error;
417
418	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
419	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
420		return (EBADF);
421	switch (fp->f_type) {
422
423	case DTYPE_FIFO:
424	case DTYPE_VNODE:
425		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
426		break;
427
428	case DTYPE_SOCKET:
429		error = soo_stat((struct socket *)fp->f_data, &ub);
430		break;
431
432	case DTYPE_PIPE:
433		error = pipe_stat((struct pipe *)fp->f_data, &ub);
434		break;
435
436	default:
437		panic("ofstat");
438		/*NOTREACHED*/
439	}
440	cvtstat(&ub, &oub);
441	if (error == 0)
442		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
443	return (error);
444}
445#endif /* COMPAT_43 || COMPAT_SUNOS */
446
447/*
448 * Return status information about a file descriptor.
449 */
450#ifndef _SYS_SYSPROTO_H_
451struct fstat_args {
452	int	fd;
453	struct	stat *sb;
454};
455#endif
456/* ARGSUSED */
457int
458fstat(p, uap, retval)
459	struct proc *p;
460	register struct fstat_args *uap;
461	int *retval;
462{
463	register struct filedesc *fdp = p->p_fd;
464	register struct file *fp;
465	struct stat ub;
466	int error;
467
468	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
469	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
470		return (EBADF);
471	switch (fp->f_type) {
472
473	case DTYPE_FIFO:
474	case DTYPE_VNODE:
475		error = vn_stat((struct vnode *)fp->f_data, &ub, p);
476		break;
477
478	case DTYPE_SOCKET:
479		error = soo_stat((struct socket *)fp->f_data, &ub);
480		break;
481
482	case DTYPE_PIPE:
483		error = pipe_stat((struct pipe *)fp->f_data, &ub);
484		break;
485
486	default:
487		panic("fstat");
488		/*NOTREACHED*/
489	}
490	if (error == 0)
491		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
492	return (error);
493}
494
495/*
496 * Return pathconf information about a file descriptor.
497 */
498#ifndef _SYS_SYSPROTO_H_
499struct fpathconf_args {
500	int	fd;
501	int	name;
502};
503#endif
504/* ARGSUSED */
505int
506fpathconf(p, uap, retval)
507	struct proc *p;
508	register struct fpathconf_args *uap;
509	int *retval;
510{
511	struct filedesc *fdp = p->p_fd;
512	struct file *fp;
513	struct vnode *vp;
514
515	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
516	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
517		return (EBADF);
518	switch (fp->f_type) {
519
520	case DTYPE_PIPE:
521	case DTYPE_SOCKET:
522		if (uap->name != _PC_PIPE_BUF)
523			return (EINVAL);
524		*retval = PIPE_BUF;
525		return (0);
526
527	case DTYPE_FIFO:
528	case DTYPE_VNODE:
529		vp = (struct vnode *)fp->f_data;
530		return (VOP_PATHCONF(vp, uap->name, retval));
531
532	default:
533		panic("fpathconf");
534	}
535	/*NOTREACHED*/
536}
537
538/*
539 * Allocate a file descriptor for the process.
540 */
541static int fdexpand;
542SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
543
544int
545fdalloc(p, want, result)
546	struct proc *p;
547	int want;
548	int *result;
549{
550	register struct filedesc *fdp = p->p_fd;
551	register int i;
552	int lim, last, nfiles;
553	struct file **newofile;
554	char *newofileflags;
555
556	/*
557	 * Search for a free descriptor starting at the higher
558	 * of want or fd_freefile.  If that fails, consider
559	 * expanding the ofile array.
560	 */
561	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
562	for (;;) {
563		last = min(fdp->fd_nfiles, lim);
564		if ((i = want) < fdp->fd_freefile)
565			i = fdp->fd_freefile;
566		for (; i < last; i++) {
567			if (fdp->fd_ofiles[i] == NULL) {
568				fdp->fd_ofileflags[i] = 0;
569				if (i > fdp->fd_lastfile)
570					fdp->fd_lastfile = i;
571				if (want <= fdp->fd_freefile)
572					fdp->fd_freefile = i;
573				*result = i;
574				return (0);
575			}
576		}
577
578		/*
579		 * No space in current array.  Expand?
580		 */
581		if (fdp->fd_nfiles >= lim)
582			return (EMFILE);
583		if (fdp->fd_nfiles < NDEXTENT)
584			nfiles = NDEXTENT;
585		else
586			nfiles = 2 * fdp->fd_nfiles;
587		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
588		    M_FILEDESC, M_WAITOK);
589		newofileflags = (char *) &newofile[nfiles];
590		/*
591		 * Copy the existing ofile and ofileflags arrays
592		 * and zero the new portion of each array.
593		 */
594		bcopy(fdp->fd_ofiles, newofile,
595			(i = sizeof(struct file *) * fdp->fd_nfiles));
596		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
597		bcopy(fdp->fd_ofileflags, newofileflags,
598			(i = sizeof(char) * fdp->fd_nfiles));
599		bzero(newofileflags + i, nfiles * sizeof(char) - i);
600		if (fdp->fd_nfiles > NDFILE)
601			FREE(fdp->fd_ofiles, M_FILEDESC);
602		fdp->fd_ofiles = newofile;
603		fdp->fd_ofileflags = newofileflags;
604		fdp->fd_nfiles = nfiles;
605		fdexpand++;
606	}
607	return (0);
608}
609
610/*
611 * Check to see whether n user file descriptors
612 * are available to the process p.
613 */
614int
615fdavail(p, n)
616	struct proc *p;
617	register int n;
618{
619	register struct filedesc *fdp = p->p_fd;
620	register struct file **fpp;
621	register int i, lim, last;
622
623	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
624	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
625		return (1);
626
627	last = min(fdp->fd_nfiles, lim);
628	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
629	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
630		if (*fpp == NULL && --n <= 0)
631			return (1);
632	return (0);
633}
634
635/*
636 * Create a new open file structure and allocate
637 * a file decriptor for the process that refers to it.
638 */
639int
640falloc(p, resultfp, resultfd)
641	register struct proc *p;
642	struct file **resultfp;
643	int *resultfd;
644{
645	register struct file *fp, *fq;
646	int error, i;
647
648	if ((error = fdalloc(p, 0, &i)))
649		return (error);
650	if (nfiles >= maxfiles) {
651		tablefull("file");
652		return (ENFILE);
653	}
654	/*
655	 * Allocate a new file descriptor.
656	 * If the process has file descriptor zero open, add to the list
657	 * of open files at that point, otherwise put it at the front of
658	 * the list of open files.
659	 */
660	nfiles++;
661	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
662	bzero(fp, sizeof(struct file));
663	if ((fq = p->p_fd->fd_ofiles[0])) {
664		LIST_INSERT_AFTER(fq, fp, f_list);
665	} else {
666		LIST_INSERT_HEAD(&filehead, fp, f_list);
667	}
668	p->p_fd->fd_ofiles[i] = fp;
669	fp->f_count = 1;
670	fp->f_cred = p->p_ucred;
671	fp->f_seqcount = 1;
672	crhold(fp->f_cred);
673	if (resultfp)
674		*resultfp = fp;
675	if (resultfd)
676		*resultfd = i;
677	return (0);
678}
679
680/*
681 * Free a file descriptor.
682 */
683void
684ffree(fp)
685	register struct file *fp;
686{
687	LIST_REMOVE(fp, f_list);
688	crfree(fp->f_cred);
689#ifdef DIAGNOSTIC
690	fp->f_count = 0;
691#endif
692	nfiles--;
693	FREE(fp, M_FILE);
694}
695
696/*
697 * Build a new filedesc structure.
698 */
699struct filedesc *
700fdinit(p)
701	struct proc *p;
702{
703	register struct filedesc0 *newfdp;
704	register struct filedesc *fdp = p->p_fd;
705
706	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
707	    M_FILEDESC, M_WAITOK);
708	bzero(newfdp, sizeof(struct filedesc0));
709	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
710	VREF(newfdp->fd_fd.fd_cdir);
711	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
712	if (newfdp->fd_fd.fd_rdir)
713		VREF(newfdp->fd_fd.fd_rdir);
714
715	/* Create the file descriptor table. */
716	newfdp->fd_fd.fd_refcnt = 1;
717	newfdp->fd_fd.fd_cmask = cmask;
718	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
719	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
720	newfdp->fd_fd.fd_nfiles = NDFILE;
721
722	newfdp->fd_fd.fd_freefile = 0;
723	newfdp->fd_fd.fd_lastfile = 0;
724
725	return (&newfdp->fd_fd);
726}
727
728/*
729 * Share a filedesc structure.
730 */
731struct filedesc *
732fdshare(p)
733	struct proc *p;
734{
735	p->p_fd->fd_refcnt++;
736	return (p->p_fd);
737}
738
739/*
740 * Copy a filedesc structure.
741 */
742struct filedesc *
743fdcopy(p)
744	struct proc *p;
745{
746	register struct filedesc *newfdp, *fdp = p->p_fd;
747	register struct file **fpp;
748	register int i;
749
750	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
751	    M_FILEDESC, M_WAITOK);
752	bcopy(fdp, newfdp, sizeof(struct filedesc));
753	VREF(newfdp->fd_cdir);
754	if (newfdp->fd_rdir)
755		VREF(newfdp->fd_rdir);
756	newfdp->fd_refcnt = 1;
757
758	/*
759	 * If the number of open files fits in the internal arrays
760	 * of the open file structure, use them, otherwise allocate
761	 * additional memory for the number of descriptors currently
762	 * in use.
763	 */
764	if (newfdp->fd_lastfile < NDFILE) {
765		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
766		newfdp->fd_ofileflags =
767		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
768		i = NDFILE;
769	} else {
770		/*
771		 * Compute the smallest multiple of NDEXTENT needed
772		 * for the file descriptors currently in use,
773		 * allowing the table to shrink.
774		 */
775		i = newfdp->fd_nfiles;
776		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
777			i /= 2;
778		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
779		    M_FILEDESC, M_WAITOK);
780		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
781	}
782	newfdp->fd_nfiles = i;
783	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
784	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
785	fpp = newfdp->fd_ofiles;
786	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
787		if (*fpp != NULL)
788			(*fpp)->f_count++;
789	return (newfdp);
790}
791
792/*
793 * Release a filedesc structure.
794 */
795void
796fdfree(p)
797	struct proc *p;
798{
799	register struct filedesc *fdp = p->p_fd;
800	struct file **fpp;
801	register int i;
802
803	if (--fdp->fd_refcnt > 0)
804		return;
805	fpp = fdp->fd_ofiles;
806	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
807		if (*fpp)
808			(void) closef(*fpp, p);
809	if (fdp->fd_nfiles > NDFILE)
810		FREE(fdp->fd_ofiles, M_FILEDESC);
811	vrele(fdp->fd_cdir);
812	if (fdp->fd_rdir)
813		vrele(fdp->fd_rdir);
814	FREE(fdp, M_FILEDESC);
815}
816
817/*
818 * Close any files on exec?
819 */
820void
821fdcloseexec(p)
822	struct proc *p;
823{
824	struct filedesc *fdp = p->p_fd;
825	struct file **fpp;
826	char *fdfp;
827	register int i;
828
829	fpp = fdp->fd_ofiles;
830	fdfp = fdp->fd_ofileflags;
831	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
832		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
833			if (*fdfp & UF_MAPPED)
834				(void) munmapfd(p, i);
835			(void) closef(*fpp, p);
836			*fpp = NULL;
837			*fdfp = 0;
838			if (i < fdp->fd_freefile)
839				fdp->fd_freefile = i;
840		}
841	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
842		fdp->fd_lastfile--;
843}
844
845/*
846 * Internal form of close.
847 * Decrement reference count on file structure.
848 * Note: p may be NULL when closing a file
849 * that was being passed in a message.
850 */
851int
852closef(fp, p)
853	register struct file *fp;
854	register struct proc *p;
855{
856	struct vnode *vp;
857	struct flock lf;
858	int error;
859
860	if (fp == NULL)
861		return (0);
862	/*
863	 * POSIX record locking dictates that any close releases ALL
864	 * locks owned by this process.  This is handled by setting
865	 * a flag in the unlock to free ONLY locks obeying POSIX
866	 * semantics, and not to free BSD-style file locks.
867	 * If the descriptor was in a message, POSIX-style locks
868	 * aren't passed with the descriptor.
869	 */
870	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
871		lf.l_whence = SEEK_SET;
872		lf.l_start = 0;
873		lf.l_len = 0;
874		lf.l_type = F_UNLCK;
875		vp = (struct vnode *)fp->f_data;
876		(void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
877	}
878	if (--fp->f_count > 0)
879		return (0);
880	if (fp->f_count < 0)
881		panic("closef: count < 0");
882	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
883		lf.l_whence = SEEK_SET;
884		lf.l_start = 0;
885		lf.l_len = 0;
886		lf.l_type = F_UNLCK;
887		vp = (struct vnode *)fp->f_data;
888		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
889	}
890	if (fp->f_ops)
891		error = (*fp->f_ops->fo_close)(fp, p);
892	else
893		error = 0;
894	ffree(fp);
895	return (error);
896}
897
898/*
899 * Apply an advisory lock on a file descriptor.
900 *
901 * Just attempt to get a record lock of the requested type on
902 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
903 */
904#ifndef _SYS_SYSPROTO_H_
905struct flock_args {
906	int	fd;
907	int	how;
908};
909#endif
910/* ARGSUSED */
911int
912flock(p, uap, retval)
913	struct proc *p;
914	register struct flock_args *uap;
915	int *retval;
916{
917	register struct filedesc *fdp = p->p_fd;
918	register struct file *fp;
919	struct vnode *vp;
920	struct flock lf;
921
922	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
923	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
924		return (EBADF);
925	if (fp->f_type != DTYPE_VNODE)
926		return (EOPNOTSUPP);
927	vp = (struct vnode *)fp->f_data;
928	lf.l_whence = SEEK_SET;
929	lf.l_start = 0;
930	lf.l_len = 0;
931	if (uap->how & LOCK_UN) {
932		lf.l_type = F_UNLCK;
933		fp->f_flag &= ~FHASLOCK;
934		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
935	}
936	if (uap->how & LOCK_EX)
937		lf.l_type = F_WRLCK;
938	else if (uap->how & LOCK_SH)
939		lf.l_type = F_RDLCK;
940	else
941		return (EBADF);
942	fp->f_flag |= FHASLOCK;
943	if (uap->how & LOCK_NB)
944		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
945	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
946}
947
948/*
949 * File Descriptor pseudo-device driver (/dev/fd/).
950 *
951 * Opening minor device N dup()s the file (if any) connected to file
952 * descriptor N belonging to the calling process.  Note that this driver
953 * consists of only the ``open()'' routine, because all subsequent
954 * references to this file will be direct to the other driver.
955 */
956/* ARGSUSED */
957static int
958fdopen(dev, mode, type, p)
959	dev_t dev;
960	int mode, type;
961	struct proc *p;
962{
963
964	/*
965	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
966	 * the file descriptor being sought for duplication. The error
967	 * return ensures that the vnode for this device will be released
968	 * by vn_open. Open will detect this special error and take the
969	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
970	 * will simply report the error.
971	 */
972	p->p_dupfd = minor(dev);
973	return (ENODEV);
974}
975
976/*
977 * Duplicate the specified descriptor to a free descriptor.
978 */
979int
980dupfdopen(fdp, indx, dfd, mode, error)
981	register struct filedesc *fdp;
982	register int indx, dfd;
983	int mode;
984	int error;
985{
986	register struct file *wfp;
987	struct file *fp;
988
989	/*
990	 * If the to-be-dup'd fd number is greater than the allowed number
991	 * of file descriptors, or the fd to be dup'd has already been
992	 * closed, reject.  Note, check for new == old is necessary as
993	 * falloc could allocate an already closed to-be-dup'd descriptor
994	 * as the new descriptor.
995	 */
996	fp = fdp->fd_ofiles[indx];
997	if ((u_int)dfd >= fdp->fd_nfiles ||
998	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
999		return (EBADF);
1000
1001	/*
1002	 * There are two cases of interest here.
1003	 *
1004	 * For ENODEV simply dup (dfd) to file descriptor
1005	 * (indx) and return.
1006	 *
1007	 * For ENXIO steal away the file structure from (dfd) and
1008	 * store it in (indx).  (dfd) is effectively closed by
1009	 * this operation.
1010	 *
1011	 * Any other error code is just returned.
1012	 */
1013	switch (error) {
1014	case ENODEV:
1015		/*
1016		 * Check that the mode the file is being opened for is a
1017		 * subset of the mode of the existing descriptor.
1018		 */
1019		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1020			return (EACCES);
1021		fdp->fd_ofiles[indx] = wfp;
1022		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1023		wfp->f_count++;
1024		if (indx > fdp->fd_lastfile)
1025			fdp->fd_lastfile = indx;
1026		return (0);
1027
1028	case ENXIO:
1029		/*
1030		 * Steal away the file pointer from dfd, and stuff it into indx.
1031		 */
1032		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1033		fdp->fd_ofiles[dfd] = NULL;
1034		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1035		fdp->fd_ofileflags[dfd] = 0;
1036		/*
1037		 * Complete the clean up of the filedesc structure by
1038		 * recomputing the various hints.
1039		 */
1040		if (indx > fdp->fd_lastfile)
1041			fdp->fd_lastfile = indx;
1042		else
1043			while (fdp->fd_lastfile > 0 &&
1044			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1045				fdp->fd_lastfile--;
1046			if (dfd < fdp->fd_freefile)
1047				fdp->fd_freefile = dfd;
1048		return (0);
1049
1050	default:
1051		return (error);
1052	}
1053	/* NOTREACHED */
1054}
1055
1056/*
1057 * Get file structures.
1058 */
1059static int
1060sysctl_kern_file SYSCTL_HANDLER_ARGS
1061{
1062	int error;
1063	struct file *fp;
1064
1065	if (!req->oldptr) {
1066		/*
1067		 * overestimate by 10 files
1068		 */
1069		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1070				(nfiles + 10) * sizeof(struct file)));
1071	}
1072
1073	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1074	if (error)
1075		return (error);
1076
1077	/*
1078	 * followed by an array of file structures
1079	 */
1080	for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1081		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1082		if (error)
1083			return (error);
1084	}
1085	return (0);
1086}
1087
1088SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1089	0, 0, sysctl_kern_file, "S,file", "");
1090
1091SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1092	CTLFLAG_RW, &maxfilesperproc, 0, "");
1093
1094SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1095
1096static fildesc_devsw_installed = 0;
1097#ifdef DEVFS
1098static	void *devfs_token_stdin;
1099static	void *devfs_token_stdout;
1100static	void *devfs_token_stderr;
1101static	void *devfs_token_fildesc[NUMFDESC];
1102#endif
1103
1104static void 	fildesc_drvinit(void *unused)
1105{
1106	dev_t dev;
1107#ifdef DEVFS
1108	int fd;
1109#endif
1110
1111	if( ! fildesc_devsw_installed ) {
1112		dev = makedev(CDEV_MAJOR,0);
1113		cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1114		fildesc_devsw_installed = 1;
1115#ifdef DEVFS
1116		for (fd = 0; fd < NUMFDESC; fd++)
1117			devfs_token_fildesc[fd] =
1118				devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1119						 UID_BIN, GID_BIN, 0666,
1120						 "fd/%d", fd);
1121		devfs_token_stdin =
1122			devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1123					 UID_ROOT, GID_WHEEL, 0666,
1124					 "stdin", fd);
1125		devfs_token_stdout =
1126			devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1127					 UID_ROOT, GID_WHEEL, 0666,
1128					 "stdout", fd);
1129		devfs_token_stderr =
1130			devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1131					 UID_ROOT, GID_WHEEL, 0666,
1132					 "stderr", fd);
1133#endif
1134    	}
1135}
1136
1137SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1138					fildesc_drvinit,NULL)
1139
1140
1141