kern_descrip.c revision 64529
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 64529 2000-08-11 09:05:12Z peter $
40 */
41
42#include "opt_compat.h"
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
46#include <sys/conf.h>
47#include <sys/filedesc.h>
48#include <sys/kernel.h>
49#include <sys/sysctl.h>
50#include <sys/vnode.h>
51#include <sys/proc.h>
52#include <sys/file.h>
53#include <sys/stat.h>
54#include <sys/filio.h>
55#include <sys/fcntl.h>
56#include <sys/malloc.h>
57#include <sys/unistd.h>
58#include <sys/resourcevar.h>
59#include <sys/event.h>
60
61#include <vm/vm.h>
62#include <vm/vm_extern.h>
63
64static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
65MALLOC_DEFINE(M_FILE, "file", "Open file structure");
66static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
67
68static	 d_open_t  fdopen;
69#define NUMFDESC 64
70
71#define CDEV_MAJOR 22
72static struct cdevsw fildesc_cdevsw = {
73	/* open */	fdopen,
74	/* close */	noclose,
75	/* read */	noread,
76	/* write */	nowrite,
77	/* ioctl */	noioctl,
78	/* poll */	nopoll,
79	/* mmap */	nommap,
80	/* strategy */	nostrategy,
81	/* name */	"FD",
82	/* maj */	CDEV_MAJOR,
83	/* dump */	nodump,
84	/* psize */	nopsize,
85	/* flags */	0,
86	/* bmaj */	-1
87};
88
89static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
90static int badfo_readwrite __P((struct file *fp, struct uio *uio,
91    struct ucred *cred, int flags, struct proc *p));
92static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
93    struct proc *p));
94static int badfo_poll __P((struct file *fp, int events,
95    struct ucred *cred, struct proc *p));
96static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
97static int badfo_close __P((struct file *fp, struct proc *p));
98
99/*
100 * Descriptor management.
101 */
102struct filelist filehead;	/* head of list of open files */
103int nfiles;			/* actual number of open files */
104extern int cmask;
105
106/*
107 * System calls on descriptors.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct getdtablesize_args {
111	int	dummy;
112};
113#endif
114/* ARGSUSED */
115int
116getdtablesize(p, uap)
117	struct proc *p;
118	struct getdtablesize_args *uap;
119{
120
121	p->p_retval[0] =
122	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
123	return (0);
124}
125
126/*
127 * Duplicate a file descriptor to a particular value.
128 */
129#ifndef _SYS_SYSPROTO_H_
130struct dup2_args {
131	u_int	from;
132	u_int	to;
133};
134#endif
135/* ARGSUSED */
136int
137dup2(p, uap)
138	struct proc *p;
139	struct dup2_args *uap;
140{
141	register struct filedesc *fdp = p->p_fd;
142	register u_int old = uap->from, new = uap->to;
143	int i, error;
144
145	if (old >= fdp->fd_nfiles ||
146	    fdp->fd_ofiles[old] == NULL ||
147	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
148	    new >= maxfilesperproc)
149		return (EBADF);
150	if (old == new) {
151		p->p_retval[0] = new;
152		return (0);
153	}
154	if (new >= fdp->fd_nfiles) {
155		if ((error = fdalloc(p, new, &i)))
156			return (error);
157		if (new != i)
158			panic("dup2: fdalloc");
159	} else if (fdp->fd_ofiles[new]) {
160		if (fdp->fd_ofileflags[new] & UF_MAPPED)
161			(void) munmapfd(p, new);
162		/*
163		 * dup2() must succeed even if the close has an error.
164		 */
165		(void) closef(fdp->fd_ofiles[new], p);
166	}
167	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
168}
169
170/*
171 * Duplicate a file descriptor.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct dup_args {
175	u_int	fd;
176};
177#endif
178/* ARGSUSED */
179int
180dup(p, uap)
181	struct proc *p;
182	struct dup_args *uap;
183{
184	register struct filedesc *fdp;
185	u_int old;
186	int new, error;
187
188	old = uap->fd;
189	fdp = p->p_fd;
190	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
191		return (EBADF);
192	if ((error = fdalloc(p, 0, &new)))
193		return (error);
194	return (finishdup(fdp, (int)old, new, p->p_retval));
195}
196
197/*
198 * The file control system call.
199 */
200#ifndef _SYS_SYSPROTO_H_
201struct fcntl_args {
202	int	fd;
203	int	cmd;
204	long	arg;
205};
206#endif
207/* ARGSUSED */
208int
209fcntl(p, uap)
210	struct proc *p;
211	register struct fcntl_args *uap;
212{
213	register struct filedesc *fdp = p->p_fd;
214	register struct file *fp;
215	register char *pop;
216	struct vnode *vp;
217	int i, tmp, error, flg = F_POSIX;
218	struct flock fl;
219	u_int newmin;
220
221	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
222	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
223		return (EBADF);
224	pop = &fdp->fd_ofileflags[uap->fd];
225	switch (uap->cmd) {
226
227	case F_DUPFD:
228		newmin = uap->arg;
229		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
230		    newmin >= maxfilesperproc)
231			return (EINVAL);
232		if ((error = fdalloc(p, newmin, &i)))
233			return (error);
234		return (finishdup(fdp, uap->fd, i, p->p_retval));
235
236	case F_GETFD:
237		p->p_retval[0] = *pop & 1;
238		return (0);
239
240	case F_SETFD:
241		*pop = (*pop &~ 1) | (uap->arg & 1);
242		return (0);
243
244	case F_GETFL:
245		p->p_retval[0] = OFLAGS(fp->f_flag);
246		return (0);
247
248	case F_SETFL:
249		fp->f_flag &= ~FCNTLFLAGS;
250		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
251		tmp = fp->f_flag & FNONBLOCK;
252		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
253		if (error)
254			return (error);
255		tmp = fp->f_flag & FASYNC;
256		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
257		if (!error)
258			return (0);
259		fp->f_flag &= ~FNONBLOCK;
260		tmp = 0;
261		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
262		return (error);
263
264	case F_GETOWN:
265		return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p));
266
267	case F_SETOWN:
268		return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
269
270	case F_SETLKW:
271		flg |= F_WAIT;
272		/* Fall into F_SETLK */
273
274	case F_SETLK:
275		if (fp->f_type != DTYPE_VNODE)
276			return (EBADF);
277		vp = (struct vnode *)fp->f_data;
278		/* Copy in the lock structure */
279		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
280		    sizeof(fl));
281		if (error)
282			return (error);
283		if (fl.l_whence == SEEK_CUR)
284			fl.l_start += fp->f_offset;
285		switch (fl.l_type) {
286
287		case F_RDLCK:
288			if ((fp->f_flag & FREAD) == 0)
289				return (EBADF);
290			p->p_flag |= P_ADVLOCK;
291			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
292
293		case F_WRLCK:
294			if ((fp->f_flag & FWRITE) == 0)
295				return (EBADF);
296			p->p_flag |= P_ADVLOCK;
297			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
298
299		case F_UNLCK:
300			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl,
301				F_POSIX));
302
303		default:
304			return (EINVAL);
305		}
306
307	case F_GETLK:
308		if (fp->f_type != DTYPE_VNODE)
309			return (EBADF);
310		vp = (struct vnode *)fp->f_data;
311		/* Copy in the lock structure */
312		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
313		    sizeof(fl));
314		if (error)
315			return (error);
316		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
317		    fl.l_type != F_UNLCK)
318			return (EINVAL);
319		if (fl.l_whence == SEEK_CUR)
320			fl.l_start += fp->f_offset;
321		if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX)))
322			return (error);
323		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
324		    sizeof(fl)));
325
326	default:
327		return (EINVAL);
328	}
329	/* NOTREACHED */
330}
331
332/*
333 * Common code for dup, dup2, and fcntl(F_DUPFD).
334 */
335static int
336finishdup(fdp, old, new, retval)
337	register struct filedesc *fdp;
338	register int old, new;
339	register_t *retval;
340{
341	register struct file *fp;
342
343	fp = fdp->fd_ofiles[old];
344	fdp->fd_ofiles[new] = fp;
345	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
346	fhold(fp);
347	if (new > fdp->fd_lastfile)
348		fdp->fd_lastfile = new;
349	*retval = new;
350	return (0);
351}
352
353/*
354 * If sigio is on the list associated with a process or process group,
355 * disable signalling from the device, remove sigio from the list and
356 * free sigio.
357 */
358void
359funsetown(sigio)
360	struct sigio *sigio;
361{
362	int s;
363
364	if (sigio == NULL)
365		return;
366	s = splhigh();
367	*(sigio->sio_myref) = NULL;
368	splx(s);
369	if (sigio->sio_pgid < 0) {
370		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
371			     sigio, sio_pgsigio);
372	} else /* if ((*sigiop)->sio_pgid > 0) */ {
373		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
374			     sigio, sio_pgsigio);
375	}
376	crfree(sigio->sio_ucred);
377	FREE(sigio, M_SIGIO);
378}
379
380/* Free a list of sigio structures. */
381void
382funsetownlst(sigiolst)
383	struct sigiolst *sigiolst;
384{
385	struct sigio *sigio;
386
387	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
388		funsetown(sigio);
389}
390
391/*
392 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
393 *
394 * After permission checking, add a sigio structure to the sigio list for
395 * the process or process group.
396 */
397int
398fsetown(pgid, sigiop)
399	pid_t pgid;
400	struct sigio **sigiop;
401{
402	struct proc *proc;
403	struct pgrp *pgrp;
404	struct sigio *sigio;
405	int s;
406
407	if (pgid == 0) {
408		funsetown(*sigiop);
409		return (0);
410	}
411	if (pgid > 0) {
412		proc = pfind(pgid);
413		if (proc == NULL)
414			return (ESRCH);
415
416		/*
417		 * Policy - Don't allow a process to FSETOWN a process
418		 * in another session.
419		 *
420		 * Remove this test to allow maximum flexibility or
421		 * restrict FSETOWN to the current process or process
422		 * group for maximum safety.
423		 */
424		if (proc->p_session != curproc->p_session)
425			return (EPERM);
426
427		pgrp = NULL;
428	} else /* if (pgid < 0) */ {
429		pgrp = pgfind(-pgid);
430		if (pgrp == NULL)
431			return (ESRCH);
432
433		/*
434		 * Policy - Don't allow a process to FSETOWN a process
435		 * in another session.
436		 *
437		 * Remove this test to allow maximum flexibility or
438		 * restrict FSETOWN to the current process or process
439		 * group for maximum safety.
440		 */
441		if (pgrp->pg_session != curproc->p_session)
442			return (EPERM);
443
444		proc = NULL;
445	}
446	funsetown(*sigiop);
447	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
448	if (pgid > 0) {
449		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
450		sigio->sio_proc = proc;
451	} else {
452		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
453		sigio->sio_pgrp = pgrp;
454	}
455	sigio->sio_pgid = pgid;
456	crhold(curproc->p_ucred);
457	sigio->sio_ucred = curproc->p_ucred;
458	/* It would be convenient if p_ruid was in ucred. */
459	sigio->sio_ruid = curproc->p_cred->p_ruid;
460	sigio->sio_myref = sigiop;
461	s = splhigh();
462	*sigiop = sigio;
463	splx(s);
464	return (0);
465}
466
467/*
468 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
469 */
470pid_t
471fgetown(sigio)
472	struct sigio *sigio;
473{
474	return (sigio != NULL ? sigio->sio_pgid : 0);
475}
476
477/*
478 * Close a file descriptor.
479 */
480#ifndef _SYS_SYSPROTO_H_
481struct close_args {
482        int     fd;
483};
484#endif
485/* ARGSUSED */
486int
487close(p, uap)
488	struct proc *p;
489	struct close_args *uap;
490{
491	register struct filedesc *fdp = p->p_fd;
492	register struct file *fp;
493	register int fd = uap->fd;
494	register u_char *pf;
495
496	if ((unsigned)fd >= fdp->fd_nfiles ||
497	    (fp = fdp->fd_ofiles[fd]) == NULL)
498		return (EBADF);
499	pf = (u_char *)&fdp->fd_ofileflags[fd];
500	if (*pf & UF_MAPPED)
501		(void) munmapfd(p, fd);
502	fdp->fd_ofiles[fd] = NULL;
503	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
504		fdp->fd_lastfile--;
505	if (fd < fdp->fd_freefile)
506		fdp->fd_freefile = fd;
507	*pf = 0;
508	if (fd < fdp->fd_knlistsize)
509		knote_fdclose(p, fd);
510	return (closef(fp, p));
511}
512
513#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
514/*
515 * Return status information about a file descriptor.
516 */
517#ifndef _SYS_SYSPROTO_H_
518struct ofstat_args {
519	int	fd;
520	struct	ostat *sb;
521};
522#endif
523/* ARGSUSED */
524int
525ofstat(p, uap)
526	struct proc *p;
527	register struct ofstat_args *uap;
528{
529	register struct filedesc *fdp = p->p_fd;
530	register struct file *fp;
531	struct stat ub;
532	struct ostat oub;
533	int error;
534
535	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
536	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
537		return (EBADF);
538	error = fo_stat(fp, &ub, p);
539	if (error == 0) {
540		cvtstat(&ub, &oub);
541		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
542	}
543	return (error);
544}
545#endif /* COMPAT_43 || COMPAT_SUNOS */
546
547/*
548 * Return status information about a file descriptor.
549 */
550#ifndef _SYS_SYSPROTO_H_
551struct fstat_args {
552	int	fd;
553	struct	stat *sb;
554};
555#endif
556/* ARGSUSED */
557int
558fstat(p, uap)
559	struct proc *p;
560	register struct fstat_args *uap;
561{
562	register struct filedesc *fdp = p->p_fd;
563	register struct file *fp;
564	struct stat ub;
565	int error;
566
567	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
568	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
569		return (EBADF);
570	error = fo_stat(fp, &ub, p);
571	if (error == 0)
572		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
573	return (error);
574}
575
576/*
577 * Return status information about a file descriptor.
578 */
579#ifndef _SYS_SYSPROTO_H_
580struct nfstat_args {
581	int	fd;
582	struct	nstat *sb;
583};
584#endif
585/* ARGSUSED */
586int
587nfstat(p, uap)
588	struct proc *p;
589	register struct nfstat_args *uap;
590{
591	register struct filedesc *fdp = p->p_fd;
592	register struct file *fp;
593	struct stat ub;
594	struct nstat nub;
595	int error;
596
597	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
598	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
599		return (EBADF);
600	error = fo_stat(fp, &ub, p);
601	if (error == 0) {
602		cvtnstat(&ub, &nub);
603		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
604	}
605	return (error);
606}
607
608/*
609 * Return pathconf information about a file descriptor.
610 */
611#ifndef _SYS_SYSPROTO_H_
612struct fpathconf_args {
613	int	fd;
614	int	name;
615};
616#endif
617/* ARGSUSED */
618int
619fpathconf(p, uap)
620	struct proc *p;
621	register struct fpathconf_args *uap;
622{
623	struct filedesc *fdp = p->p_fd;
624	struct file *fp;
625	struct vnode *vp;
626
627	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
628	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
629		return (EBADF);
630	switch (fp->f_type) {
631
632	case DTYPE_PIPE:
633	case DTYPE_SOCKET:
634		if (uap->name != _PC_PIPE_BUF)
635			return (EINVAL);
636		p->p_retval[0] = PIPE_BUF;
637		return (0);
638
639	case DTYPE_FIFO:
640	case DTYPE_VNODE:
641		vp = (struct vnode *)fp->f_data;
642		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
643
644	default:
645		return (EOPNOTSUPP);
646	}
647	/*NOTREACHED*/
648}
649
650/*
651 * Allocate a file descriptor for the process.
652 */
653static int fdexpand;
654SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
655
656int
657fdalloc(p, want, result)
658	struct proc *p;
659	int want;
660	int *result;
661{
662	register struct filedesc *fdp = p->p_fd;
663	register int i;
664	int lim, last, nfiles;
665	struct file **newofile;
666	char *newofileflags;
667
668	/*
669	 * Search for a free descriptor starting at the higher
670	 * of want or fd_freefile.  If that fails, consider
671	 * expanding the ofile array.
672	 */
673	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
674	for (;;) {
675		last = min(fdp->fd_nfiles, lim);
676		if ((i = want) < fdp->fd_freefile)
677			i = fdp->fd_freefile;
678		for (; i < last; i++) {
679			if (fdp->fd_ofiles[i] == NULL) {
680				fdp->fd_ofileflags[i] = 0;
681				if (i > fdp->fd_lastfile)
682					fdp->fd_lastfile = i;
683				if (want <= fdp->fd_freefile)
684					fdp->fd_freefile = i;
685				*result = i;
686				return (0);
687			}
688		}
689
690		/*
691		 * No space in current array.  Expand?
692		 */
693		if (fdp->fd_nfiles >= lim)
694			return (EMFILE);
695		if (fdp->fd_nfiles < NDEXTENT)
696			nfiles = NDEXTENT;
697		else
698			nfiles = 2 * fdp->fd_nfiles;
699		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
700		    M_FILEDESC, M_WAITOK);
701		newofileflags = (char *) &newofile[nfiles];
702		/*
703		 * Copy the existing ofile and ofileflags arrays
704		 * and zero the new portion of each array.
705		 */
706		bcopy(fdp->fd_ofiles, newofile,
707			(i = sizeof(struct file *) * fdp->fd_nfiles));
708		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
709		bcopy(fdp->fd_ofileflags, newofileflags,
710			(i = sizeof(char) * fdp->fd_nfiles));
711		bzero(newofileflags + i, nfiles * sizeof(char) - i);
712		if (fdp->fd_nfiles > NDFILE)
713			FREE(fdp->fd_ofiles, M_FILEDESC);
714		fdp->fd_ofiles = newofile;
715		fdp->fd_ofileflags = newofileflags;
716		fdp->fd_nfiles = nfiles;
717		fdexpand++;
718	}
719	return (0);
720}
721
722/*
723 * Check to see whether n user file descriptors
724 * are available to the process p.
725 */
726int
727fdavail(p, n)
728	struct proc *p;
729	register int n;
730{
731	register struct filedesc *fdp = p->p_fd;
732	register struct file **fpp;
733	register int i, lim, last;
734
735	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
736	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
737		return (1);
738
739	last = min(fdp->fd_nfiles, lim);
740	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
741	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
742		if (*fpp == NULL && --n <= 0)
743			return (1);
744	return (0);
745}
746
747/*
748 * Create a new open file structure and allocate
749 * a file decriptor for the process that refers to it.
750 */
751int
752falloc(p, resultfp, resultfd)
753	register struct proc *p;
754	struct file **resultfp;
755	int *resultfd;
756{
757	register struct file *fp, *fq;
758	int error, i;
759
760	if ((error = fdalloc(p, 0, &i)))
761		return (error);
762	if (nfiles >= maxfiles) {
763		tablefull("file");
764		return (ENFILE);
765	}
766	/*
767	 * Allocate a new file descriptor.
768	 * If the process has file descriptor zero open, add to the list
769	 * of open files at that point, otherwise put it at the front of
770	 * the list of open files.
771	 */
772	nfiles++;
773	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
774	bzero(fp, sizeof(struct file));
775	fp->f_count = 1;
776	fp->f_cred = p->p_ucred;
777	fp->f_ops = &badfileops;
778	fp->f_seqcount = 1;
779	crhold(fp->f_cred);
780	if ((fq = p->p_fd->fd_ofiles[0])) {
781		LIST_INSERT_AFTER(fq, fp, f_list);
782	} else {
783		LIST_INSERT_HEAD(&filehead, fp, f_list);
784	}
785	p->p_fd->fd_ofiles[i] = fp;
786	if (resultfp)
787		*resultfp = fp;
788	if (resultfd)
789		*resultfd = i;
790	return (0);
791}
792
793/*
794 * Free a file descriptor.
795 */
796void
797ffree(fp)
798	register struct file *fp;
799{
800	LIST_REMOVE(fp, f_list);
801	crfree(fp->f_cred);
802#if defined(DIAGNOSTIC) || defined(INVARIANTS)
803	fp->f_count = 0;
804#endif
805	nfiles--;
806	FREE(fp, M_FILE);
807}
808
809/*
810 * Build a new filedesc structure.
811 */
812struct filedesc *
813fdinit(p)
814	struct proc *p;
815{
816	register struct filedesc0 *newfdp;
817	register struct filedesc *fdp = p->p_fd;
818
819	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
820	    M_FILEDESC, M_WAITOK);
821	bzero(newfdp, sizeof(struct filedesc0));
822	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
823	if (newfdp->fd_fd.fd_cdir)
824		VREF(newfdp->fd_fd.fd_cdir);
825	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
826	if (newfdp->fd_fd.fd_rdir)
827		VREF(newfdp->fd_fd.fd_rdir);
828	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
829	if (newfdp->fd_fd.fd_jdir)
830		VREF(newfdp->fd_fd.fd_jdir);
831
832	/* Create the file descriptor table. */
833	newfdp->fd_fd.fd_refcnt = 1;
834	newfdp->fd_fd.fd_cmask = cmask;
835	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
836	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
837	newfdp->fd_fd.fd_nfiles = NDFILE;
838	newfdp->fd_fd.fd_knlistsize = -1;
839
840	return (&newfdp->fd_fd);
841}
842
843/*
844 * Share a filedesc structure.
845 */
846struct filedesc *
847fdshare(p)
848	struct proc *p;
849{
850	p->p_fd->fd_refcnt++;
851	return (p->p_fd);
852}
853
854/*
855 * Copy a filedesc structure.
856 */
857struct filedesc *
858fdcopy(p)
859	struct proc *p;
860{
861	register struct filedesc *newfdp, *fdp = p->p_fd;
862	register struct file **fpp;
863	register int i;
864
865	/* Certain daemons might not have file descriptors. */
866	if (fdp == NULL)
867		return (NULL);
868
869	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
870	    M_FILEDESC, M_WAITOK);
871	bcopy(fdp, newfdp, sizeof(struct filedesc));
872	if (newfdp->fd_cdir)
873		VREF(newfdp->fd_cdir);
874	if (newfdp->fd_rdir)
875		VREF(newfdp->fd_rdir);
876	if (newfdp->fd_jdir)
877		VREF(newfdp->fd_jdir);
878	newfdp->fd_refcnt = 1;
879
880	/*
881	 * If the number of open files fits in the internal arrays
882	 * of the open file structure, use them, otherwise allocate
883	 * additional memory for the number of descriptors currently
884	 * in use.
885	 */
886	if (newfdp->fd_lastfile < NDFILE) {
887		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
888		newfdp->fd_ofileflags =
889		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
890		i = NDFILE;
891	} else {
892		/*
893		 * Compute the smallest multiple of NDEXTENT needed
894		 * for the file descriptors currently in use,
895		 * allowing the table to shrink.
896		 */
897		i = newfdp->fd_nfiles;
898		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
899			i /= 2;
900		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
901		    M_FILEDESC, M_WAITOK);
902		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
903	}
904	newfdp->fd_nfiles = i;
905	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
906	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
907
908	/*
909	 * kq descriptors cannot be copied.
910	 */
911	if (newfdp->fd_knlistsize != -1) {
912		fpp = newfdp->fd_ofiles;
913		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
914			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
915				*fpp = NULL;
916		newfdp->fd_knlist = NULL;
917		newfdp->fd_knlistsize = -1;
918		newfdp->fd_knhash = NULL;
919		newfdp->fd_knhashmask = 0;
920	}
921
922	fpp = newfdp->fd_ofiles;
923	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
924		if (*fpp != NULL)
925			fhold(*fpp);
926	return (newfdp);
927}
928
929/*
930 * Release a filedesc structure.
931 */
932void
933fdfree(p)
934	struct proc *p;
935{
936	register struct filedesc *fdp = p->p_fd;
937	struct file **fpp;
938	register int i;
939
940	/* Certain daemons might not have file descriptors. */
941	if (fdp == NULL)
942		return;
943
944	if (--fdp->fd_refcnt > 0)
945		return;
946	fpp = fdp->fd_ofiles;
947	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
948		if (*fpp)
949			(void) closef(*fpp, p);
950	if (fdp->fd_nfiles > NDFILE)
951		FREE(fdp->fd_ofiles, M_FILEDESC);
952	if (fdp->fd_cdir)
953		vrele(fdp->fd_cdir);
954	if (fdp->fd_rdir)
955		vrele(fdp->fd_rdir);
956	if (fdp->fd_jdir)
957		vrele(fdp->fd_jdir);
958	if (fdp->fd_knlist)
959		FREE(fdp->fd_knlist, M_TEMP);
960	if (fdp->fd_knhash)
961		FREE(fdp->fd_knhash, M_TEMP);
962	FREE(fdp, M_FILEDESC);
963}
964
965/*
966 * For setugid programs, we don't want to people to use that setugidness
967 * to generate error messages which write to a file which otherwise would
968 * otherwise be off-limits to the process.
969 *
970 * This is a gross hack to plug the hole.  A better solution would involve
971 * a special vop or other form of generalized access control mechanism.  We
972 * go ahead and just reject all procfs file systems accesses as dangerous.
973 *
974 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
975 * sufficient.  We also don't for check setugidness since we know we are.
976 */
977static int
978is_unsafe(struct file *fp)
979{
980	if (fp->f_type == DTYPE_VNODE &&
981	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
982		return (1);
983	return (0);
984}
985
986/*
987 * Make this setguid thing safe, if at all possible.
988 */
989void
990setugidsafety(p)
991	struct proc *p;
992{
993	struct filedesc *fdp = p->p_fd;
994	struct file **fpp;
995	char *fdfp;
996	register int i;
997
998	/* Certain daemons might not have file descriptors. */
999	if (fdp == NULL)
1000		return;
1001
1002	fpp = fdp->fd_ofiles;
1003	fdfp = fdp->fd_ofileflags;
1004	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) {
1005		if (i > 2)
1006			break;
1007		if (*fpp != NULL && is_unsafe(*fpp)) {
1008			if ((*fdfp & UF_MAPPED) != 0)
1009				(void) munmapfd(p, i);
1010			(void) closef(*fpp, p);
1011			*fpp = NULL;
1012			*fdfp = 0;
1013			if (i < fdp->fd_freefile)
1014				fdp->fd_freefile = i;
1015		}
1016	}
1017	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1018		fdp->fd_lastfile--;
1019}
1020
1021/*
1022 * Close any files on exec?
1023 */
1024void
1025fdcloseexec(p)
1026	struct proc *p;
1027{
1028	struct filedesc *fdp = p->p_fd;
1029	struct file **fpp;
1030	char *fdfp;
1031	register int i;
1032
1033	/* Certain daemons might not have file descriptors. */
1034	if (fdp == NULL)
1035		return;
1036
1037	fpp = fdp->fd_ofiles;
1038	fdfp = fdp->fd_ofileflags;
1039	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1040		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1041			if (*fdfp & UF_MAPPED)
1042				(void) munmapfd(p, i);
1043			(void) closef(*fpp, p);
1044			*fpp = NULL;
1045			*fdfp = 0;
1046			if (i < fdp->fd_freefile)
1047				fdp->fd_freefile = i;
1048		}
1049	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1050		fdp->fd_lastfile--;
1051}
1052
1053/*
1054 * Internal form of close.
1055 * Decrement reference count on file structure.
1056 * Note: p may be NULL when closing a file
1057 * that was being passed in a message.
1058 */
1059int
1060closef(fp, p)
1061	register struct file *fp;
1062	register struct proc *p;
1063{
1064	struct vnode *vp;
1065	struct flock lf;
1066
1067	if (fp == NULL)
1068		return (0);
1069	/*
1070	 * POSIX record locking dictates that any close releases ALL
1071	 * locks owned by this process.  This is handled by setting
1072	 * a flag in the unlock to free ONLY locks obeying POSIX
1073	 * semantics, and not to free BSD-style file locks.
1074	 * If the descriptor was in a message, POSIX-style locks
1075	 * aren't passed with the descriptor.
1076	 */
1077	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1078		lf.l_whence = SEEK_SET;
1079		lf.l_start = 0;
1080		lf.l_len = 0;
1081		lf.l_type = F_UNLCK;
1082		vp = (struct vnode *)fp->f_data;
1083		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1084	}
1085	return (fdrop(fp, p));
1086}
1087
1088int
1089fdrop(fp, p)
1090	struct file *fp;
1091	struct proc *p;
1092{
1093	struct flock lf;
1094	struct vnode *vp;
1095	int error;
1096
1097	if (--fp->f_count > 0)
1098		return (0);
1099	if (fp->f_count < 0)
1100		panic("fdrop: count < 0");
1101	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1102		lf.l_whence = SEEK_SET;
1103		lf.l_start = 0;
1104		lf.l_len = 0;
1105		lf.l_type = F_UNLCK;
1106		vp = (struct vnode *)fp->f_data;
1107		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1108	}
1109	if (fp->f_ops != &badfileops)
1110		error = fo_close(fp, p);
1111	else
1112		error = 0;
1113	ffree(fp);
1114	return (error);
1115}
1116
1117/*
1118 * Apply an advisory lock on a file descriptor.
1119 *
1120 * Just attempt to get a record lock of the requested type on
1121 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1122 */
1123#ifndef _SYS_SYSPROTO_H_
1124struct flock_args {
1125	int	fd;
1126	int	how;
1127};
1128#endif
1129/* ARGSUSED */
1130int
1131flock(p, uap)
1132	struct proc *p;
1133	register struct flock_args *uap;
1134{
1135	register struct filedesc *fdp = p->p_fd;
1136	register struct file *fp;
1137	struct vnode *vp;
1138	struct flock lf;
1139
1140	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1141	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1142		return (EBADF);
1143	if (fp->f_type != DTYPE_VNODE)
1144		return (EOPNOTSUPP);
1145	vp = (struct vnode *)fp->f_data;
1146	lf.l_whence = SEEK_SET;
1147	lf.l_start = 0;
1148	lf.l_len = 0;
1149	if (uap->how & LOCK_UN) {
1150		lf.l_type = F_UNLCK;
1151		fp->f_flag &= ~FHASLOCK;
1152		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1153	}
1154	if (uap->how & LOCK_EX)
1155		lf.l_type = F_WRLCK;
1156	else if (uap->how & LOCK_SH)
1157		lf.l_type = F_RDLCK;
1158	else
1159		return (EBADF);
1160	fp->f_flag |= FHASLOCK;
1161	if (uap->how & LOCK_NB)
1162		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1163	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1164}
1165
1166/*
1167 * File Descriptor pseudo-device driver (/dev/fd/).
1168 *
1169 * Opening minor device N dup()s the file (if any) connected to file
1170 * descriptor N belonging to the calling process.  Note that this driver
1171 * consists of only the ``open()'' routine, because all subsequent
1172 * references to this file will be direct to the other driver.
1173 */
1174/* ARGSUSED */
1175static int
1176fdopen(dev, mode, type, p)
1177	dev_t dev;
1178	int mode, type;
1179	struct proc *p;
1180{
1181
1182	/*
1183	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1184	 * the file descriptor being sought for duplication. The error
1185	 * return ensures that the vnode for this device will be released
1186	 * by vn_open. Open will detect this special error and take the
1187	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1188	 * will simply report the error.
1189	 */
1190	p->p_dupfd = minor(dev);
1191	return (ENODEV);
1192}
1193
1194/*
1195 * Duplicate the specified descriptor to a free descriptor.
1196 */
1197int
1198dupfdopen(fdp, indx, dfd, mode, error)
1199	register struct filedesc *fdp;
1200	register int indx, dfd;
1201	int mode;
1202	int error;
1203{
1204	register struct file *wfp;
1205	struct file *fp;
1206
1207	/*
1208	 * If the to-be-dup'd fd number is greater than the allowed number
1209	 * of file descriptors, or the fd to be dup'd has already been
1210	 * closed, reject.  Note, check for new == old is necessary as
1211	 * falloc could allocate an already closed to-be-dup'd descriptor
1212	 * as the new descriptor.
1213	 */
1214	fp = fdp->fd_ofiles[indx];
1215	if ((u_int)dfd >= fdp->fd_nfiles ||
1216	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1217		return (EBADF);
1218
1219	/*
1220	 * There are two cases of interest here.
1221	 *
1222	 * For ENODEV simply dup (dfd) to file descriptor
1223	 * (indx) and return.
1224	 *
1225	 * For ENXIO steal away the file structure from (dfd) and
1226	 * store it in (indx).  (dfd) is effectively closed by
1227	 * this operation.
1228	 *
1229	 * Any other error code is just returned.
1230	 */
1231	switch (error) {
1232	case ENODEV:
1233		/*
1234		 * Check that the mode the file is being opened for is a
1235		 * subset of the mode of the existing descriptor.
1236		 */
1237		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1238			return (EACCES);
1239		fdp->fd_ofiles[indx] = wfp;
1240		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1241		fhold(wfp);
1242		if (indx > fdp->fd_lastfile)
1243			fdp->fd_lastfile = indx;
1244		return (0);
1245
1246	case ENXIO:
1247		/*
1248		 * Steal away the file pointer from dfd, and stuff it into indx.
1249		 */
1250		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1251		fdp->fd_ofiles[dfd] = NULL;
1252		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1253		fdp->fd_ofileflags[dfd] = 0;
1254		/*
1255		 * Complete the clean up of the filedesc structure by
1256		 * recomputing the various hints.
1257		 */
1258		if (indx > fdp->fd_lastfile)
1259			fdp->fd_lastfile = indx;
1260		else
1261			while (fdp->fd_lastfile > 0 &&
1262			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1263				fdp->fd_lastfile--;
1264			if (dfd < fdp->fd_freefile)
1265				fdp->fd_freefile = dfd;
1266		return (0);
1267
1268	default:
1269		return (error);
1270	}
1271	/* NOTREACHED */
1272}
1273
1274/*
1275 * Get file structures.
1276 */
1277static int
1278sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1279{
1280	int error;
1281	struct file *fp;
1282
1283	if (!req->oldptr) {
1284		/*
1285		 * overestimate by 10 files
1286		 */
1287		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1288				(nfiles + 10) * sizeof(struct file)));
1289	}
1290
1291	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1292	if (error)
1293		return (error);
1294
1295	/*
1296	 * followed by an array of file structures
1297	 */
1298	LIST_FOREACH(fp, &filehead, f_list) {
1299		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1300		if (error)
1301			return (error);
1302	}
1303	return (0);
1304}
1305
1306SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1307    0, 0, sysctl_kern_file, "S,file", "Entire file table");
1308
1309SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1310    &maxfilesperproc, 0, "Maximum files allowed open per process");
1311
1312SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1313    &maxfiles, 0, "Maximum number of files");
1314
1315static void
1316fildesc_drvinit(void *unused)
1317{
1318	int fd;
1319
1320	for (fd = 0; fd < NUMFDESC; fd++)
1321		make_dev(&fildesc_cdevsw, fd,
1322		    UID_BIN, GID_BIN, 0666, "fd/%d", fd);
1323	make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
1324	make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
1325	make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
1326}
1327
1328struct fileops badfileops = {
1329	badfo_readwrite,
1330	badfo_readwrite,
1331	badfo_ioctl,
1332	badfo_poll,
1333	badfo_stat,
1334	badfo_close
1335};
1336
1337static int
1338badfo_readwrite(fp, uio, cred, flags, p)
1339	struct file *fp;
1340	struct uio *uio;
1341	struct ucred *cred;
1342	struct proc *p;
1343	int flags;
1344{
1345
1346	return (EBADF);
1347}
1348
1349static int
1350badfo_ioctl(fp, com, data, p)
1351	struct file *fp;
1352	u_long com;
1353	caddr_t data;
1354	struct proc *p;
1355{
1356
1357	return (EBADF);
1358}
1359
1360static int
1361badfo_poll(fp, events, cred, p)
1362	struct file *fp;
1363	int events;
1364	struct ucred *cred;
1365	struct proc *p;
1366{
1367
1368	return (0);
1369}
1370
1371static int
1372badfo_stat(fp, sb, p)
1373	struct file *fp;
1374	struct stat *sb;
1375	struct proc *p;
1376{
1377
1378	return (EBADF);
1379}
1380
1381static int
1382badfo_close(fp, p)
1383	struct file *fp;
1384	struct proc *p;
1385{
1386
1387	return (EBADF);
1388}
1389
1390SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1391					fildesc_drvinit,NULL)
1392