kern_descrip.c revision 62185
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 62185 2000-06-27 23:08:36Z alfred $
40 */
41
42#include "opt_compat.h"
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/sysproto.h>
46#include <sys/conf.h>
47#include <sys/filedesc.h>
48#include <sys/kernel.h>
49#include <sys/sysctl.h>
50#include <sys/vnode.h>
51#include <sys/proc.h>
52#include <sys/file.h>
53#include <sys/stat.h>
54#include <sys/filio.h>
55#include <sys/fcntl.h>
56#include <sys/malloc.h>
57#include <sys/unistd.h>
58#include <sys/resourcevar.h>
59#include <sys/event.h>
60
61#include <vm/vm.h>
62#include <vm/vm_extern.h>
63
64static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
65MALLOC_DEFINE(M_FILE, "file", "Open file structure");
66static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
67
68static	 d_open_t  fdopen;
69#define NUMFDESC 64
70
71#define CDEV_MAJOR 22
72static struct cdevsw fildesc_cdevsw = {
73	/* open */	fdopen,
74	/* close */	noclose,
75	/* read */	noread,
76	/* write */	nowrite,
77	/* ioctl */	noioctl,
78	/* poll */	nopoll,
79	/* mmap */	nommap,
80	/* strategy */	nostrategy,
81	/* name */	"FD",
82	/* maj */	CDEV_MAJOR,
83	/* dump */	nodump,
84	/* psize */	nopsize,
85	/* flags */	0,
86	/* bmaj */	-1
87};
88
89static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
90static int badfo_readwrite __P((struct file *fp, struct uio *uio,
91    struct ucred *cred, int flags, struct proc *p));
92static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
93    struct proc *p));
94static int badfo_poll __P((struct file *fp, int events,
95    struct ucred *cred, struct proc *p));
96static int badfo_stat __P((struct file *fp, struct stat *sb, struct proc *p));
97static int badfo_close __P((struct file *fp, struct proc *p));
98
99/*
100 * Descriptor management.
101 */
102struct filelist filehead;	/* head of list of open files */
103int nfiles;			/* actual number of open files */
104extern int cmask;
105
106/*
107 * System calls on descriptors.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct getdtablesize_args {
111	int	dummy;
112};
113#endif
114/* ARGSUSED */
115int
116getdtablesize(p, uap)
117	struct proc *p;
118	struct getdtablesize_args *uap;
119{
120
121	p->p_retval[0] =
122	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
123	return (0);
124}
125
126/*
127 * Duplicate a file descriptor to a particular value.
128 */
129#ifndef _SYS_SYSPROTO_H_
130struct dup2_args {
131	u_int	from;
132	u_int	to;
133};
134#endif
135/* ARGSUSED */
136int
137dup2(p, uap)
138	struct proc *p;
139	struct dup2_args *uap;
140{
141	register struct filedesc *fdp = p->p_fd;
142	register u_int old = uap->from, new = uap->to;
143	int i, error;
144
145	if (old >= fdp->fd_nfiles ||
146	    fdp->fd_ofiles[old] == NULL ||
147	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
148	    new >= maxfilesperproc)
149		return (EBADF);
150	if (old == new) {
151		p->p_retval[0] = new;
152		return (0);
153	}
154	if (new >= fdp->fd_nfiles) {
155		if ((error = fdalloc(p, new, &i)))
156			return (error);
157		if (new != i)
158			panic("dup2: fdalloc");
159	} else if (fdp->fd_ofiles[new]) {
160		if (fdp->fd_ofileflags[new] & UF_MAPPED)
161			(void) munmapfd(p, new);
162		/*
163		 * dup2() must succeed even if the close has an error.
164		 */
165		(void) closef(fdp->fd_ofiles[new], p);
166	}
167	return (finishdup(fdp, (int)old, (int)new, p->p_retval));
168}
169
170/*
171 * Duplicate a file descriptor.
172 */
173#ifndef _SYS_SYSPROTO_H_
174struct dup_args {
175	u_int	fd;
176};
177#endif
178/* ARGSUSED */
179int
180dup(p, uap)
181	struct proc *p;
182	struct dup_args *uap;
183{
184	register struct filedesc *fdp;
185	u_int old;
186	int new, error;
187
188	old = uap->fd;
189	fdp = p->p_fd;
190	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
191		return (EBADF);
192	if ((error = fdalloc(p, 0, &new)))
193		return (error);
194	return (finishdup(fdp, (int)old, new, p->p_retval));
195}
196
197/*
198 * The file control system call.
199 */
200#ifndef _SYS_SYSPROTO_H_
201struct fcntl_args {
202	int	fd;
203	int	cmd;
204	long	arg;
205};
206#endif
207/* ARGSUSED */
208int
209fcntl(p, uap)
210	struct proc *p;
211	register struct fcntl_args *uap;
212{
213	register struct filedesc *fdp = p->p_fd;
214	register struct file *fp;
215	register char *pop;
216	struct vnode *vp;
217	int i, tmp, error, flg = F_POSIX;
218	struct flock fl;
219	u_int newmin;
220
221	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
222	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
223		return (EBADF);
224	pop = &fdp->fd_ofileflags[uap->fd];
225	switch (uap->cmd) {
226
227	case F_DUPFD:
228		newmin = uap->arg;
229		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
230		    newmin >= maxfilesperproc)
231			return (EINVAL);
232		if ((error = fdalloc(p, newmin, &i)))
233			return (error);
234		return (finishdup(fdp, uap->fd, i, p->p_retval));
235
236	case F_GETFD:
237		p->p_retval[0] = *pop & 1;
238		return (0);
239
240	case F_SETFD:
241		*pop = (*pop &~ 1) | (uap->arg & 1);
242		return (0);
243
244	case F_GETFL:
245		p->p_retval[0] = OFLAGS(fp->f_flag);
246		return (0);
247
248	case F_SETFL:
249		fp->f_flag &= ~FCNTLFLAGS;
250		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
251		tmp = fp->f_flag & FNONBLOCK;
252		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
253		if (error)
254			return (error);
255		tmp = fp->f_flag & FASYNC;
256		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
257		if (!error)
258			return (0);
259		fp->f_flag &= ~FNONBLOCK;
260		tmp = 0;
261		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
262		return (error);
263
264	case F_GETOWN:
265		return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p));
266
267	case F_SETOWN:
268		return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p));
269
270	case F_SETLKW:
271		flg |= F_WAIT;
272		/* Fall into F_SETLK */
273
274	case F_SETLK:
275		if (fp->f_type != DTYPE_VNODE)
276			return (EBADF);
277		vp = (struct vnode *)fp->f_data;
278		/* Copy in the lock structure */
279		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
280		    sizeof(fl));
281		if (error)
282			return (error);
283		if (fl.l_whence == SEEK_CUR)
284			fl.l_start += fp->f_offset;
285		switch (fl.l_type) {
286
287		case F_RDLCK:
288			if ((fp->f_flag & FREAD) == 0)
289				return (EBADF);
290			p->p_flag |= P_ADVLOCK;
291			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
292
293		case F_WRLCK:
294			if ((fp->f_flag & FWRITE) == 0)
295				return (EBADF);
296			p->p_flag |= P_ADVLOCK;
297			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg));
298
299		case F_UNLCK:
300			return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl,
301				F_POSIX));
302
303		default:
304			return (EINVAL);
305		}
306
307	case F_GETLK:
308		if (fp->f_type != DTYPE_VNODE)
309			return (EBADF);
310		vp = (struct vnode *)fp->f_data;
311		/* Copy in the lock structure */
312		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
313		    sizeof(fl));
314		if (error)
315			return (error);
316		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
317		    fl.l_type != F_UNLCK)
318			return (EINVAL);
319		if (fl.l_whence == SEEK_CUR)
320			fl.l_start += fp->f_offset;
321		if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX)))
322			return (error);
323		return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
324		    sizeof(fl)));
325
326	default:
327		return (EINVAL);
328	}
329	/* NOTREACHED */
330}
331
332/*
333 * Common code for dup, dup2, and fcntl(F_DUPFD).
334 */
335static int
336finishdup(fdp, old, new, retval)
337	register struct filedesc *fdp;
338	register int old, new;
339	register_t *retval;
340{
341	register struct file *fp;
342
343	fp = fdp->fd_ofiles[old];
344	fdp->fd_ofiles[new] = fp;
345	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
346	fhold(fp);
347	if (new > fdp->fd_lastfile)
348		fdp->fd_lastfile = new;
349	*retval = new;
350	return (0);
351}
352
353/*
354 * If sigio is on the list associated with a process or process group,
355 * disable signalling from the device, remove sigio from the list and
356 * free sigio.
357 */
358void
359funsetown(sigio)
360	struct sigio *sigio;
361{
362	int s;
363
364	if (sigio == NULL)
365		return;
366	s = splhigh();
367	*(sigio->sio_myref) = NULL;
368	splx(s);
369	if (sigio->sio_pgid < 0) {
370		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
371			     sigio, sio_pgsigio);
372	} else /* if ((*sigiop)->sio_pgid > 0) */ {
373		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
374			     sigio, sio_pgsigio);
375	}
376	crfree(sigio->sio_ucred);
377	FREE(sigio, M_SIGIO);
378}
379
380/* Free a list of sigio structures. */
381void
382funsetownlst(sigiolst)
383	struct sigiolst *sigiolst;
384{
385	struct sigio *sigio;
386
387	while ((sigio = SLIST_FIRST(sigiolst)) != NULL)
388		funsetown(sigio);
389}
390
391/*
392 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
393 *
394 * After permission checking, add a sigio structure to the sigio list for
395 * the process or process group.
396 */
397int
398fsetown(pgid, sigiop)
399	pid_t pgid;
400	struct sigio **sigiop;
401{
402	struct proc *proc;
403	struct pgrp *pgrp;
404	struct sigio *sigio;
405	int s;
406
407	if (pgid == 0) {
408		funsetown(*sigiop);
409		return (0);
410	}
411	if (pgid > 0) {
412		proc = pfind(pgid);
413		if (proc == NULL)
414			return (ESRCH);
415
416		/*
417		 * Policy - Don't allow a process to FSETOWN a process
418		 * in another session.
419		 *
420		 * Remove this test to allow maximum flexibility or
421		 * restrict FSETOWN to the current process or process
422		 * group for maximum safety.
423		 */
424		if (proc->p_session != curproc->p_session)
425			return (EPERM);
426
427		pgrp = NULL;
428	} else /* if (pgid < 0) */ {
429		pgrp = pgfind(-pgid);
430		if (pgrp == NULL)
431			return (ESRCH);
432
433		/*
434		 * Policy - Don't allow a process to FSETOWN a process
435		 * in another session.
436		 *
437		 * Remove this test to allow maximum flexibility or
438		 * restrict FSETOWN to the current process or process
439		 * group for maximum safety.
440		 */
441		if (pgrp->pg_session != curproc->p_session)
442			return (EPERM);
443
444		proc = NULL;
445	}
446	funsetown(*sigiop);
447	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
448	if (pgid > 0) {
449		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
450		sigio->sio_proc = proc;
451	} else {
452		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
453		sigio->sio_pgrp = pgrp;
454	}
455	sigio->sio_pgid = pgid;
456	crhold(curproc->p_ucred);
457	sigio->sio_ucred = curproc->p_ucred;
458	/* It would be convenient if p_ruid was in ucred. */
459	sigio->sio_ruid = curproc->p_cred->p_ruid;
460	sigio->sio_myref = sigiop;
461	s = splhigh();
462	*sigiop = sigio;
463	splx(s);
464	return (0);
465}
466
467/*
468 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
469 */
470pid_t
471fgetown(sigio)
472	struct sigio *sigio;
473{
474	return (sigio != NULL ? sigio->sio_pgid : 0);
475}
476
477/*
478 * Close a file descriptor.
479 */
480#ifndef _SYS_SYSPROTO_H_
481struct close_args {
482        int     fd;
483};
484#endif
485/* ARGSUSED */
486int
487close(p, uap)
488	struct proc *p;
489	struct close_args *uap;
490{
491	register struct filedesc *fdp = p->p_fd;
492	register struct file *fp;
493	register int fd = uap->fd;
494	register u_char *pf;
495
496	if ((unsigned)fd >= fdp->fd_nfiles ||
497	    (fp = fdp->fd_ofiles[fd]) == NULL)
498		return (EBADF);
499	pf = (u_char *)&fdp->fd_ofileflags[fd];
500	if (*pf & UF_MAPPED)
501		(void) munmapfd(p, fd);
502	fdp->fd_ofiles[fd] = NULL;
503	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
504		fdp->fd_lastfile--;
505	if (fd < fdp->fd_freefile)
506		fdp->fd_freefile = fd;
507	*pf = 0;
508	if (fd < fdp->fd_knlistsize)
509		knote_fdclose(p, fd);
510	return (closef(fp, p));
511}
512
513#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
514/*
515 * Return status information about a file descriptor.
516 */
517#ifndef _SYS_SYSPROTO_H_
518struct ofstat_args {
519	int	fd;
520	struct	ostat *sb;
521};
522#endif
523/* ARGSUSED */
524int
525ofstat(p, uap)
526	struct proc *p;
527	register struct ofstat_args *uap;
528{
529	register struct filedesc *fdp = p->p_fd;
530	register struct file *fp;
531	struct stat ub;
532	struct ostat oub;
533	int error;
534
535	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
536	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
537		return (EBADF);
538	error = fo_stat(fp, &ub, p);
539	if (error == 0) {
540		cvtstat(&ub, &oub);
541		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
542	}
543	return (error);
544}
545#endif /* COMPAT_43 || COMPAT_SUNOS */
546
547/*
548 * Return status information about a file descriptor.
549 */
550#ifndef _SYS_SYSPROTO_H_
551struct fstat_args {
552	int	fd;
553	struct	stat *sb;
554};
555#endif
556/* ARGSUSED */
557int
558fstat(p, uap)
559	struct proc *p;
560	register struct fstat_args *uap;
561{
562	register struct filedesc *fdp = p->p_fd;
563	register struct file *fp;
564	struct stat ub;
565	int error;
566
567	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
568	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
569		return (EBADF);
570	error = fo_stat(fp, &ub, p);
571	if (error == 0)
572		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
573	return (error);
574}
575
576/*
577 * Return status information about a file descriptor.
578 */
579#ifndef _SYS_SYSPROTO_H_
580struct nfstat_args {
581	int	fd;
582	struct	nstat *sb;
583};
584#endif
585/* ARGSUSED */
586int
587nfstat(p, uap)
588	struct proc *p;
589	register struct nfstat_args *uap;
590{
591	register struct filedesc *fdp = p->p_fd;
592	register struct file *fp;
593	struct stat ub;
594	struct nstat nub;
595	int error;
596
597	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
598	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
599		return (EBADF);
600	error = fo_stat(fp, &ub, p);
601	if (error == 0) {
602		cvtnstat(&ub, &nub);
603		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
604	}
605	return (error);
606}
607
608/*
609 * Return pathconf information about a file descriptor.
610 */
611#ifndef _SYS_SYSPROTO_H_
612struct fpathconf_args {
613	int	fd;
614	int	name;
615};
616#endif
617/* ARGSUSED */
618int
619fpathconf(p, uap)
620	struct proc *p;
621	register struct fpathconf_args *uap;
622{
623	struct filedesc *fdp = p->p_fd;
624	struct file *fp;
625	struct vnode *vp;
626
627	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
628	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
629		return (EBADF);
630	switch (fp->f_type) {
631
632	case DTYPE_PIPE:
633	case DTYPE_SOCKET:
634		if (uap->name != _PC_PIPE_BUF)
635			return (EINVAL);
636		p->p_retval[0] = PIPE_BUF;
637		return (0);
638
639	case DTYPE_FIFO:
640	case DTYPE_VNODE:
641		vp = (struct vnode *)fp->f_data;
642		return (VOP_PATHCONF(vp, uap->name, p->p_retval));
643
644	default:
645		return (EOPNOTSUPP);
646	}
647	/*NOTREACHED*/
648}
649
650/*
651 * Allocate a file descriptor for the process.
652 */
653static int fdexpand;
654SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
655
656int
657fdalloc(p, want, result)
658	struct proc *p;
659	int want;
660	int *result;
661{
662	register struct filedesc *fdp = p->p_fd;
663	register int i;
664	int lim, last, nfiles;
665	struct file **newofile;
666	char *newofileflags;
667
668	/*
669	 * Search for a free descriptor starting at the higher
670	 * of want or fd_freefile.  If that fails, consider
671	 * expanding the ofile array.
672	 */
673	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
674	for (;;) {
675		last = min(fdp->fd_nfiles, lim);
676		if ((i = want) < fdp->fd_freefile)
677			i = fdp->fd_freefile;
678		for (; i < last; i++) {
679			if (fdp->fd_ofiles[i] == NULL) {
680				fdp->fd_ofileflags[i] = 0;
681				if (i > fdp->fd_lastfile)
682					fdp->fd_lastfile = i;
683				if (want <= fdp->fd_freefile)
684					fdp->fd_freefile = i;
685				*result = i;
686				return (0);
687			}
688		}
689
690		/*
691		 * No space in current array.  Expand?
692		 */
693		if (fdp->fd_nfiles >= lim)
694			return (EMFILE);
695		if (fdp->fd_nfiles < NDEXTENT)
696			nfiles = NDEXTENT;
697		else
698			nfiles = 2 * fdp->fd_nfiles;
699		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
700		    M_FILEDESC, M_WAITOK);
701		newofileflags = (char *) &newofile[nfiles];
702		/*
703		 * Copy the existing ofile and ofileflags arrays
704		 * and zero the new portion of each array.
705		 */
706		bcopy(fdp->fd_ofiles, newofile,
707			(i = sizeof(struct file *) * fdp->fd_nfiles));
708		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
709		bcopy(fdp->fd_ofileflags, newofileflags,
710			(i = sizeof(char) * fdp->fd_nfiles));
711		bzero(newofileflags + i, nfiles * sizeof(char) - i);
712		if (fdp->fd_nfiles > NDFILE)
713			FREE(fdp->fd_ofiles, M_FILEDESC);
714		fdp->fd_ofiles = newofile;
715		fdp->fd_ofileflags = newofileflags;
716		fdp->fd_nfiles = nfiles;
717		fdexpand++;
718	}
719	return (0);
720}
721
722/*
723 * Check to see whether n user file descriptors
724 * are available to the process p.
725 */
726int
727fdavail(p, n)
728	struct proc *p;
729	register int n;
730{
731	register struct filedesc *fdp = p->p_fd;
732	register struct file **fpp;
733	register int i, lim, last;
734
735	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
736	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
737		return (1);
738
739	last = min(fdp->fd_nfiles, lim);
740	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
741	for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
742		if (*fpp == NULL && --n <= 0)
743			return (1);
744	return (0);
745}
746
747/*
748 * Create a new open file structure and allocate
749 * a file decriptor for the process that refers to it.
750 */
751int
752falloc(p, resultfp, resultfd)
753	register struct proc *p;
754	struct file **resultfp;
755	int *resultfd;
756{
757	register struct file *fp, *fq;
758	int error, i;
759
760	if ((error = fdalloc(p, 0, &i)))
761		return (error);
762	if (nfiles >= maxfiles) {
763		tablefull("file");
764		return (ENFILE);
765	}
766	/*
767	 * Allocate a new file descriptor.
768	 * If the process has file descriptor zero open, add to the list
769	 * of open files at that point, otherwise put it at the front of
770	 * the list of open files.
771	 */
772	nfiles++;
773	MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
774	bzero(fp, sizeof(struct file));
775	fp->f_count = 1;
776	fp->f_cred = p->p_ucred;
777	fp->f_ops = &badfileops;
778	fp->f_seqcount = 1;
779	crhold(fp->f_cred);
780	if ((fq = p->p_fd->fd_ofiles[0])) {
781		LIST_INSERT_AFTER(fq, fp, f_list);
782	} else {
783		LIST_INSERT_HEAD(&filehead, fp, f_list);
784	}
785	p->p_fd->fd_ofiles[i] = fp;
786	if (resultfp)
787		*resultfp = fp;
788	if (resultfd)
789		*resultfd = i;
790	return (0);
791}
792
793/*
794 * Free a file descriptor.
795 */
796void
797ffree(fp)
798	register struct file *fp;
799{
800	LIST_REMOVE(fp, f_list);
801	crfree(fp->f_cred);
802#if defined(DIAGNOSTIC) || defined(INVARIANTS)
803	fp->f_count = 0;
804#endif
805	nfiles--;
806	FREE(fp, M_FILE);
807}
808
809/*
810 * Build a new filedesc structure.
811 */
812struct filedesc *
813fdinit(p)
814	struct proc *p;
815{
816	register struct filedesc0 *newfdp;
817	register struct filedesc *fdp = p->p_fd;
818
819	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
820	    M_FILEDESC, M_WAITOK);
821	bzero(newfdp, sizeof(struct filedesc0));
822	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
823	VREF(newfdp->fd_fd.fd_cdir);
824	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
825	VREF(newfdp->fd_fd.fd_rdir);
826	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
827	if (newfdp->fd_fd.fd_jdir)
828		VREF(newfdp->fd_fd.fd_jdir);
829
830	/* Create the file descriptor table. */
831	newfdp->fd_fd.fd_refcnt = 1;
832	newfdp->fd_fd.fd_cmask = cmask;
833	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
834	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
835	newfdp->fd_fd.fd_nfiles = NDFILE;
836	newfdp->fd_fd.fd_knlistsize = -1;
837
838	return (&newfdp->fd_fd);
839}
840
841/*
842 * Share a filedesc structure.
843 */
844struct filedesc *
845fdshare(p)
846	struct proc *p;
847{
848	p->p_fd->fd_refcnt++;
849	return (p->p_fd);
850}
851
852/*
853 * Copy a filedesc structure.
854 */
855struct filedesc *
856fdcopy(p)
857	struct proc *p;
858{
859	register struct filedesc *newfdp, *fdp = p->p_fd;
860	register struct file **fpp;
861	register int i;
862
863	/* Certain daemons might not have file descriptors. */
864	if (fdp == NULL)
865		return (NULL);
866
867	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
868	    M_FILEDESC, M_WAITOK);
869	bcopy(fdp, newfdp, sizeof(struct filedesc));
870	VREF(newfdp->fd_cdir);
871	VREF(newfdp->fd_rdir);
872	if (newfdp->fd_jdir)
873		VREF(newfdp->fd_jdir);
874	newfdp->fd_refcnt = 1;
875
876	/*
877	 * If the number of open files fits in the internal arrays
878	 * of the open file structure, use them, otherwise allocate
879	 * additional memory for the number of descriptors currently
880	 * in use.
881	 */
882	if (newfdp->fd_lastfile < NDFILE) {
883		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
884		newfdp->fd_ofileflags =
885		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
886		i = NDFILE;
887	} else {
888		/*
889		 * Compute the smallest multiple of NDEXTENT needed
890		 * for the file descriptors currently in use,
891		 * allowing the table to shrink.
892		 */
893		i = newfdp->fd_nfiles;
894		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
895			i /= 2;
896		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
897		    M_FILEDESC, M_WAITOK);
898		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
899	}
900	newfdp->fd_nfiles = i;
901	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
902	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
903
904	/*
905	 * kq descriptors cannot be copied.
906	 */
907	if (newfdp->fd_knlistsize != -1) {
908		fpp = newfdp->fd_ofiles;
909		for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
910			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE)
911				*fpp = NULL;
912		newfdp->fd_knlist = NULL;
913		newfdp->fd_knlistsize = -1;
914		newfdp->fd_knhash = NULL;
915		newfdp->fd_knhashmask = 0;
916	}
917
918	fpp = newfdp->fd_ofiles;
919	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
920		if (*fpp != NULL)
921			fhold(*fpp);
922	return (newfdp);
923}
924
925/*
926 * Release a filedesc structure.
927 */
928void
929fdfree(p)
930	struct proc *p;
931{
932	register struct filedesc *fdp = p->p_fd;
933	struct file **fpp;
934	register int i;
935
936	/* Certain daemons might not have file descriptors. */
937	if (fdp == NULL)
938		return;
939
940	if (--fdp->fd_refcnt > 0)
941		return;
942	fpp = fdp->fd_ofiles;
943	for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
944		if (*fpp)
945			(void) closef(*fpp, p);
946	if (fdp->fd_nfiles > NDFILE)
947		FREE(fdp->fd_ofiles, M_FILEDESC);
948	vrele(fdp->fd_cdir);
949	vrele(fdp->fd_rdir);
950	if (fdp->fd_jdir)
951		vrele(fdp->fd_jdir);
952	if (fdp->fd_knlist)
953		FREE(fdp->fd_knlist, M_TEMP);
954	if (fdp->fd_knhash)
955		FREE(fdp->fd_knhash, M_TEMP);
956	FREE(fdp, M_FILEDESC);
957}
958
959/*
960 * For setugid programs, we don't want to people to use that setugidness
961 * to generate error messages which write to a file which otherwise would
962 * otherwise be off-limits to the process.
963 *
964 * This is a gross hack to plug the hole.  A better solution would involve
965 * a special vop or other form of generalized access control mechanism.  We
966 * go ahead and just reject all procfs file systems accesses as dangerous.
967 *
968 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
969 * sufficient.  We also don't for check setugidness since we know we are.
970 */
971static int
972is_unsafe(struct file *fp)
973{
974	if (fp->f_type == DTYPE_VNODE &&
975	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
976		return (1);
977	return (0);
978}
979
980/*
981 * Make this setguid thing safe, if at all possible.
982 */
983void
984setugidsafety(p)
985	struct proc *p;
986{
987	struct filedesc *fdp = p->p_fd;
988	struct file **fpp;
989	char *fdfp;
990	register int i;
991
992	/* Certain daemons might not have file descriptors. */
993	if (fdp == NULL)
994		return;
995
996	fpp = fdp->fd_ofiles;
997	fdfp = fdp->fd_ofileflags;
998	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) {
999		if (i > 2)
1000			break;
1001		if (*fpp != NULL && is_unsafe(*fpp)) {
1002			if ((*fdfp & UF_MAPPED) != 0)
1003				(void) munmapfd(p, i);
1004			(void) closef(*fpp, p);
1005			*fpp = NULL;
1006			*fdfp = 0;
1007			if (i < fdp->fd_freefile)
1008				fdp->fd_freefile = i;
1009		}
1010	}
1011	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1012		fdp->fd_lastfile--;
1013}
1014
1015/*
1016 * Close any files on exec?
1017 */
1018void
1019fdcloseexec(p)
1020	struct proc *p;
1021{
1022	struct filedesc *fdp = p->p_fd;
1023	struct file **fpp;
1024	char *fdfp;
1025	register int i;
1026
1027	/* Certain daemons might not have file descriptors. */
1028	if (fdp == NULL)
1029		return;
1030
1031	fpp = fdp->fd_ofiles;
1032	fdfp = fdp->fd_ofileflags;
1033	for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1034		if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1035			if (*fdfp & UF_MAPPED)
1036				(void) munmapfd(p, i);
1037			(void) closef(*fpp, p);
1038			*fpp = NULL;
1039			*fdfp = 0;
1040			if (i < fdp->fd_freefile)
1041				fdp->fd_freefile = i;
1042		}
1043	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1044		fdp->fd_lastfile--;
1045}
1046
1047/*
1048 * Internal form of close.
1049 * Decrement reference count on file structure.
1050 * Note: p may be NULL when closing a file
1051 * that was being passed in a message.
1052 */
1053int
1054closef(fp, p)
1055	register struct file *fp;
1056	register struct proc *p;
1057{
1058	struct vnode *vp;
1059	struct flock lf;
1060
1061	if (fp == NULL)
1062		return (0);
1063	/*
1064	 * POSIX record locking dictates that any close releases ALL
1065	 * locks owned by this process.  This is handled by setting
1066	 * a flag in the unlock to free ONLY locks obeying POSIX
1067	 * semantics, and not to free BSD-style file locks.
1068	 * If the descriptor was in a message, POSIX-style locks
1069	 * aren't passed with the descriptor.
1070	 */
1071	if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1072		lf.l_whence = SEEK_SET;
1073		lf.l_start = 0;
1074		lf.l_len = 0;
1075		lf.l_type = F_UNLCK;
1076		vp = (struct vnode *)fp->f_data;
1077		(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &lf, F_POSIX);
1078	}
1079	return (fdrop(fp, p));
1080}
1081
1082int
1083fdrop(fp, p)
1084	struct file *fp;
1085	struct proc *p;
1086{
1087	struct flock lf;
1088	struct vnode *vp;
1089	int error;
1090
1091	if (--fp->f_count > 0)
1092		return (0);
1093	if (fp->f_count < 0)
1094		panic("fdrop: count < 0");
1095	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1096		lf.l_whence = SEEK_SET;
1097		lf.l_start = 0;
1098		lf.l_len = 0;
1099		lf.l_type = F_UNLCK;
1100		vp = (struct vnode *)fp->f_data;
1101		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1102	}
1103	if (fp->f_ops != &badfileops)
1104		error = fo_close(fp, p);
1105	else
1106		error = 0;
1107	ffree(fp);
1108	return (error);
1109}
1110
1111/*
1112 * Apply an advisory lock on a file descriptor.
1113 *
1114 * Just attempt to get a record lock of the requested type on
1115 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1116 */
1117#ifndef _SYS_SYSPROTO_H_
1118struct flock_args {
1119	int	fd;
1120	int	how;
1121};
1122#endif
1123/* ARGSUSED */
1124int
1125flock(p, uap)
1126	struct proc *p;
1127	register struct flock_args *uap;
1128{
1129	register struct filedesc *fdp = p->p_fd;
1130	register struct file *fp;
1131	struct vnode *vp;
1132	struct flock lf;
1133
1134	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1135	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1136		return (EBADF);
1137	if (fp->f_type != DTYPE_VNODE)
1138		return (EOPNOTSUPP);
1139	vp = (struct vnode *)fp->f_data;
1140	lf.l_whence = SEEK_SET;
1141	lf.l_start = 0;
1142	lf.l_len = 0;
1143	if (uap->how & LOCK_UN) {
1144		lf.l_type = F_UNLCK;
1145		fp->f_flag &= ~FHASLOCK;
1146		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1147	}
1148	if (uap->how & LOCK_EX)
1149		lf.l_type = F_WRLCK;
1150	else if (uap->how & LOCK_SH)
1151		lf.l_type = F_RDLCK;
1152	else
1153		return (EBADF);
1154	fp->f_flag |= FHASLOCK;
1155	if (uap->how & LOCK_NB)
1156		return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1157	return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1158}
1159
1160/*
1161 * File Descriptor pseudo-device driver (/dev/fd/).
1162 *
1163 * Opening minor device N dup()s the file (if any) connected to file
1164 * descriptor N belonging to the calling process.  Note that this driver
1165 * consists of only the ``open()'' routine, because all subsequent
1166 * references to this file will be direct to the other driver.
1167 */
1168/* ARGSUSED */
1169static int
1170fdopen(dev, mode, type, p)
1171	dev_t dev;
1172	int mode, type;
1173	struct proc *p;
1174{
1175
1176	/*
1177	 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1178	 * the file descriptor being sought for duplication. The error
1179	 * return ensures that the vnode for this device will be released
1180	 * by vn_open. Open will detect this special error and take the
1181	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1182	 * will simply report the error.
1183	 */
1184	p->p_dupfd = minor(dev);
1185	return (ENODEV);
1186}
1187
1188/*
1189 * Duplicate the specified descriptor to a free descriptor.
1190 */
1191int
1192dupfdopen(fdp, indx, dfd, mode, error)
1193	register struct filedesc *fdp;
1194	register int indx, dfd;
1195	int mode;
1196	int error;
1197{
1198	register struct file *wfp;
1199	struct file *fp;
1200
1201	/*
1202	 * If the to-be-dup'd fd number is greater than the allowed number
1203	 * of file descriptors, or the fd to be dup'd has already been
1204	 * closed, reject.  Note, check for new == old is necessary as
1205	 * falloc could allocate an already closed to-be-dup'd descriptor
1206	 * as the new descriptor.
1207	 */
1208	fp = fdp->fd_ofiles[indx];
1209	if ((u_int)dfd >= fdp->fd_nfiles ||
1210	    (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1211		return (EBADF);
1212
1213	/*
1214	 * There are two cases of interest here.
1215	 *
1216	 * For ENODEV simply dup (dfd) to file descriptor
1217	 * (indx) and return.
1218	 *
1219	 * For ENXIO steal away the file structure from (dfd) and
1220	 * store it in (indx).  (dfd) is effectively closed by
1221	 * this operation.
1222	 *
1223	 * Any other error code is just returned.
1224	 */
1225	switch (error) {
1226	case ENODEV:
1227		/*
1228		 * Check that the mode the file is being opened for is a
1229		 * subset of the mode of the existing descriptor.
1230		 */
1231		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1232			return (EACCES);
1233		fdp->fd_ofiles[indx] = wfp;
1234		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1235		fhold(wfp);
1236		if (indx > fdp->fd_lastfile)
1237			fdp->fd_lastfile = indx;
1238		return (0);
1239
1240	case ENXIO:
1241		/*
1242		 * Steal away the file pointer from dfd, and stuff it into indx.
1243		 */
1244		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1245		fdp->fd_ofiles[dfd] = NULL;
1246		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1247		fdp->fd_ofileflags[dfd] = 0;
1248		/*
1249		 * Complete the clean up of the filedesc structure by
1250		 * recomputing the various hints.
1251		 */
1252		if (indx > fdp->fd_lastfile)
1253			fdp->fd_lastfile = indx;
1254		else
1255			while (fdp->fd_lastfile > 0 &&
1256			       fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1257				fdp->fd_lastfile--;
1258			if (dfd < fdp->fd_freefile)
1259				fdp->fd_freefile = dfd;
1260		return (0);
1261
1262	default:
1263		return (error);
1264	}
1265	/* NOTREACHED */
1266}
1267
1268/*
1269 * Get file structures.
1270 */
1271static int
1272sysctl_kern_file SYSCTL_HANDLER_ARGS
1273{
1274	int error;
1275	struct file *fp;
1276
1277	if (!req->oldptr) {
1278		/*
1279		 * overestimate by 10 files
1280		 */
1281		return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1282				(nfiles + 10) * sizeof(struct file)));
1283	}
1284
1285	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1286	if (error)
1287		return (error);
1288
1289	/*
1290	 * followed by an array of file structures
1291	 */
1292	LIST_FOREACH(fp, &filehead, f_list) {
1293		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1294		if (error)
1295			return (error);
1296	}
1297	return (0);
1298}
1299
1300SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1301    0, 0, sysctl_kern_file, "S,file", "Entire file table");
1302
1303SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
1304    &maxfilesperproc, 0, "Maximum files allowed open per process");
1305
1306SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
1307    &maxfiles, 0, "Maximum number of files");
1308
1309static void
1310fildesc_drvinit(void *unused)
1311{
1312	int fd;
1313
1314	for (fd = 0; fd < NUMFDESC; fd++)
1315		make_dev(&fildesc_cdevsw, fd,
1316		    UID_BIN, GID_BIN, 0666, "fd/%d", fd);
1317	make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "stdin");
1318	make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "stdout");
1319	make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "stderr");
1320}
1321
1322struct fileops badfileops = {
1323	badfo_readwrite,
1324	badfo_readwrite,
1325	badfo_ioctl,
1326	badfo_poll,
1327	badfo_stat,
1328	badfo_close
1329};
1330
1331static int
1332badfo_readwrite(fp, uio, cred, flags, p)
1333	struct file *fp;
1334	struct uio *uio;
1335	struct ucred *cred;
1336	struct proc *p;
1337	int flags;
1338{
1339
1340	return (EBADF);
1341}
1342
1343static int
1344badfo_ioctl(fp, com, data, p)
1345	struct file *fp;
1346	u_long com;
1347	caddr_t data;
1348	struct proc *p;
1349{
1350
1351	return (EBADF);
1352}
1353
1354static int
1355badfo_poll(fp, events, cred, p)
1356	struct file *fp;
1357	int events;
1358	struct ucred *cred;
1359	struct proc *p;
1360{
1361
1362	return (0);
1363}
1364
1365static int
1366badfo_stat(fp, sb, p)
1367	struct file *fp;
1368	struct stat *sb;
1369	struct proc *p;
1370{
1371
1372	return (EBADF);
1373}
1374
1375static int
1376badfo_close(fp, p)
1377	struct file *fp;
1378	struct proc *p;
1379{
1380
1381	return (EBADF);
1382}
1383
1384SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1385					fildesc_drvinit,NULL)
1386