kern_descrip.c revision 94861
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 94861 2002-04-16 17:11:34Z jhb $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/mutex.h>
49#include <sys/sysproto.h>
50#include <sys/conf.h>
51#include <sys/filedesc.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54#include <sys/vnode.h>
55#include <sys/proc.h>
56#include <sys/file.h>
57#include <sys/stat.h>
58#include <sys/filio.h>
59#include <sys/fcntl.h>
60#include <sys/unistd.h>
61#include <sys/resourcevar.h>
62#include <sys/event.h>
63#include <sys/sx.h>
64#include <sys/socketvar.h>
65
66#include <machine/limits.h>
67
68#include <vm/vm.h>
69#include <vm/vm_extern.h>
70#include <vm/uma.h>
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74
75uma_zone_t file_zone;
76
77static	 d_open_t  fdopen;
78#define NUMFDESC 64
79
80#define CDEV_MAJOR 22
81static struct cdevsw fildesc_cdevsw = {
82	/* open */	fdopen,
83	/* close */	noclose,
84	/* read */	noread,
85	/* write */	nowrite,
86	/* ioctl */	noioctl,
87	/* poll */	nopoll,
88	/* mmap */	nommap,
89	/* strategy */	nostrategy,
90	/* name */	"FD",
91	/* maj */	CDEV_MAJOR,
92	/* dump */	nodump,
93	/* psize */	nopsize,
94	/* flags */	0,
95};
96
97static int do_dup(struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td);
98static int badfo_readwrite(struct file *fp, struct uio *uio,
99    struct ucred *cred, int flags, struct thread *td);
100static int badfo_ioctl(struct file *fp, u_long com, caddr_t data,
101    struct thread *td);
102static int badfo_poll(struct file *fp, int events,
103    struct ucred *cred, struct thread *td);
104static int badfo_kqfilter(struct file *fp, struct knote *kn);
105static int badfo_stat(struct file *fp, struct stat *sb, struct thread *td);
106static int badfo_close(struct file *fp, struct thread *td);
107
108/*
109 * Descriptor management.
110 */
111struct filelist filehead;	/* head of list of open files */
112int nfiles;			/* actual number of open files */
113extern int cmask;
114struct sx filelist_lock;	/* sx to protect filelist */
115
116/*
117 * System calls on descriptors.
118 */
119#ifndef _SYS_SYSPROTO_H_
120struct getdtablesize_args {
121	int	dummy;
122};
123#endif
124/*
125 * MPSAFE
126 */
127/* ARGSUSED */
128int
129getdtablesize(td, uap)
130	struct thread *td;
131	struct getdtablesize_args *uap;
132{
133	struct proc *p = td->td_proc;
134
135	mtx_lock(&Giant);
136	td->td_retval[0] =
137	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
138	mtx_unlock(&Giant);
139	return (0);
140}
141
142/*
143 * Duplicate a file descriptor to a particular value.
144 *
145 * note: keep in mind that a potential race condition exists when closing
146 * descriptors from a shared descriptor table (via rfork).
147 */
148#ifndef _SYS_SYSPROTO_H_
149struct dup2_args {
150	u_int	from;
151	u_int	to;
152};
153#endif
154/*
155 * MPSAFE
156 */
157/* ARGSUSED */
158int
159dup2(td, uap)
160	struct thread *td;
161	struct dup2_args *uap;
162{
163	struct proc *p = td->td_proc;
164	register struct filedesc *fdp = td->td_proc->p_fd;
165	register u_int old = uap->from, new = uap->to;
166	int i, error;
167
168	FILEDESC_LOCK(fdp);
169retry:
170	if (old >= fdp->fd_nfiles ||
171	    fdp->fd_ofiles[old] == NULL ||
172	    new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
173	    new >= maxfilesperproc) {
174		FILEDESC_UNLOCK(fdp);
175		return (EBADF);
176	}
177	if (old == new) {
178		td->td_retval[0] = new;
179		FILEDESC_UNLOCK(fdp);
180		return (0);
181	}
182	if (new >= fdp->fd_nfiles) {
183		if ((error = fdalloc(td, new, &i))) {
184			FILEDESC_UNLOCK(fdp);
185			return (error);
186		}
187		/*
188		 * fdalloc() may block, retest everything.
189		 */
190		goto retry;
191	}
192	error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
193	return(error);
194}
195
196/*
197 * Duplicate a file descriptor.
198 */
199#ifndef _SYS_SYSPROTO_H_
200struct dup_args {
201	u_int	fd;
202};
203#endif
204/*
205 * MPSAFE
206 */
207/* ARGSUSED */
208int
209dup(td, uap)
210	struct thread *td;
211	struct dup_args *uap;
212{
213	register struct filedesc *fdp;
214	u_int old;
215	int new, error;
216
217	old = uap->fd;
218	fdp = td->td_proc->p_fd;
219	FILEDESC_LOCK(fdp);
220	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
221		FILEDESC_UNLOCK(fdp);
222		return (EBADF);
223	}
224	if ((error = fdalloc(td, 0, &new))) {
225		FILEDESC_UNLOCK(fdp);
226		return (error);
227	}
228	error = do_dup(fdp, (int)old, new, td->td_retval, td);
229	return (error);
230}
231
232/*
233 * The file control system call.
234 */
235#ifndef _SYS_SYSPROTO_H_
236struct fcntl_args {
237	int	fd;
238	int	cmd;
239	long	arg;
240};
241#endif
242/*
243 * MPSAFE
244 */
245/* ARGSUSED */
246int
247fcntl(td, uap)
248	struct thread *td;
249	register struct fcntl_args *uap;
250{
251	register struct proc *p = td->td_proc;
252	register struct filedesc *fdp;
253	register struct file *fp;
254	register char *pop;
255	struct vnode *vp;
256	int i, tmp, error = 0, flg = F_POSIX;
257	struct flock fl;
258	u_int newmin;
259	struct proc *leaderp;
260
261	mtx_lock(&Giant);
262
263	fdp = p->p_fd;
264	FILEDESC_LOCK(fdp);
265	if ((unsigned)uap->fd >= fdp->fd_nfiles ||
266	    (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
267		FILEDESC_UNLOCK(fdp);
268		error = EBADF;
269		goto done2;
270	}
271	pop = &fdp->fd_ofileflags[uap->fd];
272
273	switch (uap->cmd) {
274	case F_DUPFD:
275		newmin = uap->arg;
276		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
277		    newmin >= maxfilesperproc) {
278			FILEDESC_UNLOCK(fdp);
279			error = EINVAL;
280			break;
281		}
282		if ((error = fdalloc(td, newmin, &i))) {
283			FILEDESC_UNLOCK(fdp);
284			break;
285		}
286		error = do_dup(fdp, uap->fd, i, td->td_retval, td);
287		break;
288
289	case F_GETFD:
290		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
291		FILEDESC_UNLOCK(fdp);
292		break;
293
294	case F_SETFD:
295		*pop = (*pop &~ UF_EXCLOSE) |
296			    (uap->arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
297		FILEDESC_UNLOCK(fdp);
298		break;
299
300	case F_GETFL:
301		FILE_LOCK(fp);
302		FILEDESC_UNLOCK(fdp);
303		td->td_retval[0] = OFLAGS(fp->f_flag);
304		FILE_UNLOCK(fp);
305		break;
306
307	case F_SETFL:
308		fhold(fp);
309		FILEDESC_UNLOCK(fdp);
310		fp->f_flag &= ~FCNTLFLAGS;
311		fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
312		tmp = fp->f_flag & FNONBLOCK;
313		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
314		if (error) {
315			fdrop(fp, td);
316			break;
317		}
318		tmp = fp->f_flag & FASYNC;
319		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
320		if (!error) {
321			fdrop(fp, td);
322			break;
323		}
324		fp->f_flag &= ~FNONBLOCK;
325		tmp = 0;
326		(void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
327		fdrop(fp, td);
328		break;
329
330	case F_GETOWN:
331		fhold(fp);
332		FILEDESC_UNLOCK(fdp);
333		error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
334		fdrop(fp, td);
335		break;
336
337	case F_SETOWN:
338		fhold(fp);
339		FILEDESC_UNLOCK(fdp);
340		error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
341		fdrop(fp, td);
342		break;
343
344	case F_SETLKW:
345		flg |= F_WAIT;
346		/* Fall into F_SETLK */
347
348	case F_SETLK:
349		if (fp->f_type != DTYPE_VNODE) {
350			FILEDESC_UNLOCK(fdp);
351			error = EBADF;
352			break;
353		}
354		vp = (struct vnode *)fp->f_data;
355		/*
356		 * copyin/lockop may block
357		 */
358		fhold(fp);
359		FILEDESC_UNLOCK(fdp);
360		vp = (struct vnode *)fp->f_data;
361
362		/* Copy in the lock structure */
363		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
364		    sizeof(fl));
365		if (error) {
366			fdrop(fp, td);
367			break;
368		}
369		if (fl.l_whence == SEEK_CUR) {
370			if (fp->f_offset < 0 ||
371			    (fl.l_start > 0 &&
372			     fp->f_offset > OFF_MAX - fl.l_start)) {
373				fdrop(fp, td);
374				error = EOVERFLOW;
375				break;
376			}
377			fl.l_start += fp->f_offset;
378		}
379
380		switch (fl.l_type) {
381		case F_RDLCK:
382			if ((fp->f_flag & FREAD) == 0) {
383				error = EBADF;
384				break;
385			}
386			PROC_LOCK(p);
387			p->p_flag |= P_ADVLOCK;
388			leaderp = p->p_leader;
389			PROC_UNLOCK(p);
390			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
391			    &fl, flg);
392			break;
393		case F_WRLCK:
394			if ((fp->f_flag & FWRITE) == 0) {
395				error = EBADF;
396				break;
397			}
398			PROC_LOCK(p);
399			p->p_flag |= P_ADVLOCK;
400			leaderp = p->p_leader;
401			PROC_UNLOCK(p);
402			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
403			    &fl, flg);
404			break;
405		case F_UNLCK:
406			PROC_LOCK(p);
407			leaderp = p->p_leader;
408			PROC_UNLOCK(p);
409			error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
410				&fl, F_POSIX);
411			break;
412		default:
413			error = EINVAL;
414			break;
415		}
416		fdrop(fp, td);
417		break;
418
419	case F_GETLK:
420		if (fp->f_type != DTYPE_VNODE) {
421			FILEDESC_UNLOCK(fdp);
422			error = EBADF;
423			break;
424		}
425		vp = (struct vnode *)fp->f_data;
426		/*
427		 * copyin/lockop may block
428		 */
429		fhold(fp);
430		FILEDESC_UNLOCK(fdp);
431		vp = (struct vnode *)fp->f_data;
432
433		/* Copy in the lock structure */
434		error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
435		    sizeof(fl));
436		if (error) {
437			fdrop(fp, td);
438			break;
439		}
440		if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
441		    fl.l_type != F_UNLCK) {
442			fdrop(fp, td);
443			error = EINVAL;
444			break;
445		}
446		if (fl.l_whence == SEEK_CUR) {
447			if ((fl.l_start > 0 &&
448			     fp->f_offset > OFF_MAX - fl.l_start) ||
449			    (fl.l_start < 0 &&
450			     fp->f_offset < OFF_MIN - fl.l_start)) {
451				fdrop(fp, td);
452				error = EOVERFLOW;
453				break;
454			}
455			fl.l_start += fp->f_offset;
456		}
457		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
458			    &fl, F_POSIX);
459		fdrop(fp, td);
460		if (error == 0) {
461			error = copyout((caddr_t)&fl,
462				    (caddr_t)(intptr_t)uap->arg, sizeof(fl));
463		}
464		break;
465	default:
466		FILEDESC_UNLOCK(fdp);
467		error = EINVAL;
468		break;
469	}
470done2:
471	mtx_unlock(&Giant);
472	return (error);
473}
474
475/*
476 * Common code for dup, dup2, and fcntl(F_DUPFD).
477 * filedesc must be locked, but will be unlocked as a side effect.
478 */
479static int
480do_dup(fdp, old, new, retval, td)
481	register struct filedesc *fdp;
482	register int old, new;
483	register_t *retval;
484	struct thread *td;
485{
486	struct file *fp;
487	struct file *delfp;
488
489	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
490
491	/*
492	 * Save info on the descriptor being overwritten.  We have
493	 * to do the unmap now, but we cannot close it without
494	 * introducing an ownership race for the slot.
495	 */
496	delfp = fdp->fd_ofiles[new];
497#if 0
498	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
499		(void) munmapfd(td, new);
500#endif
501
502	/*
503	 * Duplicate the source descriptor, update lastfile
504	 */
505	fp = fdp->fd_ofiles[old];
506	fdp->fd_ofiles[new] = fp;
507	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
508	fhold(fp);
509	if (new > fdp->fd_lastfile)
510		fdp->fd_lastfile = new;
511	*retval = new;
512
513	FILEDESC_UNLOCK(fdp);
514
515	/*
516	 * If we dup'd over a valid file, we now own the reference to it
517	 * and must dispose of it using closef() semantics (as if a
518	 * close() were performed on it).
519	 */
520	if (delfp) {
521		mtx_lock(&Giant);
522		(void) closef(delfp, td);
523		mtx_unlock(&Giant);
524	}
525	return (0);
526}
527
528/*
529 * If sigio is on the list associated with a process or process group,
530 * disable signalling from the device, remove sigio from the list and
531 * free sigio.
532 */
533void
534funsetown(sigio)
535	struct sigio *sigio;
536{
537	int s;
538
539	if (sigio == NULL)
540		return;
541
542	s = splhigh();
543	*(sigio->sio_myref) = NULL;
544	splx(s);
545	if ((sigio)->sio_pgid < 0) {
546		struct pgrp *pg = (sigio)->sio_pgrp;
547		PGRP_LOCK(pg);
548		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
549			     sigio, sio_pgsigio);
550		PGRP_UNLOCK(pg);
551	} else {
552		struct proc *p = (sigio)->sio_proc;
553		PROC_LOCK(p);
554		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
555			     sigio, sio_pgsigio);
556		PROC_UNLOCK(p);
557	}
558	crfree(sigio->sio_ucred);
559	FREE(sigio, M_SIGIO);
560}
561
562/* Free a list of sigio structures. */
563void
564funsetownlst(sigiolst)
565	struct sigiolst *sigiolst;
566{
567	int s;
568	struct sigio *sigio;
569	struct proc *p;
570	struct pgrp *pg;
571
572	sigio = SLIST_FIRST(sigiolst);
573	if (sigio == NULL)
574		return;
575
576	p = NULL;
577	pg = NULL;
578
579	/*
580	 * Every entry of the list should belong
581	 * to a single proc or pgrp.
582	 */
583	if (sigio->sio_pgid < 0) {
584		pg = sigio->sio_pgrp;
585		PGRP_LOCK_ASSERT(pg, MA_OWNED);
586	} else /* if (sigio->sio_pgid > 0) */ {
587		p = sigio->sio_proc;
588		PROC_LOCK_ASSERT(p, MA_OWNED);
589	}
590
591	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
592		s = splhigh();
593		*(sigio->sio_myref) = NULL;
594		splx(s);
595		if (pg != NULL) {
596			KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list"));
597			KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list"));
598			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
599			PGRP_UNLOCK(pg);
600			crfree(sigio->sio_ucred);
601			FREE(sigio, M_SIGIO);
602			PGRP_LOCK(pg);
603		} else /* if (p != NULL) */ {
604			KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list"));
605			KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list"));
606			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
607			PROC_UNLOCK(p);
608			crfree(sigio->sio_ucred);
609			FREE(sigio, M_SIGIO);
610			PROC_LOCK(p);
611		}
612	}
613}
614
615/*
616 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
617 *
618 * After permission checking, add a sigio structure to the sigio list for
619 * the process or process group.
620 */
621int
622fsetown(pgid, sigiop)
623	pid_t pgid;
624	struct sigio **sigiop;
625{
626	struct proc *proc;
627	struct pgrp *pgrp;
628	struct sigio *sigio;
629	int s, ret;
630
631	if (pgid == 0) {
632		funsetown(*sigiop);
633		return (0);
634	}
635
636	ret = 0;
637
638	/* Allocate and fill in the new sigio out of locks. */
639	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
640	sigio->sio_pgid = pgid;
641	sigio->sio_ucred = crhold(curthread->td_ucred);
642	sigio->sio_myref = sigiop;
643
644	sx_slock(&proctree_lock);
645	if (pgid > 0) {
646		proc = pfind(pgid);
647		if (proc == NULL) {
648			ret = ESRCH;
649			goto fail;
650		}
651
652		/*
653		 * Policy - Don't allow a process to FSETOWN a process
654		 * in another session.
655		 *
656		 * Remove this test to allow maximum flexibility or
657		 * restrict FSETOWN to the current process or process
658		 * group for maximum safety.
659		 */
660		PROC_UNLOCK(proc);
661		if (proc->p_session != curthread->td_proc->p_session) {
662			ret = EPERM;
663			goto fail;
664		}
665
666		pgrp = NULL;
667	} else /* if (pgid < 0) */ {
668		pgrp = pgfind(-pgid);
669		if (pgrp == NULL) {
670			ret = ESRCH;
671			goto fail;
672		}
673		PGRP_UNLOCK(pgrp);
674
675		/*
676		 * Policy - Don't allow a process to FSETOWN a process
677		 * in another session.
678		 *
679		 * Remove this test to allow maximum flexibility or
680		 * restrict FSETOWN to the current process or process
681		 * group for maximum safety.
682		 */
683		if (pgrp->pg_session != curthread->td_proc->p_session) {
684			ret = EPERM;
685			goto fail;
686		}
687
688		proc = NULL;
689	}
690	funsetown(*sigiop);
691	if (pgid > 0) {
692		PROC_LOCK(proc);
693		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
694		sigio->sio_proc = proc;
695		PROC_UNLOCK(proc);
696	} else {
697		PGRP_LOCK(pgrp);
698		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
699		sigio->sio_pgrp = pgrp;
700		PGRP_UNLOCK(pgrp);
701	}
702	sx_sunlock(&proctree_lock);
703	s = splhigh();
704	*sigiop = sigio;
705	splx(s);
706	return (0);
707
708fail:
709	sx_sunlock(&proctree_lock);
710	crfree(sigio->sio_ucred);
711	FREE(sigio, M_SIGIO);
712	return (ret);
713}
714
715/*
716 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
717 */
718pid_t
719fgetown(sigio)
720	struct sigio *sigio;
721{
722	return (sigio != NULL ? sigio->sio_pgid : 0);
723}
724
725/*
726 * Close a file descriptor.
727 */
728#ifndef _SYS_SYSPROTO_H_
729struct close_args {
730        int     fd;
731};
732#endif
733/*
734 * MPSAFE
735 */
736/* ARGSUSED */
737int
738close(td, uap)
739	struct thread *td;
740	struct close_args *uap;
741{
742	register struct filedesc *fdp;
743	register struct file *fp;
744	register int fd = uap->fd;
745	int error = 0;
746
747	mtx_lock(&Giant);
748	fdp = td->td_proc->p_fd;
749	FILEDESC_LOCK(fdp);
750	if ((unsigned)fd >= fdp->fd_nfiles ||
751	    (fp = fdp->fd_ofiles[fd]) == NULL) {
752		FILEDESC_UNLOCK(fdp);
753		error = EBADF;
754		goto done2;
755	}
756#if 0
757	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
758		(void) munmapfd(td, fd);
759#endif
760	fdp->fd_ofiles[fd] = NULL;
761	fdp->fd_ofileflags[fd] = 0;
762
763	/*
764	 * we now hold the fp reference that used to be owned by the descriptor
765	 * array.
766	 */
767	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
768		fdp->fd_lastfile--;
769	if (fd < fdp->fd_freefile)
770		fdp->fd_freefile = fd;
771	if (fd < fdp->fd_knlistsize) {
772		FILEDESC_UNLOCK(fdp);
773		knote_fdclose(td, fd);
774	} else
775		FILEDESC_UNLOCK(fdp);
776
777	error = closef(fp, td);
778done2:
779	mtx_unlock(&Giant);
780	return(error);
781}
782
783#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
784/*
785 * Return status information about a file descriptor.
786 */
787#ifndef _SYS_SYSPROTO_H_
788struct ofstat_args {
789	int	fd;
790	struct	ostat *sb;
791};
792#endif
793/*
794 * MPSAFE
795 */
796/* ARGSUSED */
797int
798ofstat(td, uap)
799	struct thread *td;
800	register struct ofstat_args *uap;
801{
802	struct file *fp;
803	struct stat ub;
804	struct ostat oub;
805	int error;
806
807	mtx_lock(&Giant);
808	if ((error = fget(td, uap->fd, &fp)) != 0)
809		goto done2;
810	error = fo_stat(fp, &ub, td);
811	if (error == 0) {
812		cvtstat(&ub, &oub);
813		error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
814	}
815	fdrop(fp, td);
816done2:
817	mtx_unlock(&Giant);
818	return (error);
819}
820#endif /* COMPAT_43 || COMPAT_SUNOS */
821
822/*
823 * Return status information about a file descriptor.
824 */
825#ifndef _SYS_SYSPROTO_H_
826struct fstat_args {
827	int	fd;
828	struct	stat *sb;
829};
830#endif
831/*
832 * MPSAFE
833 */
834/* ARGSUSED */
835int
836fstat(td, uap)
837	struct thread *td;
838	struct fstat_args *uap;
839{
840	struct file *fp;
841	struct stat ub;
842	int error;
843
844	mtx_lock(&Giant);
845	if ((error = fget(td, uap->fd, &fp)) != 0)
846		goto done2;
847	error = fo_stat(fp, &ub, td);
848	if (error == 0)
849		error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
850	fdrop(fp, td);
851done2:
852	mtx_unlock(&Giant);
853	return (error);
854}
855
856/*
857 * Return status information about a file descriptor.
858 */
859#ifndef _SYS_SYSPROTO_H_
860struct nfstat_args {
861	int	fd;
862	struct	nstat *sb;
863};
864#endif
865/*
866 * MPSAFE
867 */
868/* ARGSUSED */
869int
870nfstat(td, uap)
871	struct thread *td;
872	register struct nfstat_args *uap;
873{
874	struct file *fp;
875	struct stat ub;
876	struct nstat nub;
877	int error;
878
879	mtx_lock(&Giant);
880	if ((error = fget(td, uap->fd, &fp)) != 0)
881		goto done2;
882	error = fo_stat(fp, &ub, td);
883	if (error == 0) {
884		cvtnstat(&ub, &nub);
885		error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
886	}
887	fdrop(fp, td);
888done2:
889	mtx_unlock(&Giant);
890	return (error);
891}
892
893/*
894 * Return pathconf information about a file descriptor.
895 */
896#ifndef _SYS_SYSPROTO_H_
897struct fpathconf_args {
898	int	fd;
899	int	name;
900};
901#endif
902/*
903 * MPSAFE
904 */
905/* ARGSUSED */
906int
907fpathconf(td, uap)
908	struct thread *td;
909	register struct fpathconf_args *uap;
910{
911	struct file *fp;
912	struct vnode *vp;
913	int error;
914
915	if ((error = fget(td, uap->fd, &fp)) != 0)
916		return (error);
917
918	switch (fp->f_type) {
919	case DTYPE_PIPE:
920	case DTYPE_SOCKET:
921		if (uap->name != _PC_PIPE_BUF) {
922			error = EINVAL;
923		} else {
924			td->td_retval[0] = PIPE_BUF;
925			error = 0;
926		}
927		break;
928	case DTYPE_FIFO:
929	case DTYPE_VNODE:
930		vp = (struct vnode *)fp->f_data;
931		mtx_lock(&Giant);
932		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
933		mtx_unlock(&Giant);
934		break;
935	default:
936		error = EOPNOTSUPP;
937		break;
938	}
939	fdrop(fp, td);
940	return(error);
941}
942
943/*
944 * Allocate a file descriptor for the process.
945 */
946static int fdexpand;
947SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
948
949int
950fdalloc(td, want, result)
951	struct thread *td;
952	int want;
953	int *result;
954{
955	struct proc *p = td->td_proc;
956	register struct filedesc *fdp = td->td_proc->p_fd;
957	register int i;
958	int lim, last, nfiles;
959	struct file **newofile, **oldofile;
960	char *newofileflags;
961
962	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
963
964	/*
965	 * Search for a free descriptor starting at the higher
966	 * of want or fd_freefile.  If that fails, consider
967	 * expanding the ofile array.
968	 */
969	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
970	for (;;) {
971		last = min(fdp->fd_nfiles, lim);
972		if ((i = want) < fdp->fd_freefile)
973			i = fdp->fd_freefile;
974		for (; i < last; i++) {
975			if (fdp->fd_ofiles[i] == NULL) {
976				fdp->fd_ofileflags[i] = 0;
977				if (i > fdp->fd_lastfile)
978					fdp->fd_lastfile = i;
979				if (want <= fdp->fd_freefile)
980					fdp->fd_freefile = i;
981				*result = i;
982				return (0);
983			}
984		}
985
986		/*
987		 * No space in current array.  Expand?
988		 */
989		if (fdp->fd_nfiles >= lim)
990			return (EMFILE);
991		if (fdp->fd_nfiles < NDEXTENT)
992			nfiles = NDEXTENT;
993		else
994			nfiles = 2 * fdp->fd_nfiles;
995		FILEDESC_UNLOCK(fdp);
996		mtx_lock(&Giant);
997		MALLOC(newofile, struct file **, nfiles * OFILESIZE,
998		    M_FILEDESC, M_WAITOK);
999		mtx_unlock(&Giant);
1000		FILEDESC_LOCK(fdp);
1001
1002		/*
1003		 * deal with file-table extend race that might have occured
1004		 * when malloc was blocked.
1005		 */
1006		if (fdp->fd_nfiles >= nfiles) {
1007			FILEDESC_UNLOCK(fdp);
1008			mtx_lock(&Giant);
1009			FREE(newofile, M_FILEDESC);
1010			mtx_unlock(&Giant);
1011			FILEDESC_LOCK(fdp);
1012			continue;
1013		}
1014		newofileflags = (char *) &newofile[nfiles];
1015		/*
1016		 * Copy the existing ofile and ofileflags arrays
1017		 * and zero the new portion of each array.
1018		 */
1019		bcopy(fdp->fd_ofiles, newofile,
1020			(i = sizeof(struct file *) * fdp->fd_nfiles));
1021		bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1022		bcopy(fdp->fd_ofileflags, newofileflags,
1023			(i = sizeof(char) * fdp->fd_nfiles));
1024		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1025		if (fdp->fd_nfiles > NDFILE)
1026			oldofile = fdp->fd_ofiles;
1027		else
1028			oldofile = NULL;
1029		fdp->fd_ofiles = newofile;
1030		fdp->fd_ofileflags = newofileflags;
1031		fdp->fd_nfiles = nfiles;
1032		fdexpand++;
1033		if (oldofile != NULL) {
1034			FILEDESC_UNLOCK(fdp);
1035			mtx_lock(&Giant);
1036			FREE(oldofile, M_FILEDESC);
1037			mtx_unlock(&Giant);
1038			FILEDESC_LOCK(fdp);
1039		}
1040	}
1041	return (0);
1042}
1043
1044/*
1045 * Check to see whether n user file descriptors
1046 * are available to the process p.
1047 */
1048int
1049fdavail(td, n)
1050	struct thread *td;
1051	register int n;
1052{
1053	struct proc *p = td->td_proc;
1054	register struct filedesc *fdp = td->td_proc->p_fd;
1055	register struct file **fpp;
1056	register int i, lim, last;
1057
1058	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1059
1060	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1061	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1062		return (1);
1063
1064	last = min(fdp->fd_nfiles, lim);
1065	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1066	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1067		if (*fpp == NULL && --n <= 0)
1068			return (1);
1069	}
1070	return (0);
1071}
1072
1073/*
1074 * Create a new open file structure and allocate
1075 * a file decriptor for the process that refers to it.
1076 */
1077int
1078falloc(td, resultfp, resultfd)
1079	register struct thread *td;
1080	struct file **resultfp;
1081	int *resultfd;
1082{
1083	struct proc *p = td->td_proc;
1084	register struct file *fp, *fq;
1085	int error, i;
1086
1087	sx_xlock(&filelist_lock);
1088	if (nfiles >= maxfiles) {
1089		sx_xunlock(&filelist_lock);
1090		tablefull("file");
1091		return (ENFILE);
1092	}
1093	nfiles++;
1094	sx_xunlock(&filelist_lock);
1095	/*
1096	 * Allocate a new file descriptor.
1097	 * If the process has file descriptor zero open, add to the list
1098	 * of open files at that point, otherwise put it at the front of
1099	 * the list of open files.
1100	 */
1101	fp = uma_zalloc(file_zone, M_WAITOK);
1102	bzero(fp, sizeof(*fp));
1103
1104	/*
1105	 * wait until after malloc (which may have blocked) returns before
1106	 * allocating the slot, else a race might have shrunk it if we had
1107	 * allocated it before the malloc.
1108	 */
1109	FILEDESC_LOCK(p->p_fd);
1110	if ((error = fdalloc(td, 0, &i))) {
1111		FILEDESC_UNLOCK(p->p_fd);
1112		sx_xlock(&filelist_lock);
1113		nfiles--;
1114		sx_xunlock(&filelist_lock);
1115		uma_zfree(file_zone, fp);
1116		return (error);
1117	}
1118	fp->f_mtxp = mtx_pool_alloc();
1119	fp->f_gcflag = 0;
1120	fp->f_count = 1;
1121	fp->f_cred = crhold(td->td_ucred);
1122	fp->f_ops = &badfileops;
1123	fp->f_seqcount = 1;
1124	FILEDESC_UNLOCK(p->p_fd);
1125	sx_xlock(&filelist_lock);
1126	FILEDESC_LOCK(p->p_fd);
1127	if ((fq = p->p_fd->fd_ofiles[0])) {
1128		LIST_INSERT_AFTER(fq, fp, f_list);
1129	} else {
1130		LIST_INSERT_HEAD(&filehead, fp, f_list);
1131	}
1132	p->p_fd->fd_ofiles[i] = fp;
1133	FILEDESC_UNLOCK(p->p_fd);
1134	sx_xunlock(&filelist_lock);
1135	if (resultfp)
1136		*resultfp = fp;
1137	if (resultfd)
1138		*resultfd = i;
1139	return (0);
1140}
1141
1142/*
1143 * Free a file descriptor.
1144 */
1145void
1146ffree(fp)
1147	register struct file *fp;
1148{
1149
1150	KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1151	sx_xlock(&filelist_lock);
1152	LIST_REMOVE(fp, f_list);
1153	nfiles--;
1154	sx_xunlock(&filelist_lock);
1155	crfree(fp->f_cred);
1156	uma_zfree(file_zone, fp);
1157}
1158
1159/*
1160 * Build a new filedesc structure.
1161 */
1162struct filedesc *
1163fdinit(td)
1164	struct thread *td;
1165{
1166	register struct filedesc0 *newfdp;
1167	register struct filedesc *fdp = td->td_proc->p_fd;
1168
1169	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1170	    M_FILEDESC, M_WAITOK | M_ZERO);
1171	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1172	FILEDESC_LOCK(&newfdp->fd_fd);
1173	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1174	if (newfdp->fd_fd.fd_cdir)
1175		VREF(newfdp->fd_fd.fd_cdir);
1176	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1177	if (newfdp->fd_fd.fd_rdir)
1178		VREF(newfdp->fd_fd.fd_rdir);
1179	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1180	if (newfdp->fd_fd.fd_jdir)
1181		VREF(newfdp->fd_fd.fd_jdir);
1182
1183	/* Create the file descriptor table. */
1184	newfdp->fd_fd.fd_refcnt = 1;
1185	newfdp->fd_fd.fd_cmask = cmask;
1186	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1187	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1188	newfdp->fd_fd.fd_nfiles = NDFILE;
1189	newfdp->fd_fd.fd_knlistsize = -1;
1190	FILEDESC_UNLOCK(&newfdp->fd_fd);
1191
1192	return (&newfdp->fd_fd);
1193}
1194
1195/*
1196 * Share a filedesc structure.
1197 */
1198struct filedesc *
1199fdshare(p)
1200	struct proc *p;
1201{
1202	FILEDESC_LOCK(p->p_fd);
1203	p->p_fd->fd_refcnt++;
1204	FILEDESC_UNLOCK(p->p_fd);
1205	return (p->p_fd);
1206}
1207
1208/*
1209 * Copy a filedesc structure.
1210 */
1211struct filedesc *
1212fdcopy(td)
1213	struct thread *td;
1214{
1215	register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1216	register struct file **fpp;
1217	register int i, j;
1218
1219	/* Certain daemons might not have file descriptors. */
1220	if (fdp == NULL)
1221		return (NULL);
1222
1223	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1224
1225	FILEDESC_UNLOCK(fdp);
1226	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1227	    M_FILEDESC, M_WAITOK);
1228	FILEDESC_LOCK(fdp);
1229	bcopy(fdp, newfdp, sizeof(struct filedesc));
1230	FILEDESC_UNLOCK(fdp);
1231	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1232	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1233	if (newfdp->fd_cdir)
1234		VREF(newfdp->fd_cdir);
1235	if (newfdp->fd_rdir)
1236		VREF(newfdp->fd_rdir);
1237	if (newfdp->fd_jdir)
1238		VREF(newfdp->fd_jdir);
1239	newfdp->fd_refcnt = 1;
1240
1241	/*
1242	 * If the number of open files fits in the internal arrays
1243	 * of the open file structure, use them, otherwise allocate
1244	 * additional memory for the number of descriptors currently
1245	 * in use.
1246	 */
1247	FILEDESC_LOCK(fdp);
1248	newfdp->fd_lastfile = fdp->fd_lastfile;
1249	newfdp->fd_nfiles = fdp->fd_nfiles;
1250	if (newfdp->fd_lastfile < NDFILE) {
1251		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1252		newfdp->fd_ofileflags =
1253		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1254		i = NDFILE;
1255	} else {
1256		/*
1257		 * Compute the smallest multiple of NDEXTENT needed
1258		 * for the file descriptors currently in use,
1259		 * allowing the table to shrink.
1260		 */
1261retry:
1262		i = newfdp->fd_nfiles;
1263		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1264			i /= 2;
1265		FILEDESC_UNLOCK(fdp);
1266		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1267		    M_FILEDESC, M_WAITOK);
1268		FILEDESC_LOCK(fdp);
1269		newfdp->fd_lastfile = fdp->fd_lastfile;
1270		newfdp->fd_nfiles = fdp->fd_nfiles;
1271		j = newfdp->fd_nfiles;
1272		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1273			j /= 2;
1274		if (i != j) {
1275			/*
1276			 * The size of the original table has changed.
1277			 * Go over once again.
1278			 */
1279			FILEDESC_UNLOCK(fdp);
1280			FREE(newfdp->fd_ofiles, M_FILEDESC);
1281			FILEDESC_LOCK(fdp);
1282			newfdp->fd_lastfile = fdp->fd_lastfile;
1283			newfdp->fd_nfiles = fdp->fd_nfiles;
1284			goto retry;
1285		}
1286		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1287	}
1288	newfdp->fd_nfiles = i;
1289	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1290	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1291
1292	/*
1293	 * kq descriptors cannot be copied.
1294	 */
1295	if (newfdp->fd_knlistsize != -1) {
1296		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1297		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1298			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1299				*fpp = NULL;
1300				if (i < newfdp->fd_freefile)
1301					newfdp->fd_freefile = i;
1302			}
1303			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1304				newfdp->fd_lastfile--;
1305		}
1306		newfdp->fd_knlist = NULL;
1307		newfdp->fd_knlistsize = -1;
1308		newfdp->fd_knhash = NULL;
1309		newfdp->fd_knhashmask = 0;
1310	}
1311
1312	fpp = newfdp->fd_ofiles;
1313	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1314		if (*fpp != NULL) {
1315			fhold(*fpp);
1316		}
1317	}
1318	return (newfdp);
1319}
1320
1321/*
1322 * Release a filedesc structure.
1323 */
1324void
1325fdfree(td)
1326	struct thread *td;
1327{
1328	register struct filedesc *fdp;
1329	struct file **fpp;
1330	register int i;
1331
1332	fdp = td->td_proc->p_fd;
1333	/* Certain daemons might not have file descriptors. */
1334	if (fdp == NULL)
1335		return;
1336
1337	FILEDESC_LOCK(fdp);
1338	if (--fdp->fd_refcnt > 0) {
1339		FILEDESC_UNLOCK(fdp);
1340		return;
1341	}
1342	/*
1343	 * we are the last reference to the structure, we can
1344	 * safely assume it will not change out from under us.
1345	 */
1346	FILEDESC_UNLOCK(fdp);
1347	fpp = fdp->fd_ofiles;
1348	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1349		if (*fpp)
1350			(void) closef(*fpp, td);
1351	}
1352
1353	PROC_LOCK(td->td_proc);
1354	td->td_proc->p_fd = NULL;
1355	PROC_UNLOCK(td->td_proc);
1356
1357	if (fdp->fd_nfiles > NDFILE)
1358		FREE(fdp->fd_ofiles, M_FILEDESC);
1359	if (fdp->fd_cdir)
1360		vrele(fdp->fd_cdir);
1361	if (fdp->fd_rdir)
1362		vrele(fdp->fd_rdir);
1363	if (fdp->fd_jdir)
1364		vrele(fdp->fd_jdir);
1365	if (fdp->fd_knlist)
1366		FREE(fdp->fd_knlist, M_KQUEUE);
1367	if (fdp->fd_knhash)
1368		FREE(fdp->fd_knhash, M_KQUEUE);
1369	mtx_destroy(&fdp->fd_mtx);
1370	FREE(fdp, M_FILEDESC);
1371}
1372
1373/*
1374 * For setugid programs, we don't want to people to use that setugidness
1375 * to generate error messages which write to a file which otherwise would
1376 * otherwise be off-limits to the process.
1377 *
1378 * This is a gross hack to plug the hole.  A better solution would involve
1379 * a special vop or other form of generalized access control mechanism.  We
1380 * go ahead and just reject all procfs file systems accesses as dangerous.
1381 *
1382 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1383 * sufficient.  We also don't for check setugidness since we know we are.
1384 */
1385static int
1386is_unsafe(struct file *fp)
1387{
1388	if (fp->f_type == DTYPE_VNODE &&
1389	    ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1390		return (1);
1391	return (0);
1392}
1393
1394/*
1395 * Make this setguid thing safe, if at all possible.
1396 */
1397void
1398setugidsafety(td)
1399	struct thread *td;
1400{
1401	struct filedesc *fdp = td->td_proc->p_fd;
1402	register int i;
1403
1404	/* Certain daemons might not have file descriptors. */
1405	if (fdp == NULL)
1406		return;
1407
1408	/*
1409	 * note: fdp->fd_ofiles may be reallocated out from under us while
1410	 * we are blocked in a close.  Be careful!
1411	 */
1412	FILEDESC_LOCK(fdp);
1413	for (i = 0; i <= fdp->fd_lastfile; i++) {
1414		if (i > 2)
1415			break;
1416		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1417			struct file *fp;
1418
1419#if 0
1420			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1421				(void) munmapfd(td, i);
1422#endif
1423			if (i < fdp->fd_knlistsize) {
1424				FILEDESC_UNLOCK(fdp);
1425				knote_fdclose(td, i);
1426				FILEDESC_LOCK(fdp);
1427			}
1428			/*
1429			 * NULL-out descriptor prior to close to avoid
1430			 * a race while close blocks.
1431			 */
1432			fp = fdp->fd_ofiles[i];
1433			fdp->fd_ofiles[i] = NULL;
1434			fdp->fd_ofileflags[i] = 0;
1435			if (i < fdp->fd_freefile)
1436				fdp->fd_freefile = i;
1437			FILEDESC_UNLOCK(fdp);
1438			(void) closef(fp, td);
1439			FILEDESC_LOCK(fdp);
1440		}
1441	}
1442	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1443		fdp->fd_lastfile--;
1444	FILEDESC_UNLOCK(fdp);
1445}
1446
1447/*
1448 * Close any files on exec?
1449 */
1450void
1451fdcloseexec(td)
1452	struct thread *td;
1453{
1454	struct filedesc *fdp = td->td_proc->p_fd;
1455	register int i;
1456
1457	/* Certain daemons might not have file descriptors. */
1458	if (fdp == NULL)
1459		return;
1460
1461	FILEDESC_LOCK(fdp);
1462
1463	/*
1464	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1465	 * may block and rip them out from under us.
1466	 */
1467	for (i = 0; i <= fdp->fd_lastfile; i++) {
1468		if (fdp->fd_ofiles[i] != NULL &&
1469		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1470			struct file *fp;
1471
1472#if 0
1473			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1474				(void) munmapfd(td, i);
1475#endif
1476			if (i < fdp->fd_knlistsize) {
1477				FILEDESC_UNLOCK(fdp);
1478				knote_fdclose(td, i);
1479				FILEDESC_LOCK(fdp);
1480			}
1481			/*
1482			 * NULL-out descriptor prior to close to avoid
1483			 * a race while close blocks.
1484			 */
1485			fp = fdp->fd_ofiles[i];
1486			fdp->fd_ofiles[i] = NULL;
1487			fdp->fd_ofileflags[i] = 0;
1488			if (i < fdp->fd_freefile)
1489				fdp->fd_freefile = i;
1490			FILEDESC_UNLOCK(fdp);
1491			(void) closef(fp, td);
1492			FILEDESC_LOCK(fdp);
1493		}
1494	}
1495	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1496		fdp->fd_lastfile--;
1497	FILEDESC_UNLOCK(fdp);
1498}
1499
1500/*
1501 * Internal form of close.
1502 * Decrement reference count on file structure.
1503 * Note: td may be NULL when closing a file
1504 * that was being passed in a message.
1505 */
1506int
1507closef(fp, td)
1508	register struct file *fp;
1509	register struct thread *td;
1510{
1511	struct vnode *vp;
1512	struct flock lf;
1513
1514	if (fp == NULL)
1515		return (0);
1516	/*
1517	 * POSIX record locking dictates that any close releases ALL
1518	 * locks owned by this process.  This is handled by setting
1519	 * a flag in the unlock to free ONLY locks obeying POSIX
1520	 * semantics, and not to free BSD-style file locks.
1521	 * If the descriptor was in a message, POSIX-style locks
1522	 * aren't passed with the descriptor.
1523	 */
1524	if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1525	    fp->f_type == DTYPE_VNODE) {
1526		lf.l_whence = SEEK_SET;
1527		lf.l_start = 0;
1528		lf.l_len = 0;
1529		lf.l_type = F_UNLCK;
1530		vp = (struct vnode *)fp->f_data;
1531		(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1532		    F_UNLCK, &lf, F_POSIX);
1533	}
1534	return (fdrop(fp, td));
1535}
1536
1537/*
1538 * Drop reference on struct file passed in, may call closef if the
1539 * reference hits zero.
1540 */
1541int
1542fdrop(fp, td)
1543	struct file *fp;
1544	struct thread *td;
1545{
1546
1547	FILE_LOCK(fp);
1548	return (fdrop_locked(fp, td));
1549}
1550
1551/*
1552 * Extract the file pointer associated with the specified descriptor for
1553 * the current user process.
1554 *
1555 * If the descriptor doesn't exist, EBADF is returned.
1556 *
1557 * If the descriptor exists but doesn't match 'flags' then
1558 * return EBADF for read attempts and EINVAL for write attempts.
1559 *
1560 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1561 * It should be droped with fdrop().
1562 * If it is not set, then the refcount will not be bumped however the
1563 * thread's filedesc struct will be returned locked (for fgetsock).
1564 *
1565 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1566 * Otherwise *fpp is set and zero is returned.
1567 */
1568static __inline
1569int
1570_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1571{
1572	struct filedesc *fdp;
1573	struct file *fp;
1574
1575	*fpp = NULL;
1576	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1577		return(EBADF);
1578	FILEDESC_LOCK(fdp);
1579	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1580		FILEDESC_UNLOCK(fdp);
1581		return(EBADF);
1582	}
1583
1584	/*
1585	 * Note: FREAD failures returns EBADF to maintain backwards
1586	 * compatibility with what routines returned before.
1587	 *
1588	 * Only one flag, or 0, may be specified.
1589	 */
1590	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1591		FILEDESC_UNLOCK(fdp);
1592		return(EBADF);
1593	}
1594	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1595		FILEDESC_UNLOCK(fdp);
1596		return(EINVAL);
1597	}
1598	if (hold) {
1599		fhold(fp);
1600		FILEDESC_UNLOCK(fdp);
1601	}
1602	*fpp = fp;
1603	return(0);
1604}
1605
1606int
1607fget(struct thread *td, int fd, struct file **fpp)
1608{
1609    return(_fget(td, fd, fpp, 0, 1));
1610}
1611
1612int
1613fget_read(struct thread *td, int fd, struct file **fpp)
1614{
1615    return(_fget(td, fd, fpp, FREAD, 1));
1616}
1617
1618int
1619fget_write(struct thread *td, int fd, struct file **fpp)
1620{
1621    return(_fget(td, fd, fpp, FWRITE, 1));
1622}
1623
1624/*
1625 * Like fget() but loads the underlying vnode, or returns an error if
1626 * the descriptor does not represent a vnode.  Note that pipes use vnodes
1627 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1628 * error).  The returned vnode will be vref()d.
1629 */
1630
1631static __inline
1632int
1633_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1634{
1635	struct file *fp;
1636	int error;
1637
1638	*vpp = NULL;
1639	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1640		return (error);
1641	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1642		error = EINVAL;
1643	} else {
1644		*vpp = (struct vnode *)fp->f_data;
1645		vref(*vpp);
1646	}
1647	FILEDESC_UNLOCK(td->td_proc->p_fd);
1648	return (error);
1649}
1650
1651int
1652fgetvp(struct thread *td, int fd, struct vnode **vpp)
1653{
1654	return(_fgetvp(td, fd, vpp, 0));
1655}
1656
1657int
1658fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1659{
1660	return(_fgetvp(td, fd, vpp, FREAD));
1661}
1662
1663int
1664fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1665{
1666	return(_fgetvp(td, fd, vpp, FWRITE));
1667}
1668
1669/*
1670 * Like fget() but loads the underlying socket, or returns an error if
1671 * the descriptor does not represent a socket.
1672 *
1673 * We bump the ref count on the returned socket.  XXX Also obtain the SX lock in
1674 * the future.
1675 */
1676int
1677fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1678{
1679	struct file *fp;
1680	int error;
1681
1682	*spp = NULL;
1683	if (fflagp)
1684		*fflagp = 0;
1685	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1686		return (error);
1687	if (fp->f_type != DTYPE_SOCKET) {
1688		error = ENOTSOCK;
1689	} else {
1690		*spp = (struct socket *)fp->f_data;
1691		if (fflagp)
1692			*fflagp = fp->f_flag;
1693		soref(*spp);
1694	}
1695	FILEDESC_UNLOCK(td->td_proc->p_fd);
1696	return(error);
1697}
1698
1699/*
1700 * Drop the reference count on the the socket and XXX release the SX lock in
1701 * the future.  The last reference closes the socket.
1702 */
1703void
1704fputsock(struct socket *so)
1705{
1706	sorele(so);
1707}
1708
1709/*
1710 * Drop reference on struct file passed in, may call closef if the
1711 * reference hits zero.
1712 * Expects struct file locked, and will unlock it.
1713 */
1714int
1715fdrop_locked(fp, td)
1716	struct file *fp;
1717	struct thread *td;
1718{
1719	struct flock lf;
1720	struct vnode *vp;
1721	int error;
1722
1723	FILE_LOCK_ASSERT(fp, MA_OWNED);
1724
1725	if (--fp->f_count > 0) {
1726		FILE_UNLOCK(fp);
1727		return (0);
1728	}
1729	mtx_lock(&Giant);
1730	if (fp->f_count < 0)
1731		panic("fdrop: count < 0");
1732	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1733		lf.l_whence = SEEK_SET;
1734		lf.l_start = 0;
1735		lf.l_len = 0;
1736		lf.l_type = F_UNLCK;
1737		vp = (struct vnode *)fp->f_data;
1738		FILE_UNLOCK(fp);
1739		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1740	} else
1741		FILE_UNLOCK(fp);
1742	if (fp->f_ops != &badfileops)
1743		error = fo_close(fp, td);
1744	else
1745		error = 0;
1746	ffree(fp);
1747	mtx_unlock(&Giant);
1748	return (error);
1749}
1750
1751/*
1752 * Apply an advisory lock on a file descriptor.
1753 *
1754 * Just attempt to get a record lock of the requested type on
1755 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1756 */
1757#ifndef _SYS_SYSPROTO_H_
1758struct flock_args {
1759	int	fd;
1760	int	how;
1761};
1762#endif
1763/*
1764 * MPSAFE
1765 */
1766/* ARGSUSED */
1767int
1768flock(td, uap)
1769	struct thread *td;
1770	register struct flock_args *uap;
1771{
1772	struct file *fp;
1773	struct vnode *vp;
1774	struct flock lf;
1775	int error;
1776
1777	if ((error = fget(td, uap->fd, &fp)) != 0)
1778		return (error);
1779	if (fp->f_type != DTYPE_VNODE) {
1780		fdrop(fp, td);
1781		return (EOPNOTSUPP);
1782	}
1783
1784	mtx_lock(&Giant);
1785	vp = (struct vnode *)fp->f_data;
1786	lf.l_whence = SEEK_SET;
1787	lf.l_start = 0;
1788	lf.l_len = 0;
1789	if (uap->how & LOCK_UN) {
1790		lf.l_type = F_UNLCK;
1791		FILE_LOCK(fp);
1792		fp->f_flag &= ~FHASLOCK;
1793		FILE_UNLOCK(fp);
1794		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1795		goto done2;
1796	}
1797	if (uap->how & LOCK_EX)
1798		lf.l_type = F_WRLCK;
1799	else if (uap->how & LOCK_SH)
1800		lf.l_type = F_RDLCK;
1801	else {
1802		error = EBADF;
1803		goto done2;
1804	}
1805	FILE_LOCK(fp);
1806	fp->f_flag |= FHASLOCK;
1807	FILE_UNLOCK(fp);
1808	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1809	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1810done2:
1811	fdrop(fp, td);
1812	mtx_unlock(&Giant);
1813	return (error);
1814}
1815
1816/*
1817 * File Descriptor pseudo-device driver (/dev/fd/).
1818 *
1819 * Opening minor device N dup()s the file (if any) connected to file
1820 * descriptor N belonging to the calling process.  Note that this driver
1821 * consists of only the ``open()'' routine, because all subsequent
1822 * references to this file will be direct to the other driver.
1823 */
1824/* ARGSUSED */
1825static int
1826fdopen(dev, mode, type, td)
1827	dev_t dev;
1828	int mode, type;
1829	struct thread *td;
1830{
1831
1832	/*
1833	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1834	 * the file descriptor being sought for duplication. The error
1835	 * return ensures that the vnode for this device will be released
1836	 * by vn_open. Open will detect this special error and take the
1837	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1838	 * will simply report the error.
1839	 */
1840	td->td_dupfd = dev2unit(dev);
1841	return (ENODEV);
1842}
1843
1844/*
1845 * Duplicate the specified descriptor to a free descriptor.
1846 */
1847int
1848dupfdopen(td, fdp, indx, dfd, mode, error)
1849	struct thread *td;
1850	struct filedesc *fdp;
1851	int indx, dfd;
1852	int mode;
1853	int error;
1854{
1855	register struct file *wfp;
1856	struct file *fp;
1857
1858	/*
1859	 * If the to-be-dup'd fd number is greater than the allowed number
1860	 * of file descriptors, or the fd to be dup'd has already been
1861	 * closed, then reject.
1862	 */
1863	FILEDESC_LOCK(fdp);
1864	if ((u_int)dfd >= fdp->fd_nfiles ||
1865	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1866		FILEDESC_UNLOCK(fdp);
1867		return (EBADF);
1868	}
1869
1870	/*
1871	 * There are two cases of interest here.
1872	 *
1873	 * For ENODEV simply dup (dfd) to file descriptor
1874	 * (indx) and return.
1875	 *
1876	 * For ENXIO steal away the file structure from (dfd) and
1877	 * store it in (indx).  (dfd) is effectively closed by
1878	 * this operation.
1879	 *
1880	 * Any other error code is just returned.
1881	 */
1882	switch (error) {
1883	case ENODEV:
1884		/*
1885		 * Check that the mode the file is being opened for is a
1886		 * subset of the mode of the existing descriptor.
1887		 */
1888		FILE_LOCK(wfp);
1889		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1890			FILE_UNLOCK(wfp);
1891			FILEDESC_UNLOCK(fdp);
1892			return (EACCES);
1893		}
1894		fp = fdp->fd_ofiles[indx];
1895#if 0
1896		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1897			(void) munmapfd(td, indx);
1898#endif
1899		fdp->fd_ofiles[indx] = wfp;
1900		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1901		fhold_locked(wfp);
1902		FILE_UNLOCK(wfp);
1903		if (indx > fdp->fd_lastfile)
1904			fdp->fd_lastfile = indx;
1905		if (fp != NULL)
1906			FILE_LOCK(fp);
1907		FILEDESC_UNLOCK(fdp);
1908		/*
1909		 * we now own the reference to fp that the ofiles[] array
1910		 * used to own.  Release it.
1911		 */
1912		if (fp != NULL)
1913			fdrop_locked(fp, td);
1914		return (0);
1915
1916	case ENXIO:
1917		/*
1918		 * Steal away the file pointer from dfd, and stuff it into indx.
1919		 */
1920		fp = fdp->fd_ofiles[indx];
1921#if 0
1922		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1923			(void) munmapfd(td, indx);
1924#endif
1925		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1926		fdp->fd_ofiles[dfd] = NULL;
1927		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1928		fdp->fd_ofileflags[dfd] = 0;
1929
1930		/*
1931		 * Complete the clean up of the filedesc structure by
1932		 * recomputing the various hints.
1933		 */
1934		if (indx > fdp->fd_lastfile) {
1935			fdp->fd_lastfile = indx;
1936		} else {
1937			while (fdp->fd_lastfile > 0 &&
1938			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1939				fdp->fd_lastfile--;
1940			}
1941			if (dfd < fdp->fd_freefile)
1942				fdp->fd_freefile = dfd;
1943		}
1944		if (fp != NULL)
1945			FILE_LOCK(fp);
1946		FILEDESC_UNLOCK(fdp);
1947
1948		/*
1949		 * we now own the reference to fp that the ofiles[] array
1950		 * used to own.  Release it.
1951		 */
1952		if (fp != NULL)
1953			fdrop_locked(fp, td);
1954		return (0);
1955
1956	default:
1957		FILEDESC_UNLOCK(fdp);
1958		return (error);
1959	}
1960	/* NOTREACHED */
1961}
1962
1963/*
1964 * Get file structures.
1965 */
1966static int
1967sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1968{
1969	int error;
1970	struct file *fp;
1971
1972	sx_slock(&filelist_lock);
1973	if (!req->oldptr) {
1974		/*
1975		 * overestimate by 10 files
1976		 */
1977		error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1978				   (nfiles + 10) * sizeof(struct file));
1979		sx_sunlock(&filelist_lock);
1980		return (error);
1981	}
1982
1983	error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1984	if (error) {
1985		sx_sunlock(&filelist_lock);
1986		return (error);
1987	}
1988
1989	/*
1990	 * followed by an array of file structures
1991	 */
1992	LIST_FOREACH(fp, &filehead, f_list) {
1993		error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1994		if (error) {
1995			sx_sunlock(&filelist_lock);
1996			return (error);
1997		}
1998	}
1999	sx_sunlock(&filelist_lock);
2000	return (0);
2001}
2002
2003SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2004    0, 0, sysctl_kern_file, "S,file", "Entire file table");
2005
2006SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2007    &maxfilesperproc, 0, "Maximum files allowed open per process");
2008
2009SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2010    &maxfiles, 0, "Maximum number of files");
2011
2012SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2013    &nfiles, 0, "System-wide number of open files");
2014
2015static void
2016fildesc_drvinit(void *unused)
2017{
2018	dev_t dev;
2019
2020	dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2021	make_dev_alias(dev, "stdin");
2022	dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2023	make_dev_alias(dev, "stdout");
2024	dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2025	make_dev_alias(dev, "stderr");
2026	if (!devfs_present) {
2027		int fd;
2028
2029		for (fd = 3; fd < NUMFDESC; fd++)
2030			make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2031			    "fd/%d", fd);
2032	}
2033}
2034
2035struct fileops badfileops = {
2036	badfo_readwrite,
2037	badfo_readwrite,
2038	badfo_ioctl,
2039	badfo_poll,
2040	badfo_kqfilter,
2041	badfo_stat,
2042	badfo_close
2043};
2044
2045static int
2046badfo_readwrite(fp, uio, cred, flags, td)
2047	struct file *fp;
2048	struct uio *uio;
2049	struct ucred *cred;
2050	struct thread *td;
2051	int flags;
2052{
2053
2054	return (EBADF);
2055}
2056
2057static int
2058badfo_ioctl(fp, com, data, td)
2059	struct file *fp;
2060	u_long com;
2061	caddr_t data;
2062	struct thread *td;
2063{
2064
2065	return (EBADF);
2066}
2067
2068static int
2069badfo_poll(fp, events, cred, td)
2070	struct file *fp;
2071	int events;
2072	struct ucred *cred;
2073	struct thread *td;
2074{
2075
2076	return (0);
2077}
2078
2079static int
2080badfo_kqfilter(fp, kn)
2081	struct file *fp;
2082	struct knote *kn;
2083{
2084
2085	return (0);
2086}
2087
2088static int
2089badfo_stat(fp, sb, td)
2090	struct file *fp;
2091	struct stat *sb;
2092	struct thread *td;
2093{
2094
2095	return (EBADF);
2096}
2097
2098static int
2099badfo_close(fp, td)
2100	struct file *fp;
2101	struct thread *td;
2102{
2103
2104	return (EBADF);
2105}
2106
2107SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2108					fildesc_drvinit,NULL)
2109
2110static void filelistinit(void *);
2111SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2112
2113/* ARGSUSED*/
2114static void
2115filelistinit(dummy)
2116	void *dummy;
2117{
2118	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2119	    NULL, NULL, UMA_ALIGN_PTR, 0);
2120
2121	sx_init(&filelist_lock, "filelist lock");
2122}
2123