kern_descrip.c revision 118143
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 *	@(#)kern_descrip.c	8.6 (Berkeley) 4/19/94
39 */
40
41#include <sys/cdefs.h>
42__FBSDID("$FreeBSD: head/sys/kern/kern_descrip.c 118143 2003-07-29 05:23:19Z alc $");
43
44#include "opt_compat.h"
45
46#include <sys/param.h>
47#include <sys/systm.h>
48#include <sys/syscallsubr.h>
49#include <sys/sysproto.h>
50#include <sys/conf.h>
51#include <sys/filedesc.h>
52#include <sys/lock.h>
53#include <sys/kernel.h>
54#include <sys/limits.h>
55#include <sys/malloc.h>
56#include <sys/mutex.h>
57#include <sys/sysctl.h>
58#include <sys/vnode.h>
59#include <sys/mount.h>
60#include <sys/proc.h>
61#include <sys/namei.h>
62#include <sys/file.h>
63#include <sys/stat.h>
64#include <sys/filio.h>
65#include <sys/fcntl.h>
66#include <sys/unistd.h>
67#include <sys/resourcevar.h>
68#include <sys/event.h>
69#include <sys/sx.h>
70#include <sys/socketvar.h>
71#include <sys/signalvar.h>
72
73#include <vm/vm.h>
74#include <vm/vm_extern.h>
75#include <vm/uma.h>
76
77static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
78static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader",
79		     "file desc to leader structures");
80static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
81
82static uma_zone_t file_zone;
83
84static	 d_open_t  fdopen;
85#define	NUMFDESC 64
86
87#define	CDEV_MAJOR 22
88static struct cdevsw fildesc_cdevsw = {
89	.d_open =	fdopen,
90	.d_name =	"FD",
91	.d_maj =	CDEV_MAJOR,
92};
93
94/* How to treat 'new' parameter when allocating a fd for do_dup(). */
95enum dup_type { DUP_VARIABLE, DUP_FIXED };
96
97static int do_dup(struct thread *td, enum dup_type type, int old, int new,
98    register_t *retval);
99
100/*
101 * Descriptor management.
102 */
103struct filelist filehead;	/* head of list of open files */
104int nfiles;			/* actual number of open files */
105extern int cmask;
106struct sx filelist_lock;	/* sx to protect filelist */
107struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
108
109/*
110 * System calls on descriptors.
111 */
112#ifndef _SYS_SYSPROTO_H_
113struct getdtablesize_args {
114	int	dummy;
115};
116#endif
117/*
118 * MPSAFE
119 */
120/* ARGSUSED */
121int
122getdtablesize(td, uap)
123	struct thread *td;
124	struct getdtablesize_args *uap;
125{
126	struct proc *p = td->td_proc;
127
128	mtx_lock(&Giant);
129	td->td_retval[0] =
130	    min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
131	mtx_unlock(&Giant);
132	return (0);
133}
134
135/*
136 * Duplicate a file descriptor to a particular value.
137 *
138 * note: keep in mind that a potential race condition exists when closing
139 * descriptors from a shared descriptor table (via rfork).
140 */
141#ifndef _SYS_SYSPROTO_H_
142struct dup2_args {
143	u_int	from;
144	u_int	to;
145};
146#endif
147/*
148 * MPSAFE
149 */
150/* ARGSUSED */
151int
152dup2(td, uap)
153	struct thread *td;
154	struct dup2_args *uap;
155{
156
157	return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
158		    td->td_retval));
159}
160
161/*
162 * Duplicate a file descriptor.
163 */
164#ifndef _SYS_SYSPROTO_H_
165struct dup_args {
166	u_int	fd;
167};
168#endif
169/*
170 * MPSAFE
171 */
172/* ARGSUSED */
173int
174dup(td, uap)
175	struct thread *td;
176	struct dup_args *uap;
177{
178
179	return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
180}
181
182/*
183 * The file control system call.
184 */
185#ifndef _SYS_SYSPROTO_H_
186struct fcntl_args {
187	int	fd;
188	int	cmd;
189	long	arg;
190};
191#endif
192/*
193 * MPSAFE
194 */
195/* ARGSUSED */
196int
197fcntl(td, uap)
198	struct thread *td;
199	struct fcntl_args *uap;
200{
201	struct flock fl;
202	intptr_t arg;
203	int error;
204
205	error = 0;
206	switch (uap->cmd) {
207	case F_GETLK:
208	case F_SETLK:
209	case F_SETLKW:
210		error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
211		arg = (intptr_t)&fl;
212		break;
213	default:
214		arg = uap->arg;
215		break;
216	}
217	if (error)
218		return (error);
219	error = kern_fcntl(td, uap->fd, uap->cmd, arg);
220	if (error)
221		return (error);
222	if (uap->cmd == F_GETLK)
223		error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
224	return (error);
225}
226
227int
228kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
229{
230	struct filedesc *fdp;
231	struct flock *flp;
232	struct file *fp;
233	struct proc *p;
234	char *pop;
235	struct vnode *vp;
236	u_int newmin;
237	int error, flg, tmp;
238
239	error = 0;
240	flg = F_POSIX;
241	p = td->td_proc;
242	fdp = p->p_fd;
243	mtx_lock(&Giant);
244	FILEDESC_LOCK(fdp);
245	if ((unsigned)fd >= fdp->fd_nfiles ||
246	    (fp = fdp->fd_ofiles[fd]) == NULL) {
247		FILEDESC_UNLOCK(fdp);
248		error = EBADF;
249		goto done2;
250	}
251	pop = &fdp->fd_ofileflags[fd];
252
253	switch (cmd) {
254	case F_DUPFD:
255		FILEDESC_UNLOCK(fdp);
256		newmin = arg;
257		if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
258		    newmin >= maxfilesperproc) {
259			error = EINVAL;
260			break;
261		}
262		error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
263		break;
264
265	case F_GETFD:
266		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
267		FILEDESC_UNLOCK(fdp);
268		break;
269
270	case F_SETFD:
271		*pop = (*pop &~ UF_EXCLOSE) |
272		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
273		FILEDESC_UNLOCK(fdp);
274		break;
275
276	case F_GETFL:
277		FILE_LOCK(fp);
278		FILEDESC_UNLOCK(fdp);
279		td->td_retval[0] = OFLAGS(fp->f_flag);
280		FILE_UNLOCK(fp);
281		break;
282
283	case F_SETFL:
284		FILE_LOCK(fp);
285		FILEDESC_UNLOCK(fdp);
286		fhold_locked(fp);
287		fp->f_flag &= ~FCNTLFLAGS;
288		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
289		FILE_UNLOCK(fp);
290		tmp = fp->f_flag & FNONBLOCK;
291		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
292		if (error) {
293			fdrop(fp, td);
294			break;
295		}
296		tmp = fp->f_flag & FASYNC;
297		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
298		if (error == 0) {
299			fdrop(fp, td);
300			break;
301		}
302		FILE_LOCK(fp);
303		fp->f_flag &= ~FNONBLOCK;
304		FILE_UNLOCK(fp);
305		tmp = 0;
306		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
307		fdrop(fp, td);
308		break;
309
310	case F_GETOWN:
311		fhold(fp);
312		FILEDESC_UNLOCK(fdp);
313		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
314		if (error == 0)
315			td->td_retval[0] = tmp;
316		fdrop(fp, td);
317		break;
318
319	case F_SETOWN:
320		fhold(fp);
321		FILEDESC_UNLOCK(fdp);
322		tmp = arg;
323		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
324		fdrop(fp, td);
325		break;
326
327	case F_SETLKW:
328		flg |= F_WAIT;
329		/* FALLTHROUGH F_SETLK */
330
331	case F_SETLK:
332		if (fp->f_type != DTYPE_VNODE) {
333			FILEDESC_UNLOCK(fdp);
334			error = EBADF;
335			break;
336		}
337
338		flp = (struct flock *)arg;
339		if (flp->l_whence == SEEK_CUR) {
340			if (fp->f_offset < 0 ||
341			    (flp->l_start > 0 &&
342			     fp->f_offset > OFF_MAX - flp->l_start)) {
343				FILEDESC_UNLOCK(fdp);
344				error = EOVERFLOW;
345				break;
346			}
347			flp->l_start += fp->f_offset;
348		}
349
350		/*
351		 * VOP_ADVLOCK() may block.
352		 */
353		fhold(fp);
354		FILEDESC_UNLOCK(fdp);
355		vp = fp->f_vnode;
356
357		switch (flp->l_type) {
358		case F_RDLCK:
359			if ((fp->f_flag & FREAD) == 0) {
360				error = EBADF;
361				break;
362			}
363			PROC_LOCK(p->p_leader);
364			p->p_leader->p_flag |= P_ADVLOCK;
365			PROC_UNLOCK(p->p_leader);
366			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
367			    flp, flg);
368			break;
369		case F_WRLCK:
370			if ((fp->f_flag & FWRITE) == 0) {
371				error = EBADF;
372				break;
373			}
374			PROC_LOCK(p->p_leader);
375			p->p_leader->p_flag |= P_ADVLOCK;
376			PROC_UNLOCK(p->p_leader);
377			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
378			    flp, flg);
379			break;
380		case F_UNLCK:
381			error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
382			    flp, F_POSIX);
383			break;
384		default:
385			error = EINVAL;
386			break;
387		}
388		/* Check for race with close */
389		FILEDESC_LOCK(fdp);
390		if ((unsigned) fd >= fdp->fd_nfiles ||
391		    fp != fdp->fd_ofiles[fd]) {
392			FILEDESC_UNLOCK(fdp);
393			flp->l_whence = SEEK_SET;
394			flp->l_start = 0;
395			flp->l_len = 0;
396			flp->l_type = F_UNLCK;
397			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
398					   F_UNLCK, flp, F_POSIX);
399		} else
400			FILEDESC_UNLOCK(fdp);
401		fdrop(fp, td);
402		break;
403
404	case F_GETLK:
405		if (fp->f_type != DTYPE_VNODE) {
406			FILEDESC_UNLOCK(fdp);
407			error = EBADF;
408			break;
409		}
410		flp = (struct flock *)arg;
411		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
412		    flp->l_type != F_UNLCK) {
413			FILEDESC_UNLOCK(fdp);
414			error = EINVAL;
415			break;
416		}
417		if (flp->l_whence == SEEK_CUR) {
418			if ((flp->l_start > 0 &&
419			    fp->f_offset > OFF_MAX - flp->l_start) ||
420			    (flp->l_start < 0 &&
421			     fp->f_offset < OFF_MIN - flp->l_start)) {
422				FILEDESC_UNLOCK(fdp);
423				error = EOVERFLOW;
424				break;
425			}
426			flp->l_start += fp->f_offset;
427		}
428		/*
429		 * VOP_ADVLOCK() may block.
430		 */
431		fhold(fp);
432		FILEDESC_UNLOCK(fdp);
433		vp = fp->f_vnode;
434		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
435		    F_POSIX);
436		fdrop(fp, td);
437		break;
438	default:
439		FILEDESC_UNLOCK(fdp);
440		error = EINVAL;
441		break;
442	}
443done2:
444	mtx_unlock(&Giant);
445	return (error);
446}
447
448/*
449 * Common code for dup, dup2, and fcntl(F_DUPFD).
450 */
451static int
452do_dup(td, type, old, new, retval)
453	enum dup_type type;
454	int old, new;
455	register_t *retval;
456	struct thread *td;
457{
458	struct filedesc *fdp;
459	struct proc *p;
460	struct file *fp;
461	struct file *delfp;
462	int error, newfd;
463	int holdleaders;
464
465	p = td->td_proc;
466	fdp = p->p_fd;
467
468	/*
469	 * Verify we have a valid descriptor to dup from and possibly to
470	 * dup to.
471	 */
472	if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
473	    new >= maxfilesperproc)
474		return (EBADF);
475	FILEDESC_LOCK(fdp);
476	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
477		FILEDESC_UNLOCK(fdp);
478		return (EBADF);
479	}
480	if (type == DUP_FIXED && old == new) {
481		*retval = new;
482		FILEDESC_UNLOCK(fdp);
483		return (0);
484	}
485	fp = fdp->fd_ofiles[old];
486	fhold(fp);
487
488	/*
489	 * Expand the table for the new descriptor if needed.  This may
490	 * block and drop and reacquire the filedesc lock.
491	 */
492	if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
493		error = fdalloc(td, new, &newfd);
494		if (error) {
495			FILEDESC_UNLOCK(fdp);
496			fdrop(fp, td);
497			return (error);
498		}
499	}
500	if (type == DUP_VARIABLE)
501		new = newfd;
502
503	/*
504	 * If the old file changed out from under us then treat it as a
505	 * bad file descriptor.  Userland should do its own locking to
506	 * avoid this case.
507	 */
508	if (fdp->fd_ofiles[old] != fp) {
509		if (fdp->fd_ofiles[new] == NULL) {
510			if (new < fdp->fd_freefile)
511				fdp->fd_freefile = new;
512			while (fdp->fd_lastfile > 0 &&
513			    fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
514				fdp->fd_lastfile--;
515		}
516		FILEDESC_UNLOCK(fdp);
517		fdrop(fp, td);
518		return (EBADF);
519	}
520	KASSERT(old != new, ("new fd is same as old"));
521
522	/*
523	 * Save info on the descriptor being overwritten.  We have
524	 * to do the unmap now, but we cannot close it without
525	 * introducing an ownership race for the slot.
526	 */
527	delfp = fdp->fd_ofiles[new];
528	if (delfp != NULL && p->p_fdtol != NULL) {
529		/*
530		 * Ask fdfree() to sleep to ensure that all relevant
531		 * process leaders can be traversed in closef().
532		 */
533		fdp->fd_holdleaderscount++;
534		holdleaders = 1;
535	} else
536		holdleaders = 0;
537	KASSERT(delfp == NULL || type == DUP_FIXED,
538	    ("dup() picked an open file"));
539#if 0
540	if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
541		(void) munmapfd(td, new);
542#endif
543
544	/*
545	 * Duplicate the source descriptor, update lastfile
546	 */
547	fdp->fd_ofiles[new] = fp;
548 	fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
549	if (new > fdp->fd_lastfile)
550		fdp->fd_lastfile = new;
551	FILEDESC_UNLOCK(fdp);
552	*retval = new;
553
554	/*
555	 * If we dup'd over a valid file, we now own the reference to it
556	 * and must dispose of it using closef() semantics (as if a
557	 * close() were performed on it).
558	 */
559	if (delfp) {
560		mtx_lock(&Giant);
561		(void) closef(delfp, td);
562		mtx_unlock(&Giant);
563		if (holdleaders) {
564			FILEDESC_LOCK(fdp);
565			fdp->fd_holdleaderscount--;
566			if (fdp->fd_holdleaderscount == 0 &&
567			    fdp->fd_holdleaderswakeup != 0) {
568				fdp->fd_holdleaderswakeup = 0;
569				wakeup(&fdp->fd_holdleaderscount);
570			}
571			FILEDESC_UNLOCK(fdp);
572		}
573	}
574	return (0);
575}
576
577/*
578 * If sigio is on the list associated with a process or process group,
579 * disable signalling from the device, remove sigio from the list and
580 * free sigio.
581 */
582void
583funsetown(sigiop)
584	struct sigio **sigiop;
585{
586	struct sigio *sigio;
587
588	SIGIO_LOCK();
589	sigio = *sigiop;
590	if (sigio == NULL) {
591		SIGIO_UNLOCK();
592		return;
593	}
594	*(sigio->sio_myref) = NULL;
595	if ((sigio)->sio_pgid < 0) {
596		struct pgrp *pg = (sigio)->sio_pgrp;
597		PGRP_LOCK(pg);
598		SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
599			     sigio, sio_pgsigio);
600		PGRP_UNLOCK(pg);
601	} else {
602		struct proc *p = (sigio)->sio_proc;
603		PROC_LOCK(p);
604		SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
605			     sigio, sio_pgsigio);
606		PROC_UNLOCK(p);
607	}
608	SIGIO_UNLOCK();
609	crfree(sigio->sio_ucred);
610	FREE(sigio, M_SIGIO);
611}
612
613/*
614 * Free a list of sigio structures.
615 * We only need to lock the SIGIO_LOCK because we have made ourselves
616 * inaccessable to callers of fsetown and therefore do not need to lock
617 * the proc or pgrp struct for the list manipulation.
618 */
619void
620funsetownlst(sigiolst)
621	struct sigiolst *sigiolst;
622{
623	struct proc *p;
624	struct pgrp *pg;
625	struct sigio *sigio;
626
627	sigio = SLIST_FIRST(sigiolst);
628	if (sigio == NULL)
629		return;
630	p = NULL;
631	pg = NULL;
632
633	/*
634	 * Every entry of the list should belong
635	 * to a single proc or pgrp.
636	 */
637	if (sigio->sio_pgid < 0) {
638		pg = sigio->sio_pgrp;
639		PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
640	} else /* if (sigio->sio_pgid > 0) */ {
641		p = sigio->sio_proc;
642		PROC_LOCK_ASSERT(p, MA_NOTOWNED);
643	}
644
645	SIGIO_LOCK();
646	while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
647		*(sigio->sio_myref) = NULL;
648		if (pg != NULL) {
649			KASSERT(sigio->sio_pgid < 0,
650			    ("Proc sigio in pgrp sigio list"));
651			KASSERT(sigio->sio_pgrp == pg,
652			    ("Bogus pgrp in sigio list"));
653			PGRP_LOCK(pg);
654			SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
655			    sio_pgsigio);
656			PGRP_UNLOCK(pg);
657		} else /* if (p != NULL) */ {
658			KASSERT(sigio->sio_pgid > 0,
659			    ("Pgrp sigio in proc sigio list"));
660			KASSERT(sigio->sio_proc == p,
661			    ("Bogus proc in sigio list"));
662			PROC_LOCK(p);
663			SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
664			    sio_pgsigio);
665			PROC_UNLOCK(p);
666		}
667		SIGIO_UNLOCK();
668		crfree(sigio->sio_ucred);
669		FREE(sigio, M_SIGIO);
670		SIGIO_LOCK();
671	}
672	SIGIO_UNLOCK();
673}
674
675/*
676 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
677 *
678 * After permission checking, add a sigio structure to the sigio list for
679 * the process or process group.
680 */
681int
682fsetown(pgid, sigiop)
683	pid_t pgid;
684	struct sigio **sigiop;
685{
686	struct proc *proc;
687	struct pgrp *pgrp;
688	struct sigio *sigio;
689	int ret;
690
691	if (pgid == 0) {
692		funsetown(sigiop);
693		return (0);
694	}
695
696	ret = 0;
697
698	/* Allocate and fill in the new sigio out of locks. */
699	MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
700	sigio->sio_pgid = pgid;
701	sigio->sio_ucred = crhold(curthread->td_ucred);
702	sigio->sio_myref = sigiop;
703
704	sx_slock(&proctree_lock);
705	if (pgid > 0) {
706		proc = pfind(pgid);
707		if (proc == NULL) {
708			ret = ESRCH;
709			goto fail;
710		}
711
712		/*
713		 * Policy - Don't allow a process to FSETOWN a process
714		 * in another session.
715		 *
716		 * Remove this test to allow maximum flexibility or
717		 * restrict FSETOWN to the current process or process
718		 * group for maximum safety.
719		 */
720		PROC_UNLOCK(proc);
721		if (proc->p_session != curthread->td_proc->p_session) {
722			ret = EPERM;
723			goto fail;
724		}
725
726		pgrp = NULL;
727	} else /* if (pgid < 0) */ {
728		pgrp = pgfind(-pgid);
729		if (pgrp == NULL) {
730			ret = ESRCH;
731			goto fail;
732		}
733		PGRP_UNLOCK(pgrp);
734
735		/*
736		 * Policy - Don't allow a process to FSETOWN a process
737		 * in another session.
738		 *
739		 * Remove this test to allow maximum flexibility or
740		 * restrict FSETOWN to the current process or process
741		 * group for maximum safety.
742		 */
743		if (pgrp->pg_session != curthread->td_proc->p_session) {
744			ret = EPERM;
745			goto fail;
746		}
747
748		proc = NULL;
749	}
750	funsetown(sigiop);
751	if (pgid > 0) {
752		PROC_LOCK(proc);
753		/*
754		 * Since funsetownlst() is called without the proctree
755		 * locked, we need to check for P_WEXIT.
756		 * XXX: is ESRCH correct?
757		 */
758		if ((proc->p_flag & P_WEXIT) != 0) {
759			PROC_UNLOCK(proc);
760			ret = ESRCH;
761			goto fail;
762		}
763		SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
764		sigio->sio_proc = proc;
765		PROC_UNLOCK(proc);
766	} else {
767		PGRP_LOCK(pgrp);
768		SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
769		sigio->sio_pgrp = pgrp;
770		PGRP_UNLOCK(pgrp);
771	}
772	sx_sunlock(&proctree_lock);
773	SIGIO_LOCK();
774	*sigiop = sigio;
775	SIGIO_UNLOCK();
776	return (0);
777
778fail:
779	sx_sunlock(&proctree_lock);
780	crfree(sigio->sio_ucred);
781	FREE(sigio, M_SIGIO);
782	return (ret);
783}
784
785/*
786 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
787 */
788pid_t
789fgetown(sigiop)
790	struct sigio **sigiop;
791{
792	pid_t pgid;
793
794	SIGIO_LOCK();
795	pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
796	SIGIO_UNLOCK();
797	return (pgid);
798}
799
800/*
801 * Close a file descriptor.
802 */
803#ifndef _SYS_SYSPROTO_H_
804struct close_args {
805        int     fd;
806};
807#endif
808/*
809 * MPSAFE
810 */
811/* ARGSUSED */
812int
813close(td, uap)
814	struct thread *td;
815	struct close_args *uap;
816{
817	struct filedesc *fdp;
818	struct file *fp;
819	int fd, error;
820	int holdleaders;
821
822	fd = uap->fd;
823	error = 0;
824	holdleaders = 0;
825	fdp = td->td_proc->p_fd;
826	mtx_lock(&Giant);
827	FILEDESC_LOCK(fdp);
828	if ((unsigned)fd >= fdp->fd_nfiles ||
829	    (fp = fdp->fd_ofiles[fd]) == NULL) {
830		FILEDESC_UNLOCK(fdp);
831		error = EBADF;
832		goto done2;
833	}
834#if 0
835	if (fdp->fd_ofileflags[fd] & UF_MAPPED)
836		(void) munmapfd(td, fd);
837#endif
838	fdp->fd_ofiles[fd] = NULL;
839	fdp->fd_ofileflags[fd] = 0;
840	if (td->td_proc->p_fdtol != NULL) {
841		/*
842		 * Ask fdfree() to sleep to ensure that all relevant
843		 * process leaders can be traversed in closef().
844		 */
845		fdp->fd_holdleaderscount++;
846		holdleaders = 1;
847	}
848
849	/*
850	 * we now hold the fp reference that used to be owned by the descriptor
851	 * array.
852	 */
853	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
854		fdp->fd_lastfile--;
855	if (fd < fdp->fd_freefile)
856		fdp->fd_freefile = fd;
857	if (fd < fdp->fd_knlistsize) {
858		FILEDESC_UNLOCK(fdp);
859		knote_fdclose(td, fd);
860	} else
861		FILEDESC_UNLOCK(fdp);
862
863	error = closef(fp, td);
864done2:
865	mtx_unlock(&Giant);
866	if (holdleaders) {
867		FILEDESC_LOCK(fdp);
868		fdp->fd_holdleaderscount--;
869		if (fdp->fd_holdleaderscount == 0 &&
870		    fdp->fd_holdleaderswakeup != 0) {
871			fdp->fd_holdleaderswakeup = 0;
872			wakeup(&fdp->fd_holdleaderscount);
873		}
874		FILEDESC_UNLOCK(fdp);
875	}
876	return (error);
877}
878
879#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
880/*
881 * Return status information about a file descriptor.
882 */
883#ifndef _SYS_SYSPROTO_H_
884struct ofstat_args {
885	int	fd;
886	struct	ostat *sb;
887};
888#endif
889/*
890 * MPSAFE
891 */
892/* ARGSUSED */
893int
894ofstat(td, uap)
895	struct thread *td;
896	struct ofstat_args *uap;
897{
898	struct file *fp;
899	struct stat ub;
900	struct ostat oub;
901	int error;
902
903	mtx_lock(&Giant);
904	if ((error = fget(td, uap->fd, &fp)) != 0)
905		goto done2;
906	error = fo_stat(fp, &ub, td->td_ucred, td);
907	if (error == 0) {
908		cvtstat(&ub, &oub);
909		error = copyout(&oub, uap->sb, sizeof(oub));
910	}
911	fdrop(fp, td);
912done2:
913	mtx_unlock(&Giant);
914	return (error);
915}
916#endif /* COMPAT_43 || COMPAT_SUNOS */
917
918/*
919 * Return status information about a file descriptor.
920 */
921#ifndef _SYS_SYSPROTO_H_
922struct fstat_args {
923	int	fd;
924	struct	stat *sb;
925};
926#endif
927/*
928 * MPSAFE
929 */
930/* ARGSUSED */
931int
932fstat(td, uap)
933	struct thread *td;
934	struct fstat_args *uap;
935{
936	struct file *fp;
937	struct stat ub;
938	int error;
939
940	mtx_lock(&Giant);
941	if ((error = fget(td, uap->fd, &fp)) != 0)
942		goto done2;
943	error = fo_stat(fp, &ub, td->td_ucred, td);
944	if (error == 0)
945		error = copyout(&ub, uap->sb, sizeof(ub));
946	fdrop(fp, td);
947done2:
948	mtx_unlock(&Giant);
949	return (error);
950}
951
952/*
953 * Return status information about a file descriptor.
954 */
955#ifndef _SYS_SYSPROTO_H_
956struct nfstat_args {
957	int	fd;
958	struct	nstat *sb;
959};
960#endif
961/*
962 * MPSAFE
963 */
964/* ARGSUSED */
965int
966nfstat(td, uap)
967	struct thread *td;
968	struct nfstat_args *uap;
969{
970	struct file *fp;
971	struct stat ub;
972	struct nstat nub;
973	int error;
974
975	mtx_lock(&Giant);
976	if ((error = fget(td, uap->fd, &fp)) != 0)
977		goto done2;
978	error = fo_stat(fp, &ub, td->td_ucred, td);
979	if (error == 0) {
980		cvtnstat(&ub, &nub);
981		error = copyout(&nub, uap->sb, sizeof(nub));
982	}
983	fdrop(fp, td);
984done2:
985	mtx_unlock(&Giant);
986	return (error);
987}
988
989/*
990 * Return pathconf information about a file descriptor.
991 */
992#ifndef _SYS_SYSPROTO_H_
993struct fpathconf_args {
994	int	fd;
995	int	name;
996};
997#endif
998/*
999 * MPSAFE
1000 */
1001/* ARGSUSED */
1002int
1003fpathconf(td, uap)
1004	struct thread *td;
1005	struct fpathconf_args *uap;
1006{
1007	struct file *fp;
1008	struct vnode *vp;
1009	int error;
1010
1011	if ((error = fget(td, uap->fd, &fp)) != 0)
1012		return (error);
1013
1014	/* If asynchronous I/O is available, it works for all descriptors. */
1015	if (uap->name == _PC_ASYNC_IO) {
1016		td->td_retval[0] = async_io_version;
1017		goto out;
1018	}
1019	vp = fp->f_vnode;
1020	if (vp != NULL) {
1021		mtx_lock(&Giant);
1022		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1023		error = VOP_PATHCONF(vp, uap->name, td->td_retval);
1024		VOP_UNLOCK(vp, 0, td);
1025		mtx_unlock(&Giant);
1026	} else if (fp->f_type == DTYPE_PIPE || fp->f_type == DTYPE_SOCKET) {
1027		if (uap->name != _PC_PIPE_BUF) {
1028			error = EINVAL;
1029		} else {
1030			td->td_retval[0] = PIPE_BUF;
1031		error = 0;
1032		}
1033	} else {
1034		error = EOPNOTSUPP;
1035	}
1036out:
1037	fdrop(fp, td);
1038	return (error);
1039}
1040
1041/*
1042 * Allocate a file descriptor for the process.
1043 */
1044static int fdexpand;
1045SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1046
1047int
1048fdalloc(td, want, result)
1049	struct thread *td;
1050	int want;
1051	int *result;
1052{
1053	struct proc *p = td->td_proc;
1054	struct filedesc *fdp = td->td_proc->p_fd;
1055	int i;
1056	int lim, last, nfiles;
1057	struct file **newofile, **oldofile;
1058	char *newofileflags;
1059
1060	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1061
1062	/*
1063	 * Search for a free descriptor starting at the higher
1064	 * of want or fd_freefile.  If that fails, consider
1065	 * expanding the ofile array.
1066	 */
1067	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1068	for (;;) {
1069		last = min(fdp->fd_nfiles, lim);
1070		i = max(want, fdp->fd_freefile);
1071		for (; i < last; i++) {
1072			if (fdp->fd_ofiles[i] == NULL) {
1073				fdp->fd_ofileflags[i] = 0;
1074				if (i > fdp->fd_lastfile)
1075					fdp->fd_lastfile = i;
1076				if (want <= fdp->fd_freefile)
1077					fdp->fd_freefile = i;
1078				*result = i;
1079				return (0);
1080			}
1081		}
1082
1083		/*
1084		 * No space in current array.  Expand?
1085		 */
1086		if (i >= lim)
1087			return (EMFILE);
1088		if (fdp->fd_nfiles < NDEXTENT)
1089			nfiles = NDEXTENT;
1090		else
1091			nfiles = 2 * fdp->fd_nfiles;
1092		while (nfiles < want)
1093			nfiles <<= 1;
1094		FILEDESC_UNLOCK(fdp);
1095		newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1096
1097		/*
1098		 * Deal with file-table extend race that might have
1099		 * occurred while filedesc was unlocked.
1100		 */
1101		FILEDESC_LOCK(fdp);
1102		if (fdp->fd_nfiles >= nfiles) {
1103			FILEDESC_UNLOCK(fdp);
1104			free(newofile, M_FILEDESC);
1105			FILEDESC_LOCK(fdp);
1106			continue;
1107		}
1108		newofileflags = (char *) &newofile[nfiles];
1109		/*
1110		 * Copy the existing ofile and ofileflags arrays
1111		 * and zero the new portion of each array.
1112		 */
1113		i = fdp->fd_nfiles * sizeof(struct file *);
1114		bcopy(fdp->fd_ofiles, newofile,	i);
1115		bzero((char *)newofile + i,
1116		    nfiles * sizeof(struct file *) - i);
1117		i = fdp->fd_nfiles * sizeof(char);
1118		bcopy(fdp->fd_ofileflags, newofileflags, i);
1119		bzero(newofileflags + i, nfiles * sizeof(char) - i);
1120		if (fdp->fd_nfiles > NDFILE)
1121			oldofile = fdp->fd_ofiles;
1122		else
1123			oldofile = NULL;
1124		fdp->fd_ofiles = newofile;
1125		fdp->fd_ofileflags = newofileflags;
1126		fdp->fd_nfiles = nfiles;
1127		fdexpand++;
1128		if (oldofile != NULL) {
1129			FILEDESC_UNLOCK(fdp);
1130			free(oldofile, M_FILEDESC);
1131			FILEDESC_LOCK(fdp);
1132		}
1133	}
1134}
1135
1136/*
1137 * Check to see whether n user file descriptors
1138 * are available to the process p.
1139 */
1140int
1141fdavail(td, n)
1142	struct thread *td;
1143	int n;
1144{
1145	struct proc *p = td->td_proc;
1146	struct filedesc *fdp = td->td_proc->p_fd;
1147	struct file **fpp;
1148	int i, lim, last;
1149
1150	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1151
1152	lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1153	if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1154		return (1);
1155	last = min(fdp->fd_nfiles, lim);
1156	fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1157	for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1158		if (*fpp == NULL && --n <= 0)
1159			return (1);
1160	}
1161	return (0);
1162}
1163
1164/*
1165 * Create a new open file structure and allocate
1166 * a file decriptor for the process that refers to it.
1167 */
1168int
1169falloc(td, resultfp, resultfd)
1170	struct thread *td;
1171	struct file **resultfp;
1172	int *resultfd;
1173{
1174	struct proc *p = td->td_proc;
1175	struct file *fp, *fq;
1176	int error, i;
1177	int maxuserfiles = maxfiles - (maxfiles / 20);
1178	static struct timeval lastfail;
1179	static int curfail;
1180
1181	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1182	sx_xlock(&filelist_lock);
1183	if ((nfiles >= maxuserfiles && td->td_ucred->cr_ruid != 0)
1184	   || nfiles >= maxfiles) {
1185		if (ppsratecheck(&lastfail, &curfail, 1)) {
1186			printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
1187				td->td_ucred->cr_ruid);
1188		}
1189		sx_xunlock(&filelist_lock);
1190		uma_zfree(file_zone, fp);
1191		return (ENFILE);
1192	}
1193	nfiles++;
1194
1195	/*
1196	 * If the process has file descriptor zero open, add the new file
1197	 * descriptor to the list of open files at that point, otherwise
1198	 * put it at the front of the list of open files.
1199	 */
1200	fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep);
1201	fp->f_count = 1;
1202	fp->f_cred = crhold(td->td_ucred);
1203	fp->f_ops = &badfileops;
1204	FILEDESC_LOCK(p->p_fd);
1205	if ((fq = p->p_fd->fd_ofiles[0])) {
1206		LIST_INSERT_AFTER(fq, fp, f_list);
1207	} else {
1208		LIST_INSERT_HEAD(&filehead, fp, f_list);
1209	}
1210	sx_xunlock(&filelist_lock);
1211	if ((error = fdalloc(td, 0, &i))) {
1212		FILEDESC_UNLOCK(p->p_fd);
1213		fdrop(fp, td);
1214		return (error);
1215	}
1216	p->p_fd->fd_ofiles[i] = fp;
1217	FILEDESC_UNLOCK(p->p_fd);
1218	if (resultfp)
1219		*resultfp = fp;
1220	if (resultfd)
1221		*resultfd = i;
1222	return (0);
1223}
1224
1225/*
1226 * Free a file descriptor.
1227 */
1228void
1229ffree(fp)
1230	struct file *fp;
1231{
1232
1233	KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1234	sx_xlock(&filelist_lock);
1235	LIST_REMOVE(fp, f_list);
1236	nfiles--;
1237	sx_xunlock(&filelist_lock);
1238	crfree(fp->f_cred);
1239	uma_zfree(file_zone, fp);
1240}
1241
1242/*
1243 * Build a new filedesc structure from another.
1244 * Copy the current, root, and jail root vnode references.
1245 */
1246struct filedesc *
1247fdinit(fdp)
1248	struct filedesc *fdp;
1249{
1250	struct filedesc0 *newfdp;
1251
1252	MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1253	    M_FILEDESC, M_WAITOK | M_ZERO);
1254	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1255	newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1256	if (newfdp->fd_fd.fd_cdir)
1257		VREF(newfdp->fd_fd.fd_cdir);
1258	newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1259	if (newfdp->fd_fd.fd_rdir)
1260		VREF(newfdp->fd_fd.fd_rdir);
1261	newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1262	if (newfdp->fd_fd.fd_jdir)
1263		VREF(newfdp->fd_fd.fd_jdir);
1264
1265	/* Create the file descriptor table. */
1266	newfdp->fd_fd.fd_refcnt = 1;
1267	newfdp->fd_fd.fd_cmask = cmask;
1268	newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1269	newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1270	newfdp->fd_fd.fd_nfiles = NDFILE;
1271	newfdp->fd_fd.fd_knlistsize = -1;
1272	return (&newfdp->fd_fd);
1273}
1274
1275/*
1276 * Share a filedesc structure.
1277 */
1278struct filedesc *
1279fdshare(fdp)
1280	struct filedesc *fdp;
1281{
1282	FILEDESC_LOCK(fdp);
1283	fdp->fd_refcnt++;
1284	FILEDESC_UNLOCK(fdp);
1285	return (fdp);
1286}
1287
1288/*
1289 * Copy a filedesc structure.
1290 * A NULL pointer in returns a NULL reference, this is to ease callers,
1291 * not catch errors.
1292 */
1293struct filedesc *
1294fdcopy(fdp)
1295	struct filedesc *fdp;
1296{
1297	struct filedesc *newfdp;
1298	struct file **fpp;
1299	int i, j;
1300
1301	/* Certain daemons might not have file descriptors. */
1302	if (fdp == NULL)
1303		return (NULL);
1304
1305	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1306
1307	FILEDESC_UNLOCK(fdp);
1308	MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1309	    M_FILEDESC, M_WAITOK);
1310	FILEDESC_LOCK(fdp);
1311	bcopy(fdp, newfdp, sizeof(struct filedesc));
1312	FILEDESC_UNLOCK(fdp);
1313	bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1314	mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1315	if (newfdp->fd_cdir)
1316		VREF(newfdp->fd_cdir);
1317	if (newfdp->fd_rdir)
1318		VREF(newfdp->fd_rdir);
1319	if (newfdp->fd_jdir)
1320		VREF(newfdp->fd_jdir);
1321	newfdp->fd_refcnt = 1;
1322
1323	/*
1324	 * If the number of open files fits in the internal arrays
1325	 * of the open file structure, use them, otherwise allocate
1326	 * additional memory for the number of descriptors currently
1327	 * in use.
1328	 */
1329	FILEDESC_LOCK(fdp);
1330	newfdp->fd_lastfile = fdp->fd_lastfile;
1331	newfdp->fd_nfiles = fdp->fd_nfiles;
1332	if (newfdp->fd_lastfile < NDFILE) {
1333		newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1334		newfdp->fd_ofileflags =
1335		    ((struct filedesc0 *) newfdp)->fd_dfileflags;
1336		i = NDFILE;
1337	} else {
1338		/*
1339		 * Compute the smallest multiple of NDEXTENT needed
1340		 * for the file descriptors currently in use,
1341		 * allowing the table to shrink.
1342		 */
1343retry:
1344		i = newfdp->fd_nfiles;
1345		while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1346			i /= 2;
1347		FILEDESC_UNLOCK(fdp);
1348		MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1349		    M_FILEDESC, M_WAITOK);
1350		FILEDESC_LOCK(fdp);
1351		newfdp->fd_lastfile = fdp->fd_lastfile;
1352		newfdp->fd_nfiles = fdp->fd_nfiles;
1353		j = newfdp->fd_nfiles;
1354		while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1355			j /= 2;
1356		if (i != j) {
1357			/*
1358			 * The size of the original table has changed.
1359			 * Go over once again.
1360			 */
1361			FILEDESC_UNLOCK(fdp);
1362			FREE(newfdp->fd_ofiles, M_FILEDESC);
1363			FILEDESC_LOCK(fdp);
1364			newfdp->fd_lastfile = fdp->fd_lastfile;
1365			newfdp->fd_nfiles = fdp->fd_nfiles;
1366			goto retry;
1367		}
1368		newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1369	}
1370	newfdp->fd_nfiles = i;
1371	bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1372	bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1373
1374	/*
1375	 * kq descriptors cannot be copied.
1376	 */
1377	if (newfdp->fd_knlistsize != -1) {
1378		fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1379		for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1380			if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1381				*fpp = NULL;
1382				if (i < newfdp->fd_freefile)
1383					newfdp->fd_freefile = i;
1384			}
1385			if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1386				newfdp->fd_lastfile--;
1387		}
1388		newfdp->fd_knlist = NULL;
1389		newfdp->fd_knlistsize = -1;
1390		newfdp->fd_knhash = NULL;
1391		newfdp->fd_knhashmask = 0;
1392	}
1393
1394	fpp = newfdp->fd_ofiles;
1395	for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1396		if (*fpp != NULL)
1397			fhold(*fpp);
1398	}
1399	return (newfdp);
1400}
1401
1402/* A mutex to protect the association between a proc and filedesc. */
1403struct mtx	fdesc_mtx;
1404MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF);
1405
1406/*
1407 * Release a filedesc structure.
1408 */
1409void
1410fdfree(td)
1411	struct thread *td;
1412{
1413	struct filedesc *fdp;
1414	struct file **fpp;
1415	int i;
1416	struct filedesc_to_leader *fdtol;
1417	struct file *fp;
1418	struct vnode *vp;
1419	struct flock lf;
1420
1421	/* Certain daemons might not have file descriptors. */
1422	fdp = td->td_proc->p_fd;
1423	if (fdp == NULL)
1424		return;
1425
1426	/* Check for special need to clear POSIX style locks */
1427	fdtol = td->td_proc->p_fdtol;
1428	if (fdtol != NULL) {
1429		FILEDESC_LOCK(fdp);
1430		KASSERT(fdtol->fdl_refcount > 0,
1431			("filedesc_to_refcount botch: fdl_refcount=%d",
1432			 fdtol->fdl_refcount));
1433		if (fdtol->fdl_refcount == 1 &&
1434		    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
1435			i = 0;
1436			fpp = fdp->fd_ofiles;
1437			for (i = 0, fpp = fdp->fd_ofiles;
1438			     i < fdp->fd_lastfile;
1439			     i++, fpp++) {
1440				if (*fpp == NULL ||
1441				    (*fpp)->f_type != DTYPE_VNODE)
1442					continue;
1443				fp = *fpp;
1444				fhold(fp);
1445				FILEDESC_UNLOCK(fdp);
1446				lf.l_whence = SEEK_SET;
1447				lf.l_start = 0;
1448				lf.l_len = 0;
1449				lf.l_type = F_UNLCK;
1450				vp = fp->f_vnode;
1451				(void) VOP_ADVLOCK(vp,
1452						   (caddr_t)td->td_proc->
1453						   p_leader,
1454						   F_UNLCK,
1455						   &lf,
1456						   F_POSIX);
1457				FILEDESC_LOCK(fdp);
1458				fdrop(fp, td);
1459				fpp = fdp->fd_ofiles + i;
1460			}
1461		}
1462	retry:
1463		if (fdtol->fdl_refcount == 1) {
1464			if (fdp->fd_holdleaderscount > 0 &&
1465			    (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
1466				/*
1467				 * close() or do_dup() has cleared a reference
1468				 * in a shared file descriptor table.
1469				 */
1470				fdp->fd_holdleaderswakeup = 1;
1471				msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx,
1472				       PLOCK, "fdlhold", 0);
1473				goto retry;
1474			}
1475			if (fdtol->fdl_holdcount > 0) {
1476				/*
1477				 * Ensure that fdtol->fdl_leader
1478				 * remains valid in closef().
1479				 */
1480				fdtol->fdl_wakeup = 1;
1481				msleep(fdtol, &fdp->fd_mtx,
1482				       PLOCK, "fdlhold", 0);
1483				goto retry;
1484			}
1485		}
1486		fdtol->fdl_refcount--;
1487		if (fdtol->fdl_refcount == 0 &&
1488		    fdtol->fdl_holdcount == 0) {
1489			fdtol->fdl_next->fdl_prev = fdtol->fdl_prev;
1490			fdtol->fdl_prev->fdl_next = fdtol->fdl_next;
1491		} else
1492			fdtol = NULL;
1493		td->td_proc->p_fdtol = NULL;
1494		FILEDESC_UNLOCK(fdp);
1495		if (fdtol != NULL)
1496			FREE(fdtol, M_FILEDESC_TO_LEADER);
1497	}
1498	FILEDESC_LOCK(fdp);
1499	if (--fdp->fd_refcnt > 0) {
1500		FILEDESC_UNLOCK(fdp);
1501		return;
1502	}
1503
1504	/*
1505	 * We are the last reference to the structure, so we can
1506	 * safely assume it will not change out from under us.
1507	 */
1508	FILEDESC_UNLOCK(fdp);
1509	fpp = fdp->fd_ofiles;
1510	for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1511		if (*fpp)
1512			(void) closef(*fpp, td);
1513	}
1514
1515	/* XXX This should happen earlier. */
1516	mtx_lock(&fdesc_mtx);
1517	td->td_proc->p_fd = NULL;
1518	mtx_unlock(&fdesc_mtx);
1519
1520	if (fdp->fd_nfiles > NDFILE)
1521		FREE(fdp->fd_ofiles, M_FILEDESC);
1522	if (fdp->fd_cdir)
1523		vrele(fdp->fd_cdir);
1524	if (fdp->fd_rdir)
1525		vrele(fdp->fd_rdir);
1526	if (fdp->fd_jdir)
1527		vrele(fdp->fd_jdir);
1528	if (fdp->fd_knlist)
1529		FREE(fdp->fd_knlist, M_KQUEUE);
1530	if (fdp->fd_knhash)
1531		FREE(fdp->fd_knhash, M_KQUEUE);
1532	mtx_destroy(&fdp->fd_mtx);
1533	FREE(fdp, M_FILEDESC);
1534}
1535
1536/*
1537 * For setugid programs, we don't want to people to use that setugidness
1538 * to generate error messages which write to a file which otherwise would
1539 * otherwise be off-limits to the process.  We check for filesystems where
1540 * the vnode can change out from under us after execve (like [lin]procfs).
1541 *
1542 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1543 * sufficient.  We also don't for check setugidness since we know we are.
1544 */
1545static int
1546is_unsafe(struct file *fp)
1547{
1548	if (fp->f_type == DTYPE_VNODE) {
1549		struct vnode *vp = fp->f_vnode;
1550
1551		if ((vp->v_vflag & VV_PROCDEP) != 0)
1552			return (1);
1553	}
1554	return (0);
1555}
1556
1557/*
1558 * Make this setguid thing safe, if at all possible.
1559 */
1560void
1561setugidsafety(td)
1562	struct thread *td;
1563{
1564	struct filedesc *fdp;
1565	int i;
1566
1567	/* Certain daemons might not have file descriptors. */
1568	fdp = td->td_proc->p_fd;
1569	if (fdp == NULL)
1570		return;
1571
1572	/*
1573	 * Note: fdp->fd_ofiles may be reallocated out from under us while
1574	 * we are blocked in a close.  Be careful!
1575	 */
1576	FILEDESC_LOCK(fdp);
1577	for (i = 0; i <= fdp->fd_lastfile; i++) {
1578		if (i > 2)
1579			break;
1580		if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1581			struct file *fp;
1582
1583#if 0
1584			if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1585				(void) munmapfd(td, i);
1586#endif
1587			if (i < fdp->fd_knlistsize) {
1588				FILEDESC_UNLOCK(fdp);
1589				knote_fdclose(td, i);
1590				FILEDESC_LOCK(fdp);
1591			}
1592			/*
1593			 * NULL-out descriptor prior to close to avoid
1594			 * a race while close blocks.
1595			 */
1596			fp = fdp->fd_ofiles[i];
1597			fdp->fd_ofiles[i] = NULL;
1598			fdp->fd_ofileflags[i] = 0;
1599			if (i < fdp->fd_freefile)
1600				fdp->fd_freefile = i;
1601			FILEDESC_UNLOCK(fdp);
1602			(void) closef(fp, td);
1603			FILEDESC_LOCK(fdp);
1604		}
1605	}
1606	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1607		fdp->fd_lastfile--;
1608	FILEDESC_UNLOCK(fdp);
1609}
1610
1611/*
1612 * Close any files on exec?
1613 */
1614void
1615fdcloseexec(td)
1616	struct thread *td;
1617{
1618	struct filedesc *fdp;
1619	int i;
1620
1621	/* Certain daemons might not have file descriptors. */
1622	fdp = td->td_proc->p_fd;
1623	if (fdp == NULL)
1624		return;
1625
1626	FILEDESC_LOCK(fdp);
1627
1628	/*
1629	 * We cannot cache fd_ofiles or fd_ofileflags since operations
1630	 * may block and rip them out from under us.
1631	 */
1632	for (i = 0; i <= fdp->fd_lastfile; i++) {
1633		if (fdp->fd_ofiles[i] != NULL &&
1634		    (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1635			struct file *fp;
1636
1637#if 0
1638			if (fdp->fd_ofileflags[i] & UF_MAPPED)
1639				(void) munmapfd(td, i);
1640#endif
1641			if (i < fdp->fd_knlistsize) {
1642				FILEDESC_UNLOCK(fdp);
1643				knote_fdclose(td, i);
1644				FILEDESC_LOCK(fdp);
1645			}
1646			/*
1647			 * NULL-out descriptor prior to close to avoid
1648			 * a race while close blocks.
1649			 */
1650			fp = fdp->fd_ofiles[i];
1651			fdp->fd_ofiles[i] = NULL;
1652			fdp->fd_ofileflags[i] = 0;
1653			if (i < fdp->fd_freefile)
1654				fdp->fd_freefile = i;
1655			FILEDESC_UNLOCK(fdp);
1656			(void) closef(fp, td);
1657			FILEDESC_LOCK(fdp);
1658		}
1659	}
1660	while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1661		fdp->fd_lastfile--;
1662	FILEDESC_UNLOCK(fdp);
1663}
1664
1665/*
1666 * It is unsafe for set[ug]id processes to be started with file
1667 * descriptors 0..2 closed, as these descriptors are given implicit
1668 * significance in the Standard C library.  fdcheckstd() will create a
1669 * descriptor referencing /dev/null for each of stdin, stdout, and
1670 * stderr that is not already open.
1671 */
1672int
1673fdcheckstd(td)
1674	struct thread *td;
1675{
1676	struct nameidata nd;
1677	struct filedesc *fdp;
1678	struct file *fp;
1679	register_t retval;
1680	int fd, i, error, flags, devnull;
1681
1682	fdp = td->td_proc->p_fd;
1683	if (fdp == NULL)
1684		return (0);
1685	devnull = -1;
1686	error = 0;
1687	for (i = 0; i < 3; i++) {
1688		if (fdp->fd_ofiles[i] != NULL)
1689			continue;
1690		if (devnull < 0) {
1691			error = falloc(td, &fp, &fd);
1692			if (error != 0)
1693				break;
1694			KASSERT(fd == i, ("oof, we didn't get our fd"));
1695			NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1696			    td);
1697			flags = FREAD | FWRITE;
1698			error = vn_open(&nd, &flags, 0, -1);
1699			if (error != 0) {
1700				FILEDESC_LOCK(fdp);
1701				fdp->fd_ofiles[fd] = NULL;
1702				FILEDESC_UNLOCK(fdp);
1703				fdrop(fp, td);
1704				break;
1705			}
1706			NDFREE(&nd, NDF_ONLY_PNBUF);
1707			fp->f_vnode = nd.ni_vp;
1708			fp->f_data = nd.ni_vp;
1709			fp->f_flag = flags;
1710			fp->f_ops = &vnops;
1711			fp->f_type = DTYPE_VNODE;
1712			VOP_UNLOCK(nd.ni_vp, 0, td);
1713			devnull = fd;
1714		} else {
1715			error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1716			if (error != 0)
1717				break;
1718		}
1719	}
1720	return (error);
1721}
1722
1723/*
1724 * Internal form of close.
1725 * Decrement reference count on file structure.
1726 * Note: td may be NULL when closing a file
1727 * that was being passed in a message.
1728 */
1729int
1730closef(fp, td)
1731	struct file *fp;
1732	struct thread *td;
1733{
1734	struct vnode *vp;
1735	struct flock lf;
1736	struct filedesc_to_leader *fdtol;
1737	struct filedesc *fdp;
1738
1739	if (fp == NULL)
1740		return (0);
1741	/*
1742	 * POSIX record locking dictates that any close releases ALL
1743	 * locks owned by this process.  This is handled by setting
1744	 * a flag in the unlock to free ONLY locks obeying POSIX
1745	 * semantics, and not to free BSD-style file locks.
1746	 * If the descriptor was in a message, POSIX-style locks
1747	 * aren't passed with the descriptor.
1748	 */
1749	if (td != NULL &&
1750	    fp->f_type == DTYPE_VNODE) {
1751		if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) {
1752			lf.l_whence = SEEK_SET;
1753			lf.l_start = 0;
1754			lf.l_len = 0;
1755			lf.l_type = F_UNLCK;
1756			vp = fp->f_vnode;
1757			(void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1758					   F_UNLCK, &lf, F_POSIX);
1759		}
1760		fdtol = td->td_proc->p_fdtol;
1761		if (fdtol != NULL) {
1762			/*
1763			 * Handle special case where file descriptor table
1764			 * is shared between multiple process leaders.
1765			 */
1766			fdp = td->td_proc->p_fd;
1767			FILEDESC_LOCK(fdp);
1768			for (fdtol = fdtol->fdl_next;
1769			     fdtol != td->td_proc->p_fdtol;
1770			     fdtol = fdtol->fdl_next) {
1771				if ((fdtol->fdl_leader->p_flag &
1772				     P_ADVLOCK) == 0)
1773					continue;
1774				fdtol->fdl_holdcount++;
1775				FILEDESC_UNLOCK(fdp);
1776				lf.l_whence = SEEK_SET;
1777				lf.l_start = 0;
1778				lf.l_len = 0;
1779				lf.l_type = F_UNLCK;
1780				vp = fp->f_vnode;
1781				(void) VOP_ADVLOCK(vp,
1782						   (caddr_t)fdtol->fdl_leader,
1783						   F_UNLCK, &lf, F_POSIX);
1784				FILEDESC_LOCK(fdp);
1785				fdtol->fdl_holdcount--;
1786				if (fdtol->fdl_holdcount == 0 &&
1787				    fdtol->fdl_wakeup != 0) {
1788					fdtol->fdl_wakeup = 0;
1789					wakeup(fdtol);
1790				}
1791			}
1792			FILEDESC_UNLOCK(fdp);
1793		}
1794	}
1795	return (fdrop(fp, td));
1796}
1797
1798/*
1799 * Drop reference on struct file passed in, may call closef if the
1800 * reference hits zero.
1801 */
1802int
1803fdrop(fp, td)
1804	struct file *fp;
1805	struct thread *td;
1806{
1807
1808	FILE_LOCK(fp);
1809	return (fdrop_locked(fp, td));
1810}
1811
1812/*
1813 * Extract the file pointer associated with the specified descriptor for
1814 * the current user process.
1815 *
1816 * If the descriptor doesn't exist, EBADF is returned.
1817 *
1818 * If the descriptor exists but doesn't match 'flags' then
1819 * return EBADF for read attempts and EINVAL for write attempts.
1820 *
1821 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1822 * It should be droped with fdrop().
1823 * If it is not set, then the refcount will not be bumped however the
1824 * thread's filedesc struct will be returned locked (for fgetsock).
1825 *
1826 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1827 * Otherwise *fpp is set and zero is returned.
1828 */
1829static __inline int
1830_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1831{
1832	struct filedesc *fdp;
1833	struct file *fp;
1834
1835	*fpp = NULL;
1836	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1837		return (EBADF);
1838	FILEDESC_LOCK(fdp);
1839	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1840		FILEDESC_UNLOCK(fdp);
1841		return (EBADF);
1842	}
1843
1844	/*
1845	 * Note: FREAD failures returns EBADF to maintain backwards
1846	 * compatibility with what routines returned before.
1847	 *
1848	 * Only one flag, or 0, may be specified.
1849	 */
1850	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1851		FILEDESC_UNLOCK(fdp);
1852		return (EBADF);
1853	}
1854	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1855		FILEDESC_UNLOCK(fdp);
1856		return (EINVAL);
1857	}
1858	if (hold) {
1859		fhold(fp);
1860		FILEDESC_UNLOCK(fdp);
1861	}
1862	*fpp = fp;
1863	return (0);
1864}
1865
1866int
1867fget(struct thread *td, int fd, struct file **fpp)
1868{
1869
1870	return(_fget(td, fd, fpp, 0, 1));
1871}
1872
1873int
1874fget_read(struct thread *td, int fd, struct file **fpp)
1875{
1876
1877	return(_fget(td, fd, fpp, FREAD, 1));
1878}
1879
1880int
1881fget_write(struct thread *td, int fd, struct file **fpp)
1882{
1883
1884	return(_fget(td, fd, fpp, FWRITE, 1));
1885}
1886
1887/*
1888 * Like fget() but loads the underlying vnode, or returns an error if
1889 * the descriptor does not represent a vnode.  Note that pipes use vnodes
1890 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1891 * error).  The returned vnode will be vref()d.
1892 */
1893static __inline int
1894_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1895{
1896	struct file *fp;
1897	int error;
1898
1899	*vpp = NULL;
1900	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1901		return (error);
1902	if (fp->f_vnode == NULL) {
1903		error = EINVAL;
1904	} else {
1905		*vpp = fp->f_vnode;
1906		vref(*vpp);
1907	}
1908	FILEDESC_UNLOCK(td->td_proc->p_fd);
1909	return (error);
1910}
1911
1912int
1913fgetvp(struct thread *td, int fd, struct vnode **vpp)
1914{
1915
1916	return (_fgetvp(td, fd, vpp, 0));
1917}
1918
1919int
1920fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1921{
1922
1923	return (_fgetvp(td, fd, vpp, FREAD));
1924}
1925
1926int
1927fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1928{
1929
1930	return (_fgetvp(td, fd, vpp, FWRITE));
1931}
1932
1933/*
1934 * Like fget() but loads the underlying socket, or returns an error if
1935 * the descriptor does not represent a socket.
1936 *
1937 * We bump the ref count on the returned socket.  XXX Also obtain the SX
1938 * lock in the future.
1939 */
1940int
1941fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1942{
1943	struct file *fp;
1944	int error;
1945
1946	*spp = NULL;
1947	if (fflagp != NULL)
1948		*fflagp = 0;
1949	if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1950		return (error);
1951	if (fp->f_type != DTYPE_SOCKET) {
1952		error = ENOTSOCK;
1953	} else {
1954		*spp = fp->f_data;
1955		if (fflagp)
1956			*fflagp = fp->f_flag;
1957		soref(*spp);
1958	}
1959	FILEDESC_UNLOCK(td->td_proc->p_fd);
1960	return (error);
1961}
1962
1963/*
1964 * Drop the reference count on the the socket and XXX release the SX lock in
1965 * the future.  The last reference closes the socket.
1966 */
1967void
1968fputsock(struct socket *so)
1969{
1970
1971	sorele(so);
1972}
1973
1974/*
1975 * Drop reference on struct file passed in, may call closef if the
1976 * reference hits zero.
1977 * Expects struct file locked, and will unlock it.
1978 */
1979int
1980fdrop_locked(fp, td)
1981	struct file *fp;
1982	struct thread *td;
1983{
1984	struct flock lf;
1985	struct vnode *vp;
1986	int error;
1987
1988	FILE_LOCK_ASSERT(fp, MA_OWNED);
1989
1990	if (--fp->f_count > 0) {
1991		FILE_UNLOCK(fp);
1992		return (0);
1993	}
1994	/* We have the last ref so we can proceed without the file lock. */
1995	FILE_UNLOCK(fp);
1996	mtx_lock(&Giant);
1997	if (fp->f_count < 0)
1998		panic("fdrop: count < 0");
1999	if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
2000		lf.l_whence = SEEK_SET;
2001		lf.l_start = 0;
2002		lf.l_len = 0;
2003		lf.l_type = F_UNLCK;
2004		vp = fp->f_vnode;
2005		(void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
2006	}
2007	if (fp->f_ops != &badfileops)
2008		error = fo_close(fp, td);
2009	else
2010		error = 0;
2011	ffree(fp);
2012	mtx_unlock(&Giant);
2013	return (error);
2014}
2015
2016/*
2017 * Apply an advisory lock on a file descriptor.
2018 *
2019 * Just attempt to get a record lock of the requested type on
2020 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
2021 */
2022#ifndef _SYS_SYSPROTO_H_
2023struct flock_args {
2024	int	fd;
2025	int	how;
2026};
2027#endif
2028/*
2029 * MPSAFE
2030 */
2031/* ARGSUSED */
2032int
2033flock(td, uap)
2034	struct thread *td;
2035	struct flock_args *uap;
2036{
2037	struct file *fp;
2038	struct vnode *vp;
2039	struct flock lf;
2040	int error;
2041
2042	if ((error = fget(td, uap->fd, &fp)) != 0)
2043		return (error);
2044	if (fp->f_type != DTYPE_VNODE) {
2045		fdrop(fp, td);
2046		return (EOPNOTSUPP);
2047	}
2048
2049	mtx_lock(&Giant);
2050	vp = fp->f_vnode;
2051	lf.l_whence = SEEK_SET;
2052	lf.l_start = 0;
2053	lf.l_len = 0;
2054	if (uap->how & LOCK_UN) {
2055		lf.l_type = F_UNLCK;
2056		FILE_LOCK(fp);
2057		fp->f_flag &= ~FHASLOCK;
2058		FILE_UNLOCK(fp);
2059		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
2060		goto done2;
2061	}
2062	if (uap->how & LOCK_EX)
2063		lf.l_type = F_WRLCK;
2064	else if (uap->how & LOCK_SH)
2065		lf.l_type = F_RDLCK;
2066	else {
2067		error = EBADF;
2068		goto done2;
2069	}
2070	FILE_LOCK(fp);
2071	fp->f_flag |= FHASLOCK;
2072	FILE_UNLOCK(fp);
2073	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
2074	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
2075done2:
2076	fdrop(fp, td);
2077	mtx_unlock(&Giant);
2078	return (error);
2079}
2080
2081/*
2082 * File Descriptor pseudo-device driver (/dev/fd/).
2083 *
2084 * Opening minor device N dup()s the file (if any) connected to file
2085 * descriptor N belonging to the calling process.  Note that this driver
2086 * consists of only the ``open()'' routine, because all subsequent
2087 * references to this file will be direct to the other driver.
2088 */
2089/* ARGSUSED */
2090static int
2091fdopen(dev, mode, type, td)
2092	dev_t dev;
2093	int mode, type;
2094	struct thread *td;
2095{
2096
2097	/*
2098	 * XXX Kludge: set curthread->td_dupfd to contain the value of the
2099	 * the file descriptor being sought for duplication. The error
2100	 * return ensures that the vnode for this device will be released
2101	 * by vn_open. Open will detect this special error and take the
2102	 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
2103	 * will simply report the error.
2104	 */
2105	td->td_dupfd = dev2unit(dev);
2106	return (ENODEV);
2107}
2108
2109/*
2110 * Duplicate the specified descriptor to a free descriptor.
2111 */
2112int
2113dupfdopen(td, fdp, indx, dfd, mode, error)
2114	struct thread *td;
2115	struct filedesc *fdp;
2116	int indx, dfd;
2117	int mode;
2118	int error;
2119{
2120	struct file *wfp;
2121	struct file *fp;
2122
2123	/*
2124	 * If the to-be-dup'd fd number is greater than the allowed number
2125	 * of file descriptors, or the fd to be dup'd has already been
2126	 * closed, then reject.
2127	 */
2128	FILEDESC_LOCK(fdp);
2129	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
2130	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
2131		FILEDESC_UNLOCK(fdp);
2132		return (EBADF);
2133	}
2134
2135	/*
2136	 * There are two cases of interest here.
2137	 *
2138	 * For ENODEV simply dup (dfd) to file descriptor
2139	 * (indx) and return.
2140	 *
2141	 * For ENXIO steal away the file structure from (dfd) and
2142	 * store it in (indx).  (dfd) is effectively closed by
2143	 * this operation.
2144	 *
2145	 * Any other error code is just returned.
2146	 */
2147	switch (error) {
2148	case ENODEV:
2149		/*
2150		 * Check that the mode the file is being opened for is a
2151		 * subset of the mode of the existing descriptor.
2152		 */
2153		FILE_LOCK(wfp);
2154		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2155			FILE_UNLOCK(wfp);
2156			FILEDESC_UNLOCK(fdp);
2157			return (EACCES);
2158		}
2159		fp = fdp->fd_ofiles[indx];
2160#if 0
2161		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2162			(void) munmapfd(td, indx);
2163#endif
2164		fdp->fd_ofiles[indx] = wfp;
2165		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2166		fhold_locked(wfp);
2167		FILE_UNLOCK(wfp);
2168		if (indx > fdp->fd_lastfile)
2169			fdp->fd_lastfile = indx;
2170		if (fp != NULL)
2171			FILE_LOCK(fp);
2172		FILEDESC_UNLOCK(fdp);
2173		/*
2174		 * We now own the reference to fp that the ofiles[] array
2175		 * used to own.  Release it.
2176		 */
2177		if (fp != NULL)
2178			fdrop_locked(fp, td);
2179		return (0);
2180
2181	case ENXIO:
2182		/*
2183		 * Steal away the file pointer from dfd and stuff it into indx.
2184		 */
2185		fp = fdp->fd_ofiles[indx];
2186#if 0
2187		if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2188			(void) munmapfd(td, indx);
2189#endif
2190		fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2191		fdp->fd_ofiles[dfd] = NULL;
2192		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2193		fdp->fd_ofileflags[dfd] = 0;
2194
2195		/*
2196		 * Complete the clean up of the filedesc structure by
2197		 * recomputing the various hints.
2198		 */
2199		if (indx > fdp->fd_lastfile) {
2200			fdp->fd_lastfile = indx;
2201		} else {
2202			while (fdp->fd_lastfile > 0 &&
2203			   fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2204				fdp->fd_lastfile--;
2205			}
2206			if (dfd < fdp->fd_freefile)
2207				fdp->fd_freefile = dfd;
2208		}
2209		if (fp != NULL)
2210			FILE_LOCK(fp);
2211		FILEDESC_UNLOCK(fdp);
2212
2213		/*
2214		 * we now own the reference to fp that the ofiles[] array
2215		 * used to own.  Release it.
2216		 */
2217		if (fp != NULL)
2218			fdrop_locked(fp, td);
2219		return (0);
2220
2221	default:
2222		FILEDESC_UNLOCK(fdp);
2223		return (error);
2224	}
2225	/* NOTREACHED */
2226}
2227
2228
2229struct filedesc_to_leader *
2230filedesc_to_leader_alloc(struct filedesc_to_leader *old,
2231			 struct filedesc *fdp,
2232			 struct proc *leader)
2233{
2234	struct filedesc_to_leader *fdtol;
2235
2236	MALLOC(fdtol, struct filedesc_to_leader *,
2237	       sizeof(struct filedesc_to_leader),
2238	       M_FILEDESC_TO_LEADER,
2239	       M_WAITOK);
2240	fdtol->fdl_refcount = 1;
2241	fdtol->fdl_holdcount = 0;
2242	fdtol->fdl_wakeup = 0;
2243	fdtol->fdl_leader = leader;
2244	if (old != NULL) {
2245		FILEDESC_LOCK(fdp);
2246		fdtol->fdl_next = old->fdl_next;
2247		fdtol->fdl_prev = old;
2248		old->fdl_next = fdtol;
2249		fdtol->fdl_next->fdl_prev = fdtol;
2250		FILEDESC_UNLOCK(fdp);
2251	} else {
2252		fdtol->fdl_next = fdtol;
2253		fdtol->fdl_prev = fdtol;
2254	}
2255	return fdtol;
2256}
2257
2258/*
2259 * Get file structures.
2260 */
2261static int
2262sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2263{
2264	struct xfile xf;
2265	struct filedesc *fdp;
2266	struct file *fp;
2267	struct proc *p;
2268	int error, n;
2269
2270	/*
2271	 * Note: because the number of file descriptors is calculated
2272	 * in different ways for sizing vs returning the data,
2273	 * there is information leakage from the first loop.  However,
2274	 * it is of a similar order of magnitude to the leakage from
2275	 * global system statistics such as kern.openfiles.
2276	 */
2277	sysctl_wire_old_buffer(req, 0);
2278	if (req->oldptr == NULL) {
2279		n = 16;		/* A slight overestimate. */
2280		sx_slock(&filelist_lock);
2281		LIST_FOREACH(fp, &filehead, f_list) {
2282			/*
2283			 * We should grab the lock, but this is an
2284			 * estimate, so does it really matter?
2285			 */
2286			/* mtx_lock(fp->f_mtxp); */
2287			n += fp->f_count;
2288			/* mtx_unlock(f->f_mtxp); */
2289		}
2290		sx_sunlock(&filelist_lock);
2291		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2292	}
2293	error = 0;
2294	bzero(&xf, sizeof(xf));
2295	xf.xf_size = sizeof(xf);
2296	sx_slock(&allproc_lock);
2297	LIST_FOREACH(p, &allproc, p_list) {
2298		PROC_LOCK(p);
2299		if (p_cansee(req->td, p) != 0) {
2300			PROC_UNLOCK(p);
2301			continue;
2302		}
2303		xf.xf_pid = p->p_pid;
2304		xf.xf_uid = p->p_ucred->cr_uid;
2305		PROC_UNLOCK(p);
2306		mtx_lock(&fdesc_mtx);
2307		if ((fdp = p->p_fd) == NULL) {
2308			mtx_unlock(&fdesc_mtx);
2309			continue;
2310		}
2311		FILEDESC_LOCK(fdp);
2312		for (n = 0; n < fdp->fd_nfiles; ++n) {
2313			if ((fp = fdp->fd_ofiles[n]) == NULL)
2314				continue;
2315			xf.xf_fd = n;
2316			xf.xf_file = fp;
2317			xf.xf_data = fp->f_data;
2318			xf.xf_type = fp->f_type;
2319			xf.xf_count = fp->f_count;
2320			xf.xf_msgcount = fp->f_msgcount;
2321			xf.xf_offset = fp->f_offset;
2322			xf.xf_flag = fp->f_flag;
2323			error = SYSCTL_OUT(req, &xf, sizeof(xf));
2324			if (error)
2325				break;
2326		}
2327		FILEDESC_UNLOCK(fdp);
2328		mtx_unlock(&fdesc_mtx);
2329		if (error)
2330			break;
2331	}
2332	sx_sunlock(&allproc_lock);
2333	return (error);
2334}
2335
2336SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2337    0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2338
2339SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2340    &maxfilesperproc, 0, "Maximum files allowed open per process");
2341
2342SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2343    &maxfiles, 0, "Maximum number of files");
2344
2345SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2346    &nfiles, 0, "System-wide number of open files");
2347
2348static void
2349fildesc_drvinit(void *unused)
2350{
2351	dev_t dev;
2352
2353	dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
2354	make_dev_alias(dev, "stdin");
2355	dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
2356	make_dev_alias(dev, "stdout");
2357	dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
2358	make_dev_alias(dev, "stderr");
2359}
2360
2361static fo_rdwr_t	badfo_readwrite;
2362static fo_ioctl_t	badfo_ioctl;
2363static fo_poll_t	badfo_poll;
2364static fo_kqfilter_t	badfo_kqfilter;
2365static fo_stat_t	badfo_stat;
2366static fo_close_t	badfo_close;
2367
2368struct fileops badfileops = {
2369	.fo_read = badfo_readwrite,
2370	.fo_write = badfo_readwrite,
2371	.fo_ioctl = badfo_ioctl,
2372	.fo_poll = badfo_poll,
2373	.fo_kqfilter = badfo_kqfilter,
2374	.fo_stat = badfo_stat,
2375	.fo_close = badfo_close,
2376};
2377
2378static int
2379badfo_readwrite(fp, uio, active_cred, flags, td)
2380	struct file *fp;
2381	struct uio *uio;
2382	struct ucred *active_cred;
2383	struct thread *td;
2384	int flags;
2385{
2386
2387	return (EBADF);
2388}
2389
2390static int
2391badfo_ioctl(fp, com, data, active_cred, td)
2392	struct file *fp;
2393	u_long com;
2394	void *data;
2395	struct ucred *active_cred;
2396	struct thread *td;
2397{
2398
2399	return (EBADF);
2400}
2401
2402static int
2403badfo_poll(fp, events, active_cred, td)
2404	struct file *fp;
2405	int events;
2406	struct ucred *active_cred;
2407	struct thread *td;
2408{
2409
2410	return (0);
2411}
2412
2413static int
2414badfo_kqfilter(fp, kn)
2415	struct file *fp;
2416	struct knote *kn;
2417{
2418
2419	return (0);
2420}
2421
2422static int
2423badfo_stat(fp, sb, active_cred, td)
2424	struct file *fp;
2425	struct stat *sb;
2426	struct ucred *active_cred;
2427	struct thread *td;
2428{
2429
2430	return (EBADF);
2431}
2432
2433static int
2434badfo_close(fp, td)
2435	struct file *fp;
2436	struct thread *td;
2437{
2438
2439	return (EBADF);
2440}
2441
2442SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2443					fildesc_drvinit,NULL)
2444
2445static void filelistinit(void *);
2446SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2447
2448/* ARGSUSED*/
2449static void
2450filelistinit(dummy)
2451	void *dummy;
2452{
2453
2454	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2455	    NULL, NULL, UMA_ALIGN_PTR, 0);
2456	sx_init(&filelist_lock, "filelist lock");
2457	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2458}
2459