Deleted Added
full compact
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 * $Id: kern_descrip.c,v 1.54 1998/07/15 06:10:16 bde Exp $
39 * $Id: kern_descrip.c,v 1.55 1998/07/29 17:38:13 bde Exp $
40 */
41
42#include "opt_compat.h"
43#include "opt_devfs.h"
44
45#include <sys/param.h>
46#include <sys/systm.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/kernel.h>
51#include <sys/sysctl.h>
52#include <sys/vnode.h>
53#include <sys/proc.h>
54#include <sys/file.h>
55#include <sys/socketvar.h>
56#include <sys/stat.h>
57#include <sys/filio.h>
58#include <sys/ttycom.h>
59#include <sys/fcntl.h>
60#include <sys/malloc.h>
61#include <sys/unistd.h>
62#include <sys/resourcevar.h>
63#include <sys/pipe.h>
64
65#include <vm/vm.h>
66#include <vm/vm_extern.h>
67
68#ifdef DEVFS
69#include <sys/devfsext.h>
70#endif /*DEVFS*/
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
73MALLOC_DEFINE(M_FILE, "file", "Open file structure");
74static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
75
76
77static d_open_t fdopen;
78#define NUMFDESC 64
79
80#define CDEV_MAJOR 22
81static struct cdevsw fildesc_cdevsw =
82 { fdopen, noclose, noread, nowrite,
83 noioc, nostop, nullreset, nodevtotty,
84 seltrue, nommap, nostrat };
85
86static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval));
87/*
88 * Descriptor management.
89 */
90struct filelist filehead; /* head of list of open files */
91int nfiles; /* actual number of open files */
92extern int cmask;
93
94/*
95 * System calls on descriptors.
96 */
97#ifndef _SYS_SYSPROTO_H_
98struct getdtablesize_args {
99 int dummy;
100};
101#endif
102/* ARGSUSED */
103int
104getdtablesize(p, uap)
105 struct proc *p;
106 struct getdtablesize_args *uap;
107{
108
109 p->p_retval[0] =
110 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
111 return (0);
112}
113
114/*
115 * Duplicate a file descriptor to a particular value.
116 */
117#ifndef _SYS_SYSPROTO_H_
118struct dup2_args {
119 u_int from;
120 u_int to;
121};
122#endif
123/* ARGSUSED */
124int
125dup2(p, uap)
126 struct proc *p;
127 struct dup2_args *uap;
128{
129 register struct filedesc *fdp = p->p_fd;
130 register u_int old = uap->from, new = uap->to;
131 int i, error;
132
133 if (old >= fdp->fd_nfiles ||
134 fdp->fd_ofiles[old] == NULL ||
135 new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
136 new >= maxfilesperproc)
137 return (EBADF);
138 if (old == new) {
139 p->p_retval[0] = new;
140 return (0);
141 }
142 if (new >= fdp->fd_nfiles) {
143 if ((error = fdalloc(p, new, &i)))
144 return (error);
145 if (new != i)
146 panic("dup2: fdalloc");
147 } else if (fdp->fd_ofiles[new]) {
148 if (fdp->fd_ofileflags[new] & UF_MAPPED)
149 (void) munmapfd(p, new);
150 /*
151 * dup2() must succeed even if the close has an error.
152 */
153 (void) closef(fdp->fd_ofiles[new], p);
154 }
155 return (finishdup(fdp, (int)old, (int)new, p->p_retval));
156}
157
158/*
159 * Duplicate a file descriptor.
160 */
161#ifndef _SYS_SYSPROTO_H_
162struct dup_args {
163 u_int fd;
164};
165#endif
166/* ARGSUSED */
167int
168dup(p, uap)
169 struct proc *p;
170 struct dup_args *uap;
171{
172 register struct filedesc *fdp;
173 u_int old;
174 int new, error;
175
176 old = uap->fd;
177
178#if 0
179 /*
180 * XXX Compatibility
181 */
182 if (old &~ 077) { uap->fd &= 077; return (dup2(p, uap, p->p_retval)); }
183#endif
184
185 fdp = p->p_fd;
186 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL)
187 return (EBADF);
188 if ((error = fdalloc(p, 0, &new)))
189 return (error);
190 return (finishdup(fdp, (int)old, new, p->p_retval));
191}
192
193/*
194 * The file control system call.
195 */
196#ifndef _SYS_SYSPROTO_H_
197struct fcntl_args {
198 int fd;
199 int cmd;
200 long arg;
201};
202#endif
203/* ARGSUSED */
204int
205fcntl(p, uap)
206 struct proc *p;
207 register struct fcntl_args *uap;
208{
209 register struct filedesc *fdp = p->p_fd;
210 register struct file *fp;
211 register char *pop;
212 struct vnode *vp;
213 int i, tmp, error, flg = F_POSIX;
214 struct flock fl;
215 u_int newmin;
216
217 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
218 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
219 return (EBADF);
220 pop = &fdp->fd_ofileflags[uap->fd];
221 switch (uap->cmd) {
222
223 case F_DUPFD:
224 newmin = uap->arg;
225 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
226 newmin >= maxfilesperproc)
227 return (EINVAL);
228 if ((error = fdalloc(p, newmin, &i)))
229 return (error);
230 return (finishdup(fdp, uap->fd, i, p->p_retval));
231
232 case F_GETFD:
233 p->p_retval[0] = *pop & 1;
234 return (0);
235
236 case F_SETFD:
237 *pop = (*pop &~ 1) | (uap->arg & 1);
238 return (0);
239
240 case F_GETFL:
241 p->p_retval[0] = OFLAGS(fp->f_flag);
242 return (0);
243
244 case F_SETFL:
245 fp->f_flag &= ~FCNTLFLAGS;
246 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
247 tmp = fp->f_flag & FNONBLOCK;
248 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
249 if (error)
250 return (error);
251 tmp = fp->f_flag & FASYNC;
252 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
253 if (!error)
254 return (0);
255 fp->f_flag &= ~FNONBLOCK;
256 tmp = 0;
257 (void) (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
258 return (error);
259
260 case F_GETOWN:
260 if (fp->f_type == DTYPE_SOCKET) {
261 p->p_retval[0] = ((struct socket *)fp->f_data)->so_pgid;
262 return (0);
263 }
261 error = (*fp->f_ops->fo_ioctl)
265 (fp, TIOCGPGRP, (caddr_t)p->p_retval, p);
266 p->p_retval[0] = - p->p_retval[0];
262 (fp, FIOGETOWN, (caddr_t)p->p_retval, p);
263 return (error);
264
265 case F_SETOWN:
270 if (fp->f_type == DTYPE_SOCKET) {
271 ((struct socket *)fp->f_data)->so_pgid = uap->arg;
272 return (0);
273 }
274 if (uap->arg <= 0) {
275 uap->arg = -uap->arg;
276 } else {
277 struct proc *p1 = pfind(uap->arg);
278 if (p1 == 0)
279 return (ESRCH);
280 uap->arg = p1->p_pgrp->pg_id;
281 }
266 return ((*fp->f_ops->fo_ioctl)
283 (fp, TIOCSPGRP, (caddr_t)&uap->arg, p));
267 (fp, FIOSETOWN, (caddr_t)&uap->arg, p));
268
269 case F_SETLKW:
270 flg |= F_WAIT;
271 /* Fall into F_SETLK */
272
273 case F_SETLK:
274 if (fp->f_type != DTYPE_VNODE)
275 return (EBADF);
276 vp = (struct vnode *)fp->f_data;
277 /* Copy in the lock structure */
278 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
279 sizeof(fl));
280 if (error)
281 return (error);
282 if (fl.l_whence == SEEK_CUR)
283 fl.l_start += fp->f_offset;
284 switch (fl.l_type) {
285
286 case F_RDLCK:
287 if ((fp->f_flag & FREAD) == 0)
288 return (EBADF);
289 p->p_flag |= P_ADVLOCK;
290 return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
291
292 case F_WRLCK:
293 if ((fp->f_flag & FWRITE) == 0)
294 return (EBADF);
295 p->p_flag |= P_ADVLOCK;
296 return (VOP_ADVLOCK(vp, (caddr_t)p, F_SETLK, &fl, flg));
297
298 case F_UNLCK:
299 return (VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &fl,
300 F_POSIX));
301
302 default:
303 return (EINVAL);
304 }
305
306 case F_GETLK:
307 if (fp->f_type != DTYPE_VNODE)
308 return (EBADF);
309 vp = (struct vnode *)fp->f_data;
310 /* Copy in the lock structure */
311 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
312 sizeof(fl));
313 if (error)
314 return (error);
315 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
316 fl.l_type != F_UNLCK)
317 return (EINVAL);
318 if (fl.l_whence == SEEK_CUR)
319 fl.l_start += fp->f_offset;
320 if ((error = VOP_ADVLOCK(vp,(caddr_t)p,F_GETLK,&fl,F_POSIX)))
321 return (error);
322 return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg,
323 sizeof(fl)));
324
325 default:
326 return (EINVAL);
327 }
328 /* NOTREACHED */
329}
330
331/*
332 * Common code for dup, dup2, and fcntl(F_DUPFD).
333 */
334static int
335finishdup(fdp, old, new, retval)
336 register struct filedesc *fdp;
337 register int old, new;
338 register_t *retval;
339{
340 register struct file *fp;
341
342 fp = fdp->fd_ofiles[old];
343 fdp->fd_ofiles[new] = fp;
344 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
345 fp->f_count++;
346 if (new > fdp->fd_lastfile)
347 fdp->fd_lastfile = new;
348 *retval = new;
349 return (0);
350}
351
352/*
353 * If sigio is on the list associated with a process or process group,
354 * disable signalling from the device, remove sigio from the list and
355 * free sigio.
356 */
357void
358funsetown(sigio)
359 struct sigio *sigio;
360{
361 int s;
362
363 if (sigio == NULL)
364 return;
365 s = splhigh();
366 *(sigio->sio_myref) = NULL;
367 splx(s);
368 if (sigio->sio_pgid < 0) {
369 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
370 sigio, sio_pgsigio);
371 } else /* if ((*sigiop)->sio_pgid > 0) */ {
372 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
373 sigio, sio_pgsigio);
374 }
375 crfree(sigio->sio_ucred);
376 FREE(sigio, M_SIGIO);
377}
378
379/* Free a list of sigio structures. */
380void
381funsetownlst(sigiolst)
382 struct sigiolst *sigiolst;
383{
384 struct sigio *sigio;
385
386 while ((sigio = sigiolst->slh_first) != NULL)
387 funsetown(sigio);
388}
389
390/*
391 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
392 *
393 * After permission checking, add a sigio structure to the sigio list for
394 * the process or process group.
395 */
396int
397fsetown(pgid, sigiop)
398 pid_t pgid;
399 struct sigio **sigiop;
400{
401 struct proc *proc = NULL;
402 struct pgrp *pgrp = NULL;
403 struct sigio *sigio;
404 int s;
405
406 if (pgid == 0) {
407 funsetown(*sigiop);
408 return (0);
409 } else if (pgid > 0) {
410 proc = pfind(pgid);
411 if (proc == NULL)
412 return (ESRCH);
413 /*
414 * Policy - Don't allow a process to FSETOWN a process
415 * in another session.
416 *
417 * Remove this test to allow maximum flexibility or
418 * restrict FSETOWN to the current process or process
419 * group for maximum safety.
420 */
421 else if (proc->p_session != curproc->p_session)
422 return (EPERM);
423 } else /* if (pgid < 0) */ {
424 pgrp = pgfind(-pgid);
425 if (pgrp == NULL)
426 return (ESRCH);
427 /*
428 * Policy - Don't allow a process to FSETOWN a process
429 * in another session.
430 *
431 * Remove this test to allow maximum flexibility or
432 * restrict FSETOWN to the current process or process
433 * group for maximum safety.
434 */
435 else if (pgrp->pg_session != curproc->p_session)
436 return (EPERM);
437 }
438 funsetown(*sigiop);
439 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO,
440 M_WAITOK);
441 if (pgid > 0) {
442 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
443 sigio->sio_proc = proc;
444 } else {
445 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
446 sigio->sio_pgrp = pgrp;
447 }
448 sigio->sio_pgid = pgid;
449 crhold(curproc->p_ucred);
450 sigio->sio_ucred = curproc->p_ucred;
451 /* It would be convenient if p_ruid was in ucred. */
452 sigio->sio_ruid = curproc->p_cred->p_ruid;
453 sigio->sio_myref = sigiop;
454 s = splhigh();
455 *sigiop = sigio;
456 splx(s);
457 return (0);
458}
459
460/*
461 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
462 */
463pid_t
464fgetown(sigio)
465 struct sigio *sigio;
466{
467 return (sigio != NULL ? sigio->sio_pgid : 0);
468}
469
470/*
471 * Close a file descriptor.
472 */
473#ifndef _SYS_SYSPROTO_H_
474struct close_args {
475 int fd;
476};
477#endif
478/* ARGSUSED */
479int
480close(p, uap)
481 struct proc *p;
482 struct close_args *uap;
483{
484 register struct filedesc *fdp = p->p_fd;
485 register struct file *fp;
486 register int fd = uap->fd;
487 register u_char *pf;
488
489 if ((unsigned)fd >= fdp->fd_nfiles ||
490 (fp = fdp->fd_ofiles[fd]) == NULL)
491 return (EBADF);
492 pf = (u_char *)&fdp->fd_ofileflags[fd];
493 if (*pf & UF_MAPPED)
494 (void) munmapfd(p, fd);
495 fdp->fd_ofiles[fd] = NULL;
496 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
497 fdp->fd_lastfile--;
498 if (fd < fdp->fd_freefile)
499 fdp->fd_freefile = fd;
500 *pf = 0;
501 return (closef(fp, p));
502}
503
504#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
505/*
506 * Return status information about a file descriptor.
507 */
508#ifndef _SYS_SYSPROTO_H_
509struct ofstat_args {
510 int fd;
511 struct ostat *sb;
512};
513#endif
514/* ARGSUSED */
515int
516ofstat(p, uap)
517 struct proc *p;
518 register struct ofstat_args *uap;
519{
520 register struct filedesc *fdp = p->p_fd;
521 register struct file *fp;
522 struct stat ub;
523 struct ostat oub;
524 int error;
525
526 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
527 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
528 return (EBADF);
529 switch (fp->f_type) {
530
531 case DTYPE_FIFO:
532 case DTYPE_VNODE:
533 error = vn_stat((struct vnode *)fp->f_data, &ub, p);
534 break;
535
536 case DTYPE_SOCKET:
537 error = soo_stat((struct socket *)fp->f_data, &ub);
538 break;
539
540 case DTYPE_PIPE:
541 error = pipe_stat((struct pipe *)fp->f_data, &ub);
542 break;
543
544 default:
545 panic("ofstat");
546 /*NOTREACHED*/
547 }
548 cvtstat(&ub, &oub);
549 if (error == 0)
550 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
551 return (error);
552}
553#endif /* COMPAT_43 || COMPAT_SUNOS */
554
555/*
556 * Return status information about a file descriptor.
557 */
558#ifndef _SYS_SYSPROTO_H_
559struct fstat_args {
560 int fd;
561 struct stat *sb;
562};
563#endif
564/* ARGSUSED */
565int
566fstat(p, uap)
567 struct proc *p;
568 register struct fstat_args *uap;
569{
570 register struct filedesc *fdp = p->p_fd;
571 register struct file *fp;
572 struct stat ub;
573 int error;
574
575 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
576 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
577 return (EBADF);
578 switch (fp->f_type) {
579
580 case DTYPE_FIFO:
581 case DTYPE_VNODE:
582 error = vn_stat((struct vnode *)fp->f_data, &ub, p);
583 break;
584
585 case DTYPE_SOCKET:
586 error = soo_stat((struct socket *)fp->f_data, &ub);
587 break;
588
589 case DTYPE_PIPE:
590 error = pipe_stat((struct pipe *)fp->f_data, &ub);
591 break;
592
593 default:
594 panic("fstat");
595 /*NOTREACHED*/
596 }
597 if (error == 0)
598 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
599 return (error);
600}
601
602/*
603 * Return status information about a file descriptor.
604 */
605#ifndef _SYS_SYSPROTO_H_
606struct nfstat_args {
607 int fd;
608 struct nstat *sb;
609};
610#endif
611/* ARGSUSED */
612int
613nfstat(p, uap)
614 struct proc *p;
615 register struct nfstat_args *uap;
616{
617 register struct filedesc *fdp = p->p_fd;
618 register struct file *fp;
619 struct stat ub;
620 struct nstat nub;
621 int error;
622
623 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
624 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
625 return (EBADF);
626 switch (fp->f_type) {
627
628 case DTYPE_FIFO:
629 case DTYPE_VNODE:
630 error = vn_stat((struct vnode *)fp->f_data, &ub, p);
631 break;
632
633 case DTYPE_SOCKET:
634 error = soo_stat((struct socket *)fp->f_data, &ub);
635 break;
636
637 case DTYPE_PIPE:
638 error = pipe_stat((struct pipe *)fp->f_data, &ub);
639 break;
640
641 default:
642 panic("fstat");
643 /*NOTREACHED*/
644 }
645 if (error == 0) {
646 cvtnstat(&ub, &nub);
647 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
648 }
649 return (error);
650}
651
652/*
653 * Return pathconf information about a file descriptor.
654 */
655#ifndef _SYS_SYSPROTO_H_
656struct fpathconf_args {
657 int fd;
658 int name;
659};
660#endif
661/* ARGSUSED */
662int
663fpathconf(p, uap)
664 struct proc *p;
665 register struct fpathconf_args *uap;
666{
667 struct filedesc *fdp = p->p_fd;
668 struct file *fp;
669 struct vnode *vp;
670
671 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
672 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
673 return (EBADF);
674 switch (fp->f_type) {
675
676 case DTYPE_PIPE:
677 case DTYPE_SOCKET:
678 if (uap->name != _PC_PIPE_BUF)
679 return (EINVAL);
680 p->p_retval[0] = PIPE_BUF;
681 return (0);
682
683 case DTYPE_FIFO:
684 case DTYPE_VNODE:
685 vp = (struct vnode *)fp->f_data;
686 return (VOP_PATHCONF(vp, uap->name, p->p_retval));
687
688 default:
689 panic("fpathconf");
690 }
691 /*NOTREACHED*/
692}
693
694/*
695 * Allocate a file descriptor for the process.
696 */
697static int fdexpand;
698SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
699
700int
701fdalloc(p, want, result)
702 struct proc *p;
703 int want;
704 int *result;
705{
706 register struct filedesc *fdp = p->p_fd;
707 register int i;
708 int lim, last, nfiles;
709 struct file **newofile;
710 char *newofileflags;
711
712 /*
713 * Search for a free descriptor starting at the higher
714 * of want or fd_freefile. If that fails, consider
715 * expanding the ofile array.
716 */
717 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
718 for (;;) {
719 last = min(fdp->fd_nfiles, lim);
720 if ((i = want) < fdp->fd_freefile)
721 i = fdp->fd_freefile;
722 for (; i < last; i++) {
723 if (fdp->fd_ofiles[i] == NULL) {
724 fdp->fd_ofileflags[i] = 0;
725 if (i > fdp->fd_lastfile)
726 fdp->fd_lastfile = i;
727 if (want <= fdp->fd_freefile)
728 fdp->fd_freefile = i;
729 *result = i;
730 return (0);
731 }
732 }
733
734 /*
735 * No space in current array. Expand?
736 */
737 if (fdp->fd_nfiles >= lim)
738 return (EMFILE);
739 if (fdp->fd_nfiles < NDEXTENT)
740 nfiles = NDEXTENT;
741 else
742 nfiles = 2 * fdp->fd_nfiles;
743 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
744 M_FILEDESC, M_WAITOK);
745 newofileflags = (char *) &newofile[nfiles];
746 /*
747 * Copy the existing ofile and ofileflags arrays
748 * and zero the new portion of each array.
749 */
750 bcopy(fdp->fd_ofiles, newofile,
751 (i = sizeof(struct file *) * fdp->fd_nfiles));
752 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
753 bcopy(fdp->fd_ofileflags, newofileflags,
754 (i = sizeof(char) * fdp->fd_nfiles));
755 bzero(newofileflags + i, nfiles * sizeof(char) - i);
756 if (fdp->fd_nfiles > NDFILE)
757 FREE(fdp->fd_ofiles, M_FILEDESC);
758 fdp->fd_ofiles = newofile;
759 fdp->fd_ofileflags = newofileflags;
760 fdp->fd_nfiles = nfiles;
761 fdexpand++;
762 }
763 return (0);
764}
765
766/*
767 * Check to see whether n user file descriptors
768 * are available to the process p.
769 */
770int
771fdavail(p, n)
772 struct proc *p;
773 register int n;
774{
775 register struct filedesc *fdp = p->p_fd;
776 register struct file **fpp;
777 register int i, lim, last;
778
779 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
780 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
781 return (1);
782
783 last = min(fdp->fd_nfiles, lim);
784 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
785 for (i = last - fdp->fd_freefile; --i >= 0; fpp++)
786 if (*fpp == NULL && --n <= 0)
787 return (1);
788 return (0);
789}
790
791/*
792 * Create a new open file structure and allocate
793 * a file decriptor for the process that refers to it.
794 */
795int
796falloc(p, resultfp, resultfd)
797 register struct proc *p;
798 struct file **resultfp;
799 int *resultfd;
800{
801 register struct file *fp, *fq;
802 int error, i;
803
804 if ((error = fdalloc(p, 0, &i)))
805 return (error);
806 if (nfiles >= maxfiles) {
807 tablefull("file");
808 return (ENFILE);
809 }
810 /*
811 * Allocate a new file descriptor.
812 * If the process has file descriptor zero open, add to the list
813 * of open files at that point, otherwise put it at the front of
814 * the list of open files.
815 */
816 nfiles++;
817 MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK);
818 bzero(fp, sizeof(struct file));
819 if ((fq = p->p_fd->fd_ofiles[0])) {
820 LIST_INSERT_AFTER(fq, fp, f_list);
821 } else {
822 LIST_INSERT_HEAD(&filehead, fp, f_list);
823 }
824 p->p_fd->fd_ofiles[i] = fp;
825 fp->f_count = 1;
826 fp->f_cred = p->p_ucred;
827 fp->f_seqcount = 1;
828 crhold(fp->f_cred);
829 if (resultfp)
830 *resultfp = fp;
831 if (resultfd)
832 *resultfd = i;
833 return (0);
834}
835
836/*
837 * Free a file descriptor.
838 */
839void
840ffree(fp)
841 register struct file *fp;
842{
843 LIST_REMOVE(fp, f_list);
844 crfree(fp->f_cred);
845#ifdef DIAGNOSTIC
846 fp->f_count = 0;
847#endif
848 nfiles--;
849 FREE(fp, M_FILE);
850}
851
852/*
853 * Build a new filedesc structure.
854 */
855struct filedesc *
856fdinit(p)
857 struct proc *p;
858{
859 register struct filedesc0 *newfdp;
860 register struct filedesc *fdp = p->p_fd;
861
862 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
863 M_FILEDESC, M_WAITOK);
864 bzero(newfdp, sizeof(struct filedesc0));
865 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
866 VREF(newfdp->fd_fd.fd_cdir);
867 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
868 VREF(newfdp->fd_fd.fd_rdir);
869
870 /* Create the file descriptor table. */
871 newfdp->fd_fd.fd_refcnt = 1;
872 newfdp->fd_fd.fd_cmask = cmask;
873 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
874 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
875 newfdp->fd_fd.fd_nfiles = NDFILE;
876
877 newfdp->fd_fd.fd_freefile = 0;
878 newfdp->fd_fd.fd_lastfile = 0;
879
880 return (&newfdp->fd_fd);
881}
882
883/*
884 * Share a filedesc structure.
885 */
886struct filedesc *
887fdshare(p)
888 struct proc *p;
889{
890 p->p_fd->fd_refcnt++;
891 return (p->p_fd);
892}
893
894/*
895 * Copy a filedesc structure.
896 */
897struct filedesc *
898fdcopy(p)
899 struct proc *p;
900{
901 register struct filedesc *newfdp, *fdp = p->p_fd;
902 register struct file **fpp;
903 register int i;
904
905/*
906 * Certain daemons might not have file descriptors
907 */
908 if (fdp == NULL)
909 return NULL;
910
911 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
912 M_FILEDESC, M_WAITOK);
913 bcopy(fdp, newfdp, sizeof(struct filedesc));
914 VREF(newfdp->fd_cdir);
915 VREF(newfdp->fd_rdir);
916 newfdp->fd_refcnt = 1;
917
918 /*
919 * If the number of open files fits in the internal arrays
920 * of the open file structure, use them, otherwise allocate
921 * additional memory for the number of descriptors currently
922 * in use.
923 */
924 if (newfdp->fd_lastfile < NDFILE) {
925 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
926 newfdp->fd_ofileflags =
927 ((struct filedesc0 *) newfdp)->fd_dfileflags;
928 i = NDFILE;
929 } else {
930 /*
931 * Compute the smallest multiple of NDEXTENT needed
932 * for the file descriptors currently in use,
933 * allowing the table to shrink.
934 */
935 i = newfdp->fd_nfiles;
936 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
937 i /= 2;
938 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
939 M_FILEDESC, M_WAITOK);
940 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
941 }
942 newfdp->fd_nfiles = i;
943 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
944 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
945 fpp = newfdp->fd_ofiles;
946 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
947 if (*fpp != NULL)
948 (*fpp)->f_count++;
949 return (newfdp);
950}
951
952/*
953 * Release a filedesc structure.
954 */
955void
956fdfree(p)
957 struct proc *p;
958{
959 register struct filedesc *fdp = p->p_fd;
960 struct file **fpp;
961 register int i;
962
963/*
964 * Certain daemons might not have file descriptors
965 */
966 if (fdp == NULL)
967 return;
968
969 if (--fdp->fd_refcnt > 0)
970 return;
971 fpp = fdp->fd_ofiles;
972 for (i = fdp->fd_lastfile; i-- >= 0; fpp++)
973 if (*fpp)
974 (void) closef(*fpp, p);
975 if (fdp->fd_nfiles > NDFILE)
976 FREE(fdp->fd_ofiles, M_FILEDESC);
977 vrele(fdp->fd_cdir);
978 vrele(fdp->fd_rdir);
979 FREE(fdp, M_FILEDESC);
980}
981
982/*
983 * Close any files on exec?
984 */
985void
986fdcloseexec(p)
987 struct proc *p;
988{
989 struct filedesc *fdp = p->p_fd;
990 struct file **fpp;
991 char *fdfp;
992 register int i;
993
994/*
995 * Certain daemons might not have file descriptors
996 */
997 if (fdp == NULL)
998 return;
999
1000 fpp = fdp->fd_ofiles;
1001 fdfp = fdp->fd_ofileflags;
1002 for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++)
1003 if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) {
1004 if (*fdfp & UF_MAPPED)
1005 (void) munmapfd(p, i);
1006 (void) closef(*fpp, p);
1007 *fpp = NULL;
1008 *fdfp = 0;
1009 if (i < fdp->fd_freefile)
1010 fdp->fd_freefile = i;
1011 }
1012 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1013 fdp->fd_lastfile--;
1014}
1015
1016/*
1017 * Internal form of close.
1018 * Decrement reference count on file structure.
1019 * Note: p may be NULL when closing a file
1020 * that was being passed in a message.
1021 */
1022int
1023closef(fp, p)
1024 register struct file *fp;
1025 register struct proc *p;
1026{
1027 struct vnode *vp;
1028 struct flock lf;
1029 int error;
1030
1031 if (fp == NULL)
1032 return (0);
1033 /*
1034 * POSIX record locking dictates that any close releases ALL
1035 * locks owned by this process. This is handled by setting
1036 * a flag in the unlock to free ONLY locks obeying POSIX
1037 * semantics, and not to free BSD-style file locks.
1038 * If the descriptor was in a message, POSIX-style locks
1039 * aren't passed with the descriptor.
1040 */
1041 if (p && (p->p_flag & P_ADVLOCK) && fp->f_type == DTYPE_VNODE) {
1042 lf.l_whence = SEEK_SET;
1043 lf.l_start = 0;
1044 lf.l_len = 0;
1045 lf.l_type = F_UNLCK;
1046 vp = (struct vnode *)fp->f_data;
1047 (void) VOP_ADVLOCK(vp, (caddr_t)p, F_UNLCK, &lf, F_POSIX);
1048 }
1049 if (--fp->f_count > 0)
1050 return (0);
1051 if (fp->f_count < 0)
1052 panic("closef: count < 0");
1053 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1054 lf.l_whence = SEEK_SET;
1055 lf.l_start = 0;
1056 lf.l_len = 0;
1057 lf.l_type = F_UNLCK;
1058 vp = (struct vnode *)fp->f_data;
1059 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1060 }
1061 if (fp->f_ops)
1062 error = (*fp->f_ops->fo_close)(fp, p);
1063 else
1064 error = 0;
1065 ffree(fp);
1066 return (error);
1067}
1068
1069/*
1070 * Apply an advisory lock on a file descriptor.
1071 *
1072 * Just attempt to get a record lock of the requested type on
1073 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1074 */
1075#ifndef _SYS_SYSPROTO_H_
1076struct flock_args {
1077 int fd;
1078 int how;
1079};
1080#endif
1081/* ARGSUSED */
1082int
1083flock(p, uap)
1084 struct proc *p;
1085 register struct flock_args *uap;
1086{
1087 register struct filedesc *fdp = p->p_fd;
1088 register struct file *fp;
1089 struct vnode *vp;
1090 struct flock lf;
1091
1092 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
1093 (fp = fdp->fd_ofiles[uap->fd]) == NULL)
1094 return (EBADF);
1095 if (fp->f_type != DTYPE_VNODE)
1096 return (EOPNOTSUPP);
1097 vp = (struct vnode *)fp->f_data;
1098 lf.l_whence = SEEK_SET;
1099 lf.l_start = 0;
1100 lf.l_len = 0;
1101 if (uap->how & LOCK_UN) {
1102 lf.l_type = F_UNLCK;
1103 fp->f_flag &= ~FHASLOCK;
1104 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK));
1105 }
1106 if (uap->how & LOCK_EX)
1107 lf.l_type = F_WRLCK;
1108 else if (uap->how & LOCK_SH)
1109 lf.l_type = F_RDLCK;
1110 else
1111 return (EBADF);
1112 fp->f_flag |= FHASLOCK;
1113 if (uap->how & LOCK_NB)
1114 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK));
1115 return (VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT));
1116}
1117
1118/*
1119 * File Descriptor pseudo-device driver (/dev/fd/).
1120 *
1121 * Opening minor device N dup()s the file (if any) connected to file
1122 * descriptor N belonging to the calling process. Note that this driver
1123 * consists of only the ``open()'' routine, because all subsequent
1124 * references to this file will be direct to the other driver.
1125 */
1126/* ARGSUSED */
1127static int
1128fdopen(dev, mode, type, p)
1129 dev_t dev;
1130 int mode, type;
1131 struct proc *p;
1132{
1133
1134 /*
1135 * XXX Kludge: set curproc->p_dupfd to contain the value of the
1136 * the file descriptor being sought for duplication. The error
1137 * return ensures that the vnode for this device will be released
1138 * by vn_open. Open will detect this special error and take the
1139 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1140 * will simply report the error.
1141 */
1142 p->p_dupfd = minor(dev);
1143 return (ENODEV);
1144}
1145
1146/*
1147 * Duplicate the specified descriptor to a free descriptor.
1148 */
1149int
1150dupfdopen(fdp, indx, dfd, mode, error)
1151 register struct filedesc *fdp;
1152 register int indx, dfd;
1153 int mode;
1154 int error;
1155{
1156 register struct file *wfp;
1157 struct file *fp;
1158
1159 /*
1160 * If the to-be-dup'd fd number is greater than the allowed number
1161 * of file descriptors, or the fd to be dup'd has already been
1162 * closed, reject. Note, check for new == old is necessary as
1163 * falloc could allocate an already closed to-be-dup'd descriptor
1164 * as the new descriptor.
1165 */
1166 fp = fdp->fd_ofiles[indx];
1167 if ((u_int)dfd >= fdp->fd_nfiles ||
1168 (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp)
1169 return (EBADF);
1170
1171 /*
1172 * There are two cases of interest here.
1173 *
1174 * For ENODEV simply dup (dfd) to file descriptor
1175 * (indx) and return.
1176 *
1177 * For ENXIO steal away the file structure from (dfd) and
1178 * store it in (indx). (dfd) is effectively closed by
1179 * this operation.
1180 *
1181 * Any other error code is just returned.
1182 */
1183 switch (error) {
1184 case ENODEV:
1185 /*
1186 * Check that the mode the file is being opened for is a
1187 * subset of the mode of the existing descriptor.
1188 */
1189 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag)
1190 return (EACCES);
1191 fdp->fd_ofiles[indx] = wfp;
1192 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1193 wfp->f_count++;
1194 if (indx > fdp->fd_lastfile)
1195 fdp->fd_lastfile = indx;
1196 return (0);
1197
1198 case ENXIO:
1199 /*
1200 * Steal away the file pointer from dfd, and stuff it into indx.
1201 */
1202 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1203 fdp->fd_ofiles[dfd] = NULL;
1204 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1205 fdp->fd_ofileflags[dfd] = 0;
1206 /*
1207 * Complete the clean up of the filedesc structure by
1208 * recomputing the various hints.
1209 */
1210 if (indx > fdp->fd_lastfile)
1211 fdp->fd_lastfile = indx;
1212 else
1213 while (fdp->fd_lastfile > 0 &&
1214 fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1215 fdp->fd_lastfile--;
1216 if (dfd < fdp->fd_freefile)
1217 fdp->fd_freefile = dfd;
1218 return (0);
1219
1220 default:
1221 return (error);
1222 }
1223 /* NOTREACHED */
1224}
1225
1226/*
1227 * Get file structures.
1228 */
1229static int
1230sysctl_kern_file SYSCTL_HANDLER_ARGS
1231{
1232 int error;
1233 struct file *fp;
1234
1235 if (!req->oldptr) {
1236 /*
1237 * overestimate by 10 files
1238 */
1239 return (SYSCTL_OUT(req, 0, sizeof(filehead) +
1240 (nfiles + 10) * sizeof(struct file)));
1241 }
1242
1243 error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1244 if (error)
1245 return (error);
1246
1247 /*
1248 * followed by an array of file structures
1249 */
1250 for (fp = filehead.lh_first; fp != NULL; fp = fp->f_list.le_next) {
1251 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1252 if (error)
1253 return (error);
1254 }
1255 return (0);
1256}
1257
1258SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
1259 0, 0, sysctl_kern_file, "S,file", "");
1260
1261SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc,
1262 CTLFLAG_RW, &maxfilesperproc, 0, "");
1263
1264SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW, &maxfiles, 0, "");
1265
1266static fildesc_devsw_installed = 0;
1267#ifdef DEVFS
1268static void *devfs_token_stdin;
1269static void *devfs_token_stdout;
1270static void *devfs_token_stderr;
1271static void *devfs_token_fildesc[NUMFDESC];
1272#endif
1273
1274static void fildesc_drvinit(void *unused)
1275{
1276 dev_t dev;
1277#ifdef DEVFS
1278 int fd;
1279#endif
1280
1281 if( ! fildesc_devsw_installed ) {
1282 dev = makedev(CDEV_MAJOR,0);
1283 cdevsw_add(&dev,&fildesc_cdevsw,NULL);
1284 fildesc_devsw_installed = 1;
1285#ifdef DEVFS
1286 for (fd = 0; fd < NUMFDESC; fd++)
1287 devfs_token_fildesc[fd] =
1288 devfs_add_devswf(&fildesc_cdevsw, fd, DV_CHR,
1289 UID_BIN, GID_BIN, 0666,
1290 "fd/%d", fd);
1291 devfs_token_stdin =
1292 devfs_add_devswf(&fildesc_cdevsw, 0, DV_CHR,
1293 UID_ROOT, GID_WHEEL, 0666,
1294 "stdin");
1295 devfs_token_stdout =
1296 devfs_add_devswf(&fildesc_cdevsw, 1, DV_CHR,
1297 UID_ROOT, GID_WHEEL, 0666,
1298 "stdout");
1299 devfs_token_stderr =
1300 devfs_add_devswf(&fildesc_cdevsw, 2, DV_CHR,
1301 UID_ROOT, GID_WHEEL, 0666,
1302 "stderr");
1303#endif
1304 }
1305}
1306
1307SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
1308 fildesc_drvinit,NULL)
1309
1310