Deleted Added
full compact
kern_descrip.c (92641) kern_descrip.c (92654)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 92641 2002-03-19 04:30:04Z alfred $
39 * $FreeBSD: head/sys/kern/kern_descrip.c 92654 2002-03-19 09:11:49Z jeff $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/mutex.h>
49#include <sys/sysproto.h>
50#include <sys/conf.h>
51#include <sys/filedesc.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54#include <sys/vnode.h>
55#include <sys/proc.h>
56#include <sys/file.h>
57#include <sys/stat.h>
58#include <sys/filio.h>
59#include <sys/fcntl.h>
60#include <sys/unistd.h>
61#include <sys/resourcevar.h>
62#include <sys/event.h>
63#include <sys/sx.h>
64#include <sys/socketvar.h>
65
66#include <machine/limits.h>
67
68#include <vm/vm.h>
69#include <vm/vm_extern.h>
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/lock.h>
47#include <sys/malloc.h>
48#include <sys/mutex.h>
49#include <sys/sysproto.h>
50#include <sys/conf.h>
51#include <sys/filedesc.h>
52#include <sys/kernel.h>
53#include <sys/sysctl.h>
54#include <sys/vnode.h>
55#include <sys/proc.h>
56#include <sys/file.h>
57#include <sys/stat.h>
58#include <sys/filio.h>
59#include <sys/fcntl.h>
60#include <sys/unistd.h>
61#include <sys/resourcevar.h>
62#include <sys/event.h>
63#include <sys/sx.h>
64#include <sys/socketvar.h>
65
66#include <machine/limits.h>
67
68#include <vm/vm.h>
69#include <vm/vm_extern.h>
70#include <vm/vm_zone.h>
70
71static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
71
72static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
72MALLOC_DEFINE(M_FILE, "file", "Open file structure");
73static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74
73static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
74
75uma_zone_t file_zone;
76
75static d_open_t fdopen;
76#define NUMFDESC 64
77
78#define CDEV_MAJOR 22
79static struct cdevsw fildesc_cdevsw = {
80 /* open */ fdopen,
81 /* close */ noclose,
82 /* read */ noread,
83 /* write */ nowrite,
84 /* ioctl */ noioctl,
85 /* poll */ nopoll,
86 /* mmap */ nommap,
87 /* strategy */ nostrategy,
88 /* name */ "FD",
89 /* maj */ CDEV_MAJOR,
90 /* dump */ nodump,
91 /* psize */ nopsize,
92 /* flags */ 0,
93};
94
95static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
96static int badfo_readwrite __P((struct file *fp, struct uio *uio,
97 struct ucred *cred, int flags, struct thread *td));
98static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
99 struct thread *td));
100static int badfo_poll __P((struct file *fp, int events,
101 struct ucred *cred, struct thread *td));
102static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
103static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
104static int badfo_close __P((struct file *fp, struct thread *td));
105
106/*
107 * Descriptor management.
108 */
109struct filelist filehead; /* head of list of open files */
110int nfiles; /* actual number of open files */
111extern int cmask;
112struct sx filelist_lock; /* sx to protect filelist */
113
114/*
115 * System calls on descriptors.
116 */
117#ifndef _SYS_SYSPROTO_H_
118struct getdtablesize_args {
119 int dummy;
120};
121#endif
122/*
123 * MPSAFE
124 */
125/* ARGSUSED */
126int
127getdtablesize(td, uap)
128 struct thread *td;
129 struct getdtablesize_args *uap;
130{
131 struct proc *p = td->td_proc;
132
133 mtx_lock(&Giant);
134 td->td_retval[0] =
135 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
136 mtx_unlock(&Giant);
137 return (0);
138}
139
140/*
141 * Duplicate a file descriptor to a particular value.
142 *
143 * note: keep in mind that a potential race condition exists when closing
144 * descriptors from a shared descriptor table (via rfork).
145 */
146#ifndef _SYS_SYSPROTO_H_
147struct dup2_args {
148 u_int from;
149 u_int to;
150};
151#endif
152/*
153 * MPSAFE
154 */
155/* ARGSUSED */
156int
157dup2(td, uap)
158 struct thread *td;
159 struct dup2_args *uap;
160{
161 struct proc *p = td->td_proc;
162 register struct filedesc *fdp = td->td_proc->p_fd;
163 register u_int old = uap->from, new = uap->to;
164 int i, error;
165
166 FILEDESC_LOCK(fdp);
167retry:
168 if (old >= fdp->fd_nfiles ||
169 fdp->fd_ofiles[old] == NULL ||
170 new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
171 new >= maxfilesperproc) {
172 FILEDESC_UNLOCK(fdp);
173 return (EBADF);
174 }
175 if (old == new) {
176 td->td_retval[0] = new;
177 FILEDESC_UNLOCK(fdp);
178 return (0);
179 }
180 if (new >= fdp->fd_nfiles) {
181 if ((error = fdalloc(td, new, &i))) {
182 FILEDESC_UNLOCK(fdp);
183 return (error);
184 }
185 /*
186 * fdalloc() may block, retest everything.
187 */
188 goto retry;
189 }
190 error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
191 return(error);
192}
193
194/*
195 * Duplicate a file descriptor.
196 */
197#ifndef _SYS_SYSPROTO_H_
198struct dup_args {
199 u_int fd;
200};
201#endif
202/*
203 * MPSAFE
204 */
205/* ARGSUSED */
206int
207dup(td, uap)
208 struct thread *td;
209 struct dup_args *uap;
210{
211 register struct filedesc *fdp;
212 u_int old;
213 int new, error;
214
215 old = uap->fd;
216 fdp = td->td_proc->p_fd;
217 FILEDESC_LOCK(fdp);
218 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
219 FILEDESC_UNLOCK(fdp);
220 return (EBADF);
221 }
222 if ((error = fdalloc(td, 0, &new))) {
223 FILEDESC_UNLOCK(fdp);
224 return (error);
225 }
226 error = do_dup(fdp, (int)old, new, td->td_retval, td);
227 return (error);
228}
229
230/*
231 * The file control system call.
232 */
233#ifndef _SYS_SYSPROTO_H_
234struct fcntl_args {
235 int fd;
236 int cmd;
237 long arg;
238};
239#endif
240/*
241 * MPSAFE
242 */
243/* ARGSUSED */
244int
245fcntl(td, uap)
246 struct thread *td;
247 register struct fcntl_args *uap;
248{
249 register struct proc *p = td->td_proc;
250 register struct filedesc *fdp;
251 register struct file *fp;
252 register char *pop;
253 struct vnode *vp;
254 int i, tmp, error = 0, flg = F_POSIX;
255 struct flock fl;
256 u_int newmin;
257 struct proc *leaderp;
258
259 mtx_lock(&Giant);
260
261 fdp = p->p_fd;
262 FILEDESC_LOCK(fdp);
263 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
264 (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
265 FILEDESC_UNLOCK(fdp);
266 error = EBADF;
267 goto done2;
268 }
269 pop = &fdp->fd_ofileflags[uap->fd];
270
271 switch (uap->cmd) {
272 case F_DUPFD:
273 newmin = uap->arg;
274 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
275 newmin >= maxfilesperproc) {
276 FILEDESC_UNLOCK(fdp);
277 error = EINVAL;
278 break;
279 }
280 if ((error = fdalloc(td, newmin, &i))) {
281 FILEDESC_UNLOCK(fdp);
282 break;
283 }
284 error = do_dup(fdp, uap->fd, i, td->td_retval, td);
285 break;
286
287 case F_GETFD:
288 td->td_retval[0] = *pop & 1;
289 FILEDESC_UNLOCK(fdp);
290 break;
291
292 case F_SETFD:
293 *pop = (*pop &~ 1) | (uap->arg & 1);
294 FILEDESC_UNLOCK(fdp);
295 break;
296
297 case F_GETFL:
298 FILE_LOCK(fp);
299 FILEDESC_UNLOCK(fdp);
300 td->td_retval[0] = OFLAGS(fp->f_flag);
301 FILE_UNLOCK(fp);
302 break;
303
304 case F_SETFL:
305 fhold(fp);
306 FILEDESC_UNLOCK(fdp);
307 fp->f_flag &= ~FCNTLFLAGS;
308 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
309 tmp = fp->f_flag & FNONBLOCK;
310 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
311 if (error) {
312 fdrop(fp, td);
313 break;
314 }
315 tmp = fp->f_flag & FASYNC;
316 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
317 if (!error) {
318 fdrop(fp, td);
319 break;
320 }
321 fp->f_flag &= ~FNONBLOCK;
322 tmp = 0;
323 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
324 fdrop(fp, td);
325 break;
326
327 case F_GETOWN:
328 fhold(fp);
329 FILEDESC_UNLOCK(fdp);
330 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
331 fdrop(fp, td);
332 break;
333
334 case F_SETOWN:
335 fhold(fp);
336 FILEDESC_UNLOCK(fdp);
337 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
338 fdrop(fp, td);
339 break;
340
341 case F_SETLKW:
342 flg |= F_WAIT;
343 /* Fall into F_SETLK */
344
345 case F_SETLK:
346 if (fp->f_type != DTYPE_VNODE) {
347 FILEDESC_UNLOCK(fdp);
348 error = EBADF;
349 break;
350 }
351 vp = (struct vnode *)fp->f_data;
352 /*
353 * copyin/lockop may block
354 */
355 fhold(fp);
356 FILEDESC_UNLOCK(fdp);
357 vp = (struct vnode *)fp->f_data;
358
359 /* Copy in the lock structure */
360 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
361 sizeof(fl));
362 if (error) {
363 fdrop(fp, td);
364 break;
365 }
366 if (fl.l_whence == SEEK_CUR) {
367 if (fp->f_offset < 0 ||
368 (fl.l_start > 0 &&
369 fp->f_offset > OFF_MAX - fl.l_start)) {
370 fdrop(fp, td);
371 error = EOVERFLOW;
372 break;
373 }
374 fl.l_start += fp->f_offset;
375 }
376
377 switch (fl.l_type) {
378 case F_RDLCK:
379 if ((fp->f_flag & FREAD) == 0) {
380 error = EBADF;
381 break;
382 }
383 PROC_LOCK(p);
384 p->p_flag |= P_ADVLOCK;
385 leaderp = p->p_leader;
386 PROC_UNLOCK(p);
387 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
388 &fl, flg);
389 break;
390 case F_WRLCK:
391 if ((fp->f_flag & FWRITE) == 0) {
392 error = EBADF;
393 break;
394 }
395 PROC_LOCK(p);
396 p->p_flag |= P_ADVLOCK;
397 leaderp = p->p_leader;
398 PROC_UNLOCK(p);
399 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
400 &fl, flg);
401 break;
402 case F_UNLCK:
403 PROC_LOCK(p);
404 leaderp = p->p_leader;
405 PROC_UNLOCK(p);
406 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
407 &fl, F_POSIX);
408 break;
409 default:
410 error = EINVAL;
411 break;
412 }
413 fdrop(fp, td);
414 break;
415
416 case F_GETLK:
417 if (fp->f_type != DTYPE_VNODE) {
418 FILEDESC_UNLOCK(fdp);
419 error = EBADF;
420 break;
421 }
422 vp = (struct vnode *)fp->f_data;
423 /*
424 * copyin/lockop may block
425 */
426 fhold(fp);
427 FILEDESC_UNLOCK(fdp);
428 vp = (struct vnode *)fp->f_data;
429
430 /* Copy in the lock structure */
431 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
432 sizeof(fl));
433 if (error) {
434 fdrop(fp, td);
435 break;
436 }
437 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
438 fl.l_type != F_UNLCK) {
439 fdrop(fp, td);
440 error = EINVAL;
441 break;
442 }
443 if (fl.l_whence == SEEK_CUR) {
444 if ((fl.l_start > 0 &&
445 fp->f_offset > OFF_MAX - fl.l_start) ||
446 (fl.l_start < 0 &&
447 fp->f_offset < OFF_MIN - fl.l_start)) {
448 fdrop(fp, td);
449 error = EOVERFLOW;
450 break;
451 }
452 fl.l_start += fp->f_offset;
453 }
454 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
455 &fl, F_POSIX);
456 fdrop(fp, td);
457 if (error == 0) {
458 error = copyout((caddr_t)&fl,
459 (caddr_t)(intptr_t)uap->arg, sizeof(fl));
460 }
461 break;
462 default:
463 FILEDESC_UNLOCK(fdp);
464 error = EINVAL;
465 break;
466 }
467done2:
468 mtx_unlock(&Giant);
469 return (error);
470}
471
472/*
473 * Common code for dup, dup2, and fcntl(F_DUPFD).
474 * filedesc must be locked, but will be unlocked as a side effect.
475 */
476static int
477do_dup(fdp, old, new, retval, td)
478 register struct filedesc *fdp;
479 register int old, new;
480 register_t *retval;
481 struct thread *td;
482{
483 struct file *fp;
484 struct file *delfp;
485
486 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
487
488 /*
489 * Save info on the descriptor being overwritten. We have
490 * to do the unmap now, but we cannot close it without
491 * introducing an ownership race for the slot.
492 */
493 delfp = fdp->fd_ofiles[new];
494#if 0
495 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
496 (void) munmapfd(td, new);
497#endif
498
499 /*
500 * Duplicate the source descriptor, update lastfile
501 */
502 fp = fdp->fd_ofiles[old];
503 fdp->fd_ofiles[new] = fp;
504 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
505 fhold(fp);
506 if (new > fdp->fd_lastfile)
507 fdp->fd_lastfile = new;
508 *retval = new;
509
510 FILEDESC_UNLOCK(fdp);
511
512 /*
513 * If we dup'd over a valid file, we now own the reference to it
514 * and must dispose of it using closef() semantics (as if a
515 * close() were performed on it).
516 */
517 if (delfp) {
518 mtx_lock(&Giant);
519 (void) closef(delfp, td);
520 mtx_unlock(&Giant);
521 }
522 return (0);
523}
524
525/*
526 * If sigio is on the list associated with a process or process group,
527 * disable signalling from the device, remove sigio from the list and
528 * free sigio.
529 */
530void
531funsetown(sigio)
532 struct sigio *sigio;
533{
534 int s;
535
536 if (sigio == NULL)
537 return;
538
539 s = splhigh();
540 *(sigio->sio_myref) = NULL;
541 splx(s);
542 if ((sigio)->sio_pgid < 0) {
543 struct pgrp *pg = (sigio)->sio_pgrp;
544 PGRP_LOCK(pg);
545 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
546 sigio, sio_pgsigio);
547 PGRP_UNLOCK(pg);
548 } else {
549 struct proc *p = (sigio)->sio_proc;
550 PROC_LOCK(p);
551 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
552 sigio, sio_pgsigio);
553 PROC_UNLOCK(p);
554 }
555 crfree(sigio->sio_ucred);
556 FREE(sigio, M_SIGIO);
557}
558
559/* Free a list of sigio structures. */
560void
561funsetownlst(sigiolst)
562 struct sigiolst *sigiolst;
563{
564 int s;
565 struct sigio *sigio;
566 struct proc *p;
567 struct pgrp *pg;
568
569 sigio = SLIST_FIRST(sigiolst);
570 if (sigio == NULL)
571 return;
572
573 p = NULL;
574 pg = NULL;
575
576 /*
577 * Every entry of the list should belong
578 * to a single proc or pgrp.
579 */
580 if (sigio->sio_pgid < 0) {
581 pg = sigio->sio_pgrp;
582 PGRP_LOCK_ASSERT(pg, MA_OWNED);
583 } else /* if (sigio->sio_pgid > 0) */ {
584 p = sigio->sio_proc;
585 PROC_LOCK_ASSERT(p, MA_OWNED);
586 }
587
588 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
589 s = splhigh();
590 *(sigio->sio_myref) = NULL;
591 splx(s);
592 if (pg != NULL) {
593 KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list"));
594 KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list"));
595 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
596 PGRP_UNLOCK(pg);
597 crfree(sigio->sio_ucred);
598 FREE(sigio, M_SIGIO);
599 PGRP_LOCK(pg);
600 } else /* if (p != NULL) */ {
601 KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list"));
602 KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list"));
603 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
604 PROC_UNLOCK(p);
605 crfree(sigio->sio_ucred);
606 FREE(sigio, M_SIGIO);
607 PROC_LOCK(p);
608 }
609 }
610}
611
612/*
613 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
614 *
615 * After permission checking, add a sigio structure to the sigio list for
616 * the process or process group.
617 */
618int
619fsetown(pgid, sigiop)
620 pid_t pgid;
621 struct sigio **sigiop;
622{
623 struct proc *proc;
624 struct pgrp *pgrp;
625 struct sigio *sigio;
626 int s, ret;
627
628 if (pgid == 0) {
629 funsetown(*sigiop);
630 return (0);
631 }
632
633 ret = 0;
634
635 /* Allocate and fill in the new sigio out of locks. */
636 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
637 sigio->sio_pgid = pgid;
638 sigio->sio_ucred = crhold(curthread->td_ucred);
639 sigio->sio_myref = sigiop;
640
641 PGRPSESS_SLOCK();
642 if (pgid > 0) {
643 proc = pfind(pgid);
644 if (proc == NULL) {
645 ret = ESRCH;
646 goto fail;
647 }
648
649 /*
650 * Policy - Don't allow a process to FSETOWN a process
651 * in another session.
652 *
653 * Remove this test to allow maximum flexibility or
654 * restrict FSETOWN to the current process or process
655 * group for maximum safety.
656 */
657 PROC_UNLOCK(proc);
658 if (proc->p_session != curthread->td_proc->p_session) {
659 ret = EPERM;
660 goto fail;
661 }
662
663 pgrp = NULL;
664 } else /* if (pgid < 0) */ {
665 pgrp = pgfind(-pgid);
666 if (pgrp == NULL) {
667 ret = ESRCH;
668 goto fail;
669 }
670 PGRP_UNLOCK(pgrp);
671
672 /*
673 * Policy - Don't allow a process to FSETOWN a process
674 * in another session.
675 *
676 * Remove this test to allow maximum flexibility or
677 * restrict FSETOWN to the current process or process
678 * group for maximum safety.
679 */
680 if (pgrp->pg_session != curthread->td_proc->p_session) {
681 ret = EPERM;
682 goto fail;
683 }
684
685 proc = NULL;
686 }
687 funsetown(*sigiop);
688 if (pgid > 0) {
689 PROC_LOCK(proc);
690 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
691 sigio->sio_proc = proc;
692 PROC_UNLOCK(proc);
693 } else {
694 PGRP_LOCK(pgrp);
695 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
696 sigio->sio_pgrp = pgrp;
697 PGRP_UNLOCK(pgrp);
698 }
699 PGRPSESS_SUNLOCK();
700 s = splhigh();
701 *sigiop = sigio;
702 splx(s);
703 return (0);
704
705fail:
706 PGRPSESS_SUNLOCK();
707 crfree(sigio->sio_ucred);
708 FREE(sigio, M_SIGIO);
709 return (ret);
710}
711
712/*
713 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
714 */
715pid_t
716fgetown(sigio)
717 struct sigio *sigio;
718{
719 return (sigio != NULL ? sigio->sio_pgid : 0);
720}
721
722/*
723 * Close a file descriptor.
724 */
725#ifndef _SYS_SYSPROTO_H_
726struct close_args {
727 int fd;
728};
729#endif
730/*
731 * MPSAFE
732 */
733/* ARGSUSED */
734int
735close(td, uap)
736 struct thread *td;
737 struct close_args *uap;
738{
739 register struct filedesc *fdp;
740 register struct file *fp;
741 register int fd = uap->fd;
742 int error = 0;
743
744 mtx_lock(&Giant);
745 fdp = td->td_proc->p_fd;
746 FILEDESC_LOCK(fdp);
747 if ((unsigned)fd >= fdp->fd_nfiles ||
748 (fp = fdp->fd_ofiles[fd]) == NULL) {
749 FILEDESC_UNLOCK(fdp);
750 error = EBADF;
751 goto done2;
752 }
753#if 0
754 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
755 (void) munmapfd(td, fd);
756#endif
757 fdp->fd_ofiles[fd] = NULL;
758 fdp->fd_ofileflags[fd] = 0;
759
760 /*
761 * we now hold the fp reference that used to be owned by the descriptor
762 * array.
763 */
764 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
765 fdp->fd_lastfile--;
766 if (fd < fdp->fd_freefile)
767 fdp->fd_freefile = fd;
768 if (fd < fdp->fd_knlistsize) {
769 FILEDESC_UNLOCK(fdp);
770 knote_fdclose(td, fd);
771 } else
772 FILEDESC_UNLOCK(fdp);
773
774 error = closef(fp, td);
775done2:
776 mtx_unlock(&Giant);
777 return(error);
778}
779
780#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
781/*
782 * Return status information about a file descriptor.
783 */
784#ifndef _SYS_SYSPROTO_H_
785struct ofstat_args {
786 int fd;
787 struct ostat *sb;
788};
789#endif
790/*
791 * MPSAFE
792 */
793/* ARGSUSED */
794int
795ofstat(td, uap)
796 struct thread *td;
797 register struct ofstat_args *uap;
798{
799 struct file *fp;
800 struct stat ub;
801 struct ostat oub;
802 int error;
803
804 mtx_lock(&Giant);
805 if ((error = fget(td, uap->fd, &fp)) != 0)
806 goto done2;
807 error = fo_stat(fp, &ub, td);
808 if (error == 0) {
809 cvtstat(&ub, &oub);
810 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
811 }
812 fdrop(fp, td);
813done2:
814 mtx_unlock(&Giant);
815 return (error);
816}
817#endif /* COMPAT_43 || COMPAT_SUNOS */
818
819/*
820 * Return status information about a file descriptor.
821 */
822#ifndef _SYS_SYSPROTO_H_
823struct fstat_args {
824 int fd;
825 struct stat *sb;
826};
827#endif
828/*
829 * MPSAFE
830 */
831/* ARGSUSED */
832int
833fstat(td, uap)
834 struct thread *td;
835 struct fstat_args *uap;
836{
837 struct file *fp;
838 struct stat ub;
839 int error;
840
841 mtx_lock(&Giant);
842 if ((error = fget(td, uap->fd, &fp)) != 0)
843 goto done2;
844 error = fo_stat(fp, &ub, td);
845 if (error == 0)
846 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
847 fdrop(fp, td);
848done2:
849 mtx_unlock(&Giant);
850 return (error);
851}
852
853/*
854 * Return status information about a file descriptor.
855 */
856#ifndef _SYS_SYSPROTO_H_
857struct nfstat_args {
858 int fd;
859 struct nstat *sb;
860};
861#endif
862/*
863 * MPSAFE
864 */
865/* ARGSUSED */
866int
867nfstat(td, uap)
868 struct thread *td;
869 register struct nfstat_args *uap;
870{
871 struct file *fp;
872 struct stat ub;
873 struct nstat nub;
874 int error;
875
876 mtx_lock(&Giant);
877 if ((error = fget(td, uap->fd, &fp)) != 0)
878 goto done2;
879 error = fo_stat(fp, &ub, td);
880 if (error == 0) {
881 cvtnstat(&ub, &nub);
882 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
883 }
884 fdrop(fp, td);
885done2:
886 mtx_unlock(&Giant);
887 return (error);
888}
889
890/*
891 * Return pathconf information about a file descriptor.
892 */
893#ifndef _SYS_SYSPROTO_H_
894struct fpathconf_args {
895 int fd;
896 int name;
897};
898#endif
899/*
900 * MPSAFE
901 */
902/* ARGSUSED */
903int
904fpathconf(td, uap)
905 struct thread *td;
906 register struct fpathconf_args *uap;
907{
908 struct file *fp;
909 struct vnode *vp;
910 int error;
911
912 if ((error = fget(td, uap->fd, &fp)) != 0)
913 return (error);
914
915 switch (fp->f_type) {
916 case DTYPE_PIPE:
917 case DTYPE_SOCKET:
918 if (uap->name != _PC_PIPE_BUF) {
919 error = EINVAL;
920 } else {
921 td->td_retval[0] = PIPE_BUF;
922 error = 0;
923 }
924 break;
925 case DTYPE_FIFO:
926 case DTYPE_VNODE:
927 vp = (struct vnode *)fp->f_data;
928 mtx_lock(&Giant);
929 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
930 mtx_unlock(&Giant);
931 break;
932 default:
933 error = EOPNOTSUPP;
934 break;
935 }
936 fdrop(fp, td);
937 return(error);
938}
939
940/*
941 * Allocate a file descriptor for the process.
942 */
943static int fdexpand;
944SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
945
946int
947fdalloc(td, want, result)
948 struct thread *td;
949 int want;
950 int *result;
951{
952 struct proc *p = td->td_proc;
953 register struct filedesc *fdp = td->td_proc->p_fd;
954 register int i;
955 int lim, last, nfiles;
956 struct file **newofile, **oldofile;
957 char *newofileflags;
958
959 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
960
961 /*
962 * Search for a free descriptor starting at the higher
963 * of want or fd_freefile. If that fails, consider
964 * expanding the ofile array.
965 */
966 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
967 for (;;) {
968 last = min(fdp->fd_nfiles, lim);
969 if ((i = want) < fdp->fd_freefile)
970 i = fdp->fd_freefile;
971 for (; i < last; i++) {
972 if (fdp->fd_ofiles[i] == NULL) {
973 fdp->fd_ofileflags[i] = 0;
974 if (i > fdp->fd_lastfile)
975 fdp->fd_lastfile = i;
976 if (want <= fdp->fd_freefile)
977 fdp->fd_freefile = i;
978 *result = i;
979 return (0);
980 }
981 }
982
983 /*
984 * No space in current array. Expand?
985 */
986 if (fdp->fd_nfiles >= lim)
987 return (EMFILE);
988 if (fdp->fd_nfiles < NDEXTENT)
989 nfiles = NDEXTENT;
990 else
991 nfiles = 2 * fdp->fd_nfiles;
992 FILEDESC_UNLOCK(fdp);
993 mtx_lock(&Giant);
994 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
995 M_FILEDESC, M_WAITOK);
996 mtx_unlock(&Giant);
997 FILEDESC_LOCK(fdp);
998
999 /*
1000 * deal with file-table extend race that might have occured
1001 * when malloc was blocked.
1002 */
1003 if (fdp->fd_nfiles >= nfiles) {
1004 FILEDESC_UNLOCK(fdp);
1005 mtx_lock(&Giant);
1006 FREE(newofile, M_FILEDESC);
1007 mtx_unlock(&Giant);
1008 FILEDESC_LOCK(fdp);
1009 continue;
1010 }
1011 newofileflags = (char *) &newofile[nfiles];
1012 /*
1013 * Copy the existing ofile and ofileflags arrays
1014 * and zero the new portion of each array.
1015 */
1016 bcopy(fdp->fd_ofiles, newofile,
1017 (i = sizeof(struct file *) * fdp->fd_nfiles));
1018 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1019 bcopy(fdp->fd_ofileflags, newofileflags,
1020 (i = sizeof(char) * fdp->fd_nfiles));
1021 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1022 if (fdp->fd_nfiles > NDFILE)
1023 oldofile = fdp->fd_ofiles;
1024 else
1025 oldofile = NULL;
1026 fdp->fd_ofiles = newofile;
1027 fdp->fd_ofileflags = newofileflags;
1028 fdp->fd_nfiles = nfiles;
1029 fdexpand++;
1030 if (oldofile != NULL) {
1031 FILEDESC_UNLOCK(fdp);
1032 mtx_lock(&Giant);
1033 FREE(oldofile, M_FILEDESC);
1034 mtx_unlock(&Giant);
1035 FILEDESC_LOCK(fdp);
1036 }
1037 }
1038 return (0);
1039}
1040
1041/*
1042 * Check to see whether n user file descriptors
1043 * are available to the process p.
1044 */
1045int
1046fdavail(td, n)
1047 struct thread *td;
1048 register int n;
1049{
1050 struct proc *p = td->td_proc;
1051 register struct filedesc *fdp = td->td_proc->p_fd;
1052 register struct file **fpp;
1053 register int i, lim, last;
1054
1055 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1056
1057 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1058 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1059 return (1);
1060
1061 last = min(fdp->fd_nfiles, lim);
1062 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1063 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1064 if (*fpp == NULL && --n <= 0)
1065 return (1);
1066 }
1067 return (0);
1068}
1069
1070/*
1071 * Create a new open file structure and allocate
1072 * a file decriptor for the process that refers to it.
1073 */
1074int
1075falloc(td, resultfp, resultfd)
1076 register struct thread *td;
1077 struct file **resultfp;
1078 int *resultfd;
1079{
1080 struct proc *p = td->td_proc;
1081 register struct file *fp, *fq;
1082 int error, i;
1083
1084 sx_xlock(&filelist_lock);
1085 if (nfiles >= maxfiles) {
1086 sx_xunlock(&filelist_lock);
1087 tablefull("file");
1088 return (ENFILE);
1089 }
1090 nfiles++;
1091 sx_xunlock(&filelist_lock);
1092 /*
1093 * Allocate a new file descriptor.
1094 * If the process has file descriptor zero open, add to the list
1095 * of open files at that point, otherwise put it at the front of
1096 * the list of open files.
1097 */
77static d_open_t fdopen;
78#define NUMFDESC 64
79
80#define CDEV_MAJOR 22
81static struct cdevsw fildesc_cdevsw = {
82 /* open */ fdopen,
83 /* close */ noclose,
84 /* read */ noread,
85 /* write */ nowrite,
86 /* ioctl */ noioctl,
87 /* poll */ nopoll,
88 /* mmap */ nommap,
89 /* strategy */ nostrategy,
90 /* name */ "FD",
91 /* maj */ CDEV_MAJOR,
92 /* dump */ nodump,
93 /* psize */ nopsize,
94 /* flags */ 0,
95};
96
97static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct thread *td));
98static int badfo_readwrite __P((struct file *fp, struct uio *uio,
99 struct ucred *cred, int flags, struct thread *td));
100static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data,
101 struct thread *td));
102static int badfo_poll __P((struct file *fp, int events,
103 struct ucred *cred, struct thread *td));
104static int badfo_kqfilter __P((struct file *fp, struct knote *kn));
105static int badfo_stat __P((struct file *fp, struct stat *sb, struct thread *td));
106static int badfo_close __P((struct file *fp, struct thread *td));
107
108/*
109 * Descriptor management.
110 */
111struct filelist filehead; /* head of list of open files */
112int nfiles; /* actual number of open files */
113extern int cmask;
114struct sx filelist_lock; /* sx to protect filelist */
115
116/*
117 * System calls on descriptors.
118 */
119#ifndef _SYS_SYSPROTO_H_
120struct getdtablesize_args {
121 int dummy;
122};
123#endif
124/*
125 * MPSAFE
126 */
127/* ARGSUSED */
128int
129getdtablesize(td, uap)
130 struct thread *td;
131 struct getdtablesize_args *uap;
132{
133 struct proc *p = td->td_proc;
134
135 mtx_lock(&Giant);
136 td->td_retval[0] =
137 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
138 mtx_unlock(&Giant);
139 return (0);
140}
141
142/*
143 * Duplicate a file descriptor to a particular value.
144 *
145 * note: keep in mind that a potential race condition exists when closing
146 * descriptors from a shared descriptor table (via rfork).
147 */
148#ifndef _SYS_SYSPROTO_H_
149struct dup2_args {
150 u_int from;
151 u_int to;
152};
153#endif
154/*
155 * MPSAFE
156 */
157/* ARGSUSED */
158int
159dup2(td, uap)
160 struct thread *td;
161 struct dup2_args *uap;
162{
163 struct proc *p = td->td_proc;
164 register struct filedesc *fdp = td->td_proc->p_fd;
165 register u_int old = uap->from, new = uap->to;
166 int i, error;
167
168 FILEDESC_LOCK(fdp);
169retry:
170 if (old >= fdp->fd_nfiles ||
171 fdp->fd_ofiles[old] == NULL ||
172 new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
173 new >= maxfilesperproc) {
174 FILEDESC_UNLOCK(fdp);
175 return (EBADF);
176 }
177 if (old == new) {
178 td->td_retval[0] = new;
179 FILEDESC_UNLOCK(fdp);
180 return (0);
181 }
182 if (new >= fdp->fd_nfiles) {
183 if ((error = fdalloc(td, new, &i))) {
184 FILEDESC_UNLOCK(fdp);
185 return (error);
186 }
187 /*
188 * fdalloc() may block, retest everything.
189 */
190 goto retry;
191 }
192 error = do_dup(fdp, (int)old, (int)new, td->td_retval, td);
193 return(error);
194}
195
196/*
197 * Duplicate a file descriptor.
198 */
199#ifndef _SYS_SYSPROTO_H_
200struct dup_args {
201 u_int fd;
202};
203#endif
204/*
205 * MPSAFE
206 */
207/* ARGSUSED */
208int
209dup(td, uap)
210 struct thread *td;
211 struct dup_args *uap;
212{
213 register struct filedesc *fdp;
214 u_int old;
215 int new, error;
216
217 old = uap->fd;
218 fdp = td->td_proc->p_fd;
219 FILEDESC_LOCK(fdp);
220 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
221 FILEDESC_UNLOCK(fdp);
222 return (EBADF);
223 }
224 if ((error = fdalloc(td, 0, &new))) {
225 FILEDESC_UNLOCK(fdp);
226 return (error);
227 }
228 error = do_dup(fdp, (int)old, new, td->td_retval, td);
229 return (error);
230}
231
232/*
233 * The file control system call.
234 */
235#ifndef _SYS_SYSPROTO_H_
236struct fcntl_args {
237 int fd;
238 int cmd;
239 long arg;
240};
241#endif
242/*
243 * MPSAFE
244 */
245/* ARGSUSED */
246int
247fcntl(td, uap)
248 struct thread *td;
249 register struct fcntl_args *uap;
250{
251 register struct proc *p = td->td_proc;
252 register struct filedesc *fdp;
253 register struct file *fp;
254 register char *pop;
255 struct vnode *vp;
256 int i, tmp, error = 0, flg = F_POSIX;
257 struct flock fl;
258 u_int newmin;
259 struct proc *leaderp;
260
261 mtx_lock(&Giant);
262
263 fdp = p->p_fd;
264 FILEDESC_LOCK(fdp);
265 if ((unsigned)uap->fd >= fdp->fd_nfiles ||
266 (fp = fdp->fd_ofiles[uap->fd]) == NULL) {
267 FILEDESC_UNLOCK(fdp);
268 error = EBADF;
269 goto done2;
270 }
271 pop = &fdp->fd_ofileflags[uap->fd];
272
273 switch (uap->cmd) {
274 case F_DUPFD:
275 newmin = uap->arg;
276 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
277 newmin >= maxfilesperproc) {
278 FILEDESC_UNLOCK(fdp);
279 error = EINVAL;
280 break;
281 }
282 if ((error = fdalloc(td, newmin, &i))) {
283 FILEDESC_UNLOCK(fdp);
284 break;
285 }
286 error = do_dup(fdp, uap->fd, i, td->td_retval, td);
287 break;
288
289 case F_GETFD:
290 td->td_retval[0] = *pop & 1;
291 FILEDESC_UNLOCK(fdp);
292 break;
293
294 case F_SETFD:
295 *pop = (*pop &~ 1) | (uap->arg & 1);
296 FILEDESC_UNLOCK(fdp);
297 break;
298
299 case F_GETFL:
300 FILE_LOCK(fp);
301 FILEDESC_UNLOCK(fdp);
302 td->td_retval[0] = OFLAGS(fp->f_flag);
303 FILE_UNLOCK(fp);
304 break;
305
306 case F_SETFL:
307 fhold(fp);
308 FILEDESC_UNLOCK(fdp);
309 fp->f_flag &= ~FCNTLFLAGS;
310 fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS;
311 tmp = fp->f_flag & FNONBLOCK;
312 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
313 if (error) {
314 fdrop(fp, td);
315 break;
316 }
317 tmp = fp->f_flag & FASYNC;
318 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td);
319 if (!error) {
320 fdrop(fp, td);
321 break;
322 }
323 fp->f_flag &= ~FNONBLOCK;
324 tmp = 0;
325 (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td);
326 fdrop(fp, td);
327 break;
328
329 case F_GETOWN:
330 fhold(fp);
331 FILEDESC_UNLOCK(fdp);
332 error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td);
333 fdrop(fp, td);
334 break;
335
336 case F_SETOWN:
337 fhold(fp);
338 FILEDESC_UNLOCK(fdp);
339 error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td);
340 fdrop(fp, td);
341 break;
342
343 case F_SETLKW:
344 flg |= F_WAIT;
345 /* Fall into F_SETLK */
346
347 case F_SETLK:
348 if (fp->f_type != DTYPE_VNODE) {
349 FILEDESC_UNLOCK(fdp);
350 error = EBADF;
351 break;
352 }
353 vp = (struct vnode *)fp->f_data;
354 /*
355 * copyin/lockop may block
356 */
357 fhold(fp);
358 FILEDESC_UNLOCK(fdp);
359 vp = (struct vnode *)fp->f_data;
360
361 /* Copy in the lock structure */
362 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
363 sizeof(fl));
364 if (error) {
365 fdrop(fp, td);
366 break;
367 }
368 if (fl.l_whence == SEEK_CUR) {
369 if (fp->f_offset < 0 ||
370 (fl.l_start > 0 &&
371 fp->f_offset > OFF_MAX - fl.l_start)) {
372 fdrop(fp, td);
373 error = EOVERFLOW;
374 break;
375 }
376 fl.l_start += fp->f_offset;
377 }
378
379 switch (fl.l_type) {
380 case F_RDLCK:
381 if ((fp->f_flag & FREAD) == 0) {
382 error = EBADF;
383 break;
384 }
385 PROC_LOCK(p);
386 p->p_flag |= P_ADVLOCK;
387 leaderp = p->p_leader;
388 PROC_UNLOCK(p);
389 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
390 &fl, flg);
391 break;
392 case F_WRLCK:
393 if ((fp->f_flag & FWRITE) == 0) {
394 error = EBADF;
395 break;
396 }
397 PROC_LOCK(p);
398 p->p_flag |= P_ADVLOCK;
399 leaderp = p->p_leader;
400 PROC_UNLOCK(p);
401 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK,
402 &fl, flg);
403 break;
404 case F_UNLCK:
405 PROC_LOCK(p);
406 leaderp = p->p_leader;
407 PROC_UNLOCK(p);
408 error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK,
409 &fl, F_POSIX);
410 break;
411 default:
412 error = EINVAL;
413 break;
414 }
415 fdrop(fp, td);
416 break;
417
418 case F_GETLK:
419 if (fp->f_type != DTYPE_VNODE) {
420 FILEDESC_UNLOCK(fdp);
421 error = EBADF;
422 break;
423 }
424 vp = (struct vnode *)fp->f_data;
425 /*
426 * copyin/lockop may block
427 */
428 fhold(fp);
429 FILEDESC_UNLOCK(fdp);
430 vp = (struct vnode *)fp->f_data;
431
432 /* Copy in the lock structure */
433 error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl,
434 sizeof(fl));
435 if (error) {
436 fdrop(fp, td);
437 break;
438 }
439 if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK &&
440 fl.l_type != F_UNLCK) {
441 fdrop(fp, td);
442 error = EINVAL;
443 break;
444 }
445 if (fl.l_whence == SEEK_CUR) {
446 if ((fl.l_start > 0 &&
447 fp->f_offset > OFF_MAX - fl.l_start) ||
448 (fl.l_start < 0 &&
449 fp->f_offset < OFF_MIN - fl.l_start)) {
450 fdrop(fp, td);
451 error = EOVERFLOW;
452 break;
453 }
454 fl.l_start += fp->f_offset;
455 }
456 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK,
457 &fl, F_POSIX);
458 fdrop(fp, td);
459 if (error == 0) {
460 error = copyout((caddr_t)&fl,
461 (caddr_t)(intptr_t)uap->arg, sizeof(fl));
462 }
463 break;
464 default:
465 FILEDESC_UNLOCK(fdp);
466 error = EINVAL;
467 break;
468 }
469done2:
470 mtx_unlock(&Giant);
471 return (error);
472}
473
474/*
475 * Common code for dup, dup2, and fcntl(F_DUPFD).
476 * filedesc must be locked, but will be unlocked as a side effect.
477 */
478static int
479do_dup(fdp, old, new, retval, td)
480 register struct filedesc *fdp;
481 register int old, new;
482 register_t *retval;
483 struct thread *td;
484{
485 struct file *fp;
486 struct file *delfp;
487
488 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
489
490 /*
491 * Save info on the descriptor being overwritten. We have
492 * to do the unmap now, but we cannot close it without
493 * introducing an ownership race for the slot.
494 */
495 delfp = fdp->fd_ofiles[new];
496#if 0
497 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
498 (void) munmapfd(td, new);
499#endif
500
501 /*
502 * Duplicate the source descriptor, update lastfile
503 */
504 fp = fdp->fd_ofiles[old];
505 fdp->fd_ofiles[new] = fp;
506 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
507 fhold(fp);
508 if (new > fdp->fd_lastfile)
509 fdp->fd_lastfile = new;
510 *retval = new;
511
512 FILEDESC_UNLOCK(fdp);
513
514 /*
515 * If we dup'd over a valid file, we now own the reference to it
516 * and must dispose of it using closef() semantics (as if a
517 * close() were performed on it).
518 */
519 if (delfp) {
520 mtx_lock(&Giant);
521 (void) closef(delfp, td);
522 mtx_unlock(&Giant);
523 }
524 return (0);
525}
526
527/*
528 * If sigio is on the list associated with a process or process group,
529 * disable signalling from the device, remove sigio from the list and
530 * free sigio.
531 */
532void
533funsetown(sigio)
534 struct sigio *sigio;
535{
536 int s;
537
538 if (sigio == NULL)
539 return;
540
541 s = splhigh();
542 *(sigio->sio_myref) = NULL;
543 splx(s);
544 if ((sigio)->sio_pgid < 0) {
545 struct pgrp *pg = (sigio)->sio_pgrp;
546 PGRP_LOCK(pg);
547 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
548 sigio, sio_pgsigio);
549 PGRP_UNLOCK(pg);
550 } else {
551 struct proc *p = (sigio)->sio_proc;
552 PROC_LOCK(p);
553 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
554 sigio, sio_pgsigio);
555 PROC_UNLOCK(p);
556 }
557 crfree(sigio->sio_ucred);
558 FREE(sigio, M_SIGIO);
559}
560
561/* Free a list of sigio structures. */
562void
563funsetownlst(sigiolst)
564 struct sigiolst *sigiolst;
565{
566 int s;
567 struct sigio *sigio;
568 struct proc *p;
569 struct pgrp *pg;
570
571 sigio = SLIST_FIRST(sigiolst);
572 if (sigio == NULL)
573 return;
574
575 p = NULL;
576 pg = NULL;
577
578 /*
579 * Every entry of the list should belong
580 * to a single proc or pgrp.
581 */
582 if (sigio->sio_pgid < 0) {
583 pg = sigio->sio_pgrp;
584 PGRP_LOCK_ASSERT(pg, MA_OWNED);
585 } else /* if (sigio->sio_pgid > 0) */ {
586 p = sigio->sio_proc;
587 PROC_LOCK_ASSERT(p, MA_OWNED);
588 }
589
590 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
591 s = splhigh();
592 *(sigio->sio_myref) = NULL;
593 splx(s);
594 if (pg != NULL) {
595 KASSERT(sigio->sio_pgid < 0, ("Proc sigio in pgrp sigio list"));
596 KASSERT(sigio->sio_pgrp == pg, ("Bogus pgrp in sigio list"));
597 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio, sio_pgsigio);
598 PGRP_UNLOCK(pg);
599 crfree(sigio->sio_ucred);
600 FREE(sigio, M_SIGIO);
601 PGRP_LOCK(pg);
602 } else /* if (p != NULL) */ {
603 KASSERT(sigio->sio_pgid > 0, ("Pgrp sigio in proc sigio list"));
604 KASSERT(sigio->sio_proc == p, ("Bogus proc in sigio list"));
605 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio, sio_pgsigio);
606 PROC_UNLOCK(p);
607 crfree(sigio->sio_ucred);
608 FREE(sigio, M_SIGIO);
609 PROC_LOCK(p);
610 }
611 }
612}
613
614/*
615 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
616 *
617 * After permission checking, add a sigio structure to the sigio list for
618 * the process or process group.
619 */
620int
621fsetown(pgid, sigiop)
622 pid_t pgid;
623 struct sigio **sigiop;
624{
625 struct proc *proc;
626 struct pgrp *pgrp;
627 struct sigio *sigio;
628 int s, ret;
629
630 if (pgid == 0) {
631 funsetown(*sigiop);
632 return (0);
633 }
634
635 ret = 0;
636
637 /* Allocate and fill in the new sigio out of locks. */
638 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
639 sigio->sio_pgid = pgid;
640 sigio->sio_ucred = crhold(curthread->td_ucred);
641 sigio->sio_myref = sigiop;
642
643 PGRPSESS_SLOCK();
644 if (pgid > 0) {
645 proc = pfind(pgid);
646 if (proc == NULL) {
647 ret = ESRCH;
648 goto fail;
649 }
650
651 /*
652 * Policy - Don't allow a process to FSETOWN a process
653 * in another session.
654 *
655 * Remove this test to allow maximum flexibility or
656 * restrict FSETOWN to the current process or process
657 * group for maximum safety.
658 */
659 PROC_UNLOCK(proc);
660 if (proc->p_session != curthread->td_proc->p_session) {
661 ret = EPERM;
662 goto fail;
663 }
664
665 pgrp = NULL;
666 } else /* if (pgid < 0) */ {
667 pgrp = pgfind(-pgid);
668 if (pgrp == NULL) {
669 ret = ESRCH;
670 goto fail;
671 }
672 PGRP_UNLOCK(pgrp);
673
674 /*
675 * Policy - Don't allow a process to FSETOWN a process
676 * in another session.
677 *
678 * Remove this test to allow maximum flexibility or
679 * restrict FSETOWN to the current process or process
680 * group for maximum safety.
681 */
682 if (pgrp->pg_session != curthread->td_proc->p_session) {
683 ret = EPERM;
684 goto fail;
685 }
686
687 proc = NULL;
688 }
689 funsetown(*sigiop);
690 if (pgid > 0) {
691 PROC_LOCK(proc);
692 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
693 sigio->sio_proc = proc;
694 PROC_UNLOCK(proc);
695 } else {
696 PGRP_LOCK(pgrp);
697 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
698 sigio->sio_pgrp = pgrp;
699 PGRP_UNLOCK(pgrp);
700 }
701 PGRPSESS_SUNLOCK();
702 s = splhigh();
703 *sigiop = sigio;
704 splx(s);
705 return (0);
706
707fail:
708 PGRPSESS_SUNLOCK();
709 crfree(sigio->sio_ucred);
710 FREE(sigio, M_SIGIO);
711 return (ret);
712}
713
714/*
715 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
716 */
717pid_t
718fgetown(sigio)
719 struct sigio *sigio;
720{
721 return (sigio != NULL ? sigio->sio_pgid : 0);
722}
723
724/*
725 * Close a file descriptor.
726 */
727#ifndef _SYS_SYSPROTO_H_
728struct close_args {
729 int fd;
730};
731#endif
732/*
733 * MPSAFE
734 */
735/* ARGSUSED */
736int
737close(td, uap)
738 struct thread *td;
739 struct close_args *uap;
740{
741 register struct filedesc *fdp;
742 register struct file *fp;
743 register int fd = uap->fd;
744 int error = 0;
745
746 mtx_lock(&Giant);
747 fdp = td->td_proc->p_fd;
748 FILEDESC_LOCK(fdp);
749 if ((unsigned)fd >= fdp->fd_nfiles ||
750 (fp = fdp->fd_ofiles[fd]) == NULL) {
751 FILEDESC_UNLOCK(fdp);
752 error = EBADF;
753 goto done2;
754 }
755#if 0
756 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
757 (void) munmapfd(td, fd);
758#endif
759 fdp->fd_ofiles[fd] = NULL;
760 fdp->fd_ofileflags[fd] = 0;
761
762 /*
763 * we now hold the fp reference that used to be owned by the descriptor
764 * array.
765 */
766 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
767 fdp->fd_lastfile--;
768 if (fd < fdp->fd_freefile)
769 fdp->fd_freefile = fd;
770 if (fd < fdp->fd_knlistsize) {
771 FILEDESC_UNLOCK(fdp);
772 knote_fdclose(td, fd);
773 } else
774 FILEDESC_UNLOCK(fdp);
775
776 error = closef(fp, td);
777done2:
778 mtx_unlock(&Giant);
779 return(error);
780}
781
782#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
783/*
784 * Return status information about a file descriptor.
785 */
786#ifndef _SYS_SYSPROTO_H_
787struct ofstat_args {
788 int fd;
789 struct ostat *sb;
790};
791#endif
792/*
793 * MPSAFE
794 */
795/* ARGSUSED */
796int
797ofstat(td, uap)
798 struct thread *td;
799 register struct ofstat_args *uap;
800{
801 struct file *fp;
802 struct stat ub;
803 struct ostat oub;
804 int error;
805
806 mtx_lock(&Giant);
807 if ((error = fget(td, uap->fd, &fp)) != 0)
808 goto done2;
809 error = fo_stat(fp, &ub, td);
810 if (error == 0) {
811 cvtstat(&ub, &oub);
812 error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub));
813 }
814 fdrop(fp, td);
815done2:
816 mtx_unlock(&Giant);
817 return (error);
818}
819#endif /* COMPAT_43 || COMPAT_SUNOS */
820
821/*
822 * Return status information about a file descriptor.
823 */
824#ifndef _SYS_SYSPROTO_H_
825struct fstat_args {
826 int fd;
827 struct stat *sb;
828};
829#endif
830/*
831 * MPSAFE
832 */
833/* ARGSUSED */
834int
835fstat(td, uap)
836 struct thread *td;
837 struct fstat_args *uap;
838{
839 struct file *fp;
840 struct stat ub;
841 int error;
842
843 mtx_lock(&Giant);
844 if ((error = fget(td, uap->fd, &fp)) != 0)
845 goto done2;
846 error = fo_stat(fp, &ub, td);
847 if (error == 0)
848 error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub));
849 fdrop(fp, td);
850done2:
851 mtx_unlock(&Giant);
852 return (error);
853}
854
855/*
856 * Return status information about a file descriptor.
857 */
858#ifndef _SYS_SYSPROTO_H_
859struct nfstat_args {
860 int fd;
861 struct nstat *sb;
862};
863#endif
864/*
865 * MPSAFE
866 */
867/* ARGSUSED */
868int
869nfstat(td, uap)
870 struct thread *td;
871 register struct nfstat_args *uap;
872{
873 struct file *fp;
874 struct stat ub;
875 struct nstat nub;
876 int error;
877
878 mtx_lock(&Giant);
879 if ((error = fget(td, uap->fd, &fp)) != 0)
880 goto done2;
881 error = fo_stat(fp, &ub, td);
882 if (error == 0) {
883 cvtnstat(&ub, &nub);
884 error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub));
885 }
886 fdrop(fp, td);
887done2:
888 mtx_unlock(&Giant);
889 return (error);
890}
891
892/*
893 * Return pathconf information about a file descriptor.
894 */
895#ifndef _SYS_SYSPROTO_H_
896struct fpathconf_args {
897 int fd;
898 int name;
899};
900#endif
901/*
902 * MPSAFE
903 */
904/* ARGSUSED */
905int
906fpathconf(td, uap)
907 struct thread *td;
908 register struct fpathconf_args *uap;
909{
910 struct file *fp;
911 struct vnode *vp;
912 int error;
913
914 if ((error = fget(td, uap->fd, &fp)) != 0)
915 return (error);
916
917 switch (fp->f_type) {
918 case DTYPE_PIPE:
919 case DTYPE_SOCKET:
920 if (uap->name != _PC_PIPE_BUF) {
921 error = EINVAL;
922 } else {
923 td->td_retval[0] = PIPE_BUF;
924 error = 0;
925 }
926 break;
927 case DTYPE_FIFO:
928 case DTYPE_VNODE:
929 vp = (struct vnode *)fp->f_data;
930 mtx_lock(&Giant);
931 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
932 mtx_unlock(&Giant);
933 break;
934 default:
935 error = EOPNOTSUPP;
936 break;
937 }
938 fdrop(fp, td);
939 return(error);
940}
941
942/*
943 * Allocate a file descriptor for the process.
944 */
945static int fdexpand;
946SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
947
948int
949fdalloc(td, want, result)
950 struct thread *td;
951 int want;
952 int *result;
953{
954 struct proc *p = td->td_proc;
955 register struct filedesc *fdp = td->td_proc->p_fd;
956 register int i;
957 int lim, last, nfiles;
958 struct file **newofile, **oldofile;
959 char *newofileflags;
960
961 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
962
963 /*
964 * Search for a free descriptor starting at the higher
965 * of want or fd_freefile. If that fails, consider
966 * expanding the ofile array.
967 */
968 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
969 for (;;) {
970 last = min(fdp->fd_nfiles, lim);
971 if ((i = want) < fdp->fd_freefile)
972 i = fdp->fd_freefile;
973 for (; i < last; i++) {
974 if (fdp->fd_ofiles[i] == NULL) {
975 fdp->fd_ofileflags[i] = 0;
976 if (i > fdp->fd_lastfile)
977 fdp->fd_lastfile = i;
978 if (want <= fdp->fd_freefile)
979 fdp->fd_freefile = i;
980 *result = i;
981 return (0);
982 }
983 }
984
985 /*
986 * No space in current array. Expand?
987 */
988 if (fdp->fd_nfiles >= lim)
989 return (EMFILE);
990 if (fdp->fd_nfiles < NDEXTENT)
991 nfiles = NDEXTENT;
992 else
993 nfiles = 2 * fdp->fd_nfiles;
994 FILEDESC_UNLOCK(fdp);
995 mtx_lock(&Giant);
996 MALLOC(newofile, struct file **, nfiles * OFILESIZE,
997 M_FILEDESC, M_WAITOK);
998 mtx_unlock(&Giant);
999 FILEDESC_LOCK(fdp);
1000
1001 /*
1002 * deal with file-table extend race that might have occured
1003 * when malloc was blocked.
1004 */
1005 if (fdp->fd_nfiles >= nfiles) {
1006 FILEDESC_UNLOCK(fdp);
1007 mtx_lock(&Giant);
1008 FREE(newofile, M_FILEDESC);
1009 mtx_unlock(&Giant);
1010 FILEDESC_LOCK(fdp);
1011 continue;
1012 }
1013 newofileflags = (char *) &newofile[nfiles];
1014 /*
1015 * Copy the existing ofile and ofileflags arrays
1016 * and zero the new portion of each array.
1017 */
1018 bcopy(fdp->fd_ofiles, newofile,
1019 (i = sizeof(struct file *) * fdp->fd_nfiles));
1020 bzero((char *)newofile + i, nfiles * sizeof(struct file *) - i);
1021 bcopy(fdp->fd_ofileflags, newofileflags,
1022 (i = sizeof(char) * fdp->fd_nfiles));
1023 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1024 if (fdp->fd_nfiles > NDFILE)
1025 oldofile = fdp->fd_ofiles;
1026 else
1027 oldofile = NULL;
1028 fdp->fd_ofiles = newofile;
1029 fdp->fd_ofileflags = newofileflags;
1030 fdp->fd_nfiles = nfiles;
1031 fdexpand++;
1032 if (oldofile != NULL) {
1033 FILEDESC_UNLOCK(fdp);
1034 mtx_lock(&Giant);
1035 FREE(oldofile, M_FILEDESC);
1036 mtx_unlock(&Giant);
1037 FILEDESC_LOCK(fdp);
1038 }
1039 }
1040 return (0);
1041}
1042
1043/*
1044 * Check to see whether n user file descriptors
1045 * are available to the process p.
1046 */
1047int
1048fdavail(td, n)
1049 struct thread *td;
1050 register int n;
1051{
1052 struct proc *p = td->td_proc;
1053 register struct filedesc *fdp = td->td_proc->p_fd;
1054 register struct file **fpp;
1055 register int i, lim, last;
1056
1057 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1058
1059 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1060 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1061 return (1);
1062
1063 last = min(fdp->fd_nfiles, lim);
1064 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1065 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1066 if (*fpp == NULL && --n <= 0)
1067 return (1);
1068 }
1069 return (0);
1070}
1071
1072/*
1073 * Create a new open file structure and allocate
1074 * a file decriptor for the process that refers to it.
1075 */
1076int
1077falloc(td, resultfp, resultfd)
1078 register struct thread *td;
1079 struct file **resultfp;
1080 int *resultfd;
1081{
1082 struct proc *p = td->td_proc;
1083 register struct file *fp, *fq;
1084 int error, i;
1085
1086 sx_xlock(&filelist_lock);
1087 if (nfiles >= maxfiles) {
1088 sx_xunlock(&filelist_lock);
1089 tablefull("file");
1090 return (ENFILE);
1091 }
1092 nfiles++;
1093 sx_xunlock(&filelist_lock);
1094 /*
1095 * Allocate a new file descriptor.
1096 * If the process has file descriptor zero open, add to the list
1097 * of open files at that point, otherwise put it at the front of
1098 * the list of open files.
1099 */
1098 MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO);
1100 fp = uma_zalloc(file_zone, M_WAITOK);
1101 bzero(fp, sizeof(*fp));
1099
1100 /*
1101 * wait until after malloc (which may have blocked) returns before
1102 * allocating the slot, else a race might have shrunk it if we had
1103 * allocated it before the malloc.
1104 */
1105 FILEDESC_LOCK(p->p_fd);
1106 if ((error = fdalloc(td, 0, &i))) {
1107 FILEDESC_UNLOCK(p->p_fd);
1108 sx_xlock(&filelist_lock);
1109 nfiles--;
1110 sx_xunlock(&filelist_lock);
1102
1103 /*
1104 * wait until after malloc (which may have blocked) returns before
1105 * allocating the slot, else a race might have shrunk it if we had
1106 * allocated it before the malloc.
1107 */
1108 FILEDESC_LOCK(p->p_fd);
1109 if ((error = fdalloc(td, 0, &i))) {
1110 FILEDESC_UNLOCK(p->p_fd);
1111 sx_xlock(&filelist_lock);
1112 nfiles--;
1113 sx_xunlock(&filelist_lock);
1111 FREE(fp, M_FILE);
1114 uma_zfree(file_zone, fp);
1112 return (error);
1113 }
1114 fp->f_mtxp = mtx_pool_alloc();
1115 fp->f_gcflag = 0;
1116 fp->f_count = 1;
1117 fp->f_cred = crhold(td->td_ucred);
1118 fp->f_ops = &badfileops;
1119 fp->f_seqcount = 1;
1120 FILEDESC_UNLOCK(p->p_fd);
1121 sx_xlock(&filelist_lock);
1122 FILEDESC_LOCK(p->p_fd);
1123 if ((fq = p->p_fd->fd_ofiles[0])) {
1124 LIST_INSERT_AFTER(fq, fp, f_list);
1125 } else {
1126 LIST_INSERT_HEAD(&filehead, fp, f_list);
1127 }
1128 p->p_fd->fd_ofiles[i] = fp;
1129 FILEDESC_UNLOCK(p->p_fd);
1130 sx_xunlock(&filelist_lock);
1131 if (resultfp)
1132 *resultfp = fp;
1133 if (resultfd)
1134 *resultfd = i;
1135 return (0);
1136}
1137
1138/*
1139 * Free a file descriptor.
1140 */
1141void
1142ffree(fp)
1143 register struct file *fp;
1144{
1145
1146 KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1147 sx_xlock(&filelist_lock);
1148 LIST_REMOVE(fp, f_list);
1149 nfiles--;
1150 sx_xunlock(&filelist_lock);
1151 crfree(fp->f_cred);
1115 return (error);
1116 }
1117 fp->f_mtxp = mtx_pool_alloc();
1118 fp->f_gcflag = 0;
1119 fp->f_count = 1;
1120 fp->f_cred = crhold(td->td_ucred);
1121 fp->f_ops = &badfileops;
1122 fp->f_seqcount = 1;
1123 FILEDESC_UNLOCK(p->p_fd);
1124 sx_xlock(&filelist_lock);
1125 FILEDESC_LOCK(p->p_fd);
1126 if ((fq = p->p_fd->fd_ofiles[0])) {
1127 LIST_INSERT_AFTER(fq, fp, f_list);
1128 } else {
1129 LIST_INSERT_HEAD(&filehead, fp, f_list);
1130 }
1131 p->p_fd->fd_ofiles[i] = fp;
1132 FILEDESC_UNLOCK(p->p_fd);
1133 sx_xunlock(&filelist_lock);
1134 if (resultfp)
1135 *resultfp = fp;
1136 if (resultfd)
1137 *resultfd = i;
1138 return (0);
1139}
1140
1141/*
1142 * Free a file descriptor.
1143 */
1144void
1145ffree(fp)
1146 register struct file *fp;
1147{
1148
1149 KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!"));
1150 sx_xlock(&filelist_lock);
1151 LIST_REMOVE(fp, f_list);
1152 nfiles--;
1153 sx_xunlock(&filelist_lock);
1154 crfree(fp->f_cred);
1152 FREE(fp, M_FILE);
1155 uma_zfree(file_zone, fp);
1153}
1154
1155/*
1156 * Build a new filedesc structure.
1157 */
1158struct filedesc *
1159fdinit(td)
1160 struct thread *td;
1161{
1162 register struct filedesc0 *newfdp;
1163 register struct filedesc *fdp = td->td_proc->p_fd;
1164
1165 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1166 M_FILEDESC, M_WAITOK | M_ZERO);
1167 mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF);
1168 FILEDESC_LOCK(&newfdp->fd_fd);
1169 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1170 if (newfdp->fd_fd.fd_cdir)
1171 VREF(newfdp->fd_fd.fd_cdir);
1172 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1173 if (newfdp->fd_fd.fd_rdir)
1174 VREF(newfdp->fd_fd.fd_rdir);
1175 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1176 if (newfdp->fd_fd.fd_jdir)
1177 VREF(newfdp->fd_fd.fd_jdir);
1178
1179 /* Create the file descriptor table. */
1180 newfdp->fd_fd.fd_refcnt = 1;
1181 newfdp->fd_fd.fd_cmask = cmask;
1182 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1183 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1184 newfdp->fd_fd.fd_nfiles = NDFILE;
1185 newfdp->fd_fd.fd_knlistsize = -1;
1186 FILEDESC_UNLOCK(&newfdp->fd_fd);
1187
1188 return (&newfdp->fd_fd);
1189}
1190
1191/*
1192 * Share a filedesc structure.
1193 */
1194struct filedesc *
1195fdshare(p)
1196 struct proc *p;
1197{
1198 FILEDESC_LOCK(p->p_fd);
1199 p->p_fd->fd_refcnt++;
1200 FILEDESC_UNLOCK(p->p_fd);
1201 return (p->p_fd);
1202}
1203
1204/*
1205 * Copy a filedesc structure.
1206 */
1207struct filedesc *
1208fdcopy(td)
1209 struct thread *td;
1210{
1211 register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1212 register struct file **fpp;
1213 register int i, j;
1214
1215 /* Certain daemons might not have file descriptors. */
1216 if (fdp == NULL)
1217 return (NULL);
1218
1219 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1220
1221 FILEDESC_UNLOCK(fdp);
1222 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1223 M_FILEDESC, M_WAITOK);
1224 FILEDESC_LOCK(fdp);
1225 bcopy(fdp, newfdp, sizeof(struct filedesc));
1226 FILEDESC_UNLOCK(fdp);
1227 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1228 mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF);
1229 if (newfdp->fd_cdir)
1230 VREF(newfdp->fd_cdir);
1231 if (newfdp->fd_rdir)
1232 VREF(newfdp->fd_rdir);
1233 if (newfdp->fd_jdir)
1234 VREF(newfdp->fd_jdir);
1235 newfdp->fd_refcnt = 1;
1236
1237 /*
1238 * If the number of open files fits in the internal arrays
1239 * of the open file structure, use them, otherwise allocate
1240 * additional memory for the number of descriptors currently
1241 * in use.
1242 */
1243 FILEDESC_LOCK(fdp);
1244 newfdp->fd_lastfile = fdp->fd_lastfile;
1245 newfdp->fd_nfiles = fdp->fd_nfiles;
1246 if (newfdp->fd_lastfile < NDFILE) {
1247 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1248 newfdp->fd_ofileflags =
1249 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1250 i = NDFILE;
1251 } else {
1252 /*
1253 * Compute the smallest multiple of NDEXTENT needed
1254 * for the file descriptors currently in use,
1255 * allowing the table to shrink.
1256 */
1257retry:
1258 i = newfdp->fd_nfiles;
1259 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1260 i /= 2;
1261 FILEDESC_UNLOCK(fdp);
1262 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1263 M_FILEDESC, M_WAITOK);
1264 FILEDESC_LOCK(fdp);
1265 newfdp->fd_lastfile = fdp->fd_lastfile;
1266 newfdp->fd_nfiles = fdp->fd_nfiles;
1267 j = newfdp->fd_nfiles;
1268 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1269 j /= 2;
1270 if (i != j) {
1271 /*
1272 * The size of the original table has changed.
1273 * Go over once again.
1274 */
1275 FILEDESC_UNLOCK(fdp);
1276 FREE(newfdp->fd_ofiles, M_FILEDESC);
1277 FILEDESC_LOCK(fdp);
1278 newfdp->fd_lastfile = fdp->fd_lastfile;
1279 newfdp->fd_nfiles = fdp->fd_nfiles;
1280 goto retry;
1281 }
1282 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1283 }
1284 newfdp->fd_nfiles = i;
1285 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1286 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1287
1288 /*
1289 * kq descriptors cannot be copied.
1290 */
1291 if (newfdp->fd_knlistsize != -1) {
1292 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1293 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1294 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1295 *fpp = NULL;
1296 if (i < newfdp->fd_freefile)
1297 newfdp->fd_freefile = i;
1298 }
1299 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1300 newfdp->fd_lastfile--;
1301 }
1302 newfdp->fd_knlist = NULL;
1303 newfdp->fd_knlistsize = -1;
1304 newfdp->fd_knhash = NULL;
1305 newfdp->fd_knhashmask = 0;
1306 }
1307
1308 fpp = newfdp->fd_ofiles;
1309 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1310 if (*fpp != NULL) {
1311 fhold(*fpp);
1312 }
1313 }
1314 return (newfdp);
1315}
1316
1317/*
1318 * Release a filedesc structure.
1319 */
1320void
1321fdfree(td)
1322 struct thread *td;
1323{
1324 register struct filedesc *fdp;
1325 struct file **fpp;
1326 register int i;
1327
1328 fdp = td->td_proc->p_fd;
1329 /* Certain daemons might not have file descriptors. */
1330 if (fdp == NULL)
1331 return;
1332
1333 FILEDESC_LOCK(fdp);
1334 if (--fdp->fd_refcnt > 0) {
1335 FILEDESC_UNLOCK(fdp);
1336 return;
1337 }
1338 /*
1339 * we are the last reference to the structure, we can
1340 * safely assume it will not change out from under us.
1341 */
1342 FILEDESC_UNLOCK(fdp);
1343 fpp = fdp->fd_ofiles;
1344 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1345 if (*fpp)
1346 (void) closef(*fpp, td);
1347 }
1348
1349 PROC_LOCK(td->td_proc);
1350 td->td_proc->p_fd = NULL;
1351 PROC_UNLOCK(td->td_proc);
1352
1353 if (fdp->fd_nfiles > NDFILE)
1354 FREE(fdp->fd_ofiles, M_FILEDESC);
1355 if (fdp->fd_cdir)
1356 vrele(fdp->fd_cdir);
1357 if (fdp->fd_rdir)
1358 vrele(fdp->fd_rdir);
1359 if (fdp->fd_jdir)
1360 vrele(fdp->fd_jdir);
1361 if (fdp->fd_knlist)
1362 FREE(fdp->fd_knlist, M_KQUEUE);
1363 if (fdp->fd_knhash)
1364 FREE(fdp->fd_knhash, M_KQUEUE);
1365 mtx_destroy(&fdp->fd_mtx);
1366 FREE(fdp, M_FILEDESC);
1367}
1368
1369/*
1370 * For setugid programs, we don't want to people to use that setugidness
1371 * to generate error messages which write to a file which otherwise would
1372 * otherwise be off-limits to the process.
1373 *
1374 * This is a gross hack to plug the hole. A better solution would involve
1375 * a special vop or other form of generalized access control mechanism. We
1376 * go ahead and just reject all procfs file systems accesses as dangerous.
1377 *
1378 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1379 * sufficient. We also don't for check setugidness since we know we are.
1380 */
1381static int
1382is_unsafe(struct file *fp)
1383{
1384 if (fp->f_type == DTYPE_VNODE &&
1385 ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1386 return (1);
1387 return (0);
1388}
1389
1390/*
1391 * Make this setguid thing safe, if at all possible.
1392 */
1393void
1394setugidsafety(td)
1395 struct thread *td;
1396{
1397 struct filedesc *fdp = td->td_proc->p_fd;
1398 register int i;
1399
1400 /* Certain daemons might not have file descriptors. */
1401 if (fdp == NULL)
1402 return;
1403
1404 /*
1405 * note: fdp->fd_ofiles may be reallocated out from under us while
1406 * we are blocked in a close. Be careful!
1407 */
1408 FILEDESC_LOCK(fdp);
1409 for (i = 0; i <= fdp->fd_lastfile; i++) {
1410 if (i > 2)
1411 break;
1412 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1413 struct file *fp;
1414
1415#if 0
1416 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1417 (void) munmapfd(td, i);
1418#endif
1419 if (i < fdp->fd_knlistsize) {
1420 FILEDESC_UNLOCK(fdp);
1421 knote_fdclose(td, i);
1422 FILEDESC_LOCK(fdp);
1423 }
1424 /*
1425 * NULL-out descriptor prior to close to avoid
1426 * a race while close blocks.
1427 */
1428 fp = fdp->fd_ofiles[i];
1429 fdp->fd_ofiles[i] = NULL;
1430 fdp->fd_ofileflags[i] = 0;
1431 if (i < fdp->fd_freefile)
1432 fdp->fd_freefile = i;
1433 FILEDESC_UNLOCK(fdp);
1434 (void) closef(fp, td);
1435 FILEDESC_LOCK(fdp);
1436 }
1437 }
1438 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1439 fdp->fd_lastfile--;
1440 FILEDESC_UNLOCK(fdp);
1441}
1442
1443/*
1444 * Close any files on exec?
1445 */
1446void
1447fdcloseexec(td)
1448 struct thread *td;
1449{
1450 struct filedesc *fdp = td->td_proc->p_fd;
1451 register int i;
1452
1453 /* Certain daemons might not have file descriptors. */
1454 if (fdp == NULL)
1455 return;
1456
1457 FILEDESC_LOCK(fdp);
1458
1459 /*
1460 * We cannot cache fd_ofiles or fd_ofileflags since operations
1461 * may block and rip them out from under us.
1462 */
1463 for (i = 0; i <= fdp->fd_lastfile; i++) {
1464 if (fdp->fd_ofiles[i] != NULL &&
1465 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1466 struct file *fp;
1467
1468#if 0
1469 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1470 (void) munmapfd(td, i);
1471#endif
1472 if (i < fdp->fd_knlistsize) {
1473 FILEDESC_UNLOCK(fdp);
1474 knote_fdclose(td, i);
1475 FILEDESC_LOCK(fdp);
1476 }
1477 /*
1478 * NULL-out descriptor prior to close to avoid
1479 * a race while close blocks.
1480 */
1481 fp = fdp->fd_ofiles[i];
1482 fdp->fd_ofiles[i] = NULL;
1483 fdp->fd_ofileflags[i] = 0;
1484 if (i < fdp->fd_freefile)
1485 fdp->fd_freefile = i;
1486 FILEDESC_UNLOCK(fdp);
1487 (void) closef(fp, td);
1488 FILEDESC_LOCK(fdp);
1489 }
1490 }
1491 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1492 fdp->fd_lastfile--;
1493 FILEDESC_UNLOCK(fdp);
1494}
1495
1496/*
1497 * Internal form of close.
1498 * Decrement reference count on file structure.
1499 * Note: td may be NULL when closing a file
1500 * that was being passed in a message.
1501 */
1502int
1503closef(fp, td)
1504 register struct file *fp;
1505 register struct thread *td;
1506{
1507 struct vnode *vp;
1508 struct flock lf;
1509
1510 if (fp == NULL)
1511 return (0);
1512 /*
1513 * POSIX record locking dictates that any close releases ALL
1514 * locks owned by this process. This is handled by setting
1515 * a flag in the unlock to free ONLY locks obeying POSIX
1516 * semantics, and not to free BSD-style file locks.
1517 * If the descriptor was in a message, POSIX-style locks
1518 * aren't passed with the descriptor.
1519 */
1520 if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1521 fp->f_type == DTYPE_VNODE) {
1522 lf.l_whence = SEEK_SET;
1523 lf.l_start = 0;
1524 lf.l_len = 0;
1525 lf.l_type = F_UNLCK;
1526 vp = (struct vnode *)fp->f_data;
1527 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1528 F_UNLCK, &lf, F_POSIX);
1529 }
1530 return (fdrop(fp, td));
1531}
1532
1533/*
1534 * Drop reference on struct file passed in, may call closef if the
1535 * reference hits zero.
1536 */
1537int
1538fdrop(fp, td)
1539 struct file *fp;
1540 struct thread *td;
1541{
1542
1543 FILE_LOCK(fp);
1544 return (fdrop_locked(fp, td));
1545}
1546
1547/*
1548 * Extract the file pointer associated with the specified descriptor for
1549 * the current user process.
1550 *
1551 * If the descriptor doesn't exist, EBADF is returned.
1552 *
1553 * If the descriptor exists but doesn't match 'flags' then
1554 * return EBADF for read attempts and EINVAL for write attempts.
1555 *
1556 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1557 * It should be droped with fdrop().
1558 * If it is not set, then the refcount will not be bumped however the
1559 * thread's filedesc struct will be returned locked (for fgetsock).
1560 *
1561 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1562 * Otherwise *fpp is set and zero is returned.
1563 */
1564static __inline
1565int
1566_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1567{
1568 struct filedesc *fdp;
1569 struct file *fp;
1570
1571 *fpp = NULL;
1572 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1573 return(EBADF);
1574 FILEDESC_LOCK(fdp);
1575 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1576 FILEDESC_UNLOCK(fdp);
1577 return(EBADF);
1578 }
1579
1580 /*
1581 * Note: FREAD failures returns EBADF to maintain backwards
1582 * compatibility with what routines returned before.
1583 *
1584 * Only one flag, or 0, may be specified.
1585 */
1586 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1587 FILEDESC_UNLOCK(fdp);
1588 return(EBADF);
1589 }
1590 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1591 FILEDESC_UNLOCK(fdp);
1592 return(EINVAL);
1593 }
1594 if (hold) {
1595 fhold(fp);
1596 FILEDESC_UNLOCK(fdp);
1597 }
1598 *fpp = fp;
1599 return(0);
1600}
1601
1602int
1603fget(struct thread *td, int fd, struct file **fpp)
1604{
1605 return(_fget(td, fd, fpp, 0, 1));
1606}
1607
1608int
1609fget_read(struct thread *td, int fd, struct file **fpp)
1610{
1611 return(_fget(td, fd, fpp, FREAD, 1));
1612}
1613
1614int
1615fget_write(struct thread *td, int fd, struct file **fpp)
1616{
1617 return(_fget(td, fd, fpp, FWRITE, 1));
1618}
1619
1620/*
1621 * Like fget() but loads the underlying vnode, or returns an error if
1622 * the descriptor does not represent a vnode. Note that pipes use vnodes
1623 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1624 * error). The returned vnode will be vref()d.
1625 */
1626
1627static __inline
1628int
1629_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1630{
1631 struct file *fp;
1632 int error;
1633
1634 *vpp = NULL;
1635 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1636 return (error);
1637 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1638 error = EINVAL;
1639 } else {
1640 *vpp = (struct vnode *)fp->f_data;
1641 vref(*vpp);
1642 }
1643 FILEDESC_UNLOCK(td->td_proc->p_fd);
1644 return (error);
1645}
1646
1647int
1648fgetvp(struct thread *td, int fd, struct vnode **vpp)
1649{
1650 return(_fgetvp(td, fd, vpp, 0));
1651}
1652
1653int
1654fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1655{
1656 return(_fgetvp(td, fd, vpp, FREAD));
1657}
1658
1659int
1660fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1661{
1662 return(_fgetvp(td, fd, vpp, FWRITE));
1663}
1664
1665/*
1666 * Like fget() but loads the underlying socket, or returns an error if
1667 * the descriptor does not represent a socket.
1668 *
1669 * We bump the ref count on the returned socket. XXX Also obtain the SX lock in
1670 * the future.
1671 */
1672int
1673fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1674{
1675 struct file *fp;
1676 int error;
1677
1678 *spp = NULL;
1679 if (fflagp)
1680 *fflagp = 0;
1681 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1682 return (error);
1683 if (fp->f_type != DTYPE_SOCKET) {
1684 error = ENOTSOCK;
1685 } else {
1686 *spp = (struct socket *)fp->f_data;
1687 if (fflagp)
1688 *fflagp = fp->f_flag;
1689 soref(*spp);
1690 }
1691 FILEDESC_UNLOCK(td->td_proc->p_fd);
1692 return(error);
1693}
1694
1695/*
1696 * Drop the reference count on the the socket and XXX release the SX lock in
1697 * the future. The last reference closes the socket.
1698 */
1699void
1700fputsock(struct socket *so)
1701{
1702 sorele(so);
1703}
1704
1705/*
1706 * Drop reference on struct file passed in, may call closef if the
1707 * reference hits zero.
1708 * Expects struct file locked, and will unlock it.
1709 */
1710int
1711fdrop_locked(fp, td)
1712 struct file *fp;
1713 struct thread *td;
1714{
1715 struct flock lf;
1716 struct vnode *vp;
1717 int error;
1718
1719 FILE_LOCK_ASSERT(fp, MA_OWNED);
1720
1721 if (--fp->f_count > 0) {
1722 FILE_UNLOCK(fp);
1723 return (0);
1724 }
1725 mtx_lock(&Giant);
1726 if (fp->f_count < 0)
1727 panic("fdrop: count < 0");
1728 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1729 lf.l_whence = SEEK_SET;
1730 lf.l_start = 0;
1731 lf.l_len = 0;
1732 lf.l_type = F_UNLCK;
1733 vp = (struct vnode *)fp->f_data;
1734 FILE_UNLOCK(fp);
1735 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1736 } else
1737 FILE_UNLOCK(fp);
1738 if (fp->f_ops != &badfileops)
1739 error = fo_close(fp, td);
1740 else
1741 error = 0;
1742 ffree(fp);
1743 mtx_unlock(&Giant);
1744 return (error);
1745}
1746
1747/*
1748 * Apply an advisory lock on a file descriptor.
1749 *
1750 * Just attempt to get a record lock of the requested type on
1751 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1752 */
1753#ifndef _SYS_SYSPROTO_H_
1754struct flock_args {
1755 int fd;
1756 int how;
1757};
1758#endif
1759/*
1760 * MPSAFE
1761 */
1762/* ARGSUSED */
1763int
1764flock(td, uap)
1765 struct thread *td;
1766 register struct flock_args *uap;
1767{
1768 struct file *fp;
1769 struct vnode *vp;
1770 struct flock lf;
1771 int error;
1772
1773 if ((error = fget(td, uap->fd, &fp)) != 0)
1774 return (error);
1775 if (fp->f_type != DTYPE_VNODE) {
1776 fdrop(fp, td);
1777 return (EOPNOTSUPP);
1778 }
1779
1780 mtx_lock(&Giant);
1781 vp = (struct vnode *)fp->f_data;
1782 lf.l_whence = SEEK_SET;
1783 lf.l_start = 0;
1784 lf.l_len = 0;
1785 if (uap->how & LOCK_UN) {
1786 lf.l_type = F_UNLCK;
1787 FILE_LOCK(fp);
1788 fp->f_flag &= ~FHASLOCK;
1789 FILE_UNLOCK(fp);
1790 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1791 goto done2;
1792 }
1793 if (uap->how & LOCK_EX)
1794 lf.l_type = F_WRLCK;
1795 else if (uap->how & LOCK_SH)
1796 lf.l_type = F_RDLCK;
1797 else {
1798 error = EBADF;
1799 goto done2;
1800 }
1801 FILE_LOCK(fp);
1802 fp->f_flag |= FHASLOCK;
1803 FILE_UNLOCK(fp);
1804 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1805 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1806done2:
1807 fdrop(fp, td);
1808 mtx_unlock(&Giant);
1809 return (error);
1810}
1811
1812/*
1813 * File Descriptor pseudo-device driver (/dev/fd/).
1814 *
1815 * Opening minor device N dup()s the file (if any) connected to file
1816 * descriptor N belonging to the calling process. Note that this driver
1817 * consists of only the ``open()'' routine, because all subsequent
1818 * references to this file will be direct to the other driver.
1819 */
1820/* ARGSUSED */
1821static int
1822fdopen(dev, mode, type, td)
1823 dev_t dev;
1824 int mode, type;
1825 struct thread *td;
1826{
1827
1828 /*
1829 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1830 * the file descriptor being sought for duplication. The error
1831 * return ensures that the vnode for this device will be released
1832 * by vn_open. Open will detect this special error and take the
1833 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1834 * will simply report the error.
1835 */
1836 td->td_dupfd = dev2unit(dev);
1837 return (ENODEV);
1838}
1839
1840/*
1841 * Duplicate the specified descriptor to a free descriptor.
1842 */
1843int
1844dupfdopen(td, fdp, indx, dfd, mode, error)
1845 struct thread *td;
1846 struct filedesc *fdp;
1847 int indx, dfd;
1848 int mode;
1849 int error;
1850{
1851 register struct file *wfp;
1852 struct file *fp;
1853
1854 /*
1855 * If the to-be-dup'd fd number is greater than the allowed number
1856 * of file descriptors, or the fd to be dup'd has already been
1857 * closed, then reject.
1858 */
1859 FILEDESC_LOCK(fdp);
1860 if ((u_int)dfd >= fdp->fd_nfiles ||
1861 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1862 FILEDESC_UNLOCK(fdp);
1863 return (EBADF);
1864 }
1865
1866 /*
1867 * There are two cases of interest here.
1868 *
1869 * For ENODEV simply dup (dfd) to file descriptor
1870 * (indx) and return.
1871 *
1872 * For ENXIO steal away the file structure from (dfd) and
1873 * store it in (indx). (dfd) is effectively closed by
1874 * this operation.
1875 *
1876 * Any other error code is just returned.
1877 */
1878 switch (error) {
1879 case ENODEV:
1880 /*
1881 * Check that the mode the file is being opened for is a
1882 * subset of the mode of the existing descriptor.
1883 */
1884 FILE_LOCK(wfp);
1885 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1886 FILE_UNLOCK(wfp);
1887 FILEDESC_UNLOCK(fdp);
1888 return (EACCES);
1889 }
1890 fp = fdp->fd_ofiles[indx];
1891#if 0
1892 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1893 (void) munmapfd(td, indx);
1894#endif
1895 fdp->fd_ofiles[indx] = wfp;
1896 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1897 fhold_locked(wfp);
1898 FILE_UNLOCK(wfp);
1899 if (indx > fdp->fd_lastfile)
1900 fdp->fd_lastfile = indx;
1901 if (fp != NULL)
1902 FILE_LOCK(fp);
1903 FILEDESC_UNLOCK(fdp);
1904 /*
1905 * we now own the reference to fp that the ofiles[] array
1906 * used to own. Release it.
1907 */
1908 if (fp != NULL)
1909 fdrop_locked(fp, td);
1910 return (0);
1911
1912 case ENXIO:
1913 /*
1914 * Steal away the file pointer from dfd, and stuff it into indx.
1915 */
1916 fp = fdp->fd_ofiles[indx];
1917#if 0
1918 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1919 (void) munmapfd(td, indx);
1920#endif
1921 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1922 fdp->fd_ofiles[dfd] = NULL;
1923 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1924 fdp->fd_ofileflags[dfd] = 0;
1925
1926 /*
1927 * Complete the clean up of the filedesc structure by
1928 * recomputing the various hints.
1929 */
1930 if (indx > fdp->fd_lastfile) {
1931 fdp->fd_lastfile = indx;
1932 } else {
1933 while (fdp->fd_lastfile > 0 &&
1934 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1935 fdp->fd_lastfile--;
1936 }
1937 if (dfd < fdp->fd_freefile)
1938 fdp->fd_freefile = dfd;
1939 }
1940 if (fp != NULL)
1941 FILE_LOCK(fp);
1942 FILEDESC_UNLOCK(fdp);
1943
1944 /*
1945 * we now own the reference to fp that the ofiles[] array
1946 * used to own. Release it.
1947 */
1948 if (fp != NULL)
1949 fdrop_locked(fp, td);
1950 return (0);
1951
1952 default:
1953 FILEDESC_UNLOCK(fdp);
1954 return (error);
1955 }
1956 /* NOTREACHED */
1957}
1958
1959/*
1960 * Get file structures.
1961 */
1962static int
1963sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1964{
1965 int error;
1966 struct file *fp;
1967
1968 sx_slock(&filelist_lock);
1969 if (!req->oldptr) {
1970 /*
1971 * overestimate by 10 files
1972 */
1973 error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1974 (nfiles + 10) * sizeof(struct file));
1975 sx_sunlock(&filelist_lock);
1976 return (error);
1977 }
1978
1979 error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1980 if (error) {
1981 sx_sunlock(&filelist_lock);
1982 return (error);
1983 }
1984
1985 /*
1986 * followed by an array of file structures
1987 */
1988 LIST_FOREACH(fp, &filehead, f_list) {
1989 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1990 if (error) {
1991 sx_sunlock(&filelist_lock);
1992 return (error);
1993 }
1994 }
1995 sx_sunlock(&filelist_lock);
1996 return (0);
1997}
1998
1999SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2000 0, 0, sysctl_kern_file, "S,file", "Entire file table");
2001
2002SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2003 &maxfilesperproc, 0, "Maximum files allowed open per process");
2004
2005SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2006 &maxfiles, 0, "Maximum number of files");
2007
2008SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2009 &nfiles, 0, "System-wide number of open files");
2010
2011static void
2012fildesc_drvinit(void *unused)
2013{
2014 dev_t dev;
2015
2016 dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2017 make_dev_alias(dev, "stdin");
2018 dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2019 make_dev_alias(dev, "stdout");
2020 dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2021 make_dev_alias(dev, "stderr");
2022 if (!devfs_present) {
2023 int fd;
2024
2025 for (fd = 3; fd < NUMFDESC; fd++)
2026 make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2027 "fd/%d", fd);
2028 }
2029}
2030
2031struct fileops badfileops = {
2032 badfo_readwrite,
2033 badfo_readwrite,
2034 badfo_ioctl,
2035 badfo_poll,
2036 badfo_kqfilter,
2037 badfo_stat,
2038 badfo_close
2039};
2040
2041static int
2042badfo_readwrite(fp, uio, cred, flags, td)
2043 struct file *fp;
2044 struct uio *uio;
2045 struct ucred *cred;
2046 struct thread *td;
2047 int flags;
2048{
2049
2050 return (EBADF);
2051}
2052
2053static int
2054badfo_ioctl(fp, com, data, td)
2055 struct file *fp;
2056 u_long com;
2057 caddr_t data;
2058 struct thread *td;
2059{
2060
2061 return (EBADF);
2062}
2063
2064static int
2065badfo_poll(fp, events, cred, td)
2066 struct file *fp;
2067 int events;
2068 struct ucred *cred;
2069 struct thread *td;
2070{
2071
2072 return (0);
2073}
2074
2075static int
2076badfo_kqfilter(fp, kn)
2077 struct file *fp;
2078 struct knote *kn;
2079{
2080
2081 return (0);
2082}
2083
2084static int
2085badfo_stat(fp, sb, td)
2086 struct file *fp;
2087 struct stat *sb;
2088 struct thread *td;
2089{
2090
2091 return (EBADF);
2092}
2093
2094static int
2095badfo_close(fp, td)
2096 struct file *fp;
2097 struct thread *td;
2098{
2099
2100 return (EBADF);
2101}
2102
2103SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2104 fildesc_drvinit,NULL)
2105
2106static void filelistinit __P((void *));
2107SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2108
2109/* ARGSUSED*/
2110static void
2111filelistinit(dummy)
2112 void *dummy;
2113{
1156}
1157
1158/*
1159 * Build a new filedesc structure.
1160 */
1161struct filedesc *
1162fdinit(td)
1163 struct thread *td;
1164{
1165 register struct filedesc0 *newfdp;
1166 register struct filedesc *fdp = td->td_proc->p_fd;
1167
1168 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1169 M_FILEDESC, M_WAITOK | M_ZERO);
1170 mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF);
1171 FILEDESC_LOCK(&newfdp->fd_fd);
1172 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1173 if (newfdp->fd_fd.fd_cdir)
1174 VREF(newfdp->fd_fd.fd_cdir);
1175 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1176 if (newfdp->fd_fd.fd_rdir)
1177 VREF(newfdp->fd_fd.fd_rdir);
1178 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1179 if (newfdp->fd_fd.fd_jdir)
1180 VREF(newfdp->fd_fd.fd_jdir);
1181
1182 /* Create the file descriptor table. */
1183 newfdp->fd_fd.fd_refcnt = 1;
1184 newfdp->fd_fd.fd_cmask = cmask;
1185 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1186 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1187 newfdp->fd_fd.fd_nfiles = NDFILE;
1188 newfdp->fd_fd.fd_knlistsize = -1;
1189 FILEDESC_UNLOCK(&newfdp->fd_fd);
1190
1191 return (&newfdp->fd_fd);
1192}
1193
1194/*
1195 * Share a filedesc structure.
1196 */
1197struct filedesc *
1198fdshare(p)
1199 struct proc *p;
1200{
1201 FILEDESC_LOCK(p->p_fd);
1202 p->p_fd->fd_refcnt++;
1203 FILEDESC_UNLOCK(p->p_fd);
1204 return (p->p_fd);
1205}
1206
1207/*
1208 * Copy a filedesc structure.
1209 */
1210struct filedesc *
1211fdcopy(td)
1212 struct thread *td;
1213{
1214 register struct filedesc *newfdp, *fdp = td->td_proc->p_fd;
1215 register struct file **fpp;
1216 register int i, j;
1217
1218 /* Certain daemons might not have file descriptors. */
1219 if (fdp == NULL)
1220 return (NULL);
1221
1222 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1223
1224 FILEDESC_UNLOCK(fdp);
1225 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1226 M_FILEDESC, M_WAITOK);
1227 FILEDESC_LOCK(fdp);
1228 bcopy(fdp, newfdp, sizeof(struct filedesc));
1229 FILEDESC_UNLOCK(fdp);
1230 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1231 mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF);
1232 if (newfdp->fd_cdir)
1233 VREF(newfdp->fd_cdir);
1234 if (newfdp->fd_rdir)
1235 VREF(newfdp->fd_rdir);
1236 if (newfdp->fd_jdir)
1237 VREF(newfdp->fd_jdir);
1238 newfdp->fd_refcnt = 1;
1239
1240 /*
1241 * If the number of open files fits in the internal arrays
1242 * of the open file structure, use them, otherwise allocate
1243 * additional memory for the number of descriptors currently
1244 * in use.
1245 */
1246 FILEDESC_LOCK(fdp);
1247 newfdp->fd_lastfile = fdp->fd_lastfile;
1248 newfdp->fd_nfiles = fdp->fd_nfiles;
1249 if (newfdp->fd_lastfile < NDFILE) {
1250 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1251 newfdp->fd_ofileflags =
1252 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1253 i = NDFILE;
1254 } else {
1255 /*
1256 * Compute the smallest multiple of NDEXTENT needed
1257 * for the file descriptors currently in use,
1258 * allowing the table to shrink.
1259 */
1260retry:
1261 i = newfdp->fd_nfiles;
1262 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1263 i /= 2;
1264 FILEDESC_UNLOCK(fdp);
1265 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1266 M_FILEDESC, M_WAITOK);
1267 FILEDESC_LOCK(fdp);
1268 newfdp->fd_lastfile = fdp->fd_lastfile;
1269 newfdp->fd_nfiles = fdp->fd_nfiles;
1270 j = newfdp->fd_nfiles;
1271 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1272 j /= 2;
1273 if (i != j) {
1274 /*
1275 * The size of the original table has changed.
1276 * Go over once again.
1277 */
1278 FILEDESC_UNLOCK(fdp);
1279 FREE(newfdp->fd_ofiles, M_FILEDESC);
1280 FILEDESC_LOCK(fdp);
1281 newfdp->fd_lastfile = fdp->fd_lastfile;
1282 newfdp->fd_nfiles = fdp->fd_nfiles;
1283 goto retry;
1284 }
1285 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1286 }
1287 newfdp->fd_nfiles = i;
1288 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1289 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1290
1291 /*
1292 * kq descriptors cannot be copied.
1293 */
1294 if (newfdp->fd_knlistsize != -1) {
1295 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1296 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1297 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1298 *fpp = NULL;
1299 if (i < newfdp->fd_freefile)
1300 newfdp->fd_freefile = i;
1301 }
1302 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1303 newfdp->fd_lastfile--;
1304 }
1305 newfdp->fd_knlist = NULL;
1306 newfdp->fd_knlistsize = -1;
1307 newfdp->fd_knhash = NULL;
1308 newfdp->fd_knhashmask = 0;
1309 }
1310
1311 fpp = newfdp->fd_ofiles;
1312 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1313 if (*fpp != NULL) {
1314 fhold(*fpp);
1315 }
1316 }
1317 return (newfdp);
1318}
1319
1320/*
1321 * Release a filedesc structure.
1322 */
1323void
1324fdfree(td)
1325 struct thread *td;
1326{
1327 register struct filedesc *fdp;
1328 struct file **fpp;
1329 register int i;
1330
1331 fdp = td->td_proc->p_fd;
1332 /* Certain daemons might not have file descriptors. */
1333 if (fdp == NULL)
1334 return;
1335
1336 FILEDESC_LOCK(fdp);
1337 if (--fdp->fd_refcnt > 0) {
1338 FILEDESC_UNLOCK(fdp);
1339 return;
1340 }
1341 /*
1342 * we are the last reference to the structure, we can
1343 * safely assume it will not change out from under us.
1344 */
1345 FILEDESC_UNLOCK(fdp);
1346 fpp = fdp->fd_ofiles;
1347 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1348 if (*fpp)
1349 (void) closef(*fpp, td);
1350 }
1351
1352 PROC_LOCK(td->td_proc);
1353 td->td_proc->p_fd = NULL;
1354 PROC_UNLOCK(td->td_proc);
1355
1356 if (fdp->fd_nfiles > NDFILE)
1357 FREE(fdp->fd_ofiles, M_FILEDESC);
1358 if (fdp->fd_cdir)
1359 vrele(fdp->fd_cdir);
1360 if (fdp->fd_rdir)
1361 vrele(fdp->fd_rdir);
1362 if (fdp->fd_jdir)
1363 vrele(fdp->fd_jdir);
1364 if (fdp->fd_knlist)
1365 FREE(fdp->fd_knlist, M_KQUEUE);
1366 if (fdp->fd_knhash)
1367 FREE(fdp->fd_knhash, M_KQUEUE);
1368 mtx_destroy(&fdp->fd_mtx);
1369 FREE(fdp, M_FILEDESC);
1370}
1371
1372/*
1373 * For setugid programs, we don't want to people to use that setugidness
1374 * to generate error messages which write to a file which otherwise would
1375 * otherwise be off-limits to the process.
1376 *
1377 * This is a gross hack to plug the hole. A better solution would involve
1378 * a special vop or other form of generalized access control mechanism. We
1379 * go ahead and just reject all procfs file systems accesses as dangerous.
1380 *
1381 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1382 * sufficient. We also don't for check setugidness since we know we are.
1383 */
1384static int
1385is_unsafe(struct file *fp)
1386{
1387 if (fp->f_type == DTYPE_VNODE &&
1388 ((struct vnode *)(fp->f_data))->v_tag == VT_PROCFS)
1389 return (1);
1390 return (0);
1391}
1392
1393/*
1394 * Make this setguid thing safe, if at all possible.
1395 */
1396void
1397setugidsafety(td)
1398 struct thread *td;
1399{
1400 struct filedesc *fdp = td->td_proc->p_fd;
1401 register int i;
1402
1403 /* Certain daemons might not have file descriptors. */
1404 if (fdp == NULL)
1405 return;
1406
1407 /*
1408 * note: fdp->fd_ofiles may be reallocated out from under us while
1409 * we are blocked in a close. Be careful!
1410 */
1411 FILEDESC_LOCK(fdp);
1412 for (i = 0; i <= fdp->fd_lastfile; i++) {
1413 if (i > 2)
1414 break;
1415 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1416 struct file *fp;
1417
1418#if 0
1419 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1420 (void) munmapfd(td, i);
1421#endif
1422 if (i < fdp->fd_knlistsize) {
1423 FILEDESC_UNLOCK(fdp);
1424 knote_fdclose(td, i);
1425 FILEDESC_LOCK(fdp);
1426 }
1427 /*
1428 * NULL-out descriptor prior to close to avoid
1429 * a race while close blocks.
1430 */
1431 fp = fdp->fd_ofiles[i];
1432 fdp->fd_ofiles[i] = NULL;
1433 fdp->fd_ofileflags[i] = 0;
1434 if (i < fdp->fd_freefile)
1435 fdp->fd_freefile = i;
1436 FILEDESC_UNLOCK(fdp);
1437 (void) closef(fp, td);
1438 FILEDESC_LOCK(fdp);
1439 }
1440 }
1441 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1442 fdp->fd_lastfile--;
1443 FILEDESC_UNLOCK(fdp);
1444}
1445
1446/*
1447 * Close any files on exec?
1448 */
1449void
1450fdcloseexec(td)
1451 struct thread *td;
1452{
1453 struct filedesc *fdp = td->td_proc->p_fd;
1454 register int i;
1455
1456 /* Certain daemons might not have file descriptors. */
1457 if (fdp == NULL)
1458 return;
1459
1460 FILEDESC_LOCK(fdp);
1461
1462 /*
1463 * We cannot cache fd_ofiles or fd_ofileflags since operations
1464 * may block and rip them out from under us.
1465 */
1466 for (i = 0; i <= fdp->fd_lastfile; i++) {
1467 if (fdp->fd_ofiles[i] != NULL &&
1468 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1469 struct file *fp;
1470
1471#if 0
1472 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1473 (void) munmapfd(td, i);
1474#endif
1475 if (i < fdp->fd_knlistsize) {
1476 FILEDESC_UNLOCK(fdp);
1477 knote_fdclose(td, i);
1478 FILEDESC_LOCK(fdp);
1479 }
1480 /*
1481 * NULL-out descriptor prior to close to avoid
1482 * a race while close blocks.
1483 */
1484 fp = fdp->fd_ofiles[i];
1485 fdp->fd_ofiles[i] = NULL;
1486 fdp->fd_ofileflags[i] = 0;
1487 if (i < fdp->fd_freefile)
1488 fdp->fd_freefile = i;
1489 FILEDESC_UNLOCK(fdp);
1490 (void) closef(fp, td);
1491 FILEDESC_LOCK(fdp);
1492 }
1493 }
1494 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1495 fdp->fd_lastfile--;
1496 FILEDESC_UNLOCK(fdp);
1497}
1498
1499/*
1500 * Internal form of close.
1501 * Decrement reference count on file structure.
1502 * Note: td may be NULL when closing a file
1503 * that was being passed in a message.
1504 */
1505int
1506closef(fp, td)
1507 register struct file *fp;
1508 register struct thread *td;
1509{
1510 struct vnode *vp;
1511 struct flock lf;
1512
1513 if (fp == NULL)
1514 return (0);
1515 /*
1516 * POSIX record locking dictates that any close releases ALL
1517 * locks owned by this process. This is handled by setting
1518 * a flag in the unlock to free ONLY locks obeying POSIX
1519 * semantics, and not to free BSD-style file locks.
1520 * If the descriptor was in a message, POSIX-style locks
1521 * aren't passed with the descriptor.
1522 */
1523 if (td && (td->td_proc->p_flag & P_ADVLOCK) &&
1524 fp->f_type == DTYPE_VNODE) {
1525 lf.l_whence = SEEK_SET;
1526 lf.l_start = 0;
1527 lf.l_len = 0;
1528 lf.l_type = F_UNLCK;
1529 vp = (struct vnode *)fp->f_data;
1530 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1531 F_UNLCK, &lf, F_POSIX);
1532 }
1533 return (fdrop(fp, td));
1534}
1535
1536/*
1537 * Drop reference on struct file passed in, may call closef if the
1538 * reference hits zero.
1539 */
1540int
1541fdrop(fp, td)
1542 struct file *fp;
1543 struct thread *td;
1544{
1545
1546 FILE_LOCK(fp);
1547 return (fdrop_locked(fp, td));
1548}
1549
1550/*
1551 * Extract the file pointer associated with the specified descriptor for
1552 * the current user process.
1553 *
1554 * If the descriptor doesn't exist, EBADF is returned.
1555 *
1556 * If the descriptor exists but doesn't match 'flags' then
1557 * return EBADF for read attempts and EINVAL for write attempts.
1558 *
1559 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1560 * It should be droped with fdrop().
1561 * If it is not set, then the refcount will not be bumped however the
1562 * thread's filedesc struct will be returned locked (for fgetsock).
1563 *
1564 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1565 * Otherwise *fpp is set and zero is returned.
1566 */
1567static __inline
1568int
1569_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1570{
1571 struct filedesc *fdp;
1572 struct file *fp;
1573
1574 *fpp = NULL;
1575 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1576 return(EBADF);
1577 FILEDESC_LOCK(fdp);
1578 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1579 FILEDESC_UNLOCK(fdp);
1580 return(EBADF);
1581 }
1582
1583 /*
1584 * Note: FREAD failures returns EBADF to maintain backwards
1585 * compatibility with what routines returned before.
1586 *
1587 * Only one flag, or 0, may be specified.
1588 */
1589 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1590 FILEDESC_UNLOCK(fdp);
1591 return(EBADF);
1592 }
1593 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1594 FILEDESC_UNLOCK(fdp);
1595 return(EINVAL);
1596 }
1597 if (hold) {
1598 fhold(fp);
1599 FILEDESC_UNLOCK(fdp);
1600 }
1601 *fpp = fp;
1602 return(0);
1603}
1604
1605int
1606fget(struct thread *td, int fd, struct file **fpp)
1607{
1608 return(_fget(td, fd, fpp, 0, 1));
1609}
1610
1611int
1612fget_read(struct thread *td, int fd, struct file **fpp)
1613{
1614 return(_fget(td, fd, fpp, FREAD, 1));
1615}
1616
1617int
1618fget_write(struct thread *td, int fd, struct file **fpp)
1619{
1620 return(_fget(td, fd, fpp, FWRITE, 1));
1621}
1622
1623/*
1624 * Like fget() but loads the underlying vnode, or returns an error if
1625 * the descriptor does not represent a vnode. Note that pipes use vnodes
1626 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1627 * error). The returned vnode will be vref()d.
1628 */
1629
1630static __inline
1631int
1632_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1633{
1634 struct file *fp;
1635 int error;
1636
1637 *vpp = NULL;
1638 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1639 return (error);
1640 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1641 error = EINVAL;
1642 } else {
1643 *vpp = (struct vnode *)fp->f_data;
1644 vref(*vpp);
1645 }
1646 FILEDESC_UNLOCK(td->td_proc->p_fd);
1647 return (error);
1648}
1649
1650int
1651fgetvp(struct thread *td, int fd, struct vnode **vpp)
1652{
1653 return(_fgetvp(td, fd, vpp, 0));
1654}
1655
1656int
1657fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1658{
1659 return(_fgetvp(td, fd, vpp, FREAD));
1660}
1661
1662int
1663fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1664{
1665 return(_fgetvp(td, fd, vpp, FWRITE));
1666}
1667
1668/*
1669 * Like fget() but loads the underlying socket, or returns an error if
1670 * the descriptor does not represent a socket.
1671 *
1672 * We bump the ref count on the returned socket. XXX Also obtain the SX lock in
1673 * the future.
1674 */
1675int
1676fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1677{
1678 struct file *fp;
1679 int error;
1680
1681 *spp = NULL;
1682 if (fflagp)
1683 *fflagp = 0;
1684 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1685 return (error);
1686 if (fp->f_type != DTYPE_SOCKET) {
1687 error = ENOTSOCK;
1688 } else {
1689 *spp = (struct socket *)fp->f_data;
1690 if (fflagp)
1691 *fflagp = fp->f_flag;
1692 soref(*spp);
1693 }
1694 FILEDESC_UNLOCK(td->td_proc->p_fd);
1695 return(error);
1696}
1697
1698/*
1699 * Drop the reference count on the the socket and XXX release the SX lock in
1700 * the future. The last reference closes the socket.
1701 */
1702void
1703fputsock(struct socket *so)
1704{
1705 sorele(so);
1706}
1707
1708/*
1709 * Drop reference on struct file passed in, may call closef if the
1710 * reference hits zero.
1711 * Expects struct file locked, and will unlock it.
1712 */
1713int
1714fdrop_locked(fp, td)
1715 struct file *fp;
1716 struct thread *td;
1717{
1718 struct flock lf;
1719 struct vnode *vp;
1720 int error;
1721
1722 FILE_LOCK_ASSERT(fp, MA_OWNED);
1723
1724 if (--fp->f_count > 0) {
1725 FILE_UNLOCK(fp);
1726 return (0);
1727 }
1728 mtx_lock(&Giant);
1729 if (fp->f_count < 0)
1730 panic("fdrop: count < 0");
1731 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1732 lf.l_whence = SEEK_SET;
1733 lf.l_start = 0;
1734 lf.l_len = 0;
1735 lf.l_type = F_UNLCK;
1736 vp = (struct vnode *)fp->f_data;
1737 FILE_UNLOCK(fp);
1738 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1739 } else
1740 FILE_UNLOCK(fp);
1741 if (fp->f_ops != &badfileops)
1742 error = fo_close(fp, td);
1743 else
1744 error = 0;
1745 ffree(fp);
1746 mtx_unlock(&Giant);
1747 return (error);
1748}
1749
1750/*
1751 * Apply an advisory lock on a file descriptor.
1752 *
1753 * Just attempt to get a record lock of the requested type on
1754 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1755 */
1756#ifndef _SYS_SYSPROTO_H_
1757struct flock_args {
1758 int fd;
1759 int how;
1760};
1761#endif
1762/*
1763 * MPSAFE
1764 */
1765/* ARGSUSED */
1766int
1767flock(td, uap)
1768 struct thread *td;
1769 register struct flock_args *uap;
1770{
1771 struct file *fp;
1772 struct vnode *vp;
1773 struct flock lf;
1774 int error;
1775
1776 if ((error = fget(td, uap->fd, &fp)) != 0)
1777 return (error);
1778 if (fp->f_type != DTYPE_VNODE) {
1779 fdrop(fp, td);
1780 return (EOPNOTSUPP);
1781 }
1782
1783 mtx_lock(&Giant);
1784 vp = (struct vnode *)fp->f_data;
1785 lf.l_whence = SEEK_SET;
1786 lf.l_start = 0;
1787 lf.l_len = 0;
1788 if (uap->how & LOCK_UN) {
1789 lf.l_type = F_UNLCK;
1790 FILE_LOCK(fp);
1791 fp->f_flag &= ~FHASLOCK;
1792 FILE_UNLOCK(fp);
1793 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1794 goto done2;
1795 }
1796 if (uap->how & LOCK_EX)
1797 lf.l_type = F_WRLCK;
1798 else if (uap->how & LOCK_SH)
1799 lf.l_type = F_RDLCK;
1800 else {
1801 error = EBADF;
1802 goto done2;
1803 }
1804 FILE_LOCK(fp);
1805 fp->f_flag |= FHASLOCK;
1806 FILE_UNLOCK(fp);
1807 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1808 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1809done2:
1810 fdrop(fp, td);
1811 mtx_unlock(&Giant);
1812 return (error);
1813}
1814
1815/*
1816 * File Descriptor pseudo-device driver (/dev/fd/).
1817 *
1818 * Opening minor device N dup()s the file (if any) connected to file
1819 * descriptor N belonging to the calling process. Note that this driver
1820 * consists of only the ``open()'' routine, because all subsequent
1821 * references to this file will be direct to the other driver.
1822 */
1823/* ARGSUSED */
1824static int
1825fdopen(dev, mode, type, td)
1826 dev_t dev;
1827 int mode, type;
1828 struct thread *td;
1829{
1830
1831 /*
1832 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1833 * the file descriptor being sought for duplication. The error
1834 * return ensures that the vnode for this device will be released
1835 * by vn_open. Open will detect this special error and take the
1836 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1837 * will simply report the error.
1838 */
1839 td->td_dupfd = dev2unit(dev);
1840 return (ENODEV);
1841}
1842
1843/*
1844 * Duplicate the specified descriptor to a free descriptor.
1845 */
1846int
1847dupfdopen(td, fdp, indx, dfd, mode, error)
1848 struct thread *td;
1849 struct filedesc *fdp;
1850 int indx, dfd;
1851 int mode;
1852 int error;
1853{
1854 register struct file *wfp;
1855 struct file *fp;
1856
1857 /*
1858 * If the to-be-dup'd fd number is greater than the allowed number
1859 * of file descriptors, or the fd to be dup'd has already been
1860 * closed, then reject.
1861 */
1862 FILEDESC_LOCK(fdp);
1863 if ((u_int)dfd >= fdp->fd_nfiles ||
1864 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1865 FILEDESC_UNLOCK(fdp);
1866 return (EBADF);
1867 }
1868
1869 /*
1870 * There are two cases of interest here.
1871 *
1872 * For ENODEV simply dup (dfd) to file descriptor
1873 * (indx) and return.
1874 *
1875 * For ENXIO steal away the file structure from (dfd) and
1876 * store it in (indx). (dfd) is effectively closed by
1877 * this operation.
1878 *
1879 * Any other error code is just returned.
1880 */
1881 switch (error) {
1882 case ENODEV:
1883 /*
1884 * Check that the mode the file is being opened for is a
1885 * subset of the mode of the existing descriptor.
1886 */
1887 FILE_LOCK(wfp);
1888 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
1889 FILE_UNLOCK(wfp);
1890 FILEDESC_UNLOCK(fdp);
1891 return (EACCES);
1892 }
1893 fp = fdp->fd_ofiles[indx];
1894#if 0
1895 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1896 (void) munmapfd(td, indx);
1897#endif
1898 fdp->fd_ofiles[indx] = wfp;
1899 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1900 fhold_locked(wfp);
1901 FILE_UNLOCK(wfp);
1902 if (indx > fdp->fd_lastfile)
1903 fdp->fd_lastfile = indx;
1904 if (fp != NULL)
1905 FILE_LOCK(fp);
1906 FILEDESC_UNLOCK(fdp);
1907 /*
1908 * we now own the reference to fp that the ofiles[] array
1909 * used to own. Release it.
1910 */
1911 if (fp != NULL)
1912 fdrop_locked(fp, td);
1913 return (0);
1914
1915 case ENXIO:
1916 /*
1917 * Steal away the file pointer from dfd, and stuff it into indx.
1918 */
1919 fp = fdp->fd_ofiles[indx];
1920#if 0
1921 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
1922 (void) munmapfd(td, indx);
1923#endif
1924 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
1925 fdp->fd_ofiles[dfd] = NULL;
1926 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
1927 fdp->fd_ofileflags[dfd] = 0;
1928
1929 /*
1930 * Complete the clean up of the filedesc structure by
1931 * recomputing the various hints.
1932 */
1933 if (indx > fdp->fd_lastfile) {
1934 fdp->fd_lastfile = indx;
1935 } else {
1936 while (fdp->fd_lastfile > 0 &&
1937 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
1938 fdp->fd_lastfile--;
1939 }
1940 if (dfd < fdp->fd_freefile)
1941 fdp->fd_freefile = dfd;
1942 }
1943 if (fp != NULL)
1944 FILE_LOCK(fp);
1945 FILEDESC_UNLOCK(fdp);
1946
1947 /*
1948 * we now own the reference to fp that the ofiles[] array
1949 * used to own. Release it.
1950 */
1951 if (fp != NULL)
1952 fdrop_locked(fp, td);
1953 return (0);
1954
1955 default:
1956 FILEDESC_UNLOCK(fdp);
1957 return (error);
1958 }
1959 /* NOTREACHED */
1960}
1961
1962/*
1963 * Get file structures.
1964 */
1965static int
1966sysctl_kern_file(SYSCTL_HANDLER_ARGS)
1967{
1968 int error;
1969 struct file *fp;
1970
1971 sx_slock(&filelist_lock);
1972 if (!req->oldptr) {
1973 /*
1974 * overestimate by 10 files
1975 */
1976 error = SYSCTL_OUT(req, 0, sizeof(filehead) +
1977 (nfiles + 10) * sizeof(struct file));
1978 sx_sunlock(&filelist_lock);
1979 return (error);
1980 }
1981
1982 error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead));
1983 if (error) {
1984 sx_sunlock(&filelist_lock);
1985 return (error);
1986 }
1987
1988 /*
1989 * followed by an array of file structures
1990 */
1991 LIST_FOREACH(fp, &filehead, f_list) {
1992 error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file));
1993 if (error) {
1994 sx_sunlock(&filelist_lock);
1995 return (error);
1996 }
1997 }
1998 sx_sunlock(&filelist_lock);
1999 return (0);
2000}
2001
2002SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2003 0, 0, sysctl_kern_file, "S,file", "Entire file table");
2004
2005SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2006 &maxfilesperproc, 0, "Maximum files allowed open per process");
2007
2008SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2009 &maxfiles, 0, "Maximum number of files");
2010
2011SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2012 &nfiles, 0, "System-wide number of open files");
2013
2014static void
2015fildesc_drvinit(void *unused)
2016{
2017 dev_t dev;
2018
2019 dev = make_dev(&fildesc_cdevsw, 0, UID_BIN, GID_BIN, 0666, "fd/0");
2020 make_dev_alias(dev, "stdin");
2021 dev = make_dev(&fildesc_cdevsw, 1, UID_BIN, GID_BIN, 0666, "fd/1");
2022 make_dev_alias(dev, "stdout");
2023 dev = make_dev(&fildesc_cdevsw, 2, UID_BIN, GID_BIN, 0666, "fd/2");
2024 make_dev_alias(dev, "stderr");
2025 if (!devfs_present) {
2026 int fd;
2027
2028 for (fd = 3; fd < NUMFDESC; fd++)
2029 make_dev(&fildesc_cdevsw, fd, UID_BIN, GID_BIN, 0666,
2030 "fd/%d", fd);
2031 }
2032}
2033
2034struct fileops badfileops = {
2035 badfo_readwrite,
2036 badfo_readwrite,
2037 badfo_ioctl,
2038 badfo_poll,
2039 badfo_kqfilter,
2040 badfo_stat,
2041 badfo_close
2042};
2043
2044static int
2045badfo_readwrite(fp, uio, cred, flags, td)
2046 struct file *fp;
2047 struct uio *uio;
2048 struct ucred *cred;
2049 struct thread *td;
2050 int flags;
2051{
2052
2053 return (EBADF);
2054}
2055
2056static int
2057badfo_ioctl(fp, com, data, td)
2058 struct file *fp;
2059 u_long com;
2060 caddr_t data;
2061 struct thread *td;
2062{
2063
2064 return (EBADF);
2065}
2066
2067static int
2068badfo_poll(fp, events, cred, td)
2069 struct file *fp;
2070 int events;
2071 struct ucred *cred;
2072 struct thread *td;
2073{
2074
2075 return (0);
2076}
2077
2078static int
2079badfo_kqfilter(fp, kn)
2080 struct file *fp;
2081 struct knote *kn;
2082{
2083
2084 return (0);
2085}
2086
2087static int
2088badfo_stat(fp, sb, td)
2089 struct file *fp;
2090 struct stat *sb;
2091 struct thread *td;
2092{
2093
2094 return (EBADF);
2095}
2096
2097static int
2098badfo_close(fp, td)
2099 struct file *fp;
2100 struct thread *td;
2101{
2102
2103 return (EBADF);
2104}
2105
2106SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2107 fildesc_drvinit,NULL)
2108
2109static void filelistinit __P((void *));
2110SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2111
2112/* ARGSUSED*/
2113static void
2114filelistinit(dummy)
2115 void *dummy;
2116{
2117 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2118 NULL, NULL, UMA_ALIGN_PTR, 0);
2119
2114 sx_init(&filelist_lock, "filelist lock");
2115}
2120 sx_init(&filelist_lock, "filelist lock");
2121}