Deleted Added
full compact
kern_descrip.c (114216) kern_descrip.c (114293)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 114216 2003-04-29 13:36:06Z kan $
39 * $FreeBSD: head/sys/kern/kern_descrip.c 114293 2003-04-30 12:57:40Z markm $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/limits.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55#include <sys/sysctl.h>
56#include <sys/vnode.h>
57#include <sys/mount.h>
58#include <sys/proc.h>
59#include <sys/namei.h>
60#include <sys/file.h>
61#include <sys/stat.h>
62#include <sys/filio.h>
63#include <sys/fcntl.h>
64#include <sys/unistd.h>
65#include <sys/resourcevar.h>
66#include <sys/event.h>
67#include <sys/sx.h>
68#include <sys/socketvar.h>
69#include <sys/signalvar.h>
70
71#include <vm/vm.h>
72#include <vm/vm_extern.h>
73#include <vm/uma.h>
74
75static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
76static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
77
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/limits.h>
53#include <sys/malloc.h>
54#include <sys/mutex.h>
55#include <sys/sysctl.h>
56#include <sys/vnode.h>
57#include <sys/mount.h>
58#include <sys/proc.h>
59#include <sys/namei.h>
60#include <sys/file.h>
61#include <sys/stat.h>
62#include <sys/filio.h>
63#include <sys/fcntl.h>
64#include <sys/unistd.h>
65#include <sys/resourcevar.h>
66#include <sys/event.h>
67#include <sys/sx.h>
68#include <sys/socketvar.h>
69#include <sys/signalvar.h>
70
71#include <vm/vm.h>
72#include <vm/vm_extern.h>
73#include <vm/uma.h>
74
75static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
76static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
77
78uma_zone_t file_zone;
78static uma_zone_t file_zone;
79
80static d_open_t fdopen;
81#define NUMFDESC 64
82
83#define CDEV_MAJOR 22
84static struct cdevsw fildesc_cdevsw = {
85 .d_open = fdopen,
86 .d_name = "FD",
87 .d_maj = CDEV_MAJOR,
88};
89
90/* How to treat 'new' parameter when allocating a fd for do_dup(). */
91enum dup_type { DUP_VARIABLE, DUP_FIXED };
92
93static int do_dup(struct thread *td, enum dup_type type, int old, int new,
94 register_t *retval);
95
96/*
97 * Descriptor management.
98 */
99struct filelist filehead; /* head of list of open files */
100int nfiles; /* actual number of open files */
101extern int cmask;
102struct sx filelist_lock; /* sx to protect filelist */
103struct mtx sigio_lock; /* mtx to protect pointers to sigio */
104
105/*
106 * System calls on descriptors.
107 */
108#ifndef _SYS_SYSPROTO_H_
109struct getdtablesize_args {
110 int dummy;
111};
112#endif
113/*
114 * MPSAFE
115 */
116/* ARGSUSED */
117int
118getdtablesize(td, uap)
119 struct thread *td;
120 struct getdtablesize_args *uap;
121{
122 struct proc *p = td->td_proc;
123
124 mtx_lock(&Giant);
125 td->td_retval[0] =
126 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
127 mtx_unlock(&Giant);
128 return (0);
129}
130
131/*
132 * Duplicate a file descriptor to a particular value.
133 *
134 * note: keep in mind that a potential race condition exists when closing
135 * descriptors from a shared descriptor table (via rfork).
136 */
137#ifndef _SYS_SYSPROTO_H_
138struct dup2_args {
139 u_int from;
140 u_int to;
141};
142#endif
143/*
144 * MPSAFE
145 */
146/* ARGSUSED */
147int
148dup2(td, uap)
149 struct thread *td;
150 struct dup2_args *uap;
151{
152
153 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
154 td->td_retval));
155}
156
157/*
158 * Duplicate a file descriptor.
159 */
160#ifndef _SYS_SYSPROTO_H_
161struct dup_args {
162 u_int fd;
163};
164#endif
165/*
166 * MPSAFE
167 */
168/* ARGSUSED */
169int
170dup(td, uap)
171 struct thread *td;
172 struct dup_args *uap;
173{
174
175 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
176}
177
178/*
179 * The file control system call.
180 */
181#ifndef _SYS_SYSPROTO_H_
182struct fcntl_args {
183 int fd;
184 int cmd;
185 long arg;
186};
187#endif
188/*
189 * MPSAFE
190 */
191/* ARGSUSED */
192int
193fcntl(td, uap)
194 struct thread *td;
195 struct fcntl_args *uap;
196{
197 struct flock fl;
198 intptr_t arg;
199 int error;
200
201 error = 0;
202 switch (uap->cmd) {
203 case F_GETLK:
204 case F_SETLK:
205 case F_SETLKW:
206 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
207 arg = (intptr_t)&fl;
208 break;
209 default:
210 arg = uap->arg;
211 break;
212 }
213 if (error)
214 return (error);
215 error = kern_fcntl(td, uap->fd, uap->cmd, arg);
216 if (error)
217 return (error);
218 if (uap->cmd == F_GETLK)
219 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
220 return (error);
221}
222
223int
224kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
225{
226 struct filedesc *fdp;
227 struct flock *flp;
228 struct file *fp;
229 struct proc *p;
230 char *pop;
231 struct vnode *vp;
232 u_int newmin;
233 int error, flg, tmp;
234
235 error = 0;
236 flg = F_POSIX;
237 p = td->td_proc;
238 fdp = p->p_fd;
239 mtx_lock(&Giant);
240 FILEDESC_LOCK(fdp);
241 if ((unsigned)fd >= fdp->fd_nfiles ||
242 (fp = fdp->fd_ofiles[fd]) == NULL) {
243 FILEDESC_UNLOCK(fdp);
244 error = EBADF;
245 goto done2;
246 }
247 pop = &fdp->fd_ofileflags[fd];
248
249 switch (cmd) {
250 case F_DUPFD:
251 FILEDESC_UNLOCK(fdp);
252 newmin = arg;
253 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
254 newmin >= maxfilesperproc) {
255 error = EINVAL;
256 break;
257 }
258 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
259 break;
260
261 case F_GETFD:
262 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
263 FILEDESC_UNLOCK(fdp);
264 break;
265
266 case F_SETFD:
267 *pop = (*pop &~ UF_EXCLOSE) |
268 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
269 FILEDESC_UNLOCK(fdp);
270 break;
271
272 case F_GETFL:
273 FILE_LOCK(fp);
274 FILEDESC_UNLOCK(fdp);
275 td->td_retval[0] = OFLAGS(fp->f_flag);
276 FILE_UNLOCK(fp);
277 break;
278
279 case F_SETFL:
280 FILE_LOCK(fp);
281 FILEDESC_UNLOCK(fdp);
282 fhold_locked(fp);
283 fp->f_flag &= ~FCNTLFLAGS;
284 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
285 FILE_UNLOCK(fp);
286 tmp = fp->f_flag & FNONBLOCK;
287 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
288 if (error) {
289 fdrop(fp, td);
290 break;
291 }
292 tmp = fp->f_flag & FASYNC;
293 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
294 if (error == 0) {
295 fdrop(fp, td);
296 break;
297 }
298 FILE_LOCK(fp);
299 fp->f_flag &= ~FNONBLOCK;
300 FILE_UNLOCK(fp);
301 tmp = 0;
302 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
303 fdrop(fp, td);
304 break;
305
306 case F_GETOWN:
307 fhold(fp);
308 FILEDESC_UNLOCK(fdp);
309 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
310 if (error == 0)
311 td->td_retval[0] = tmp;
312 fdrop(fp, td);
313 break;
314
315 case F_SETOWN:
316 fhold(fp);
317 FILEDESC_UNLOCK(fdp);
318 tmp = arg;
319 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
320 fdrop(fp, td);
321 break;
322
323 case F_SETLKW:
324 flg |= F_WAIT;
325 /* FALLTHROUGH F_SETLK */
326
327 case F_SETLK:
328 if (fp->f_type != DTYPE_VNODE) {
329 FILEDESC_UNLOCK(fdp);
330 error = EBADF;
331 break;
332 }
333
334 flp = (struct flock *)arg;
335 if (flp->l_whence == SEEK_CUR) {
336 if (fp->f_offset < 0 ||
337 (flp->l_start > 0 &&
338 fp->f_offset > OFF_MAX - flp->l_start)) {
339 FILEDESC_UNLOCK(fdp);
340 error = EOVERFLOW;
341 break;
342 }
343 flp->l_start += fp->f_offset;
344 }
345
346 /*
347 * VOP_ADVLOCK() may block.
348 */
349 fhold(fp);
350 FILEDESC_UNLOCK(fdp);
351 vp = fp->f_data;
352
353 switch (flp->l_type) {
354 case F_RDLCK:
355 if ((fp->f_flag & FREAD) == 0) {
356 error = EBADF;
357 break;
358 }
359 PROC_LOCK(p->p_leader);
360 p->p_leader->p_flag |= P_ADVLOCK;
361 PROC_UNLOCK(p->p_leader);
362 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
363 flp, flg);
364 break;
365 case F_WRLCK:
366 if ((fp->f_flag & FWRITE) == 0) {
367 error = EBADF;
368 break;
369 }
370 PROC_LOCK(p->p_leader);
371 p->p_leader->p_flag |= P_ADVLOCK;
372 PROC_UNLOCK(p->p_leader);
373 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
374 flp, flg);
375 break;
376 case F_UNLCK:
377 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
378 flp, F_POSIX);
379 break;
380 default:
381 error = EINVAL;
382 break;
383 }
384 /* Check for race with close */
385 FILEDESC_LOCK(fdp);
386 if ((unsigned) fd >= fdp->fd_nfiles ||
387 fp != fdp->fd_ofiles[fd]) {
388 FILEDESC_UNLOCK(fdp);
389 flp->l_whence = SEEK_SET;
390 flp->l_start = 0;
391 flp->l_len = 0;
392 flp->l_type = F_UNLCK;
393 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
394 F_UNLCK, flp, F_POSIX);
395 } else
396 FILEDESC_UNLOCK(fdp);
397 fdrop(fp, td);
398 break;
399
400 case F_GETLK:
401 if (fp->f_type != DTYPE_VNODE) {
402 FILEDESC_UNLOCK(fdp);
403 error = EBADF;
404 break;
405 }
406 flp = (struct flock *)arg;
407 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
408 flp->l_type != F_UNLCK) {
409 FILEDESC_UNLOCK(fdp);
410 error = EINVAL;
411 break;
412 }
413 if (flp->l_whence == SEEK_CUR) {
414 if ((flp->l_start > 0 &&
415 fp->f_offset > OFF_MAX - flp->l_start) ||
416 (flp->l_start < 0 &&
417 fp->f_offset < OFF_MIN - flp->l_start)) {
418 FILEDESC_UNLOCK(fdp);
419 error = EOVERFLOW;
420 break;
421 }
422 flp->l_start += fp->f_offset;
423 }
424 /*
425 * VOP_ADVLOCK() may block.
426 */
427 fhold(fp);
428 FILEDESC_UNLOCK(fdp);
429 vp = fp->f_data;
430 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
431 F_POSIX);
432 fdrop(fp, td);
433 break;
434 default:
435 FILEDESC_UNLOCK(fdp);
436 error = EINVAL;
437 break;
438 }
439done2:
440 mtx_unlock(&Giant);
441 return (error);
442}
443
444/*
445 * Common code for dup, dup2, and fcntl(F_DUPFD).
446 */
447static int
448do_dup(td, type, old, new, retval)
449 enum dup_type type;
450 int old, new;
451 register_t *retval;
452 struct thread *td;
453{
454 struct filedesc *fdp;
455 struct proc *p;
456 struct file *fp;
457 struct file *delfp;
458 int error, newfd;
459
460 p = td->td_proc;
461 fdp = p->p_fd;
462
463 /*
464 * Verify we have a valid descriptor to dup from and possibly to
465 * dup to.
466 */
467 if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
468 new >= maxfilesperproc)
469 return (EBADF);
470 FILEDESC_LOCK(fdp);
471 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
472 FILEDESC_UNLOCK(fdp);
473 return (EBADF);
474 }
475 if (type == DUP_FIXED && old == new) {
476 *retval = new;
477 FILEDESC_UNLOCK(fdp);
478 return (0);
479 }
480 fp = fdp->fd_ofiles[old];
481 fhold(fp);
482
483 /*
484 * Expand the table for the new descriptor if needed. This may
485 * block and drop and reacquire the filedesc lock.
486 */
487 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
488 error = fdalloc(td, new, &newfd);
489 if (error) {
490 FILEDESC_UNLOCK(fdp);
491 fdrop(fp, td);
492 return (error);
493 }
494 }
495 if (type == DUP_VARIABLE)
496 new = newfd;
497
498 /*
499 * If the old file changed out from under us then treat it as a
500 * bad file descriptor. Userland should do its own locking to
501 * avoid this case.
502 */
503 if (fdp->fd_ofiles[old] != fp) {
504 if (fdp->fd_ofiles[new] == NULL) {
505 if (new < fdp->fd_freefile)
506 fdp->fd_freefile = new;
507 while (fdp->fd_lastfile > 0 &&
508 fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
509 fdp->fd_lastfile--;
510 }
511 FILEDESC_UNLOCK(fdp);
512 fdrop(fp, td);
513 return (EBADF);
514 }
515 KASSERT(old != new, ("new fd is same as old"));
516
517 /*
518 * Save info on the descriptor being overwritten. We have
519 * to do the unmap now, but we cannot close it without
520 * introducing an ownership race for the slot.
521 */
522 delfp = fdp->fd_ofiles[new];
523 KASSERT(delfp == NULL || type == DUP_FIXED,
524 ("dup() picked an open file"));
525#if 0
526 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
527 (void) munmapfd(td, new);
528#endif
529
530 /*
531 * Duplicate the source descriptor, update lastfile
532 */
533 fdp->fd_ofiles[new] = fp;
534 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
535 if (new > fdp->fd_lastfile)
536 fdp->fd_lastfile = new;
537 FILEDESC_UNLOCK(fdp);
538 *retval = new;
539
540 /*
541 * If we dup'd over a valid file, we now own the reference to it
542 * and must dispose of it using closef() semantics (as if a
543 * close() were performed on it).
544 */
545 if (delfp) {
546 mtx_lock(&Giant);
547 (void) closef(delfp, td);
548 mtx_unlock(&Giant);
549 }
550 return (0);
551}
552
553/*
554 * If sigio is on the list associated with a process or process group,
555 * disable signalling from the device, remove sigio from the list and
556 * free sigio.
557 */
558void
559funsetown(sigiop)
560 struct sigio **sigiop;
561{
562 struct sigio *sigio;
563
564 SIGIO_LOCK();
565 sigio = *sigiop;
566 if (sigio == NULL) {
567 SIGIO_UNLOCK();
568 return;
569 }
570 *(sigio->sio_myref) = NULL;
571 if ((sigio)->sio_pgid < 0) {
572 struct pgrp *pg = (sigio)->sio_pgrp;
573 PGRP_LOCK(pg);
574 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
575 sigio, sio_pgsigio);
576 PGRP_UNLOCK(pg);
577 } else {
578 struct proc *p = (sigio)->sio_proc;
579 PROC_LOCK(p);
580 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
581 sigio, sio_pgsigio);
582 PROC_UNLOCK(p);
583 }
584 SIGIO_UNLOCK();
585 crfree(sigio->sio_ucred);
586 FREE(sigio, M_SIGIO);
587}
588
589/*
590 * Free a list of sigio structures.
591 * We only need to lock the SIGIO_LOCK because we have made ourselves
592 * inaccessable to callers of fsetown and therefore do not need to lock
593 * the proc or pgrp struct for the list manipulation.
594 */
595void
596funsetownlst(sigiolst)
597 struct sigiolst *sigiolst;
598{
599 struct proc *p;
600 struct pgrp *pg;
601 struct sigio *sigio;
602
603 sigio = SLIST_FIRST(sigiolst);
604 if (sigio == NULL)
605 return;
606 p = NULL;
607 pg = NULL;
608
609 /*
610 * Every entry of the list should belong
611 * to a single proc or pgrp.
612 */
613 if (sigio->sio_pgid < 0) {
614 pg = sigio->sio_pgrp;
615 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
616 } else /* if (sigio->sio_pgid > 0) */ {
617 p = sigio->sio_proc;
618 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
619 }
620
621 SIGIO_LOCK();
622 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
623 *(sigio->sio_myref) = NULL;
624 if (pg != NULL) {
625 KASSERT(sigio->sio_pgid < 0,
626 ("Proc sigio in pgrp sigio list"));
627 KASSERT(sigio->sio_pgrp == pg,
628 ("Bogus pgrp in sigio list"));
629 PGRP_LOCK(pg);
630 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
631 sio_pgsigio);
632 PGRP_UNLOCK(pg);
633 } else /* if (p != NULL) */ {
634 KASSERT(sigio->sio_pgid > 0,
635 ("Pgrp sigio in proc sigio list"));
636 KASSERT(sigio->sio_proc == p,
637 ("Bogus proc in sigio list"));
638 PROC_LOCK(p);
639 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
640 sio_pgsigio);
641 PROC_UNLOCK(p);
642 }
643 SIGIO_UNLOCK();
644 crfree(sigio->sio_ucred);
645 FREE(sigio, M_SIGIO);
646 SIGIO_LOCK();
647 }
648 SIGIO_UNLOCK();
649}
650
651/*
652 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
653 *
654 * After permission checking, add a sigio structure to the sigio list for
655 * the process or process group.
656 */
657int
658fsetown(pgid, sigiop)
659 pid_t pgid;
660 struct sigio **sigiop;
661{
662 struct proc *proc;
663 struct pgrp *pgrp;
664 struct sigio *sigio;
665 int ret;
666
667 if (pgid == 0) {
668 funsetown(sigiop);
669 return (0);
670 }
671
672 ret = 0;
673
674 /* Allocate and fill in the new sigio out of locks. */
675 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
676 sigio->sio_pgid = pgid;
677 sigio->sio_ucred = crhold(curthread->td_ucred);
678 sigio->sio_myref = sigiop;
679
680 sx_slock(&proctree_lock);
681 if (pgid > 0) {
682 proc = pfind(pgid);
683 if (proc == NULL) {
684 ret = ESRCH;
685 goto fail;
686 }
687
688 /*
689 * Policy - Don't allow a process to FSETOWN a process
690 * in another session.
691 *
692 * Remove this test to allow maximum flexibility or
693 * restrict FSETOWN to the current process or process
694 * group for maximum safety.
695 */
696 PROC_UNLOCK(proc);
697 if (proc->p_session != curthread->td_proc->p_session) {
698 ret = EPERM;
699 goto fail;
700 }
701
702 pgrp = NULL;
703 } else /* if (pgid < 0) */ {
704 pgrp = pgfind(-pgid);
705 if (pgrp == NULL) {
706 ret = ESRCH;
707 goto fail;
708 }
709 PGRP_UNLOCK(pgrp);
710
711 /*
712 * Policy - Don't allow a process to FSETOWN a process
713 * in another session.
714 *
715 * Remove this test to allow maximum flexibility or
716 * restrict FSETOWN to the current process or process
717 * group for maximum safety.
718 */
719 if (pgrp->pg_session != curthread->td_proc->p_session) {
720 ret = EPERM;
721 goto fail;
722 }
723
724 proc = NULL;
725 }
726 funsetown(sigiop);
727 if (pgid > 0) {
728 PROC_LOCK(proc);
729 /*
730 * Since funsetownlst() is called without the proctree
731 * locked, we need to check for P_WEXIT.
732 * XXX: is ESRCH correct?
733 */
734 if ((proc->p_flag & P_WEXIT) != 0) {
735 PROC_UNLOCK(proc);
736 ret = ESRCH;
737 goto fail;
738 }
739 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
740 sigio->sio_proc = proc;
741 PROC_UNLOCK(proc);
742 } else {
743 PGRP_LOCK(pgrp);
744 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
745 sigio->sio_pgrp = pgrp;
746 PGRP_UNLOCK(pgrp);
747 }
748 sx_sunlock(&proctree_lock);
749 SIGIO_LOCK();
750 *sigiop = sigio;
751 SIGIO_UNLOCK();
752 return (0);
753
754fail:
755 sx_sunlock(&proctree_lock);
756 crfree(sigio->sio_ucred);
757 FREE(sigio, M_SIGIO);
758 return (ret);
759}
760
761/*
762 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
763 */
764pid_t
765fgetown(sigiop)
766 struct sigio **sigiop;
767{
768 pid_t pgid;
769
770 SIGIO_LOCK();
771 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
772 SIGIO_UNLOCK();
773 return (pgid);
774}
775
776/*
777 * Close a file descriptor.
778 */
779#ifndef _SYS_SYSPROTO_H_
780struct close_args {
781 int fd;
782};
783#endif
784/*
785 * MPSAFE
786 */
787/* ARGSUSED */
788int
789close(td, uap)
790 struct thread *td;
791 struct close_args *uap;
792{
793 struct filedesc *fdp;
794 struct file *fp;
795 int fd, error;
796
797 fd = uap->fd;
798 error = 0;
799 fdp = td->td_proc->p_fd;
800 mtx_lock(&Giant);
801 FILEDESC_LOCK(fdp);
802 if ((unsigned)fd >= fdp->fd_nfiles ||
803 (fp = fdp->fd_ofiles[fd]) == NULL) {
804 FILEDESC_UNLOCK(fdp);
805 error = EBADF;
806 goto done2;
807 }
808#if 0
809 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
810 (void) munmapfd(td, fd);
811#endif
812 fdp->fd_ofiles[fd] = NULL;
813 fdp->fd_ofileflags[fd] = 0;
814
815 /*
816 * we now hold the fp reference that used to be owned by the descriptor
817 * array.
818 */
819 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
820 fdp->fd_lastfile--;
821 if (fd < fdp->fd_freefile)
822 fdp->fd_freefile = fd;
823 if (fd < fdp->fd_knlistsize) {
824 FILEDESC_UNLOCK(fdp);
825 knote_fdclose(td, fd);
826 } else
827 FILEDESC_UNLOCK(fdp);
828
829 error = closef(fp, td);
830done2:
831 mtx_unlock(&Giant);
832 return (error);
833}
834
835#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
836/*
837 * Return status information about a file descriptor.
838 */
839#ifndef _SYS_SYSPROTO_H_
840struct ofstat_args {
841 int fd;
842 struct ostat *sb;
843};
844#endif
845/*
846 * MPSAFE
847 */
848/* ARGSUSED */
849int
850ofstat(td, uap)
851 struct thread *td;
852 struct ofstat_args *uap;
853{
854 struct file *fp;
855 struct stat ub;
856 struct ostat oub;
857 int error;
858
859 mtx_lock(&Giant);
860 if ((error = fget(td, uap->fd, &fp)) != 0)
861 goto done2;
862 error = fo_stat(fp, &ub, td->td_ucred, td);
863 if (error == 0) {
864 cvtstat(&ub, &oub);
865 error = copyout(&oub, uap->sb, sizeof(oub));
866 }
867 fdrop(fp, td);
868done2:
869 mtx_unlock(&Giant);
870 return (error);
871}
872#endif /* COMPAT_43 || COMPAT_SUNOS */
873
874/*
875 * Return status information about a file descriptor.
876 */
877#ifndef _SYS_SYSPROTO_H_
878struct fstat_args {
879 int fd;
880 struct stat *sb;
881};
882#endif
883/*
884 * MPSAFE
885 */
886/* ARGSUSED */
887int
888fstat(td, uap)
889 struct thread *td;
890 struct fstat_args *uap;
891{
892 struct file *fp;
893 struct stat ub;
894 int error;
895
896 mtx_lock(&Giant);
897 if ((error = fget(td, uap->fd, &fp)) != 0)
898 goto done2;
899 error = fo_stat(fp, &ub, td->td_ucred, td);
900 if (error == 0)
901 error = copyout(&ub, uap->sb, sizeof(ub));
902 fdrop(fp, td);
903done2:
904 mtx_unlock(&Giant);
905 return (error);
906}
907
908/*
909 * Return status information about a file descriptor.
910 */
911#ifndef _SYS_SYSPROTO_H_
912struct nfstat_args {
913 int fd;
914 struct nstat *sb;
915};
916#endif
917/*
918 * MPSAFE
919 */
920/* ARGSUSED */
921int
922nfstat(td, uap)
923 struct thread *td;
924 struct nfstat_args *uap;
925{
926 struct file *fp;
927 struct stat ub;
928 struct nstat nub;
929 int error;
930
931 mtx_lock(&Giant);
932 if ((error = fget(td, uap->fd, &fp)) != 0)
933 goto done2;
934 error = fo_stat(fp, &ub, td->td_ucred, td);
935 if (error == 0) {
936 cvtnstat(&ub, &nub);
937 error = copyout(&nub, uap->sb, sizeof(nub));
938 }
939 fdrop(fp, td);
940done2:
941 mtx_unlock(&Giant);
942 return (error);
943}
944
945/*
946 * Return pathconf information about a file descriptor.
947 */
948#ifndef _SYS_SYSPROTO_H_
949struct fpathconf_args {
950 int fd;
951 int name;
952};
953#endif
954/*
955 * MPSAFE
956 */
957/* ARGSUSED */
958int
959fpathconf(td, uap)
960 struct thread *td;
961 struct fpathconf_args *uap;
962{
963 struct file *fp;
964 struct vnode *vp;
965 int error;
966
967 if ((error = fget(td, uap->fd, &fp)) != 0)
968 return (error);
969
970 /* If asynchronous I/O is available, it works for all descriptors. */
971 if (uap->name == _PC_ASYNC_IO) {
972 td->td_retval[0] = async_io_version;
973 goto out;
974 }
975 switch (fp->f_type) {
976 case DTYPE_PIPE:
977 case DTYPE_SOCKET:
978 if (uap->name != _PC_PIPE_BUF) {
979 error = EINVAL;
980 } else {
981 td->td_retval[0] = PIPE_BUF;
982 error = 0;
983 }
984 break;
985 case DTYPE_FIFO:
986 case DTYPE_VNODE:
987 vp = fp->f_data;
988 mtx_lock(&Giant);
989 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
990 mtx_unlock(&Giant);
991 break;
992 default:
993 error = EOPNOTSUPP;
994 break;
995 }
996out:
997 fdrop(fp, td);
998 return (error);
999}
1000
1001/*
1002 * Allocate a file descriptor for the process.
1003 */
1004static int fdexpand;
1005SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1006
1007int
1008fdalloc(td, want, result)
1009 struct thread *td;
1010 int want;
1011 int *result;
1012{
1013 struct proc *p = td->td_proc;
1014 struct filedesc *fdp = td->td_proc->p_fd;
1015 int i;
1016 int lim, last, nfiles;
1017 struct file **newofile, **oldofile;
1018 char *newofileflags;
1019
1020 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1021
1022 /*
1023 * Search for a free descriptor starting at the higher
1024 * of want or fd_freefile. If that fails, consider
1025 * expanding the ofile array.
1026 */
1027 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1028 for (;;) {
1029 last = min(fdp->fd_nfiles, lim);
1030 i = max(want, fdp->fd_freefile);
1031 for (; i < last; i++) {
1032 if (fdp->fd_ofiles[i] == NULL) {
1033 fdp->fd_ofileflags[i] = 0;
1034 if (i > fdp->fd_lastfile)
1035 fdp->fd_lastfile = i;
1036 if (want <= fdp->fd_freefile)
1037 fdp->fd_freefile = i;
1038 *result = i;
1039 return (0);
1040 }
1041 }
1042
1043 /*
1044 * No space in current array. Expand?
1045 */
1046 if (i >= lim)
1047 return (EMFILE);
1048 if (fdp->fd_nfiles < NDEXTENT)
1049 nfiles = NDEXTENT;
1050 else
1051 nfiles = 2 * fdp->fd_nfiles;
1052 while (nfiles < want)
1053 nfiles <<= 1;
1054 FILEDESC_UNLOCK(fdp);
1055 /*
1056 * XXX malloc() calls uma_large_malloc() for sizes larger
1057 * than KMEM_ZMAX bytes. uma_large_malloc() requires Giant.
1058 */
1059 mtx_lock(&Giant);
1060 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1061 mtx_unlock(&Giant);
1062
1063 /*
1064 * Deal with file-table extend race that might have
1065 * occurred while filedesc was unlocked.
1066 */
1067 FILEDESC_LOCK(fdp);
1068 if (fdp->fd_nfiles >= nfiles) {
1069 /* XXX uma_large_free() needs Giant. */
1070 FILEDESC_UNLOCK(fdp);
1071 mtx_lock(&Giant);
1072 free(newofile, M_FILEDESC);
1073 mtx_unlock(&Giant);
1074 FILEDESC_LOCK(fdp);
1075 continue;
1076 }
1077 newofileflags = (char *) &newofile[nfiles];
1078 /*
1079 * Copy the existing ofile and ofileflags arrays
1080 * and zero the new portion of each array.
1081 */
1082 i = fdp->fd_nfiles * sizeof(struct file *);
1083 bcopy(fdp->fd_ofiles, newofile, i);
1084 bzero((char *)newofile + i,
1085 nfiles * sizeof(struct file *) - i);
1086 i = fdp->fd_nfiles * sizeof(char);
1087 bcopy(fdp->fd_ofileflags, newofileflags, i);
1088 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1089 if (fdp->fd_nfiles > NDFILE)
1090 oldofile = fdp->fd_ofiles;
1091 else
1092 oldofile = NULL;
1093 fdp->fd_ofiles = newofile;
1094 fdp->fd_ofileflags = newofileflags;
1095 fdp->fd_nfiles = nfiles;
1096 fdexpand++;
1097 if (oldofile != NULL) {
1098 /* XXX uma_large_free() needs Giant. */
1099 FILEDESC_UNLOCK(fdp);
1100 mtx_lock(&Giant);
1101 free(oldofile, M_FILEDESC);
1102 mtx_unlock(&Giant);
1103 FILEDESC_LOCK(fdp);
1104 }
1105 }
1106 return (0);
1107}
1108
1109/*
1110 * Check to see whether n user file descriptors
1111 * are available to the process p.
1112 */
1113int
1114fdavail(td, n)
1115 struct thread *td;
1116 int n;
1117{
1118 struct proc *p = td->td_proc;
1119 struct filedesc *fdp = td->td_proc->p_fd;
1120 struct file **fpp;
1121 int i, lim, last;
1122
1123 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1124
1125 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1126 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1127 return (1);
1128 last = min(fdp->fd_nfiles, lim);
1129 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1130 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1131 if (*fpp == NULL && --n <= 0)
1132 return (1);
1133 }
1134 return (0);
1135}
1136
1137/*
1138 * Create a new open file structure and allocate
1139 * a file decriptor for the process that refers to it.
1140 */
1141int
1142falloc(td, resultfp, resultfd)
1143 struct thread *td;
1144 struct file **resultfp;
1145 int *resultfd;
1146{
1147 struct proc *p = td->td_proc;
1148 struct file *fp, *fq;
1149 int error, i;
1150
1151 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1152 sx_xlock(&filelist_lock);
1153 if (nfiles >= maxfiles) {
1154 sx_xunlock(&filelist_lock);
1155 uma_zfree(file_zone, fp);
1156 tablefull("file");
1157 return (ENFILE);
1158 }
1159 nfiles++;
1160
1161 /*
1162 * If the process has file descriptor zero open, add the new file
1163 * descriptor to the list of open files at that point, otherwise
1164 * put it at the front of the list of open files.
1165 */
1166 fp->f_mtxp = mtx_pool_alloc();
1167 fp->f_gcflag = 0;
1168 fp->f_count = 1;
1169 fp->f_cred = crhold(td->td_ucred);
1170 fp->f_ops = &badfileops;
1171 fp->f_seqcount = 1;
1172 FILEDESC_LOCK(p->p_fd);
1173 if ((fq = p->p_fd->fd_ofiles[0])) {
1174 LIST_INSERT_AFTER(fq, fp, f_list);
1175 } else {
1176 LIST_INSERT_HEAD(&filehead, fp, f_list);
1177 }
1178 sx_xunlock(&filelist_lock);
1179 if ((error = fdalloc(td, 0, &i))) {
1180 FILEDESC_UNLOCK(p->p_fd);
1181 fdrop(fp, td);
1182 return (error);
1183 }
1184 p->p_fd->fd_ofiles[i] = fp;
1185 FILEDESC_UNLOCK(p->p_fd);
1186 if (resultfp)
1187 *resultfp = fp;
1188 if (resultfd)
1189 *resultfd = i;
1190 return (0);
1191}
1192
1193/*
1194 * Free a file descriptor.
1195 */
1196void
1197ffree(fp)
1198 struct file *fp;
1199{
1200
1201 KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1202 sx_xlock(&filelist_lock);
1203 LIST_REMOVE(fp, f_list);
1204 nfiles--;
1205 sx_xunlock(&filelist_lock);
1206 crfree(fp->f_cred);
1207 uma_zfree(file_zone, fp);
1208}
1209
1210/*
1211 * Build a new filedesc structure from another.
1212 * Copy the current, root, and jail root vnode references.
1213 */
1214struct filedesc *
1215fdinit(fdp)
1216 struct filedesc *fdp;
1217{
1218 struct filedesc0 *newfdp;
1219
1220 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1221 M_FILEDESC, M_WAITOK | M_ZERO);
1222 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1223 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1224 if (newfdp->fd_fd.fd_cdir)
1225 VREF(newfdp->fd_fd.fd_cdir);
1226 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1227 if (newfdp->fd_fd.fd_rdir)
1228 VREF(newfdp->fd_fd.fd_rdir);
1229 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1230 if (newfdp->fd_fd.fd_jdir)
1231 VREF(newfdp->fd_fd.fd_jdir);
1232
1233 /* Create the file descriptor table. */
1234 newfdp->fd_fd.fd_refcnt = 1;
1235 newfdp->fd_fd.fd_cmask = cmask;
1236 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1237 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1238 newfdp->fd_fd.fd_nfiles = NDFILE;
1239 newfdp->fd_fd.fd_knlistsize = -1;
1240 return (&newfdp->fd_fd);
1241}
1242
1243/*
1244 * Share a filedesc structure.
1245 */
1246struct filedesc *
1247fdshare(fdp)
1248 struct filedesc *fdp;
1249{
1250 FILEDESC_LOCK(fdp);
1251 fdp->fd_refcnt++;
1252 FILEDESC_UNLOCK(fdp);
1253 return (fdp);
1254}
1255
1256/*
1257 * Copy a filedesc structure.
1258 * A NULL pointer in returns a NULL reference, this is to ease callers,
1259 * not catch errors.
1260 */
1261struct filedesc *
1262fdcopy(fdp)
1263 struct filedesc *fdp;
1264{
1265 struct filedesc *newfdp;
1266 struct file **fpp;
1267 int i, j;
1268
1269 /* Certain daemons might not have file descriptors. */
1270 if (fdp == NULL)
1271 return (NULL);
1272
1273 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1274
1275 FILEDESC_UNLOCK(fdp);
1276 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1277 M_FILEDESC, M_WAITOK);
1278 FILEDESC_LOCK(fdp);
1279 bcopy(fdp, newfdp, sizeof(struct filedesc));
1280 FILEDESC_UNLOCK(fdp);
1281 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1282 mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1283 if (newfdp->fd_cdir)
1284 VREF(newfdp->fd_cdir);
1285 if (newfdp->fd_rdir)
1286 VREF(newfdp->fd_rdir);
1287 if (newfdp->fd_jdir)
1288 VREF(newfdp->fd_jdir);
1289 newfdp->fd_refcnt = 1;
1290
1291 /*
1292 * If the number of open files fits in the internal arrays
1293 * of the open file structure, use them, otherwise allocate
1294 * additional memory for the number of descriptors currently
1295 * in use.
1296 */
1297 FILEDESC_LOCK(fdp);
1298 newfdp->fd_lastfile = fdp->fd_lastfile;
1299 newfdp->fd_nfiles = fdp->fd_nfiles;
1300 if (newfdp->fd_lastfile < NDFILE) {
1301 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1302 newfdp->fd_ofileflags =
1303 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1304 i = NDFILE;
1305 } else {
1306 /*
1307 * Compute the smallest multiple of NDEXTENT needed
1308 * for the file descriptors currently in use,
1309 * allowing the table to shrink.
1310 */
1311retry:
1312 i = newfdp->fd_nfiles;
1313 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1314 i /= 2;
1315 FILEDESC_UNLOCK(fdp);
1316 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1317 M_FILEDESC, M_WAITOK);
1318 FILEDESC_LOCK(fdp);
1319 newfdp->fd_lastfile = fdp->fd_lastfile;
1320 newfdp->fd_nfiles = fdp->fd_nfiles;
1321 j = newfdp->fd_nfiles;
1322 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1323 j /= 2;
1324 if (i != j) {
1325 /*
1326 * The size of the original table has changed.
1327 * Go over once again.
1328 */
1329 FILEDESC_UNLOCK(fdp);
1330 FREE(newfdp->fd_ofiles, M_FILEDESC);
1331 FILEDESC_LOCK(fdp);
1332 newfdp->fd_lastfile = fdp->fd_lastfile;
1333 newfdp->fd_nfiles = fdp->fd_nfiles;
1334 goto retry;
1335 }
1336 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1337 }
1338 newfdp->fd_nfiles = i;
1339 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1340 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1341
1342 /*
1343 * kq descriptors cannot be copied.
1344 */
1345 if (newfdp->fd_knlistsize != -1) {
1346 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1347 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1348 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1349 *fpp = NULL;
1350 if (i < newfdp->fd_freefile)
1351 newfdp->fd_freefile = i;
1352 }
1353 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1354 newfdp->fd_lastfile--;
1355 }
1356 newfdp->fd_knlist = NULL;
1357 newfdp->fd_knlistsize = -1;
1358 newfdp->fd_knhash = NULL;
1359 newfdp->fd_knhashmask = 0;
1360 }
1361
1362 fpp = newfdp->fd_ofiles;
1363 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1364 if (*fpp != NULL)
1365 fhold(*fpp);
1366 }
1367 return (newfdp);
1368}
1369
1370/* A mutex to protect the association between a proc and filedesc. */
1371struct mtx fdesc_mtx;
1372MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF);
1373
1374/*
1375 * Release a filedesc structure.
1376 */
1377void
1378fdfree(td)
1379 struct thread *td;
1380{
1381 struct filedesc *fdp;
1382 struct file **fpp;
1383 int i;
1384
1385 /* Certain daemons might not have file descriptors. */
1386 fdp = td->td_proc->p_fd;
1387 if (fdp == NULL)
1388 return;
1389
1390 FILEDESC_LOCK(fdp);
1391 if (--fdp->fd_refcnt > 0) {
1392 FILEDESC_UNLOCK(fdp);
1393 return;
1394 }
1395
1396 /*
1397 * We are the last reference to the structure, so we can
1398 * safely assume it will not change out from under us.
1399 */
1400 FILEDESC_UNLOCK(fdp);
1401 fpp = fdp->fd_ofiles;
1402 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1403 if (*fpp)
1404 (void) closef(*fpp, td);
1405 }
1406
1407 /* XXX This should happen earlier. */
1408 mtx_lock(&fdesc_mtx);
1409 td->td_proc->p_fd = NULL;
1410 mtx_unlock(&fdesc_mtx);
1411
1412 if (fdp->fd_nfiles > NDFILE)
1413 FREE(fdp->fd_ofiles, M_FILEDESC);
1414 if (fdp->fd_cdir)
1415 vrele(fdp->fd_cdir);
1416 if (fdp->fd_rdir)
1417 vrele(fdp->fd_rdir);
1418 if (fdp->fd_jdir)
1419 vrele(fdp->fd_jdir);
1420 if (fdp->fd_knlist)
1421 FREE(fdp->fd_knlist, M_KQUEUE);
1422 if (fdp->fd_knhash)
1423 FREE(fdp->fd_knhash, M_KQUEUE);
1424 mtx_destroy(&fdp->fd_mtx);
1425 FREE(fdp, M_FILEDESC);
1426}
1427
1428/*
1429 * For setugid programs, we don't want to people to use that setugidness
1430 * to generate error messages which write to a file which otherwise would
1431 * otherwise be off-limits to the process. We check for filesystems where
1432 * the vnode can change out from under us after execve (like [lin]procfs).
1433 *
1434 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1435 * sufficient. We also don't for check setugidness since we know we are.
1436 */
1437static int
1438is_unsafe(struct file *fp)
1439{
1440 if (fp->f_type == DTYPE_VNODE) {
1441 struct vnode *vp = fp->f_data;
1442
1443 if ((vp->v_vflag & VV_PROCDEP) != 0)
1444 return (1);
1445 }
1446 return (0);
1447}
1448
1449/*
1450 * Make this setguid thing safe, if at all possible.
1451 */
1452void
1453setugidsafety(td)
1454 struct thread *td;
1455{
1456 struct filedesc *fdp;
1457 int i;
1458
1459 /* Certain daemons might not have file descriptors. */
1460 fdp = td->td_proc->p_fd;
1461 if (fdp == NULL)
1462 return;
1463
1464 /*
1465 * Note: fdp->fd_ofiles may be reallocated out from under us while
1466 * we are blocked in a close. Be careful!
1467 */
1468 FILEDESC_LOCK(fdp);
1469 for (i = 0; i <= fdp->fd_lastfile; i++) {
1470 if (i > 2)
1471 break;
1472 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1473 struct file *fp;
1474
1475#if 0
1476 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1477 (void) munmapfd(td, i);
1478#endif
1479 if (i < fdp->fd_knlistsize) {
1480 FILEDESC_UNLOCK(fdp);
1481 knote_fdclose(td, i);
1482 FILEDESC_LOCK(fdp);
1483 }
1484 /*
1485 * NULL-out descriptor prior to close to avoid
1486 * a race while close blocks.
1487 */
1488 fp = fdp->fd_ofiles[i];
1489 fdp->fd_ofiles[i] = NULL;
1490 fdp->fd_ofileflags[i] = 0;
1491 if (i < fdp->fd_freefile)
1492 fdp->fd_freefile = i;
1493 FILEDESC_UNLOCK(fdp);
1494 (void) closef(fp, td);
1495 FILEDESC_LOCK(fdp);
1496 }
1497 }
1498 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1499 fdp->fd_lastfile--;
1500 FILEDESC_UNLOCK(fdp);
1501}
1502
1503/*
1504 * Close any files on exec?
1505 */
1506void
1507fdcloseexec(td)
1508 struct thread *td;
1509{
1510 struct filedesc *fdp;
1511 int i;
1512
1513 /* Certain daemons might not have file descriptors. */
1514 fdp = td->td_proc->p_fd;
1515 if (fdp == NULL)
1516 return;
1517
1518 FILEDESC_LOCK(fdp);
1519
1520 /*
1521 * We cannot cache fd_ofiles or fd_ofileflags since operations
1522 * may block and rip them out from under us.
1523 */
1524 for (i = 0; i <= fdp->fd_lastfile; i++) {
1525 if (fdp->fd_ofiles[i] != NULL &&
1526 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1527 struct file *fp;
1528
1529#if 0
1530 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1531 (void) munmapfd(td, i);
1532#endif
1533 if (i < fdp->fd_knlistsize) {
1534 FILEDESC_UNLOCK(fdp);
1535 knote_fdclose(td, i);
1536 FILEDESC_LOCK(fdp);
1537 }
1538 /*
1539 * NULL-out descriptor prior to close to avoid
1540 * a race while close blocks.
1541 */
1542 fp = fdp->fd_ofiles[i];
1543 fdp->fd_ofiles[i] = NULL;
1544 fdp->fd_ofileflags[i] = 0;
1545 if (i < fdp->fd_freefile)
1546 fdp->fd_freefile = i;
1547 FILEDESC_UNLOCK(fdp);
1548 (void) closef(fp, td);
1549 FILEDESC_LOCK(fdp);
1550 }
1551 }
1552 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1553 fdp->fd_lastfile--;
1554 FILEDESC_UNLOCK(fdp);
1555}
1556
1557/*
1558 * It is unsafe for set[ug]id processes to be started with file
1559 * descriptors 0..2 closed, as these descriptors are given implicit
1560 * significance in the Standard C library. fdcheckstd() will create a
1561 * descriptor referencing /dev/null for each of stdin, stdout, and
1562 * stderr that is not already open.
1563 */
1564int
1565fdcheckstd(td)
1566 struct thread *td;
1567{
1568 struct nameidata nd;
1569 struct filedesc *fdp;
1570 struct file *fp;
1571 register_t retval;
1572 int fd, i, error, flags, devnull;
1573
1574 fdp = td->td_proc->p_fd;
1575 if (fdp == NULL)
1576 return (0);
1577 devnull = -1;
1578 error = 0;
1579 for (i = 0; i < 3; i++) {
1580 if (fdp->fd_ofiles[i] != NULL)
1581 continue;
1582 if (devnull < 0) {
1583 error = falloc(td, &fp, &fd);
1584 if (error != 0)
1585 break;
1586 KASSERT(fd == i, ("oof, we didn't get our fd"));
1587 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1588 td);
1589 flags = FREAD | FWRITE;
1590 error = vn_open(&nd, &flags, 0);
1591 if (error != 0) {
1592 FILEDESC_LOCK(fdp);
1593 fdp->fd_ofiles[fd] = NULL;
1594 FILEDESC_UNLOCK(fdp);
1595 fdrop(fp, td);
1596 break;
1597 }
1598 NDFREE(&nd, NDF_ONLY_PNBUF);
1599 fp->f_data = nd.ni_vp;
1600 fp->f_flag = flags;
1601 fp->f_ops = &vnops;
1602 fp->f_type = DTYPE_VNODE;
1603 VOP_UNLOCK(nd.ni_vp, 0, td);
1604 devnull = fd;
1605 } else {
1606 error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1607 if (error != 0)
1608 break;
1609 }
1610 }
1611 return (error);
1612}
1613
1614/*
1615 * Internal form of close.
1616 * Decrement reference count on file structure.
1617 * Note: td may be NULL when closing a file
1618 * that was being passed in a message.
1619 */
1620int
1621closef(fp, td)
1622 struct file *fp;
1623 struct thread *td;
1624{
1625 struct vnode *vp;
1626 struct flock lf;
1627
1628 if (fp == NULL)
1629 return (0);
1630 /*
1631 * POSIX record locking dictates that any close releases ALL
1632 * locks owned by this process. This is handled by setting
1633 * a flag in the unlock to free ONLY locks obeying POSIX
1634 * semantics, and not to free BSD-style file locks.
1635 * If the descriptor was in a message, POSIX-style locks
1636 * aren't passed with the descriptor.
1637 */
1638 if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 &&
1639 fp->f_type == DTYPE_VNODE) {
1640 lf.l_whence = SEEK_SET;
1641 lf.l_start = 0;
1642 lf.l_len = 0;
1643 lf.l_type = F_UNLCK;
1644 vp = fp->f_data;
1645 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1646 F_UNLCK, &lf, F_POSIX);
1647 }
1648 return (fdrop(fp, td));
1649}
1650
1651/*
1652 * Drop reference on struct file passed in, may call closef if the
1653 * reference hits zero.
1654 */
1655int
1656fdrop(fp, td)
1657 struct file *fp;
1658 struct thread *td;
1659{
1660
1661 FILE_LOCK(fp);
1662 return (fdrop_locked(fp, td));
1663}
1664
1665/*
1666 * Extract the file pointer associated with the specified descriptor for
1667 * the current user process.
1668 *
1669 * If the descriptor doesn't exist, EBADF is returned.
1670 *
1671 * If the descriptor exists but doesn't match 'flags' then
1672 * return EBADF for read attempts and EINVAL for write attempts.
1673 *
1674 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1675 * It should be droped with fdrop().
1676 * If it is not set, then the refcount will not be bumped however the
1677 * thread's filedesc struct will be returned locked (for fgetsock).
1678 *
1679 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1680 * Otherwise *fpp is set and zero is returned.
1681 */
1682static __inline int
1683_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1684{
1685 struct filedesc *fdp;
1686 struct file *fp;
1687
1688 *fpp = NULL;
1689 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1690 return (EBADF);
1691 FILEDESC_LOCK(fdp);
1692 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1693 FILEDESC_UNLOCK(fdp);
1694 return (EBADF);
1695 }
1696
1697 /*
1698 * Note: FREAD failures returns EBADF to maintain backwards
1699 * compatibility with what routines returned before.
1700 *
1701 * Only one flag, or 0, may be specified.
1702 */
1703 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1704 FILEDESC_UNLOCK(fdp);
1705 return (EBADF);
1706 }
1707 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1708 FILEDESC_UNLOCK(fdp);
1709 return (EINVAL);
1710 }
1711 if (hold) {
1712 fhold(fp);
1713 FILEDESC_UNLOCK(fdp);
1714 }
1715 *fpp = fp;
1716 return (0);
1717}
1718
1719int
1720fget(struct thread *td, int fd, struct file **fpp)
1721{
1722
1723 return(_fget(td, fd, fpp, 0, 1));
1724}
1725
1726int
1727fget_read(struct thread *td, int fd, struct file **fpp)
1728{
1729
1730 return(_fget(td, fd, fpp, FREAD, 1));
1731}
1732
1733int
1734fget_write(struct thread *td, int fd, struct file **fpp)
1735{
1736
1737 return(_fget(td, fd, fpp, FWRITE, 1));
1738}
1739
1740/*
1741 * Like fget() but loads the underlying vnode, or returns an error if
1742 * the descriptor does not represent a vnode. Note that pipes use vnodes
1743 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1744 * error). The returned vnode will be vref()d.
1745 */
1746static __inline int
1747_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1748{
1749 struct file *fp;
1750 int error;
1751
1752 *vpp = NULL;
1753 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1754 return (error);
1755 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1756 error = EINVAL;
1757 } else {
1758 *vpp = fp->f_data;
1759 vref(*vpp);
1760 }
1761 FILEDESC_UNLOCK(td->td_proc->p_fd);
1762 return (error);
1763}
1764
1765int
1766fgetvp(struct thread *td, int fd, struct vnode **vpp)
1767{
1768
1769 return (_fgetvp(td, fd, vpp, 0));
1770}
1771
1772int
1773fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1774{
1775
1776 return (_fgetvp(td, fd, vpp, FREAD));
1777}
1778
1779int
1780fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1781{
1782
1783 return (_fgetvp(td, fd, vpp, FWRITE));
1784}
1785
1786/*
1787 * Like fget() but loads the underlying socket, or returns an error if
1788 * the descriptor does not represent a socket.
1789 *
1790 * We bump the ref count on the returned socket. XXX Also obtain the SX
1791 * lock in the future.
1792 */
1793int
1794fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1795{
1796 struct file *fp;
1797 int error;
1798
1799 *spp = NULL;
1800 if (fflagp != NULL)
1801 *fflagp = 0;
1802 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1803 return (error);
1804 if (fp->f_type != DTYPE_SOCKET) {
1805 error = ENOTSOCK;
1806 } else {
1807 *spp = fp->f_data;
1808 if (fflagp)
1809 *fflagp = fp->f_flag;
1810 soref(*spp);
1811 }
1812 FILEDESC_UNLOCK(td->td_proc->p_fd);
1813 return (error);
1814}
1815
1816/*
1817 * Drop the reference count on the the socket and XXX release the SX lock in
1818 * the future. The last reference closes the socket.
1819 */
1820void
1821fputsock(struct socket *so)
1822{
1823
1824 sorele(so);
1825}
1826
1827/*
1828 * Drop reference on struct file passed in, may call closef if the
1829 * reference hits zero.
1830 * Expects struct file locked, and will unlock it.
1831 */
1832int
1833fdrop_locked(fp, td)
1834 struct file *fp;
1835 struct thread *td;
1836{
1837 struct flock lf;
1838 struct vnode *vp;
1839 int error;
1840
1841 FILE_LOCK_ASSERT(fp, MA_OWNED);
1842
1843 if (--fp->f_count > 0) {
1844 FILE_UNLOCK(fp);
1845 return (0);
1846 }
1847 mtx_lock(&Giant);
1848 if (fp->f_count < 0)
1849 panic("fdrop: count < 0");
1850 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1851 lf.l_whence = SEEK_SET;
1852 lf.l_start = 0;
1853 lf.l_len = 0;
1854 lf.l_type = F_UNLCK;
1855 vp = fp->f_data;
1856 FILE_UNLOCK(fp);
1857 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1858 } else
1859 FILE_UNLOCK(fp);
1860 if (fp->f_ops != &badfileops)
1861 error = fo_close(fp, td);
1862 else
1863 error = 0;
1864 ffree(fp);
1865 mtx_unlock(&Giant);
1866 return (error);
1867}
1868
1869/*
1870 * Apply an advisory lock on a file descriptor.
1871 *
1872 * Just attempt to get a record lock of the requested type on
1873 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1874 */
1875#ifndef _SYS_SYSPROTO_H_
1876struct flock_args {
1877 int fd;
1878 int how;
1879};
1880#endif
1881/*
1882 * MPSAFE
1883 */
1884/* ARGSUSED */
1885int
1886flock(td, uap)
1887 struct thread *td;
1888 struct flock_args *uap;
1889{
1890 struct file *fp;
1891 struct vnode *vp;
1892 struct flock lf;
1893 int error;
1894
1895 if ((error = fget(td, uap->fd, &fp)) != 0)
1896 return (error);
1897 if (fp->f_type != DTYPE_VNODE) {
1898 fdrop(fp, td);
1899 return (EOPNOTSUPP);
1900 }
1901
1902 mtx_lock(&Giant);
1903 vp = fp->f_data;
1904 lf.l_whence = SEEK_SET;
1905 lf.l_start = 0;
1906 lf.l_len = 0;
1907 if (uap->how & LOCK_UN) {
1908 lf.l_type = F_UNLCK;
1909 FILE_LOCK(fp);
1910 fp->f_flag &= ~FHASLOCK;
1911 FILE_UNLOCK(fp);
1912 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1913 goto done2;
1914 }
1915 if (uap->how & LOCK_EX)
1916 lf.l_type = F_WRLCK;
1917 else if (uap->how & LOCK_SH)
1918 lf.l_type = F_RDLCK;
1919 else {
1920 error = EBADF;
1921 goto done2;
1922 }
1923 FILE_LOCK(fp);
1924 fp->f_flag |= FHASLOCK;
1925 FILE_UNLOCK(fp);
1926 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1927 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1928done2:
1929 fdrop(fp, td);
1930 mtx_unlock(&Giant);
1931 return (error);
1932}
1933
1934/*
1935 * File Descriptor pseudo-device driver (/dev/fd/).
1936 *
1937 * Opening minor device N dup()s the file (if any) connected to file
1938 * descriptor N belonging to the calling process. Note that this driver
1939 * consists of only the ``open()'' routine, because all subsequent
1940 * references to this file will be direct to the other driver.
1941 */
1942/* ARGSUSED */
1943static int
1944fdopen(dev, mode, type, td)
1945 dev_t dev;
1946 int mode, type;
1947 struct thread *td;
1948{
1949
1950 /*
1951 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1952 * the file descriptor being sought for duplication. The error
1953 * return ensures that the vnode for this device will be released
1954 * by vn_open. Open will detect this special error and take the
1955 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1956 * will simply report the error.
1957 */
1958 td->td_dupfd = dev2unit(dev);
1959 return (ENODEV);
1960}
1961
1962/*
1963 * Duplicate the specified descriptor to a free descriptor.
1964 */
1965int
1966dupfdopen(td, fdp, indx, dfd, mode, error)
1967 struct thread *td;
1968 struct filedesc *fdp;
1969 int indx, dfd;
1970 int mode;
1971 int error;
1972{
1973 struct file *wfp;
1974 struct file *fp;
1975
1976 /*
1977 * If the to-be-dup'd fd number is greater than the allowed number
1978 * of file descriptors, or the fd to be dup'd has already been
1979 * closed, then reject.
1980 */
1981 FILEDESC_LOCK(fdp);
1982 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1983 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1984 FILEDESC_UNLOCK(fdp);
1985 return (EBADF);
1986 }
1987
1988 /*
1989 * There are two cases of interest here.
1990 *
1991 * For ENODEV simply dup (dfd) to file descriptor
1992 * (indx) and return.
1993 *
1994 * For ENXIO steal away the file structure from (dfd) and
1995 * store it in (indx). (dfd) is effectively closed by
1996 * this operation.
1997 *
1998 * Any other error code is just returned.
1999 */
2000 switch (error) {
2001 case ENODEV:
2002 /*
2003 * Check that the mode the file is being opened for is a
2004 * subset of the mode of the existing descriptor.
2005 */
2006 FILE_LOCK(wfp);
2007 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2008 FILE_UNLOCK(wfp);
2009 FILEDESC_UNLOCK(fdp);
2010 return (EACCES);
2011 }
2012 fp = fdp->fd_ofiles[indx];
2013#if 0
2014 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2015 (void) munmapfd(td, indx);
2016#endif
2017 fdp->fd_ofiles[indx] = wfp;
2018 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2019 fhold_locked(wfp);
2020 FILE_UNLOCK(wfp);
2021 if (indx > fdp->fd_lastfile)
2022 fdp->fd_lastfile = indx;
2023 if (fp != NULL)
2024 FILE_LOCK(fp);
2025 FILEDESC_UNLOCK(fdp);
2026 /*
2027 * We now own the reference to fp that the ofiles[] array
2028 * used to own. Release it.
2029 */
2030 if (fp != NULL)
2031 fdrop_locked(fp, td);
2032 return (0);
2033
2034 case ENXIO:
2035 /*
2036 * Steal away the file pointer from dfd and stuff it into indx.
2037 */
2038 fp = fdp->fd_ofiles[indx];
2039#if 0
2040 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2041 (void) munmapfd(td, indx);
2042#endif
2043 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2044 fdp->fd_ofiles[dfd] = NULL;
2045 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2046 fdp->fd_ofileflags[dfd] = 0;
2047
2048 /*
2049 * Complete the clean up of the filedesc structure by
2050 * recomputing the various hints.
2051 */
2052 if (indx > fdp->fd_lastfile) {
2053 fdp->fd_lastfile = indx;
2054 } else {
2055 while (fdp->fd_lastfile > 0 &&
2056 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2057 fdp->fd_lastfile--;
2058 }
2059 if (dfd < fdp->fd_freefile)
2060 fdp->fd_freefile = dfd;
2061 }
2062 if (fp != NULL)
2063 FILE_LOCK(fp);
2064 FILEDESC_UNLOCK(fdp);
2065
2066 /*
2067 * we now own the reference to fp that the ofiles[] array
2068 * used to own. Release it.
2069 */
2070 if (fp != NULL)
2071 fdrop_locked(fp, td);
2072 return (0);
2073
2074 default:
2075 FILEDESC_UNLOCK(fdp);
2076 return (error);
2077 }
2078 /* NOTREACHED */
2079}
2080
2081/*
2082 * Get file structures.
2083 */
2084static int
2085sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2086{
2087 struct xfile xf;
2088 struct filedesc *fdp;
2089 struct file *fp;
2090 struct proc *p;
2091 int error, n;
2092
2093 sysctl_wire_old_buffer(req, 0);
2094 if (req->oldptr == NULL) {
2095 n = 16; /* A slight overestimate. */
2096 sx_slock(&filelist_lock);
2097 LIST_FOREACH(fp, &filehead, f_list) {
2098 /*
2099 * We should grab the lock, but this is an
2100 * estimate, so does it really matter?
2101 */
2102 /* mtx_lock(fp->f_mtxp); */
2103 n += fp->f_count;
2104 /* mtx_unlock(f->f_mtxp); */
2105 }
2106 sx_sunlock(&filelist_lock);
2107 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2108 }
2109 error = 0;
2110 bzero(&xf, sizeof(xf));
2111 xf.xf_size = sizeof(xf);
2112 sx_slock(&allproc_lock);
2113 LIST_FOREACH(p, &allproc, p_list) {
2114 PROC_LOCK(p);
2115 xf.xf_pid = p->p_pid;
2116 xf.xf_uid = p->p_ucred->cr_uid;
2117 PROC_UNLOCK(p);
2118 mtx_lock(&fdesc_mtx);
2119 if ((fdp = p->p_fd) == NULL) {
2120 mtx_unlock(&fdesc_mtx);
2121 continue;
2122 }
2123 FILEDESC_LOCK(fdp);
2124 for (n = 0; n < fdp->fd_nfiles; ++n) {
2125 if ((fp = fdp->fd_ofiles[n]) == NULL)
2126 continue;
2127 xf.xf_fd = n;
2128 xf.xf_file = fp;
2129 xf.xf_data = fp->f_data;
2130 xf.xf_type = fp->f_type;
2131 xf.xf_count = fp->f_count;
2132 xf.xf_msgcount = fp->f_msgcount;
2133 xf.xf_offset = fp->f_offset;
2134 xf.xf_flag = fp->f_flag;
2135 error = SYSCTL_OUT(req, &xf, sizeof(xf));
2136 if (error)
2137 break;
2138 }
2139 FILEDESC_UNLOCK(fdp);
2140 mtx_unlock(&fdesc_mtx);
2141 if (error)
2142 break;
2143 }
2144 sx_sunlock(&allproc_lock);
2145 return (error);
2146}
2147
2148SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2149 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2150
2151SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2152 &maxfilesperproc, 0, "Maximum files allowed open per process");
2153
2154SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2155 &maxfiles, 0, "Maximum number of files");
2156
2157SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2158 &nfiles, 0, "System-wide number of open files");
2159
2160static void
2161fildesc_drvinit(void *unused)
2162{
2163 dev_t dev;
2164
2165 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
2166 make_dev_alias(dev, "stdin");
2167 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
2168 make_dev_alias(dev, "stdout");
2169 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
2170 make_dev_alias(dev, "stderr");
2171}
2172
2173static fo_rdwr_t badfo_readwrite;
2174static fo_ioctl_t badfo_ioctl;
2175static fo_poll_t badfo_poll;
2176static fo_kqfilter_t badfo_kqfilter;
2177static fo_stat_t badfo_stat;
2178static fo_close_t badfo_close;
2179
2180struct fileops badfileops = {
2181 badfo_readwrite,
2182 badfo_readwrite,
2183 badfo_ioctl,
2184 badfo_poll,
2185 badfo_kqfilter,
2186 badfo_stat,
2187 badfo_close,
2188 0
2189};
2190
2191static int
2192badfo_readwrite(fp, uio, active_cred, flags, td)
2193 struct file *fp;
2194 struct uio *uio;
2195 struct ucred *active_cred;
2196 struct thread *td;
2197 int flags;
2198{
2199
2200 return (EBADF);
2201}
2202
2203static int
2204badfo_ioctl(fp, com, data, active_cred, td)
2205 struct file *fp;
2206 u_long com;
2207 void *data;
2208 struct ucred *active_cred;
2209 struct thread *td;
2210{
2211
2212 return (EBADF);
2213}
2214
2215static int
2216badfo_poll(fp, events, active_cred, td)
2217 struct file *fp;
2218 int events;
2219 struct ucred *active_cred;
2220 struct thread *td;
2221{
2222
2223 return (0);
2224}
2225
2226static int
2227badfo_kqfilter(fp, kn)
2228 struct file *fp;
2229 struct knote *kn;
2230{
2231
2232 return (0);
2233}
2234
2235static int
2236badfo_stat(fp, sb, active_cred, td)
2237 struct file *fp;
2238 struct stat *sb;
2239 struct ucred *active_cred;
2240 struct thread *td;
2241{
2242
2243 return (EBADF);
2244}
2245
2246static int
2247badfo_close(fp, td)
2248 struct file *fp;
2249 struct thread *td;
2250{
2251
2252 return (EBADF);
2253}
2254
2255SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2256 fildesc_drvinit,NULL)
2257
2258static void filelistinit(void *);
2259SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2260
2261/* ARGSUSED*/
2262static void
2263filelistinit(dummy)
2264 void *dummy;
2265{
2266
2267 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2268 NULL, NULL, UMA_ALIGN_PTR, 0);
2269 sx_init(&filelist_lock, "filelist lock");
2270 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2271}
79
80static d_open_t fdopen;
81#define NUMFDESC 64
82
83#define CDEV_MAJOR 22
84static struct cdevsw fildesc_cdevsw = {
85 .d_open = fdopen,
86 .d_name = "FD",
87 .d_maj = CDEV_MAJOR,
88};
89
90/* How to treat 'new' parameter when allocating a fd for do_dup(). */
91enum dup_type { DUP_VARIABLE, DUP_FIXED };
92
93static int do_dup(struct thread *td, enum dup_type type, int old, int new,
94 register_t *retval);
95
96/*
97 * Descriptor management.
98 */
99struct filelist filehead; /* head of list of open files */
100int nfiles; /* actual number of open files */
101extern int cmask;
102struct sx filelist_lock; /* sx to protect filelist */
103struct mtx sigio_lock; /* mtx to protect pointers to sigio */
104
105/*
106 * System calls on descriptors.
107 */
108#ifndef _SYS_SYSPROTO_H_
109struct getdtablesize_args {
110 int dummy;
111};
112#endif
113/*
114 * MPSAFE
115 */
116/* ARGSUSED */
117int
118getdtablesize(td, uap)
119 struct thread *td;
120 struct getdtablesize_args *uap;
121{
122 struct proc *p = td->td_proc;
123
124 mtx_lock(&Giant);
125 td->td_retval[0] =
126 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
127 mtx_unlock(&Giant);
128 return (0);
129}
130
131/*
132 * Duplicate a file descriptor to a particular value.
133 *
134 * note: keep in mind that a potential race condition exists when closing
135 * descriptors from a shared descriptor table (via rfork).
136 */
137#ifndef _SYS_SYSPROTO_H_
138struct dup2_args {
139 u_int from;
140 u_int to;
141};
142#endif
143/*
144 * MPSAFE
145 */
146/* ARGSUSED */
147int
148dup2(td, uap)
149 struct thread *td;
150 struct dup2_args *uap;
151{
152
153 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
154 td->td_retval));
155}
156
157/*
158 * Duplicate a file descriptor.
159 */
160#ifndef _SYS_SYSPROTO_H_
161struct dup_args {
162 u_int fd;
163};
164#endif
165/*
166 * MPSAFE
167 */
168/* ARGSUSED */
169int
170dup(td, uap)
171 struct thread *td;
172 struct dup_args *uap;
173{
174
175 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
176}
177
178/*
179 * The file control system call.
180 */
181#ifndef _SYS_SYSPROTO_H_
182struct fcntl_args {
183 int fd;
184 int cmd;
185 long arg;
186};
187#endif
188/*
189 * MPSAFE
190 */
191/* ARGSUSED */
192int
193fcntl(td, uap)
194 struct thread *td;
195 struct fcntl_args *uap;
196{
197 struct flock fl;
198 intptr_t arg;
199 int error;
200
201 error = 0;
202 switch (uap->cmd) {
203 case F_GETLK:
204 case F_SETLK:
205 case F_SETLKW:
206 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
207 arg = (intptr_t)&fl;
208 break;
209 default:
210 arg = uap->arg;
211 break;
212 }
213 if (error)
214 return (error);
215 error = kern_fcntl(td, uap->fd, uap->cmd, arg);
216 if (error)
217 return (error);
218 if (uap->cmd == F_GETLK)
219 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
220 return (error);
221}
222
223int
224kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
225{
226 struct filedesc *fdp;
227 struct flock *flp;
228 struct file *fp;
229 struct proc *p;
230 char *pop;
231 struct vnode *vp;
232 u_int newmin;
233 int error, flg, tmp;
234
235 error = 0;
236 flg = F_POSIX;
237 p = td->td_proc;
238 fdp = p->p_fd;
239 mtx_lock(&Giant);
240 FILEDESC_LOCK(fdp);
241 if ((unsigned)fd >= fdp->fd_nfiles ||
242 (fp = fdp->fd_ofiles[fd]) == NULL) {
243 FILEDESC_UNLOCK(fdp);
244 error = EBADF;
245 goto done2;
246 }
247 pop = &fdp->fd_ofileflags[fd];
248
249 switch (cmd) {
250 case F_DUPFD:
251 FILEDESC_UNLOCK(fdp);
252 newmin = arg;
253 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
254 newmin >= maxfilesperproc) {
255 error = EINVAL;
256 break;
257 }
258 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
259 break;
260
261 case F_GETFD:
262 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
263 FILEDESC_UNLOCK(fdp);
264 break;
265
266 case F_SETFD:
267 *pop = (*pop &~ UF_EXCLOSE) |
268 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
269 FILEDESC_UNLOCK(fdp);
270 break;
271
272 case F_GETFL:
273 FILE_LOCK(fp);
274 FILEDESC_UNLOCK(fdp);
275 td->td_retval[0] = OFLAGS(fp->f_flag);
276 FILE_UNLOCK(fp);
277 break;
278
279 case F_SETFL:
280 FILE_LOCK(fp);
281 FILEDESC_UNLOCK(fdp);
282 fhold_locked(fp);
283 fp->f_flag &= ~FCNTLFLAGS;
284 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
285 FILE_UNLOCK(fp);
286 tmp = fp->f_flag & FNONBLOCK;
287 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
288 if (error) {
289 fdrop(fp, td);
290 break;
291 }
292 tmp = fp->f_flag & FASYNC;
293 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
294 if (error == 0) {
295 fdrop(fp, td);
296 break;
297 }
298 FILE_LOCK(fp);
299 fp->f_flag &= ~FNONBLOCK;
300 FILE_UNLOCK(fp);
301 tmp = 0;
302 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
303 fdrop(fp, td);
304 break;
305
306 case F_GETOWN:
307 fhold(fp);
308 FILEDESC_UNLOCK(fdp);
309 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
310 if (error == 0)
311 td->td_retval[0] = tmp;
312 fdrop(fp, td);
313 break;
314
315 case F_SETOWN:
316 fhold(fp);
317 FILEDESC_UNLOCK(fdp);
318 tmp = arg;
319 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
320 fdrop(fp, td);
321 break;
322
323 case F_SETLKW:
324 flg |= F_WAIT;
325 /* FALLTHROUGH F_SETLK */
326
327 case F_SETLK:
328 if (fp->f_type != DTYPE_VNODE) {
329 FILEDESC_UNLOCK(fdp);
330 error = EBADF;
331 break;
332 }
333
334 flp = (struct flock *)arg;
335 if (flp->l_whence == SEEK_CUR) {
336 if (fp->f_offset < 0 ||
337 (flp->l_start > 0 &&
338 fp->f_offset > OFF_MAX - flp->l_start)) {
339 FILEDESC_UNLOCK(fdp);
340 error = EOVERFLOW;
341 break;
342 }
343 flp->l_start += fp->f_offset;
344 }
345
346 /*
347 * VOP_ADVLOCK() may block.
348 */
349 fhold(fp);
350 FILEDESC_UNLOCK(fdp);
351 vp = fp->f_data;
352
353 switch (flp->l_type) {
354 case F_RDLCK:
355 if ((fp->f_flag & FREAD) == 0) {
356 error = EBADF;
357 break;
358 }
359 PROC_LOCK(p->p_leader);
360 p->p_leader->p_flag |= P_ADVLOCK;
361 PROC_UNLOCK(p->p_leader);
362 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
363 flp, flg);
364 break;
365 case F_WRLCK:
366 if ((fp->f_flag & FWRITE) == 0) {
367 error = EBADF;
368 break;
369 }
370 PROC_LOCK(p->p_leader);
371 p->p_leader->p_flag |= P_ADVLOCK;
372 PROC_UNLOCK(p->p_leader);
373 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
374 flp, flg);
375 break;
376 case F_UNLCK:
377 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
378 flp, F_POSIX);
379 break;
380 default:
381 error = EINVAL;
382 break;
383 }
384 /* Check for race with close */
385 FILEDESC_LOCK(fdp);
386 if ((unsigned) fd >= fdp->fd_nfiles ||
387 fp != fdp->fd_ofiles[fd]) {
388 FILEDESC_UNLOCK(fdp);
389 flp->l_whence = SEEK_SET;
390 flp->l_start = 0;
391 flp->l_len = 0;
392 flp->l_type = F_UNLCK;
393 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
394 F_UNLCK, flp, F_POSIX);
395 } else
396 FILEDESC_UNLOCK(fdp);
397 fdrop(fp, td);
398 break;
399
400 case F_GETLK:
401 if (fp->f_type != DTYPE_VNODE) {
402 FILEDESC_UNLOCK(fdp);
403 error = EBADF;
404 break;
405 }
406 flp = (struct flock *)arg;
407 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
408 flp->l_type != F_UNLCK) {
409 FILEDESC_UNLOCK(fdp);
410 error = EINVAL;
411 break;
412 }
413 if (flp->l_whence == SEEK_CUR) {
414 if ((flp->l_start > 0 &&
415 fp->f_offset > OFF_MAX - flp->l_start) ||
416 (flp->l_start < 0 &&
417 fp->f_offset < OFF_MIN - flp->l_start)) {
418 FILEDESC_UNLOCK(fdp);
419 error = EOVERFLOW;
420 break;
421 }
422 flp->l_start += fp->f_offset;
423 }
424 /*
425 * VOP_ADVLOCK() may block.
426 */
427 fhold(fp);
428 FILEDESC_UNLOCK(fdp);
429 vp = fp->f_data;
430 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
431 F_POSIX);
432 fdrop(fp, td);
433 break;
434 default:
435 FILEDESC_UNLOCK(fdp);
436 error = EINVAL;
437 break;
438 }
439done2:
440 mtx_unlock(&Giant);
441 return (error);
442}
443
444/*
445 * Common code for dup, dup2, and fcntl(F_DUPFD).
446 */
447static int
448do_dup(td, type, old, new, retval)
449 enum dup_type type;
450 int old, new;
451 register_t *retval;
452 struct thread *td;
453{
454 struct filedesc *fdp;
455 struct proc *p;
456 struct file *fp;
457 struct file *delfp;
458 int error, newfd;
459
460 p = td->td_proc;
461 fdp = p->p_fd;
462
463 /*
464 * Verify we have a valid descriptor to dup from and possibly to
465 * dup to.
466 */
467 if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
468 new >= maxfilesperproc)
469 return (EBADF);
470 FILEDESC_LOCK(fdp);
471 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
472 FILEDESC_UNLOCK(fdp);
473 return (EBADF);
474 }
475 if (type == DUP_FIXED && old == new) {
476 *retval = new;
477 FILEDESC_UNLOCK(fdp);
478 return (0);
479 }
480 fp = fdp->fd_ofiles[old];
481 fhold(fp);
482
483 /*
484 * Expand the table for the new descriptor if needed. This may
485 * block and drop and reacquire the filedesc lock.
486 */
487 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
488 error = fdalloc(td, new, &newfd);
489 if (error) {
490 FILEDESC_UNLOCK(fdp);
491 fdrop(fp, td);
492 return (error);
493 }
494 }
495 if (type == DUP_VARIABLE)
496 new = newfd;
497
498 /*
499 * If the old file changed out from under us then treat it as a
500 * bad file descriptor. Userland should do its own locking to
501 * avoid this case.
502 */
503 if (fdp->fd_ofiles[old] != fp) {
504 if (fdp->fd_ofiles[new] == NULL) {
505 if (new < fdp->fd_freefile)
506 fdp->fd_freefile = new;
507 while (fdp->fd_lastfile > 0 &&
508 fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
509 fdp->fd_lastfile--;
510 }
511 FILEDESC_UNLOCK(fdp);
512 fdrop(fp, td);
513 return (EBADF);
514 }
515 KASSERT(old != new, ("new fd is same as old"));
516
517 /*
518 * Save info on the descriptor being overwritten. We have
519 * to do the unmap now, but we cannot close it without
520 * introducing an ownership race for the slot.
521 */
522 delfp = fdp->fd_ofiles[new];
523 KASSERT(delfp == NULL || type == DUP_FIXED,
524 ("dup() picked an open file"));
525#if 0
526 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
527 (void) munmapfd(td, new);
528#endif
529
530 /*
531 * Duplicate the source descriptor, update lastfile
532 */
533 fdp->fd_ofiles[new] = fp;
534 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
535 if (new > fdp->fd_lastfile)
536 fdp->fd_lastfile = new;
537 FILEDESC_UNLOCK(fdp);
538 *retval = new;
539
540 /*
541 * If we dup'd over a valid file, we now own the reference to it
542 * and must dispose of it using closef() semantics (as if a
543 * close() were performed on it).
544 */
545 if (delfp) {
546 mtx_lock(&Giant);
547 (void) closef(delfp, td);
548 mtx_unlock(&Giant);
549 }
550 return (0);
551}
552
553/*
554 * If sigio is on the list associated with a process or process group,
555 * disable signalling from the device, remove sigio from the list and
556 * free sigio.
557 */
558void
559funsetown(sigiop)
560 struct sigio **sigiop;
561{
562 struct sigio *sigio;
563
564 SIGIO_LOCK();
565 sigio = *sigiop;
566 if (sigio == NULL) {
567 SIGIO_UNLOCK();
568 return;
569 }
570 *(sigio->sio_myref) = NULL;
571 if ((sigio)->sio_pgid < 0) {
572 struct pgrp *pg = (sigio)->sio_pgrp;
573 PGRP_LOCK(pg);
574 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
575 sigio, sio_pgsigio);
576 PGRP_UNLOCK(pg);
577 } else {
578 struct proc *p = (sigio)->sio_proc;
579 PROC_LOCK(p);
580 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
581 sigio, sio_pgsigio);
582 PROC_UNLOCK(p);
583 }
584 SIGIO_UNLOCK();
585 crfree(sigio->sio_ucred);
586 FREE(sigio, M_SIGIO);
587}
588
589/*
590 * Free a list of sigio structures.
591 * We only need to lock the SIGIO_LOCK because we have made ourselves
592 * inaccessable to callers of fsetown and therefore do not need to lock
593 * the proc or pgrp struct for the list manipulation.
594 */
595void
596funsetownlst(sigiolst)
597 struct sigiolst *sigiolst;
598{
599 struct proc *p;
600 struct pgrp *pg;
601 struct sigio *sigio;
602
603 sigio = SLIST_FIRST(sigiolst);
604 if (sigio == NULL)
605 return;
606 p = NULL;
607 pg = NULL;
608
609 /*
610 * Every entry of the list should belong
611 * to a single proc or pgrp.
612 */
613 if (sigio->sio_pgid < 0) {
614 pg = sigio->sio_pgrp;
615 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
616 } else /* if (sigio->sio_pgid > 0) */ {
617 p = sigio->sio_proc;
618 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
619 }
620
621 SIGIO_LOCK();
622 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
623 *(sigio->sio_myref) = NULL;
624 if (pg != NULL) {
625 KASSERT(sigio->sio_pgid < 0,
626 ("Proc sigio in pgrp sigio list"));
627 KASSERT(sigio->sio_pgrp == pg,
628 ("Bogus pgrp in sigio list"));
629 PGRP_LOCK(pg);
630 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
631 sio_pgsigio);
632 PGRP_UNLOCK(pg);
633 } else /* if (p != NULL) */ {
634 KASSERT(sigio->sio_pgid > 0,
635 ("Pgrp sigio in proc sigio list"));
636 KASSERT(sigio->sio_proc == p,
637 ("Bogus proc in sigio list"));
638 PROC_LOCK(p);
639 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
640 sio_pgsigio);
641 PROC_UNLOCK(p);
642 }
643 SIGIO_UNLOCK();
644 crfree(sigio->sio_ucred);
645 FREE(sigio, M_SIGIO);
646 SIGIO_LOCK();
647 }
648 SIGIO_UNLOCK();
649}
650
651/*
652 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
653 *
654 * After permission checking, add a sigio structure to the sigio list for
655 * the process or process group.
656 */
657int
658fsetown(pgid, sigiop)
659 pid_t pgid;
660 struct sigio **sigiop;
661{
662 struct proc *proc;
663 struct pgrp *pgrp;
664 struct sigio *sigio;
665 int ret;
666
667 if (pgid == 0) {
668 funsetown(sigiop);
669 return (0);
670 }
671
672 ret = 0;
673
674 /* Allocate and fill in the new sigio out of locks. */
675 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
676 sigio->sio_pgid = pgid;
677 sigio->sio_ucred = crhold(curthread->td_ucred);
678 sigio->sio_myref = sigiop;
679
680 sx_slock(&proctree_lock);
681 if (pgid > 0) {
682 proc = pfind(pgid);
683 if (proc == NULL) {
684 ret = ESRCH;
685 goto fail;
686 }
687
688 /*
689 * Policy - Don't allow a process to FSETOWN a process
690 * in another session.
691 *
692 * Remove this test to allow maximum flexibility or
693 * restrict FSETOWN to the current process or process
694 * group for maximum safety.
695 */
696 PROC_UNLOCK(proc);
697 if (proc->p_session != curthread->td_proc->p_session) {
698 ret = EPERM;
699 goto fail;
700 }
701
702 pgrp = NULL;
703 } else /* if (pgid < 0) */ {
704 pgrp = pgfind(-pgid);
705 if (pgrp == NULL) {
706 ret = ESRCH;
707 goto fail;
708 }
709 PGRP_UNLOCK(pgrp);
710
711 /*
712 * Policy - Don't allow a process to FSETOWN a process
713 * in another session.
714 *
715 * Remove this test to allow maximum flexibility or
716 * restrict FSETOWN to the current process or process
717 * group for maximum safety.
718 */
719 if (pgrp->pg_session != curthread->td_proc->p_session) {
720 ret = EPERM;
721 goto fail;
722 }
723
724 proc = NULL;
725 }
726 funsetown(sigiop);
727 if (pgid > 0) {
728 PROC_LOCK(proc);
729 /*
730 * Since funsetownlst() is called without the proctree
731 * locked, we need to check for P_WEXIT.
732 * XXX: is ESRCH correct?
733 */
734 if ((proc->p_flag & P_WEXIT) != 0) {
735 PROC_UNLOCK(proc);
736 ret = ESRCH;
737 goto fail;
738 }
739 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
740 sigio->sio_proc = proc;
741 PROC_UNLOCK(proc);
742 } else {
743 PGRP_LOCK(pgrp);
744 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
745 sigio->sio_pgrp = pgrp;
746 PGRP_UNLOCK(pgrp);
747 }
748 sx_sunlock(&proctree_lock);
749 SIGIO_LOCK();
750 *sigiop = sigio;
751 SIGIO_UNLOCK();
752 return (0);
753
754fail:
755 sx_sunlock(&proctree_lock);
756 crfree(sigio->sio_ucred);
757 FREE(sigio, M_SIGIO);
758 return (ret);
759}
760
761/*
762 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
763 */
764pid_t
765fgetown(sigiop)
766 struct sigio **sigiop;
767{
768 pid_t pgid;
769
770 SIGIO_LOCK();
771 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
772 SIGIO_UNLOCK();
773 return (pgid);
774}
775
776/*
777 * Close a file descriptor.
778 */
779#ifndef _SYS_SYSPROTO_H_
780struct close_args {
781 int fd;
782};
783#endif
784/*
785 * MPSAFE
786 */
787/* ARGSUSED */
788int
789close(td, uap)
790 struct thread *td;
791 struct close_args *uap;
792{
793 struct filedesc *fdp;
794 struct file *fp;
795 int fd, error;
796
797 fd = uap->fd;
798 error = 0;
799 fdp = td->td_proc->p_fd;
800 mtx_lock(&Giant);
801 FILEDESC_LOCK(fdp);
802 if ((unsigned)fd >= fdp->fd_nfiles ||
803 (fp = fdp->fd_ofiles[fd]) == NULL) {
804 FILEDESC_UNLOCK(fdp);
805 error = EBADF;
806 goto done2;
807 }
808#if 0
809 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
810 (void) munmapfd(td, fd);
811#endif
812 fdp->fd_ofiles[fd] = NULL;
813 fdp->fd_ofileflags[fd] = 0;
814
815 /*
816 * we now hold the fp reference that used to be owned by the descriptor
817 * array.
818 */
819 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
820 fdp->fd_lastfile--;
821 if (fd < fdp->fd_freefile)
822 fdp->fd_freefile = fd;
823 if (fd < fdp->fd_knlistsize) {
824 FILEDESC_UNLOCK(fdp);
825 knote_fdclose(td, fd);
826 } else
827 FILEDESC_UNLOCK(fdp);
828
829 error = closef(fp, td);
830done2:
831 mtx_unlock(&Giant);
832 return (error);
833}
834
835#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
836/*
837 * Return status information about a file descriptor.
838 */
839#ifndef _SYS_SYSPROTO_H_
840struct ofstat_args {
841 int fd;
842 struct ostat *sb;
843};
844#endif
845/*
846 * MPSAFE
847 */
848/* ARGSUSED */
849int
850ofstat(td, uap)
851 struct thread *td;
852 struct ofstat_args *uap;
853{
854 struct file *fp;
855 struct stat ub;
856 struct ostat oub;
857 int error;
858
859 mtx_lock(&Giant);
860 if ((error = fget(td, uap->fd, &fp)) != 0)
861 goto done2;
862 error = fo_stat(fp, &ub, td->td_ucred, td);
863 if (error == 0) {
864 cvtstat(&ub, &oub);
865 error = copyout(&oub, uap->sb, sizeof(oub));
866 }
867 fdrop(fp, td);
868done2:
869 mtx_unlock(&Giant);
870 return (error);
871}
872#endif /* COMPAT_43 || COMPAT_SUNOS */
873
874/*
875 * Return status information about a file descriptor.
876 */
877#ifndef _SYS_SYSPROTO_H_
878struct fstat_args {
879 int fd;
880 struct stat *sb;
881};
882#endif
883/*
884 * MPSAFE
885 */
886/* ARGSUSED */
887int
888fstat(td, uap)
889 struct thread *td;
890 struct fstat_args *uap;
891{
892 struct file *fp;
893 struct stat ub;
894 int error;
895
896 mtx_lock(&Giant);
897 if ((error = fget(td, uap->fd, &fp)) != 0)
898 goto done2;
899 error = fo_stat(fp, &ub, td->td_ucred, td);
900 if (error == 0)
901 error = copyout(&ub, uap->sb, sizeof(ub));
902 fdrop(fp, td);
903done2:
904 mtx_unlock(&Giant);
905 return (error);
906}
907
908/*
909 * Return status information about a file descriptor.
910 */
911#ifndef _SYS_SYSPROTO_H_
912struct nfstat_args {
913 int fd;
914 struct nstat *sb;
915};
916#endif
917/*
918 * MPSAFE
919 */
920/* ARGSUSED */
921int
922nfstat(td, uap)
923 struct thread *td;
924 struct nfstat_args *uap;
925{
926 struct file *fp;
927 struct stat ub;
928 struct nstat nub;
929 int error;
930
931 mtx_lock(&Giant);
932 if ((error = fget(td, uap->fd, &fp)) != 0)
933 goto done2;
934 error = fo_stat(fp, &ub, td->td_ucred, td);
935 if (error == 0) {
936 cvtnstat(&ub, &nub);
937 error = copyout(&nub, uap->sb, sizeof(nub));
938 }
939 fdrop(fp, td);
940done2:
941 mtx_unlock(&Giant);
942 return (error);
943}
944
945/*
946 * Return pathconf information about a file descriptor.
947 */
948#ifndef _SYS_SYSPROTO_H_
949struct fpathconf_args {
950 int fd;
951 int name;
952};
953#endif
954/*
955 * MPSAFE
956 */
957/* ARGSUSED */
958int
959fpathconf(td, uap)
960 struct thread *td;
961 struct fpathconf_args *uap;
962{
963 struct file *fp;
964 struct vnode *vp;
965 int error;
966
967 if ((error = fget(td, uap->fd, &fp)) != 0)
968 return (error);
969
970 /* If asynchronous I/O is available, it works for all descriptors. */
971 if (uap->name == _PC_ASYNC_IO) {
972 td->td_retval[0] = async_io_version;
973 goto out;
974 }
975 switch (fp->f_type) {
976 case DTYPE_PIPE:
977 case DTYPE_SOCKET:
978 if (uap->name != _PC_PIPE_BUF) {
979 error = EINVAL;
980 } else {
981 td->td_retval[0] = PIPE_BUF;
982 error = 0;
983 }
984 break;
985 case DTYPE_FIFO:
986 case DTYPE_VNODE:
987 vp = fp->f_data;
988 mtx_lock(&Giant);
989 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
990 mtx_unlock(&Giant);
991 break;
992 default:
993 error = EOPNOTSUPP;
994 break;
995 }
996out:
997 fdrop(fp, td);
998 return (error);
999}
1000
1001/*
1002 * Allocate a file descriptor for the process.
1003 */
1004static int fdexpand;
1005SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1006
1007int
1008fdalloc(td, want, result)
1009 struct thread *td;
1010 int want;
1011 int *result;
1012{
1013 struct proc *p = td->td_proc;
1014 struct filedesc *fdp = td->td_proc->p_fd;
1015 int i;
1016 int lim, last, nfiles;
1017 struct file **newofile, **oldofile;
1018 char *newofileflags;
1019
1020 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1021
1022 /*
1023 * Search for a free descriptor starting at the higher
1024 * of want or fd_freefile. If that fails, consider
1025 * expanding the ofile array.
1026 */
1027 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1028 for (;;) {
1029 last = min(fdp->fd_nfiles, lim);
1030 i = max(want, fdp->fd_freefile);
1031 for (; i < last; i++) {
1032 if (fdp->fd_ofiles[i] == NULL) {
1033 fdp->fd_ofileflags[i] = 0;
1034 if (i > fdp->fd_lastfile)
1035 fdp->fd_lastfile = i;
1036 if (want <= fdp->fd_freefile)
1037 fdp->fd_freefile = i;
1038 *result = i;
1039 return (0);
1040 }
1041 }
1042
1043 /*
1044 * No space in current array. Expand?
1045 */
1046 if (i >= lim)
1047 return (EMFILE);
1048 if (fdp->fd_nfiles < NDEXTENT)
1049 nfiles = NDEXTENT;
1050 else
1051 nfiles = 2 * fdp->fd_nfiles;
1052 while (nfiles < want)
1053 nfiles <<= 1;
1054 FILEDESC_UNLOCK(fdp);
1055 /*
1056 * XXX malloc() calls uma_large_malloc() for sizes larger
1057 * than KMEM_ZMAX bytes. uma_large_malloc() requires Giant.
1058 */
1059 mtx_lock(&Giant);
1060 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1061 mtx_unlock(&Giant);
1062
1063 /*
1064 * Deal with file-table extend race that might have
1065 * occurred while filedesc was unlocked.
1066 */
1067 FILEDESC_LOCK(fdp);
1068 if (fdp->fd_nfiles >= nfiles) {
1069 /* XXX uma_large_free() needs Giant. */
1070 FILEDESC_UNLOCK(fdp);
1071 mtx_lock(&Giant);
1072 free(newofile, M_FILEDESC);
1073 mtx_unlock(&Giant);
1074 FILEDESC_LOCK(fdp);
1075 continue;
1076 }
1077 newofileflags = (char *) &newofile[nfiles];
1078 /*
1079 * Copy the existing ofile and ofileflags arrays
1080 * and zero the new portion of each array.
1081 */
1082 i = fdp->fd_nfiles * sizeof(struct file *);
1083 bcopy(fdp->fd_ofiles, newofile, i);
1084 bzero((char *)newofile + i,
1085 nfiles * sizeof(struct file *) - i);
1086 i = fdp->fd_nfiles * sizeof(char);
1087 bcopy(fdp->fd_ofileflags, newofileflags, i);
1088 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1089 if (fdp->fd_nfiles > NDFILE)
1090 oldofile = fdp->fd_ofiles;
1091 else
1092 oldofile = NULL;
1093 fdp->fd_ofiles = newofile;
1094 fdp->fd_ofileflags = newofileflags;
1095 fdp->fd_nfiles = nfiles;
1096 fdexpand++;
1097 if (oldofile != NULL) {
1098 /* XXX uma_large_free() needs Giant. */
1099 FILEDESC_UNLOCK(fdp);
1100 mtx_lock(&Giant);
1101 free(oldofile, M_FILEDESC);
1102 mtx_unlock(&Giant);
1103 FILEDESC_LOCK(fdp);
1104 }
1105 }
1106 return (0);
1107}
1108
1109/*
1110 * Check to see whether n user file descriptors
1111 * are available to the process p.
1112 */
1113int
1114fdavail(td, n)
1115 struct thread *td;
1116 int n;
1117{
1118 struct proc *p = td->td_proc;
1119 struct filedesc *fdp = td->td_proc->p_fd;
1120 struct file **fpp;
1121 int i, lim, last;
1122
1123 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1124
1125 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1126 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1127 return (1);
1128 last = min(fdp->fd_nfiles, lim);
1129 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1130 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1131 if (*fpp == NULL && --n <= 0)
1132 return (1);
1133 }
1134 return (0);
1135}
1136
1137/*
1138 * Create a new open file structure and allocate
1139 * a file decriptor for the process that refers to it.
1140 */
1141int
1142falloc(td, resultfp, resultfd)
1143 struct thread *td;
1144 struct file **resultfp;
1145 int *resultfd;
1146{
1147 struct proc *p = td->td_proc;
1148 struct file *fp, *fq;
1149 int error, i;
1150
1151 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1152 sx_xlock(&filelist_lock);
1153 if (nfiles >= maxfiles) {
1154 sx_xunlock(&filelist_lock);
1155 uma_zfree(file_zone, fp);
1156 tablefull("file");
1157 return (ENFILE);
1158 }
1159 nfiles++;
1160
1161 /*
1162 * If the process has file descriptor zero open, add the new file
1163 * descriptor to the list of open files at that point, otherwise
1164 * put it at the front of the list of open files.
1165 */
1166 fp->f_mtxp = mtx_pool_alloc();
1167 fp->f_gcflag = 0;
1168 fp->f_count = 1;
1169 fp->f_cred = crhold(td->td_ucred);
1170 fp->f_ops = &badfileops;
1171 fp->f_seqcount = 1;
1172 FILEDESC_LOCK(p->p_fd);
1173 if ((fq = p->p_fd->fd_ofiles[0])) {
1174 LIST_INSERT_AFTER(fq, fp, f_list);
1175 } else {
1176 LIST_INSERT_HEAD(&filehead, fp, f_list);
1177 }
1178 sx_xunlock(&filelist_lock);
1179 if ((error = fdalloc(td, 0, &i))) {
1180 FILEDESC_UNLOCK(p->p_fd);
1181 fdrop(fp, td);
1182 return (error);
1183 }
1184 p->p_fd->fd_ofiles[i] = fp;
1185 FILEDESC_UNLOCK(p->p_fd);
1186 if (resultfp)
1187 *resultfp = fp;
1188 if (resultfd)
1189 *resultfd = i;
1190 return (0);
1191}
1192
1193/*
1194 * Free a file descriptor.
1195 */
1196void
1197ffree(fp)
1198 struct file *fp;
1199{
1200
1201 KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1202 sx_xlock(&filelist_lock);
1203 LIST_REMOVE(fp, f_list);
1204 nfiles--;
1205 sx_xunlock(&filelist_lock);
1206 crfree(fp->f_cred);
1207 uma_zfree(file_zone, fp);
1208}
1209
1210/*
1211 * Build a new filedesc structure from another.
1212 * Copy the current, root, and jail root vnode references.
1213 */
1214struct filedesc *
1215fdinit(fdp)
1216 struct filedesc *fdp;
1217{
1218 struct filedesc0 *newfdp;
1219
1220 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1221 M_FILEDESC, M_WAITOK | M_ZERO);
1222 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1223 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1224 if (newfdp->fd_fd.fd_cdir)
1225 VREF(newfdp->fd_fd.fd_cdir);
1226 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1227 if (newfdp->fd_fd.fd_rdir)
1228 VREF(newfdp->fd_fd.fd_rdir);
1229 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1230 if (newfdp->fd_fd.fd_jdir)
1231 VREF(newfdp->fd_fd.fd_jdir);
1232
1233 /* Create the file descriptor table. */
1234 newfdp->fd_fd.fd_refcnt = 1;
1235 newfdp->fd_fd.fd_cmask = cmask;
1236 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1237 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1238 newfdp->fd_fd.fd_nfiles = NDFILE;
1239 newfdp->fd_fd.fd_knlistsize = -1;
1240 return (&newfdp->fd_fd);
1241}
1242
1243/*
1244 * Share a filedesc structure.
1245 */
1246struct filedesc *
1247fdshare(fdp)
1248 struct filedesc *fdp;
1249{
1250 FILEDESC_LOCK(fdp);
1251 fdp->fd_refcnt++;
1252 FILEDESC_UNLOCK(fdp);
1253 return (fdp);
1254}
1255
1256/*
1257 * Copy a filedesc structure.
1258 * A NULL pointer in returns a NULL reference, this is to ease callers,
1259 * not catch errors.
1260 */
1261struct filedesc *
1262fdcopy(fdp)
1263 struct filedesc *fdp;
1264{
1265 struct filedesc *newfdp;
1266 struct file **fpp;
1267 int i, j;
1268
1269 /* Certain daemons might not have file descriptors. */
1270 if (fdp == NULL)
1271 return (NULL);
1272
1273 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1274
1275 FILEDESC_UNLOCK(fdp);
1276 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1277 M_FILEDESC, M_WAITOK);
1278 FILEDESC_LOCK(fdp);
1279 bcopy(fdp, newfdp, sizeof(struct filedesc));
1280 FILEDESC_UNLOCK(fdp);
1281 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1282 mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1283 if (newfdp->fd_cdir)
1284 VREF(newfdp->fd_cdir);
1285 if (newfdp->fd_rdir)
1286 VREF(newfdp->fd_rdir);
1287 if (newfdp->fd_jdir)
1288 VREF(newfdp->fd_jdir);
1289 newfdp->fd_refcnt = 1;
1290
1291 /*
1292 * If the number of open files fits in the internal arrays
1293 * of the open file structure, use them, otherwise allocate
1294 * additional memory for the number of descriptors currently
1295 * in use.
1296 */
1297 FILEDESC_LOCK(fdp);
1298 newfdp->fd_lastfile = fdp->fd_lastfile;
1299 newfdp->fd_nfiles = fdp->fd_nfiles;
1300 if (newfdp->fd_lastfile < NDFILE) {
1301 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1302 newfdp->fd_ofileflags =
1303 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1304 i = NDFILE;
1305 } else {
1306 /*
1307 * Compute the smallest multiple of NDEXTENT needed
1308 * for the file descriptors currently in use,
1309 * allowing the table to shrink.
1310 */
1311retry:
1312 i = newfdp->fd_nfiles;
1313 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1314 i /= 2;
1315 FILEDESC_UNLOCK(fdp);
1316 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1317 M_FILEDESC, M_WAITOK);
1318 FILEDESC_LOCK(fdp);
1319 newfdp->fd_lastfile = fdp->fd_lastfile;
1320 newfdp->fd_nfiles = fdp->fd_nfiles;
1321 j = newfdp->fd_nfiles;
1322 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1323 j /= 2;
1324 if (i != j) {
1325 /*
1326 * The size of the original table has changed.
1327 * Go over once again.
1328 */
1329 FILEDESC_UNLOCK(fdp);
1330 FREE(newfdp->fd_ofiles, M_FILEDESC);
1331 FILEDESC_LOCK(fdp);
1332 newfdp->fd_lastfile = fdp->fd_lastfile;
1333 newfdp->fd_nfiles = fdp->fd_nfiles;
1334 goto retry;
1335 }
1336 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1337 }
1338 newfdp->fd_nfiles = i;
1339 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1340 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1341
1342 /*
1343 * kq descriptors cannot be copied.
1344 */
1345 if (newfdp->fd_knlistsize != -1) {
1346 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1347 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1348 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1349 *fpp = NULL;
1350 if (i < newfdp->fd_freefile)
1351 newfdp->fd_freefile = i;
1352 }
1353 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1354 newfdp->fd_lastfile--;
1355 }
1356 newfdp->fd_knlist = NULL;
1357 newfdp->fd_knlistsize = -1;
1358 newfdp->fd_knhash = NULL;
1359 newfdp->fd_knhashmask = 0;
1360 }
1361
1362 fpp = newfdp->fd_ofiles;
1363 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1364 if (*fpp != NULL)
1365 fhold(*fpp);
1366 }
1367 return (newfdp);
1368}
1369
1370/* A mutex to protect the association between a proc and filedesc. */
1371struct mtx fdesc_mtx;
1372MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF);
1373
1374/*
1375 * Release a filedesc structure.
1376 */
1377void
1378fdfree(td)
1379 struct thread *td;
1380{
1381 struct filedesc *fdp;
1382 struct file **fpp;
1383 int i;
1384
1385 /* Certain daemons might not have file descriptors. */
1386 fdp = td->td_proc->p_fd;
1387 if (fdp == NULL)
1388 return;
1389
1390 FILEDESC_LOCK(fdp);
1391 if (--fdp->fd_refcnt > 0) {
1392 FILEDESC_UNLOCK(fdp);
1393 return;
1394 }
1395
1396 /*
1397 * We are the last reference to the structure, so we can
1398 * safely assume it will not change out from under us.
1399 */
1400 FILEDESC_UNLOCK(fdp);
1401 fpp = fdp->fd_ofiles;
1402 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1403 if (*fpp)
1404 (void) closef(*fpp, td);
1405 }
1406
1407 /* XXX This should happen earlier. */
1408 mtx_lock(&fdesc_mtx);
1409 td->td_proc->p_fd = NULL;
1410 mtx_unlock(&fdesc_mtx);
1411
1412 if (fdp->fd_nfiles > NDFILE)
1413 FREE(fdp->fd_ofiles, M_FILEDESC);
1414 if (fdp->fd_cdir)
1415 vrele(fdp->fd_cdir);
1416 if (fdp->fd_rdir)
1417 vrele(fdp->fd_rdir);
1418 if (fdp->fd_jdir)
1419 vrele(fdp->fd_jdir);
1420 if (fdp->fd_knlist)
1421 FREE(fdp->fd_knlist, M_KQUEUE);
1422 if (fdp->fd_knhash)
1423 FREE(fdp->fd_knhash, M_KQUEUE);
1424 mtx_destroy(&fdp->fd_mtx);
1425 FREE(fdp, M_FILEDESC);
1426}
1427
1428/*
1429 * For setugid programs, we don't want to people to use that setugidness
1430 * to generate error messages which write to a file which otherwise would
1431 * otherwise be off-limits to the process. We check for filesystems where
1432 * the vnode can change out from under us after execve (like [lin]procfs).
1433 *
1434 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1435 * sufficient. We also don't for check setugidness since we know we are.
1436 */
1437static int
1438is_unsafe(struct file *fp)
1439{
1440 if (fp->f_type == DTYPE_VNODE) {
1441 struct vnode *vp = fp->f_data;
1442
1443 if ((vp->v_vflag & VV_PROCDEP) != 0)
1444 return (1);
1445 }
1446 return (0);
1447}
1448
1449/*
1450 * Make this setguid thing safe, if at all possible.
1451 */
1452void
1453setugidsafety(td)
1454 struct thread *td;
1455{
1456 struct filedesc *fdp;
1457 int i;
1458
1459 /* Certain daemons might not have file descriptors. */
1460 fdp = td->td_proc->p_fd;
1461 if (fdp == NULL)
1462 return;
1463
1464 /*
1465 * Note: fdp->fd_ofiles may be reallocated out from under us while
1466 * we are blocked in a close. Be careful!
1467 */
1468 FILEDESC_LOCK(fdp);
1469 for (i = 0; i <= fdp->fd_lastfile; i++) {
1470 if (i > 2)
1471 break;
1472 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1473 struct file *fp;
1474
1475#if 0
1476 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1477 (void) munmapfd(td, i);
1478#endif
1479 if (i < fdp->fd_knlistsize) {
1480 FILEDESC_UNLOCK(fdp);
1481 knote_fdclose(td, i);
1482 FILEDESC_LOCK(fdp);
1483 }
1484 /*
1485 * NULL-out descriptor prior to close to avoid
1486 * a race while close blocks.
1487 */
1488 fp = fdp->fd_ofiles[i];
1489 fdp->fd_ofiles[i] = NULL;
1490 fdp->fd_ofileflags[i] = 0;
1491 if (i < fdp->fd_freefile)
1492 fdp->fd_freefile = i;
1493 FILEDESC_UNLOCK(fdp);
1494 (void) closef(fp, td);
1495 FILEDESC_LOCK(fdp);
1496 }
1497 }
1498 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1499 fdp->fd_lastfile--;
1500 FILEDESC_UNLOCK(fdp);
1501}
1502
1503/*
1504 * Close any files on exec?
1505 */
1506void
1507fdcloseexec(td)
1508 struct thread *td;
1509{
1510 struct filedesc *fdp;
1511 int i;
1512
1513 /* Certain daemons might not have file descriptors. */
1514 fdp = td->td_proc->p_fd;
1515 if (fdp == NULL)
1516 return;
1517
1518 FILEDESC_LOCK(fdp);
1519
1520 /*
1521 * We cannot cache fd_ofiles or fd_ofileflags since operations
1522 * may block and rip them out from under us.
1523 */
1524 for (i = 0; i <= fdp->fd_lastfile; i++) {
1525 if (fdp->fd_ofiles[i] != NULL &&
1526 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1527 struct file *fp;
1528
1529#if 0
1530 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1531 (void) munmapfd(td, i);
1532#endif
1533 if (i < fdp->fd_knlistsize) {
1534 FILEDESC_UNLOCK(fdp);
1535 knote_fdclose(td, i);
1536 FILEDESC_LOCK(fdp);
1537 }
1538 /*
1539 * NULL-out descriptor prior to close to avoid
1540 * a race while close blocks.
1541 */
1542 fp = fdp->fd_ofiles[i];
1543 fdp->fd_ofiles[i] = NULL;
1544 fdp->fd_ofileflags[i] = 0;
1545 if (i < fdp->fd_freefile)
1546 fdp->fd_freefile = i;
1547 FILEDESC_UNLOCK(fdp);
1548 (void) closef(fp, td);
1549 FILEDESC_LOCK(fdp);
1550 }
1551 }
1552 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1553 fdp->fd_lastfile--;
1554 FILEDESC_UNLOCK(fdp);
1555}
1556
1557/*
1558 * It is unsafe for set[ug]id processes to be started with file
1559 * descriptors 0..2 closed, as these descriptors are given implicit
1560 * significance in the Standard C library. fdcheckstd() will create a
1561 * descriptor referencing /dev/null for each of stdin, stdout, and
1562 * stderr that is not already open.
1563 */
1564int
1565fdcheckstd(td)
1566 struct thread *td;
1567{
1568 struct nameidata nd;
1569 struct filedesc *fdp;
1570 struct file *fp;
1571 register_t retval;
1572 int fd, i, error, flags, devnull;
1573
1574 fdp = td->td_proc->p_fd;
1575 if (fdp == NULL)
1576 return (0);
1577 devnull = -1;
1578 error = 0;
1579 for (i = 0; i < 3; i++) {
1580 if (fdp->fd_ofiles[i] != NULL)
1581 continue;
1582 if (devnull < 0) {
1583 error = falloc(td, &fp, &fd);
1584 if (error != 0)
1585 break;
1586 KASSERT(fd == i, ("oof, we didn't get our fd"));
1587 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1588 td);
1589 flags = FREAD | FWRITE;
1590 error = vn_open(&nd, &flags, 0);
1591 if (error != 0) {
1592 FILEDESC_LOCK(fdp);
1593 fdp->fd_ofiles[fd] = NULL;
1594 FILEDESC_UNLOCK(fdp);
1595 fdrop(fp, td);
1596 break;
1597 }
1598 NDFREE(&nd, NDF_ONLY_PNBUF);
1599 fp->f_data = nd.ni_vp;
1600 fp->f_flag = flags;
1601 fp->f_ops = &vnops;
1602 fp->f_type = DTYPE_VNODE;
1603 VOP_UNLOCK(nd.ni_vp, 0, td);
1604 devnull = fd;
1605 } else {
1606 error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1607 if (error != 0)
1608 break;
1609 }
1610 }
1611 return (error);
1612}
1613
1614/*
1615 * Internal form of close.
1616 * Decrement reference count on file structure.
1617 * Note: td may be NULL when closing a file
1618 * that was being passed in a message.
1619 */
1620int
1621closef(fp, td)
1622 struct file *fp;
1623 struct thread *td;
1624{
1625 struct vnode *vp;
1626 struct flock lf;
1627
1628 if (fp == NULL)
1629 return (0);
1630 /*
1631 * POSIX record locking dictates that any close releases ALL
1632 * locks owned by this process. This is handled by setting
1633 * a flag in the unlock to free ONLY locks obeying POSIX
1634 * semantics, and not to free BSD-style file locks.
1635 * If the descriptor was in a message, POSIX-style locks
1636 * aren't passed with the descriptor.
1637 */
1638 if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 &&
1639 fp->f_type == DTYPE_VNODE) {
1640 lf.l_whence = SEEK_SET;
1641 lf.l_start = 0;
1642 lf.l_len = 0;
1643 lf.l_type = F_UNLCK;
1644 vp = fp->f_data;
1645 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1646 F_UNLCK, &lf, F_POSIX);
1647 }
1648 return (fdrop(fp, td));
1649}
1650
1651/*
1652 * Drop reference on struct file passed in, may call closef if the
1653 * reference hits zero.
1654 */
1655int
1656fdrop(fp, td)
1657 struct file *fp;
1658 struct thread *td;
1659{
1660
1661 FILE_LOCK(fp);
1662 return (fdrop_locked(fp, td));
1663}
1664
1665/*
1666 * Extract the file pointer associated with the specified descriptor for
1667 * the current user process.
1668 *
1669 * If the descriptor doesn't exist, EBADF is returned.
1670 *
1671 * If the descriptor exists but doesn't match 'flags' then
1672 * return EBADF for read attempts and EINVAL for write attempts.
1673 *
1674 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1675 * It should be droped with fdrop().
1676 * If it is not set, then the refcount will not be bumped however the
1677 * thread's filedesc struct will be returned locked (for fgetsock).
1678 *
1679 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1680 * Otherwise *fpp is set and zero is returned.
1681 */
1682static __inline int
1683_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1684{
1685 struct filedesc *fdp;
1686 struct file *fp;
1687
1688 *fpp = NULL;
1689 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1690 return (EBADF);
1691 FILEDESC_LOCK(fdp);
1692 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1693 FILEDESC_UNLOCK(fdp);
1694 return (EBADF);
1695 }
1696
1697 /*
1698 * Note: FREAD failures returns EBADF to maintain backwards
1699 * compatibility with what routines returned before.
1700 *
1701 * Only one flag, or 0, may be specified.
1702 */
1703 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1704 FILEDESC_UNLOCK(fdp);
1705 return (EBADF);
1706 }
1707 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1708 FILEDESC_UNLOCK(fdp);
1709 return (EINVAL);
1710 }
1711 if (hold) {
1712 fhold(fp);
1713 FILEDESC_UNLOCK(fdp);
1714 }
1715 *fpp = fp;
1716 return (0);
1717}
1718
1719int
1720fget(struct thread *td, int fd, struct file **fpp)
1721{
1722
1723 return(_fget(td, fd, fpp, 0, 1));
1724}
1725
1726int
1727fget_read(struct thread *td, int fd, struct file **fpp)
1728{
1729
1730 return(_fget(td, fd, fpp, FREAD, 1));
1731}
1732
1733int
1734fget_write(struct thread *td, int fd, struct file **fpp)
1735{
1736
1737 return(_fget(td, fd, fpp, FWRITE, 1));
1738}
1739
1740/*
1741 * Like fget() but loads the underlying vnode, or returns an error if
1742 * the descriptor does not represent a vnode. Note that pipes use vnodes
1743 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1744 * error). The returned vnode will be vref()d.
1745 */
1746static __inline int
1747_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1748{
1749 struct file *fp;
1750 int error;
1751
1752 *vpp = NULL;
1753 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1754 return (error);
1755 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1756 error = EINVAL;
1757 } else {
1758 *vpp = fp->f_data;
1759 vref(*vpp);
1760 }
1761 FILEDESC_UNLOCK(td->td_proc->p_fd);
1762 return (error);
1763}
1764
1765int
1766fgetvp(struct thread *td, int fd, struct vnode **vpp)
1767{
1768
1769 return (_fgetvp(td, fd, vpp, 0));
1770}
1771
1772int
1773fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1774{
1775
1776 return (_fgetvp(td, fd, vpp, FREAD));
1777}
1778
1779int
1780fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1781{
1782
1783 return (_fgetvp(td, fd, vpp, FWRITE));
1784}
1785
1786/*
1787 * Like fget() but loads the underlying socket, or returns an error if
1788 * the descriptor does not represent a socket.
1789 *
1790 * We bump the ref count on the returned socket. XXX Also obtain the SX
1791 * lock in the future.
1792 */
1793int
1794fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1795{
1796 struct file *fp;
1797 int error;
1798
1799 *spp = NULL;
1800 if (fflagp != NULL)
1801 *fflagp = 0;
1802 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1803 return (error);
1804 if (fp->f_type != DTYPE_SOCKET) {
1805 error = ENOTSOCK;
1806 } else {
1807 *spp = fp->f_data;
1808 if (fflagp)
1809 *fflagp = fp->f_flag;
1810 soref(*spp);
1811 }
1812 FILEDESC_UNLOCK(td->td_proc->p_fd);
1813 return (error);
1814}
1815
1816/*
1817 * Drop the reference count on the the socket and XXX release the SX lock in
1818 * the future. The last reference closes the socket.
1819 */
1820void
1821fputsock(struct socket *so)
1822{
1823
1824 sorele(so);
1825}
1826
1827/*
1828 * Drop reference on struct file passed in, may call closef if the
1829 * reference hits zero.
1830 * Expects struct file locked, and will unlock it.
1831 */
1832int
1833fdrop_locked(fp, td)
1834 struct file *fp;
1835 struct thread *td;
1836{
1837 struct flock lf;
1838 struct vnode *vp;
1839 int error;
1840
1841 FILE_LOCK_ASSERT(fp, MA_OWNED);
1842
1843 if (--fp->f_count > 0) {
1844 FILE_UNLOCK(fp);
1845 return (0);
1846 }
1847 mtx_lock(&Giant);
1848 if (fp->f_count < 0)
1849 panic("fdrop: count < 0");
1850 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1851 lf.l_whence = SEEK_SET;
1852 lf.l_start = 0;
1853 lf.l_len = 0;
1854 lf.l_type = F_UNLCK;
1855 vp = fp->f_data;
1856 FILE_UNLOCK(fp);
1857 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1858 } else
1859 FILE_UNLOCK(fp);
1860 if (fp->f_ops != &badfileops)
1861 error = fo_close(fp, td);
1862 else
1863 error = 0;
1864 ffree(fp);
1865 mtx_unlock(&Giant);
1866 return (error);
1867}
1868
1869/*
1870 * Apply an advisory lock on a file descriptor.
1871 *
1872 * Just attempt to get a record lock of the requested type on
1873 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1874 */
1875#ifndef _SYS_SYSPROTO_H_
1876struct flock_args {
1877 int fd;
1878 int how;
1879};
1880#endif
1881/*
1882 * MPSAFE
1883 */
1884/* ARGSUSED */
1885int
1886flock(td, uap)
1887 struct thread *td;
1888 struct flock_args *uap;
1889{
1890 struct file *fp;
1891 struct vnode *vp;
1892 struct flock lf;
1893 int error;
1894
1895 if ((error = fget(td, uap->fd, &fp)) != 0)
1896 return (error);
1897 if (fp->f_type != DTYPE_VNODE) {
1898 fdrop(fp, td);
1899 return (EOPNOTSUPP);
1900 }
1901
1902 mtx_lock(&Giant);
1903 vp = fp->f_data;
1904 lf.l_whence = SEEK_SET;
1905 lf.l_start = 0;
1906 lf.l_len = 0;
1907 if (uap->how & LOCK_UN) {
1908 lf.l_type = F_UNLCK;
1909 FILE_LOCK(fp);
1910 fp->f_flag &= ~FHASLOCK;
1911 FILE_UNLOCK(fp);
1912 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1913 goto done2;
1914 }
1915 if (uap->how & LOCK_EX)
1916 lf.l_type = F_WRLCK;
1917 else if (uap->how & LOCK_SH)
1918 lf.l_type = F_RDLCK;
1919 else {
1920 error = EBADF;
1921 goto done2;
1922 }
1923 FILE_LOCK(fp);
1924 fp->f_flag |= FHASLOCK;
1925 FILE_UNLOCK(fp);
1926 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1927 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1928done2:
1929 fdrop(fp, td);
1930 mtx_unlock(&Giant);
1931 return (error);
1932}
1933
1934/*
1935 * File Descriptor pseudo-device driver (/dev/fd/).
1936 *
1937 * Opening minor device N dup()s the file (if any) connected to file
1938 * descriptor N belonging to the calling process. Note that this driver
1939 * consists of only the ``open()'' routine, because all subsequent
1940 * references to this file will be direct to the other driver.
1941 */
1942/* ARGSUSED */
1943static int
1944fdopen(dev, mode, type, td)
1945 dev_t dev;
1946 int mode, type;
1947 struct thread *td;
1948{
1949
1950 /*
1951 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1952 * the file descriptor being sought for duplication. The error
1953 * return ensures that the vnode for this device will be released
1954 * by vn_open. Open will detect this special error and take the
1955 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1956 * will simply report the error.
1957 */
1958 td->td_dupfd = dev2unit(dev);
1959 return (ENODEV);
1960}
1961
1962/*
1963 * Duplicate the specified descriptor to a free descriptor.
1964 */
1965int
1966dupfdopen(td, fdp, indx, dfd, mode, error)
1967 struct thread *td;
1968 struct filedesc *fdp;
1969 int indx, dfd;
1970 int mode;
1971 int error;
1972{
1973 struct file *wfp;
1974 struct file *fp;
1975
1976 /*
1977 * If the to-be-dup'd fd number is greater than the allowed number
1978 * of file descriptors, or the fd to be dup'd has already been
1979 * closed, then reject.
1980 */
1981 FILEDESC_LOCK(fdp);
1982 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1983 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1984 FILEDESC_UNLOCK(fdp);
1985 return (EBADF);
1986 }
1987
1988 /*
1989 * There are two cases of interest here.
1990 *
1991 * For ENODEV simply dup (dfd) to file descriptor
1992 * (indx) and return.
1993 *
1994 * For ENXIO steal away the file structure from (dfd) and
1995 * store it in (indx). (dfd) is effectively closed by
1996 * this operation.
1997 *
1998 * Any other error code is just returned.
1999 */
2000 switch (error) {
2001 case ENODEV:
2002 /*
2003 * Check that the mode the file is being opened for is a
2004 * subset of the mode of the existing descriptor.
2005 */
2006 FILE_LOCK(wfp);
2007 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2008 FILE_UNLOCK(wfp);
2009 FILEDESC_UNLOCK(fdp);
2010 return (EACCES);
2011 }
2012 fp = fdp->fd_ofiles[indx];
2013#if 0
2014 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2015 (void) munmapfd(td, indx);
2016#endif
2017 fdp->fd_ofiles[indx] = wfp;
2018 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2019 fhold_locked(wfp);
2020 FILE_UNLOCK(wfp);
2021 if (indx > fdp->fd_lastfile)
2022 fdp->fd_lastfile = indx;
2023 if (fp != NULL)
2024 FILE_LOCK(fp);
2025 FILEDESC_UNLOCK(fdp);
2026 /*
2027 * We now own the reference to fp that the ofiles[] array
2028 * used to own. Release it.
2029 */
2030 if (fp != NULL)
2031 fdrop_locked(fp, td);
2032 return (0);
2033
2034 case ENXIO:
2035 /*
2036 * Steal away the file pointer from dfd and stuff it into indx.
2037 */
2038 fp = fdp->fd_ofiles[indx];
2039#if 0
2040 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2041 (void) munmapfd(td, indx);
2042#endif
2043 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2044 fdp->fd_ofiles[dfd] = NULL;
2045 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2046 fdp->fd_ofileflags[dfd] = 0;
2047
2048 /*
2049 * Complete the clean up of the filedesc structure by
2050 * recomputing the various hints.
2051 */
2052 if (indx > fdp->fd_lastfile) {
2053 fdp->fd_lastfile = indx;
2054 } else {
2055 while (fdp->fd_lastfile > 0 &&
2056 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2057 fdp->fd_lastfile--;
2058 }
2059 if (dfd < fdp->fd_freefile)
2060 fdp->fd_freefile = dfd;
2061 }
2062 if (fp != NULL)
2063 FILE_LOCK(fp);
2064 FILEDESC_UNLOCK(fdp);
2065
2066 /*
2067 * we now own the reference to fp that the ofiles[] array
2068 * used to own. Release it.
2069 */
2070 if (fp != NULL)
2071 fdrop_locked(fp, td);
2072 return (0);
2073
2074 default:
2075 FILEDESC_UNLOCK(fdp);
2076 return (error);
2077 }
2078 /* NOTREACHED */
2079}
2080
2081/*
2082 * Get file structures.
2083 */
2084static int
2085sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2086{
2087 struct xfile xf;
2088 struct filedesc *fdp;
2089 struct file *fp;
2090 struct proc *p;
2091 int error, n;
2092
2093 sysctl_wire_old_buffer(req, 0);
2094 if (req->oldptr == NULL) {
2095 n = 16; /* A slight overestimate. */
2096 sx_slock(&filelist_lock);
2097 LIST_FOREACH(fp, &filehead, f_list) {
2098 /*
2099 * We should grab the lock, but this is an
2100 * estimate, so does it really matter?
2101 */
2102 /* mtx_lock(fp->f_mtxp); */
2103 n += fp->f_count;
2104 /* mtx_unlock(f->f_mtxp); */
2105 }
2106 sx_sunlock(&filelist_lock);
2107 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2108 }
2109 error = 0;
2110 bzero(&xf, sizeof(xf));
2111 xf.xf_size = sizeof(xf);
2112 sx_slock(&allproc_lock);
2113 LIST_FOREACH(p, &allproc, p_list) {
2114 PROC_LOCK(p);
2115 xf.xf_pid = p->p_pid;
2116 xf.xf_uid = p->p_ucred->cr_uid;
2117 PROC_UNLOCK(p);
2118 mtx_lock(&fdesc_mtx);
2119 if ((fdp = p->p_fd) == NULL) {
2120 mtx_unlock(&fdesc_mtx);
2121 continue;
2122 }
2123 FILEDESC_LOCK(fdp);
2124 for (n = 0; n < fdp->fd_nfiles; ++n) {
2125 if ((fp = fdp->fd_ofiles[n]) == NULL)
2126 continue;
2127 xf.xf_fd = n;
2128 xf.xf_file = fp;
2129 xf.xf_data = fp->f_data;
2130 xf.xf_type = fp->f_type;
2131 xf.xf_count = fp->f_count;
2132 xf.xf_msgcount = fp->f_msgcount;
2133 xf.xf_offset = fp->f_offset;
2134 xf.xf_flag = fp->f_flag;
2135 error = SYSCTL_OUT(req, &xf, sizeof(xf));
2136 if (error)
2137 break;
2138 }
2139 FILEDESC_UNLOCK(fdp);
2140 mtx_unlock(&fdesc_mtx);
2141 if (error)
2142 break;
2143 }
2144 sx_sunlock(&allproc_lock);
2145 return (error);
2146}
2147
2148SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2149 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2150
2151SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2152 &maxfilesperproc, 0, "Maximum files allowed open per process");
2153
2154SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2155 &maxfiles, 0, "Maximum number of files");
2156
2157SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2158 &nfiles, 0, "System-wide number of open files");
2159
2160static void
2161fildesc_drvinit(void *unused)
2162{
2163 dev_t dev;
2164
2165 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
2166 make_dev_alias(dev, "stdin");
2167 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
2168 make_dev_alias(dev, "stdout");
2169 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
2170 make_dev_alias(dev, "stderr");
2171}
2172
2173static fo_rdwr_t badfo_readwrite;
2174static fo_ioctl_t badfo_ioctl;
2175static fo_poll_t badfo_poll;
2176static fo_kqfilter_t badfo_kqfilter;
2177static fo_stat_t badfo_stat;
2178static fo_close_t badfo_close;
2179
2180struct fileops badfileops = {
2181 badfo_readwrite,
2182 badfo_readwrite,
2183 badfo_ioctl,
2184 badfo_poll,
2185 badfo_kqfilter,
2186 badfo_stat,
2187 badfo_close,
2188 0
2189};
2190
2191static int
2192badfo_readwrite(fp, uio, active_cred, flags, td)
2193 struct file *fp;
2194 struct uio *uio;
2195 struct ucred *active_cred;
2196 struct thread *td;
2197 int flags;
2198{
2199
2200 return (EBADF);
2201}
2202
2203static int
2204badfo_ioctl(fp, com, data, active_cred, td)
2205 struct file *fp;
2206 u_long com;
2207 void *data;
2208 struct ucred *active_cred;
2209 struct thread *td;
2210{
2211
2212 return (EBADF);
2213}
2214
2215static int
2216badfo_poll(fp, events, active_cred, td)
2217 struct file *fp;
2218 int events;
2219 struct ucred *active_cred;
2220 struct thread *td;
2221{
2222
2223 return (0);
2224}
2225
2226static int
2227badfo_kqfilter(fp, kn)
2228 struct file *fp;
2229 struct knote *kn;
2230{
2231
2232 return (0);
2233}
2234
2235static int
2236badfo_stat(fp, sb, active_cred, td)
2237 struct file *fp;
2238 struct stat *sb;
2239 struct ucred *active_cred;
2240 struct thread *td;
2241{
2242
2243 return (EBADF);
2244}
2245
2246static int
2247badfo_close(fp, td)
2248 struct file *fp;
2249 struct thread *td;
2250{
2251
2252 return (EBADF);
2253}
2254
2255SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2256 fildesc_drvinit,NULL)
2257
2258static void filelistinit(void *);
2259SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2260
2261/* ARGSUSED*/
2262static void
2263filelistinit(dummy)
2264 void *dummy;
2265{
2266
2267 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2268 NULL, NULL, UMA_ALIGN_PTR, 0);
2269 sx_init(&filelist_lock, "filelist lock");
2270 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2271}