Deleted Added
full compact
kern_descrip.c (111708) kern_descrip.c (111815)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
39 * $FreeBSD: head/sys/kern/kern_descrip.c 111708 2003-03-01 17:18:28Z tegge $
39 * $FreeBSD: head/sys/kern/kern_descrip.c 111815 2003-03-03 12:15:54Z phk $
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/sysctl.h>
55#include <sys/vnode.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/namei.h>
59#include <sys/file.h>
60#include <sys/stat.h>
61#include <sys/filio.h>
62#include <sys/fcntl.h>
63#include <sys/unistd.h>
64#include <sys/resourcevar.h>
65#include <sys/event.h>
66#include <sys/sx.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69
70#include <machine/limits.h>
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74#include <vm/uma.h>
75
76static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
77static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
78
79uma_zone_t file_zone;
80
81static d_open_t fdopen;
82#define NUMFDESC 64
83
84#define CDEV_MAJOR 22
85static struct cdevsw fildesc_cdevsw = {
40 */
41
42#include "opt_compat.h"
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/syscallsubr.h>
47#include <sys/sysproto.h>
48#include <sys/conf.h>
49#include <sys/filedesc.h>
50#include <sys/lock.h>
51#include <sys/kernel.h>
52#include <sys/malloc.h>
53#include <sys/mutex.h>
54#include <sys/sysctl.h>
55#include <sys/vnode.h>
56#include <sys/mount.h>
57#include <sys/proc.h>
58#include <sys/namei.h>
59#include <sys/file.h>
60#include <sys/stat.h>
61#include <sys/filio.h>
62#include <sys/fcntl.h>
63#include <sys/unistd.h>
64#include <sys/resourcevar.h>
65#include <sys/event.h>
66#include <sys/sx.h>
67#include <sys/socketvar.h>
68#include <sys/signalvar.h>
69
70#include <machine/limits.h>
71
72#include <vm/vm.h>
73#include <vm/vm_extern.h>
74#include <vm/uma.h>
75
76static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table");
77static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures");
78
79uma_zone_t file_zone;
80
81static d_open_t fdopen;
82#define NUMFDESC 64
83
84#define CDEV_MAJOR 22
85static struct cdevsw fildesc_cdevsw = {
86 /* open */ fdopen,
87 /* close */ noclose,
88 /* read */ noread,
89 /* write */ nowrite,
90 /* ioctl */ noioctl,
91 /* poll */ nopoll,
92 /* mmap */ nommap,
93 /* strategy */ nostrategy,
94 /* name */ "FD",
95 /* maj */ CDEV_MAJOR,
96 /* dump */ nodump,
97 /* psize */ nopsize,
98 /* flags */ 0,
86 .d_open = fdopen,
87 .d_name = "FD",
88 .d_maj = CDEV_MAJOR,
99};
100
101/* How to treat 'new' parameter when allocating a fd for do_dup(). */
102enum dup_type { DUP_VARIABLE, DUP_FIXED };
103
104static int do_dup(struct thread *td, enum dup_type type, int old, int new,
105 register_t *retval);
106
107/*
108 * Descriptor management.
109 */
110struct filelist filehead; /* head of list of open files */
111int nfiles; /* actual number of open files */
112extern int cmask;
113struct sx filelist_lock; /* sx to protect filelist */
114struct mtx sigio_lock; /* mtx to protect pointers to sigio */
115
116/*
117 * System calls on descriptors.
118 */
119#ifndef _SYS_SYSPROTO_H_
120struct getdtablesize_args {
121 int dummy;
122};
123#endif
124/*
125 * MPSAFE
126 */
127/* ARGSUSED */
128int
129getdtablesize(td, uap)
130 struct thread *td;
131 struct getdtablesize_args *uap;
132{
133 struct proc *p = td->td_proc;
134
135 mtx_lock(&Giant);
136 td->td_retval[0] =
137 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
138 mtx_unlock(&Giant);
139 return (0);
140}
141
142/*
143 * Duplicate a file descriptor to a particular value.
144 *
145 * note: keep in mind that a potential race condition exists when closing
146 * descriptors from a shared descriptor table (via rfork).
147 */
148#ifndef _SYS_SYSPROTO_H_
149struct dup2_args {
150 u_int from;
151 u_int to;
152};
153#endif
154/*
155 * MPSAFE
156 */
157/* ARGSUSED */
158int
159dup2(td, uap)
160 struct thread *td;
161 struct dup2_args *uap;
162{
163
164 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
165 td->td_retval));
166}
167
168/*
169 * Duplicate a file descriptor.
170 */
171#ifndef _SYS_SYSPROTO_H_
172struct dup_args {
173 u_int fd;
174};
175#endif
176/*
177 * MPSAFE
178 */
179/* ARGSUSED */
180int
181dup(td, uap)
182 struct thread *td;
183 struct dup_args *uap;
184{
185
186 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
187}
188
189/*
190 * The file control system call.
191 */
192#ifndef _SYS_SYSPROTO_H_
193struct fcntl_args {
194 int fd;
195 int cmd;
196 long arg;
197};
198#endif
199/*
200 * MPSAFE
201 */
202/* ARGSUSED */
203int
204fcntl(td, uap)
205 struct thread *td;
206 struct fcntl_args *uap;
207{
208 struct flock fl;
209 intptr_t arg;
210 int error;
211
212 error = 0;
213 switch (uap->cmd) {
214 case F_GETLK:
215 case F_SETLK:
216 case F_SETLKW:
217 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
218 arg = (intptr_t)&fl;
219 break;
220 default:
221 arg = uap->arg;
222 break;
223 }
224 if (error)
225 return (error);
226 error = kern_fcntl(td, uap->fd, uap->cmd, arg);
227 if (error)
228 return (error);
229 if (uap->cmd == F_GETLK)
230 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
231 return (error);
232}
233
234int
235kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
236{
237 struct filedesc *fdp;
238 struct flock *flp;
239 struct file *fp;
240 struct proc *p;
241 char *pop;
242 struct vnode *vp;
243 u_int newmin;
244 int error, flg, tmp;
245
246 error = 0;
247 flg = F_POSIX;
248 p = td->td_proc;
249 fdp = p->p_fd;
250 mtx_lock(&Giant);
251 FILEDESC_LOCK(fdp);
252 if ((unsigned)fd >= fdp->fd_nfiles ||
253 (fp = fdp->fd_ofiles[fd]) == NULL) {
254 FILEDESC_UNLOCK(fdp);
255 error = EBADF;
256 goto done2;
257 }
258 pop = &fdp->fd_ofileflags[fd];
259
260 switch (cmd) {
261 case F_DUPFD:
262 FILEDESC_UNLOCK(fdp);
263 newmin = arg;
264 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
265 newmin >= maxfilesperproc) {
266 error = EINVAL;
267 break;
268 }
269 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
270 break;
271
272 case F_GETFD:
273 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
274 FILEDESC_UNLOCK(fdp);
275 break;
276
277 case F_SETFD:
278 *pop = (*pop &~ UF_EXCLOSE) |
279 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
280 FILEDESC_UNLOCK(fdp);
281 break;
282
283 case F_GETFL:
284 FILE_LOCK(fp);
285 FILEDESC_UNLOCK(fdp);
286 td->td_retval[0] = OFLAGS(fp->f_flag);
287 FILE_UNLOCK(fp);
288 break;
289
290 case F_SETFL:
291 FILE_LOCK(fp);
292 FILEDESC_UNLOCK(fdp);
293 fhold_locked(fp);
294 fp->f_flag &= ~FCNTLFLAGS;
295 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
296 FILE_UNLOCK(fp);
297 tmp = fp->f_flag & FNONBLOCK;
298 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
299 if (error) {
300 fdrop(fp, td);
301 break;
302 }
303 tmp = fp->f_flag & FASYNC;
304 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
305 if (error == 0) {
306 fdrop(fp, td);
307 break;
308 }
309 FILE_LOCK(fp);
310 fp->f_flag &= ~FNONBLOCK;
311 FILE_UNLOCK(fp);
312 tmp = 0;
313 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
314 fdrop(fp, td);
315 break;
316
317 case F_GETOWN:
318 fhold(fp);
319 FILEDESC_UNLOCK(fdp);
320 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
321 if (error == 0)
322 td->td_retval[0] = tmp;
323 fdrop(fp, td);
324 break;
325
326 case F_SETOWN:
327 fhold(fp);
328 FILEDESC_UNLOCK(fdp);
329 tmp = arg;
330 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
331 fdrop(fp, td);
332 break;
333
334 case F_SETLKW:
335 flg |= F_WAIT;
336 /* FALLTHROUGH F_SETLK */
337
338 case F_SETLK:
339 if (fp->f_type != DTYPE_VNODE) {
340 FILEDESC_UNLOCK(fdp);
341 error = EBADF;
342 break;
343 }
344
345 flp = (struct flock *)arg;
346 if (flp->l_whence == SEEK_CUR) {
347 if (fp->f_offset < 0 ||
348 (flp->l_start > 0 &&
349 fp->f_offset > OFF_MAX - flp->l_start)) {
350 FILEDESC_UNLOCK(fdp);
351 error = EOVERFLOW;
352 break;
353 }
354 flp->l_start += fp->f_offset;
355 }
356
357 /*
358 * VOP_ADVLOCK() may block.
359 */
360 fhold(fp);
361 FILEDESC_UNLOCK(fdp);
362 vp = fp->f_data;
363
364 switch (flp->l_type) {
365 case F_RDLCK:
366 if ((fp->f_flag & FREAD) == 0) {
367 error = EBADF;
368 break;
369 }
370 PROC_LOCK(p->p_leader);
371 p->p_leader->p_flag |= P_ADVLOCK;
372 PROC_UNLOCK(p->p_leader);
373 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
374 flp, flg);
375 break;
376 case F_WRLCK:
377 if ((fp->f_flag & FWRITE) == 0) {
378 error = EBADF;
379 break;
380 }
381 PROC_LOCK(p->p_leader);
382 p->p_leader->p_flag |= P_ADVLOCK;
383 PROC_UNLOCK(p->p_leader);
384 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
385 flp, flg);
386 break;
387 case F_UNLCK:
388 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
389 flp, F_POSIX);
390 break;
391 default:
392 error = EINVAL;
393 break;
394 }
395 /* Check for race with close */
396 FILEDESC_LOCK(fdp);
397 if ((unsigned) fd >= fdp->fd_nfiles ||
398 fp != fdp->fd_ofiles[fd]) {
399 FILEDESC_UNLOCK(fdp);
400 flp->l_whence = SEEK_SET;
401 flp->l_start = 0;
402 flp->l_len = 0;
403 flp->l_type = F_UNLCK;
404 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
405 F_UNLCK, flp, F_POSIX);
406 } else
407 FILEDESC_UNLOCK(fdp);
408 fdrop(fp, td);
409 break;
410
411 case F_GETLK:
412 if (fp->f_type != DTYPE_VNODE) {
413 FILEDESC_UNLOCK(fdp);
414 error = EBADF;
415 break;
416 }
417 flp = (struct flock *)arg;
418 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
419 flp->l_type != F_UNLCK) {
420 FILEDESC_UNLOCK(fdp);
421 error = EINVAL;
422 break;
423 }
424 if (flp->l_whence == SEEK_CUR) {
425 if ((flp->l_start > 0 &&
426 fp->f_offset > OFF_MAX - flp->l_start) ||
427 (flp->l_start < 0 &&
428 fp->f_offset < OFF_MIN - flp->l_start)) {
429 FILEDESC_UNLOCK(fdp);
430 error = EOVERFLOW;
431 break;
432 }
433 flp->l_start += fp->f_offset;
434 }
435 /*
436 * VOP_ADVLOCK() may block.
437 */
438 fhold(fp);
439 FILEDESC_UNLOCK(fdp);
440 vp = fp->f_data;
441 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
442 F_POSIX);
443 fdrop(fp, td);
444 break;
445 default:
446 FILEDESC_UNLOCK(fdp);
447 error = EINVAL;
448 break;
449 }
450done2:
451 mtx_unlock(&Giant);
452 return (error);
453}
454
455/*
456 * Common code for dup, dup2, and fcntl(F_DUPFD).
457 */
458static int
459do_dup(td, type, old, new, retval)
460 enum dup_type type;
461 int old, new;
462 register_t *retval;
463 struct thread *td;
464{
465 struct filedesc *fdp;
466 struct proc *p;
467 struct file *fp;
468 struct file *delfp;
469 int error, newfd;
470
471 p = td->td_proc;
472 fdp = p->p_fd;
473
474 /*
475 * Verify we have a valid descriptor to dup from and possibly to
476 * dup to.
477 */
478 if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
479 new >= maxfilesperproc)
480 return (EBADF);
481 FILEDESC_LOCK(fdp);
482 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
483 FILEDESC_UNLOCK(fdp);
484 return (EBADF);
485 }
486 if (type == DUP_FIXED && old == new) {
487 *retval = new;
488 FILEDESC_UNLOCK(fdp);
489 return (0);
490 }
491 fp = fdp->fd_ofiles[old];
492 fhold(fp);
493
494 /*
495 * Expand the table for the new descriptor if needed. This may
496 * block and drop and reacquire the filedesc lock.
497 */
498 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
499 error = fdalloc(td, new, &newfd);
500 if (error) {
501 FILEDESC_UNLOCK(fdp);
502 fdrop(fp, td);
503 return (error);
504 }
505 }
506 if (type == DUP_VARIABLE)
507 new = newfd;
508
509 /*
510 * If the old file changed out from under us then treat it as a
511 * bad file descriptor. Userland should do its own locking to
512 * avoid this case.
513 */
514 if (fdp->fd_ofiles[old] != fp) {
515 if (fdp->fd_ofiles[new] == NULL) {
516 if (new < fdp->fd_freefile)
517 fdp->fd_freefile = new;
518 while (fdp->fd_lastfile > 0 &&
519 fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
520 fdp->fd_lastfile--;
521 }
522 FILEDESC_UNLOCK(fdp);
523 fdrop(fp, td);
524 return (EBADF);
525 }
526 KASSERT(old != new, ("new fd is same as old"));
527
528 /*
529 * Save info on the descriptor being overwritten. We have
530 * to do the unmap now, but we cannot close it without
531 * introducing an ownership race for the slot.
532 */
533 delfp = fdp->fd_ofiles[new];
534 KASSERT(delfp == NULL || type == DUP_FIXED,
535 ("dup() picked an open file"));
536#if 0
537 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
538 (void) munmapfd(td, new);
539#endif
540
541 /*
542 * Duplicate the source descriptor, update lastfile
543 */
544 fdp->fd_ofiles[new] = fp;
545 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
546 if (new > fdp->fd_lastfile)
547 fdp->fd_lastfile = new;
548 FILEDESC_UNLOCK(fdp);
549 *retval = new;
550
551 /*
552 * If we dup'd over a valid file, we now own the reference to it
553 * and must dispose of it using closef() semantics (as if a
554 * close() were performed on it).
555 */
556 if (delfp) {
557 mtx_lock(&Giant);
558 (void) closef(delfp, td);
559 mtx_unlock(&Giant);
560 }
561 return (0);
562}
563
564/*
565 * If sigio is on the list associated with a process or process group,
566 * disable signalling from the device, remove sigio from the list and
567 * free sigio.
568 */
569void
570funsetown(sigiop)
571 struct sigio **sigiop;
572{
573 struct sigio *sigio;
574
575 SIGIO_LOCK();
576 sigio = *sigiop;
577 if (sigio == NULL) {
578 SIGIO_UNLOCK();
579 return;
580 }
581 *(sigio->sio_myref) = NULL;
582 if ((sigio)->sio_pgid < 0) {
583 struct pgrp *pg = (sigio)->sio_pgrp;
584 PGRP_LOCK(pg);
585 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
586 sigio, sio_pgsigio);
587 PGRP_UNLOCK(pg);
588 } else {
589 struct proc *p = (sigio)->sio_proc;
590 PROC_LOCK(p);
591 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
592 sigio, sio_pgsigio);
593 PROC_UNLOCK(p);
594 }
595 SIGIO_UNLOCK();
596 crfree(sigio->sio_ucred);
597 FREE(sigio, M_SIGIO);
598}
599
600/*
601 * Free a list of sigio structures.
602 * We only need to lock the SIGIO_LOCK because we have made ourselves
603 * inaccessable to callers of fsetown and therefore do not need to lock
604 * the proc or pgrp struct for the list manipulation.
605 */
606void
607funsetownlst(sigiolst)
608 struct sigiolst *sigiolst;
609{
610 struct proc *p;
611 struct pgrp *pg;
612 struct sigio *sigio;
613
614 sigio = SLIST_FIRST(sigiolst);
615 if (sigio == NULL)
616 return;
617 p = NULL;
618 pg = NULL;
619
620 /*
621 * Every entry of the list should belong
622 * to a single proc or pgrp.
623 */
624 if (sigio->sio_pgid < 0) {
625 pg = sigio->sio_pgrp;
626 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
627 } else /* if (sigio->sio_pgid > 0) */ {
628 p = sigio->sio_proc;
629 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
630 }
631
632 SIGIO_LOCK();
633 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
634 *(sigio->sio_myref) = NULL;
635 if (pg != NULL) {
636 KASSERT(sigio->sio_pgid < 0,
637 ("Proc sigio in pgrp sigio list"));
638 KASSERT(sigio->sio_pgrp == pg,
639 ("Bogus pgrp in sigio list"));
640 PGRP_LOCK(pg);
641 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
642 sio_pgsigio);
643 PGRP_UNLOCK(pg);
644 } else /* if (p != NULL) */ {
645 KASSERT(sigio->sio_pgid > 0,
646 ("Pgrp sigio in proc sigio list"));
647 KASSERT(sigio->sio_proc == p,
648 ("Bogus proc in sigio list"));
649 PROC_LOCK(p);
650 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
651 sio_pgsigio);
652 PROC_UNLOCK(p);
653 }
654 SIGIO_UNLOCK();
655 crfree(sigio->sio_ucred);
656 FREE(sigio, M_SIGIO);
657 SIGIO_LOCK();
658 }
659 SIGIO_UNLOCK();
660}
661
662/*
663 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
664 *
665 * After permission checking, add a sigio structure to the sigio list for
666 * the process or process group.
667 */
668int
669fsetown(pgid, sigiop)
670 pid_t pgid;
671 struct sigio **sigiop;
672{
673 struct proc *proc;
674 struct pgrp *pgrp;
675 struct sigio *sigio;
676 int ret;
677
678 if (pgid == 0) {
679 funsetown(sigiop);
680 return (0);
681 }
682
683 ret = 0;
684
685 /* Allocate and fill in the new sigio out of locks. */
686 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
687 sigio->sio_pgid = pgid;
688 sigio->sio_ucred = crhold(curthread->td_ucred);
689 sigio->sio_myref = sigiop;
690
691 sx_slock(&proctree_lock);
692 if (pgid > 0) {
693 proc = pfind(pgid);
694 if (proc == NULL) {
695 ret = ESRCH;
696 goto fail;
697 }
698
699 /*
700 * Policy - Don't allow a process to FSETOWN a process
701 * in another session.
702 *
703 * Remove this test to allow maximum flexibility or
704 * restrict FSETOWN to the current process or process
705 * group for maximum safety.
706 */
707 PROC_UNLOCK(proc);
708 if (proc->p_session != curthread->td_proc->p_session) {
709 ret = EPERM;
710 goto fail;
711 }
712
713 pgrp = NULL;
714 } else /* if (pgid < 0) */ {
715 pgrp = pgfind(-pgid);
716 if (pgrp == NULL) {
717 ret = ESRCH;
718 goto fail;
719 }
720 PGRP_UNLOCK(pgrp);
721
722 /*
723 * Policy - Don't allow a process to FSETOWN a process
724 * in another session.
725 *
726 * Remove this test to allow maximum flexibility or
727 * restrict FSETOWN to the current process or process
728 * group for maximum safety.
729 */
730 if (pgrp->pg_session != curthread->td_proc->p_session) {
731 ret = EPERM;
732 goto fail;
733 }
734
735 proc = NULL;
736 }
737 funsetown(sigiop);
738 if (pgid > 0) {
739 PROC_LOCK(proc);
740 /*
741 * Since funsetownlst() is called without the proctree
742 * locked, we need to check for P_WEXIT.
743 * XXX: is ESRCH correct?
744 */
745 if ((proc->p_flag & P_WEXIT) != 0) {
746 PROC_UNLOCK(proc);
747 ret = ESRCH;
748 goto fail;
749 }
750 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
751 sigio->sio_proc = proc;
752 PROC_UNLOCK(proc);
753 } else {
754 PGRP_LOCK(pgrp);
755 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
756 sigio->sio_pgrp = pgrp;
757 PGRP_UNLOCK(pgrp);
758 }
759 sx_sunlock(&proctree_lock);
760 SIGIO_LOCK();
761 *sigiop = sigio;
762 SIGIO_UNLOCK();
763 return (0);
764
765fail:
766 sx_sunlock(&proctree_lock);
767 crfree(sigio->sio_ucred);
768 FREE(sigio, M_SIGIO);
769 return (ret);
770}
771
772/*
773 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
774 */
775pid_t
776fgetown(sigiop)
777 struct sigio **sigiop;
778{
779 pid_t pgid;
780
781 SIGIO_LOCK();
782 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
783 SIGIO_UNLOCK();
784 return (pgid);
785}
786
787/*
788 * Close a file descriptor.
789 */
790#ifndef _SYS_SYSPROTO_H_
791struct close_args {
792 int fd;
793};
794#endif
795/*
796 * MPSAFE
797 */
798/* ARGSUSED */
799int
800close(td, uap)
801 struct thread *td;
802 struct close_args *uap;
803{
804 struct filedesc *fdp;
805 struct file *fp;
806 int fd, error;
807
808 fd = uap->fd;
809 error = 0;
810 fdp = td->td_proc->p_fd;
811 mtx_lock(&Giant);
812 FILEDESC_LOCK(fdp);
813 if ((unsigned)fd >= fdp->fd_nfiles ||
814 (fp = fdp->fd_ofiles[fd]) == NULL) {
815 FILEDESC_UNLOCK(fdp);
816 error = EBADF;
817 goto done2;
818 }
819#if 0
820 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
821 (void) munmapfd(td, fd);
822#endif
823 fdp->fd_ofiles[fd] = NULL;
824 fdp->fd_ofileflags[fd] = 0;
825
826 /*
827 * we now hold the fp reference that used to be owned by the descriptor
828 * array.
829 */
830 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
831 fdp->fd_lastfile--;
832 if (fd < fdp->fd_freefile)
833 fdp->fd_freefile = fd;
834 if (fd < fdp->fd_knlistsize) {
835 FILEDESC_UNLOCK(fdp);
836 knote_fdclose(td, fd);
837 } else
838 FILEDESC_UNLOCK(fdp);
839
840 error = closef(fp, td);
841done2:
842 mtx_unlock(&Giant);
843 return (error);
844}
845
846#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
847/*
848 * Return status information about a file descriptor.
849 */
850#ifndef _SYS_SYSPROTO_H_
851struct ofstat_args {
852 int fd;
853 struct ostat *sb;
854};
855#endif
856/*
857 * MPSAFE
858 */
859/* ARGSUSED */
860int
861ofstat(td, uap)
862 struct thread *td;
863 struct ofstat_args *uap;
864{
865 struct file *fp;
866 struct stat ub;
867 struct ostat oub;
868 int error;
869
870 mtx_lock(&Giant);
871 if ((error = fget(td, uap->fd, &fp)) != 0)
872 goto done2;
873 error = fo_stat(fp, &ub, td->td_ucred, td);
874 if (error == 0) {
875 cvtstat(&ub, &oub);
876 error = copyout(&oub, uap->sb, sizeof(oub));
877 }
878 fdrop(fp, td);
879done2:
880 mtx_unlock(&Giant);
881 return (error);
882}
883#endif /* COMPAT_43 || COMPAT_SUNOS */
884
885/*
886 * Return status information about a file descriptor.
887 */
888#ifndef _SYS_SYSPROTO_H_
889struct fstat_args {
890 int fd;
891 struct stat *sb;
892};
893#endif
894/*
895 * MPSAFE
896 */
897/* ARGSUSED */
898int
899fstat(td, uap)
900 struct thread *td;
901 struct fstat_args *uap;
902{
903 struct file *fp;
904 struct stat ub;
905 int error;
906
907 mtx_lock(&Giant);
908 if ((error = fget(td, uap->fd, &fp)) != 0)
909 goto done2;
910 error = fo_stat(fp, &ub, td->td_ucred, td);
911 if (error == 0)
912 error = copyout(&ub, uap->sb, sizeof(ub));
913 fdrop(fp, td);
914done2:
915 mtx_unlock(&Giant);
916 return (error);
917}
918
919/*
920 * Return status information about a file descriptor.
921 */
922#ifndef _SYS_SYSPROTO_H_
923struct nfstat_args {
924 int fd;
925 struct nstat *sb;
926};
927#endif
928/*
929 * MPSAFE
930 */
931/* ARGSUSED */
932int
933nfstat(td, uap)
934 struct thread *td;
935 struct nfstat_args *uap;
936{
937 struct file *fp;
938 struct stat ub;
939 struct nstat nub;
940 int error;
941
942 mtx_lock(&Giant);
943 if ((error = fget(td, uap->fd, &fp)) != 0)
944 goto done2;
945 error = fo_stat(fp, &ub, td->td_ucred, td);
946 if (error == 0) {
947 cvtnstat(&ub, &nub);
948 error = copyout(&nub, uap->sb, sizeof(nub));
949 }
950 fdrop(fp, td);
951done2:
952 mtx_unlock(&Giant);
953 return (error);
954}
955
956/*
957 * Return pathconf information about a file descriptor.
958 */
959#ifndef _SYS_SYSPROTO_H_
960struct fpathconf_args {
961 int fd;
962 int name;
963};
964#endif
965/*
966 * MPSAFE
967 */
968/* ARGSUSED */
969int
970fpathconf(td, uap)
971 struct thread *td;
972 struct fpathconf_args *uap;
973{
974 struct file *fp;
975 struct vnode *vp;
976 int error;
977
978 if ((error = fget(td, uap->fd, &fp)) != 0)
979 return (error);
980
981 /* If asynchronous I/O is available, it works for all descriptors. */
982 if (uap->name == _PC_ASYNC_IO) {
983 td->td_retval[0] = async_io_version;
984 goto out;
985 }
986 switch (fp->f_type) {
987 case DTYPE_PIPE:
988 case DTYPE_SOCKET:
989 if (uap->name != _PC_PIPE_BUF) {
990 error = EINVAL;
991 } else {
992 td->td_retval[0] = PIPE_BUF;
993 error = 0;
994 }
995 break;
996 case DTYPE_FIFO:
997 case DTYPE_VNODE:
998 vp = fp->f_data;
999 mtx_lock(&Giant);
1000 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
1001 mtx_unlock(&Giant);
1002 break;
1003 default:
1004 error = EOPNOTSUPP;
1005 break;
1006 }
1007out:
1008 fdrop(fp, td);
1009 return (error);
1010}
1011
1012/*
1013 * Allocate a file descriptor for the process.
1014 */
1015static int fdexpand;
1016SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1017
1018int
1019fdalloc(td, want, result)
1020 struct thread *td;
1021 int want;
1022 int *result;
1023{
1024 struct proc *p = td->td_proc;
1025 struct filedesc *fdp = td->td_proc->p_fd;
1026 int i;
1027 int lim, last, nfiles;
1028 struct file **newofile, **oldofile;
1029 char *newofileflags;
1030
1031 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1032
1033 /*
1034 * Search for a free descriptor starting at the higher
1035 * of want or fd_freefile. If that fails, consider
1036 * expanding the ofile array.
1037 */
1038 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1039 for (;;) {
1040 last = min(fdp->fd_nfiles, lim);
1041 i = max(want, fdp->fd_freefile);
1042 for (; i < last; i++) {
1043 if (fdp->fd_ofiles[i] == NULL) {
1044 fdp->fd_ofileflags[i] = 0;
1045 if (i > fdp->fd_lastfile)
1046 fdp->fd_lastfile = i;
1047 if (want <= fdp->fd_freefile)
1048 fdp->fd_freefile = i;
1049 *result = i;
1050 return (0);
1051 }
1052 }
1053
1054 /*
1055 * No space in current array. Expand?
1056 */
1057 if (i >= lim)
1058 return (EMFILE);
1059 if (fdp->fd_nfiles < NDEXTENT)
1060 nfiles = NDEXTENT;
1061 else
1062 nfiles = 2 * fdp->fd_nfiles;
1063 while (nfiles < want)
1064 nfiles <<= 1;
1065 FILEDESC_UNLOCK(fdp);
1066 /*
1067 * XXX malloc() calls uma_large_malloc() for sizes larger
1068 * than KMEM_ZMAX bytes. uma_large_malloc() requires Giant.
1069 */
1070 mtx_lock(&Giant);
1071 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1072 mtx_unlock(&Giant);
1073
1074 /*
1075 * Deal with file-table extend race that might have
1076 * occurred while filedesc was unlocked.
1077 */
1078 FILEDESC_LOCK(fdp);
1079 if (fdp->fd_nfiles >= nfiles) {
1080 /* XXX uma_large_free() needs Giant. */
1081 FILEDESC_UNLOCK(fdp);
1082 mtx_lock(&Giant);
1083 free(newofile, M_FILEDESC);
1084 mtx_unlock(&Giant);
1085 FILEDESC_LOCK(fdp);
1086 continue;
1087 }
1088 newofileflags = (char *) &newofile[nfiles];
1089 /*
1090 * Copy the existing ofile and ofileflags arrays
1091 * and zero the new portion of each array.
1092 */
1093 i = fdp->fd_nfiles * sizeof(struct file *);
1094 bcopy(fdp->fd_ofiles, newofile, i);
1095 bzero((char *)newofile + i,
1096 nfiles * sizeof(struct file *) - i);
1097 i = fdp->fd_nfiles * sizeof(char);
1098 bcopy(fdp->fd_ofileflags, newofileflags, i);
1099 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1100 if (fdp->fd_nfiles > NDFILE)
1101 oldofile = fdp->fd_ofiles;
1102 else
1103 oldofile = NULL;
1104 fdp->fd_ofiles = newofile;
1105 fdp->fd_ofileflags = newofileflags;
1106 fdp->fd_nfiles = nfiles;
1107 fdexpand++;
1108 if (oldofile != NULL) {
1109 /* XXX uma_large_free() needs Giant. */
1110 FILEDESC_UNLOCK(fdp);
1111 mtx_lock(&Giant);
1112 free(oldofile, M_FILEDESC);
1113 mtx_unlock(&Giant);
1114 FILEDESC_LOCK(fdp);
1115 }
1116 }
1117 return (0);
1118}
1119
1120/*
1121 * Check to see whether n user file descriptors
1122 * are available to the process p.
1123 */
1124int
1125fdavail(td, n)
1126 struct thread *td;
1127 int n;
1128{
1129 struct proc *p = td->td_proc;
1130 struct filedesc *fdp = td->td_proc->p_fd;
1131 struct file **fpp;
1132 int i, lim, last;
1133
1134 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1135
1136 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1137 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1138 return (1);
1139 last = min(fdp->fd_nfiles, lim);
1140 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1141 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1142 if (*fpp == NULL && --n <= 0)
1143 return (1);
1144 }
1145 return (0);
1146}
1147
1148/*
1149 * Create a new open file structure and allocate
1150 * a file decriptor for the process that refers to it.
1151 */
1152int
1153falloc(td, resultfp, resultfd)
1154 struct thread *td;
1155 struct file **resultfp;
1156 int *resultfd;
1157{
1158 struct proc *p = td->td_proc;
1159 struct file *fp, *fq;
1160 int error, i;
1161
1162 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1163 sx_xlock(&filelist_lock);
1164 if (nfiles >= maxfiles) {
1165 sx_xunlock(&filelist_lock);
1166 uma_zfree(file_zone, fp);
1167 tablefull("file");
1168 return (ENFILE);
1169 }
1170 nfiles++;
1171
1172 /*
1173 * If the process has file descriptor zero open, add the new file
1174 * descriptor to the list of open files at that point, otherwise
1175 * put it at the front of the list of open files.
1176 */
1177 fp->f_mtxp = mtx_pool_alloc();
1178 fp->f_gcflag = 0;
1179 fp->f_count = 1;
1180 fp->f_cred = crhold(td->td_ucred);
1181 fp->f_ops = &badfileops;
1182 fp->f_seqcount = 1;
1183 FILEDESC_LOCK(p->p_fd);
1184 if ((fq = p->p_fd->fd_ofiles[0])) {
1185 LIST_INSERT_AFTER(fq, fp, f_list);
1186 } else {
1187 LIST_INSERT_HEAD(&filehead, fp, f_list);
1188 }
1189 sx_xunlock(&filelist_lock);
1190 if ((error = fdalloc(td, 0, &i))) {
1191 FILEDESC_UNLOCK(p->p_fd);
1192 fdrop(fp, td);
1193 return (error);
1194 }
1195 p->p_fd->fd_ofiles[i] = fp;
1196 FILEDESC_UNLOCK(p->p_fd);
1197 if (resultfp)
1198 *resultfp = fp;
1199 if (resultfd)
1200 *resultfd = i;
1201 return (0);
1202}
1203
1204/*
1205 * Free a file descriptor.
1206 */
1207void
1208ffree(fp)
1209 struct file *fp;
1210{
1211
1212 KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1213 sx_xlock(&filelist_lock);
1214 LIST_REMOVE(fp, f_list);
1215 nfiles--;
1216 sx_xunlock(&filelist_lock);
1217 crfree(fp->f_cred);
1218 uma_zfree(file_zone, fp);
1219}
1220
1221/*
1222 * Build a new filedesc structure from another.
1223 * Copy the current, root, and jail root vnode references.
1224 */
1225struct filedesc *
1226fdinit(fdp)
1227 struct filedesc *fdp;
1228{
1229 struct filedesc0 *newfdp;
1230
1231 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1232 M_FILEDESC, M_WAITOK | M_ZERO);
1233 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1234 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1235 if (newfdp->fd_fd.fd_cdir)
1236 VREF(newfdp->fd_fd.fd_cdir);
1237 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1238 if (newfdp->fd_fd.fd_rdir)
1239 VREF(newfdp->fd_fd.fd_rdir);
1240 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1241 if (newfdp->fd_fd.fd_jdir)
1242 VREF(newfdp->fd_fd.fd_jdir);
1243
1244 /* Create the file descriptor table. */
1245 newfdp->fd_fd.fd_refcnt = 1;
1246 newfdp->fd_fd.fd_cmask = cmask;
1247 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1248 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1249 newfdp->fd_fd.fd_nfiles = NDFILE;
1250 newfdp->fd_fd.fd_knlistsize = -1;
1251 return (&newfdp->fd_fd);
1252}
1253
1254/*
1255 * Share a filedesc structure.
1256 */
1257struct filedesc *
1258fdshare(fdp)
1259 struct filedesc *fdp;
1260{
1261 FILEDESC_LOCK(fdp);
1262 fdp->fd_refcnt++;
1263 FILEDESC_UNLOCK(fdp);
1264 return (fdp);
1265}
1266
1267/*
1268 * Copy a filedesc structure.
1269 * A NULL pointer in returns a NULL reference, this is to ease callers,
1270 * not catch errors.
1271 */
1272struct filedesc *
1273fdcopy(fdp)
1274 struct filedesc *fdp;
1275{
1276 struct filedesc *newfdp;
1277 struct file **fpp;
1278 int i, j;
1279
1280 /* Certain daemons might not have file descriptors. */
1281 if (fdp == NULL)
1282 return (NULL);
1283
1284 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1285
1286 FILEDESC_UNLOCK(fdp);
1287 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1288 M_FILEDESC, M_WAITOK);
1289 FILEDESC_LOCK(fdp);
1290 bcopy(fdp, newfdp, sizeof(struct filedesc));
1291 FILEDESC_UNLOCK(fdp);
1292 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1293 mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1294 if (newfdp->fd_cdir)
1295 VREF(newfdp->fd_cdir);
1296 if (newfdp->fd_rdir)
1297 VREF(newfdp->fd_rdir);
1298 if (newfdp->fd_jdir)
1299 VREF(newfdp->fd_jdir);
1300 newfdp->fd_refcnt = 1;
1301
1302 /*
1303 * If the number of open files fits in the internal arrays
1304 * of the open file structure, use them, otherwise allocate
1305 * additional memory for the number of descriptors currently
1306 * in use.
1307 */
1308 FILEDESC_LOCK(fdp);
1309 newfdp->fd_lastfile = fdp->fd_lastfile;
1310 newfdp->fd_nfiles = fdp->fd_nfiles;
1311 if (newfdp->fd_lastfile < NDFILE) {
1312 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1313 newfdp->fd_ofileflags =
1314 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1315 i = NDFILE;
1316 } else {
1317 /*
1318 * Compute the smallest multiple of NDEXTENT needed
1319 * for the file descriptors currently in use,
1320 * allowing the table to shrink.
1321 */
1322retry:
1323 i = newfdp->fd_nfiles;
1324 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1325 i /= 2;
1326 FILEDESC_UNLOCK(fdp);
1327 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1328 M_FILEDESC, M_WAITOK);
1329 FILEDESC_LOCK(fdp);
1330 newfdp->fd_lastfile = fdp->fd_lastfile;
1331 newfdp->fd_nfiles = fdp->fd_nfiles;
1332 j = newfdp->fd_nfiles;
1333 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1334 j /= 2;
1335 if (i != j) {
1336 /*
1337 * The size of the original table has changed.
1338 * Go over once again.
1339 */
1340 FILEDESC_UNLOCK(fdp);
1341 FREE(newfdp->fd_ofiles, M_FILEDESC);
1342 FILEDESC_LOCK(fdp);
1343 newfdp->fd_lastfile = fdp->fd_lastfile;
1344 newfdp->fd_nfiles = fdp->fd_nfiles;
1345 goto retry;
1346 }
1347 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1348 }
1349 newfdp->fd_nfiles = i;
1350 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1351 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1352
1353 /*
1354 * kq descriptors cannot be copied.
1355 */
1356 if (newfdp->fd_knlistsize != -1) {
1357 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1358 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1359 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1360 *fpp = NULL;
1361 if (i < newfdp->fd_freefile)
1362 newfdp->fd_freefile = i;
1363 }
1364 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1365 newfdp->fd_lastfile--;
1366 }
1367 newfdp->fd_knlist = NULL;
1368 newfdp->fd_knlistsize = -1;
1369 newfdp->fd_knhash = NULL;
1370 newfdp->fd_knhashmask = 0;
1371 }
1372
1373 fpp = newfdp->fd_ofiles;
1374 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1375 if (*fpp != NULL)
1376 fhold(*fpp);
1377 }
1378 return (newfdp);
1379}
1380
1381/* A mutex to protect the association between a proc and filedesc. */
1382struct mtx fdesc_mtx;
1383MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF);
1384
1385/*
1386 * Release a filedesc structure.
1387 */
1388void
1389fdfree(td)
1390 struct thread *td;
1391{
1392 struct filedesc *fdp;
1393 struct file **fpp;
1394 int i;
1395
1396 /* Certain daemons might not have file descriptors. */
1397 fdp = td->td_proc->p_fd;
1398 if (fdp == NULL)
1399 return;
1400
1401 FILEDESC_LOCK(fdp);
1402 if (--fdp->fd_refcnt > 0) {
1403 FILEDESC_UNLOCK(fdp);
1404 return;
1405 }
1406
1407 /*
1408 * We are the last reference to the structure, so we can
1409 * safely assume it will not change out from under us.
1410 */
1411 FILEDESC_UNLOCK(fdp);
1412 fpp = fdp->fd_ofiles;
1413 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1414 if (*fpp)
1415 (void) closef(*fpp, td);
1416 }
1417
1418 /* XXX This should happen earlier. */
1419 mtx_lock(&fdesc_mtx);
1420 td->td_proc->p_fd = NULL;
1421 mtx_unlock(&fdesc_mtx);
1422
1423 if (fdp->fd_nfiles > NDFILE)
1424 FREE(fdp->fd_ofiles, M_FILEDESC);
1425 if (fdp->fd_cdir)
1426 vrele(fdp->fd_cdir);
1427 if (fdp->fd_rdir)
1428 vrele(fdp->fd_rdir);
1429 if (fdp->fd_jdir)
1430 vrele(fdp->fd_jdir);
1431 if (fdp->fd_knlist)
1432 FREE(fdp->fd_knlist, M_KQUEUE);
1433 if (fdp->fd_knhash)
1434 FREE(fdp->fd_knhash, M_KQUEUE);
1435 mtx_destroy(&fdp->fd_mtx);
1436 FREE(fdp, M_FILEDESC);
1437}
1438
1439/*
1440 * For setugid programs, we don't want to people to use that setugidness
1441 * to generate error messages which write to a file which otherwise would
1442 * otherwise be off-limits to the process. We check for filesystems where
1443 * the vnode can change out from under us after execve (like [lin]procfs).
1444 *
1445 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1446 * sufficient. We also don't for check setugidness since we know we are.
1447 */
1448static int
1449is_unsafe(struct file *fp)
1450{
1451 if (fp->f_type == DTYPE_VNODE) {
1452 struct vnode *vp = fp->f_data;
1453
1454 if ((vp->v_vflag & VV_PROCDEP) != 0)
1455 return (1);
1456 }
1457 return (0);
1458}
1459
1460/*
1461 * Make this setguid thing safe, if at all possible.
1462 */
1463void
1464setugidsafety(td)
1465 struct thread *td;
1466{
1467 struct filedesc *fdp;
1468 int i;
1469
1470 /* Certain daemons might not have file descriptors. */
1471 fdp = td->td_proc->p_fd;
1472 if (fdp == NULL)
1473 return;
1474
1475 /*
1476 * Note: fdp->fd_ofiles may be reallocated out from under us while
1477 * we are blocked in a close. Be careful!
1478 */
1479 FILEDESC_LOCK(fdp);
1480 for (i = 0; i <= fdp->fd_lastfile; i++) {
1481 if (i > 2)
1482 break;
1483 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1484 struct file *fp;
1485
1486#if 0
1487 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1488 (void) munmapfd(td, i);
1489#endif
1490 if (i < fdp->fd_knlistsize) {
1491 FILEDESC_UNLOCK(fdp);
1492 knote_fdclose(td, i);
1493 FILEDESC_LOCK(fdp);
1494 }
1495 /*
1496 * NULL-out descriptor prior to close to avoid
1497 * a race while close blocks.
1498 */
1499 fp = fdp->fd_ofiles[i];
1500 fdp->fd_ofiles[i] = NULL;
1501 fdp->fd_ofileflags[i] = 0;
1502 if (i < fdp->fd_freefile)
1503 fdp->fd_freefile = i;
1504 FILEDESC_UNLOCK(fdp);
1505 (void) closef(fp, td);
1506 FILEDESC_LOCK(fdp);
1507 }
1508 }
1509 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1510 fdp->fd_lastfile--;
1511 FILEDESC_UNLOCK(fdp);
1512}
1513
1514/*
1515 * Close any files on exec?
1516 */
1517void
1518fdcloseexec(td)
1519 struct thread *td;
1520{
1521 struct filedesc *fdp;
1522 int i;
1523
1524 /* Certain daemons might not have file descriptors. */
1525 fdp = td->td_proc->p_fd;
1526 if (fdp == NULL)
1527 return;
1528
1529 FILEDESC_LOCK(fdp);
1530
1531 /*
1532 * We cannot cache fd_ofiles or fd_ofileflags since operations
1533 * may block and rip them out from under us.
1534 */
1535 for (i = 0; i <= fdp->fd_lastfile; i++) {
1536 if (fdp->fd_ofiles[i] != NULL &&
1537 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1538 struct file *fp;
1539
1540#if 0
1541 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1542 (void) munmapfd(td, i);
1543#endif
1544 if (i < fdp->fd_knlistsize) {
1545 FILEDESC_UNLOCK(fdp);
1546 knote_fdclose(td, i);
1547 FILEDESC_LOCK(fdp);
1548 }
1549 /*
1550 * NULL-out descriptor prior to close to avoid
1551 * a race while close blocks.
1552 */
1553 fp = fdp->fd_ofiles[i];
1554 fdp->fd_ofiles[i] = NULL;
1555 fdp->fd_ofileflags[i] = 0;
1556 if (i < fdp->fd_freefile)
1557 fdp->fd_freefile = i;
1558 FILEDESC_UNLOCK(fdp);
1559 (void) closef(fp, td);
1560 FILEDESC_LOCK(fdp);
1561 }
1562 }
1563 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1564 fdp->fd_lastfile--;
1565 FILEDESC_UNLOCK(fdp);
1566}
1567
1568/*
1569 * It is unsafe for set[ug]id processes to be started with file
1570 * descriptors 0..2 closed, as these descriptors are given implicit
1571 * significance in the Standard C library. fdcheckstd() will create a
1572 * descriptor referencing /dev/null for each of stdin, stdout, and
1573 * stderr that is not already open.
1574 */
1575int
1576fdcheckstd(td)
1577 struct thread *td;
1578{
1579 struct nameidata nd;
1580 struct filedesc *fdp;
1581 struct file *fp;
1582 register_t retval;
1583 int fd, i, error, flags, devnull;
1584
1585 fdp = td->td_proc->p_fd;
1586 if (fdp == NULL)
1587 return (0);
1588 devnull = -1;
1589 error = 0;
1590 for (i = 0; i < 3; i++) {
1591 if (fdp->fd_ofiles[i] != NULL)
1592 continue;
1593 if (devnull < 0) {
1594 error = falloc(td, &fp, &fd);
1595 if (error != 0)
1596 break;
1597 KASSERT(fd == i, ("oof, we didn't get our fd"));
1598 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1599 td);
1600 flags = FREAD | FWRITE;
1601 error = vn_open(&nd, &flags, 0);
1602 if (error != 0) {
1603 FILEDESC_LOCK(fdp);
1604 fdp->fd_ofiles[fd] = NULL;
1605 FILEDESC_UNLOCK(fdp);
1606 fdrop(fp, td);
1607 break;
1608 }
1609 NDFREE(&nd, NDF_ONLY_PNBUF);
1610 fp->f_data = nd.ni_vp;
1611 fp->f_flag = flags;
1612 fp->f_ops = &vnops;
1613 fp->f_type = DTYPE_VNODE;
1614 VOP_UNLOCK(nd.ni_vp, 0, td);
1615 devnull = fd;
1616 } else {
1617 error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1618 if (error != 0)
1619 break;
1620 }
1621 }
1622 return (error);
1623}
1624
1625/*
1626 * Internal form of close.
1627 * Decrement reference count on file structure.
1628 * Note: td may be NULL when closing a file
1629 * that was being passed in a message.
1630 */
1631int
1632closef(fp, td)
1633 struct file *fp;
1634 struct thread *td;
1635{
1636 struct vnode *vp;
1637 struct flock lf;
1638
1639 if (fp == NULL)
1640 return (0);
1641 /*
1642 * POSIX record locking dictates that any close releases ALL
1643 * locks owned by this process. This is handled by setting
1644 * a flag in the unlock to free ONLY locks obeying POSIX
1645 * semantics, and not to free BSD-style file locks.
1646 * If the descriptor was in a message, POSIX-style locks
1647 * aren't passed with the descriptor.
1648 */
1649 if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 &&
1650 fp->f_type == DTYPE_VNODE) {
1651 lf.l_whence = SEEK_SET;
1652 lf.l_start = 0;
1653 lf.l_len = 0;
1654 lf.l_type = F_UNLCK;
1655 vp = fp->f_data;
1656 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1657 F_UNLCK, &lf, F_POSIX);
1658 }
1659 return (fdrop(fp, td));
1660}
1661
1662/*
1663 * Drop reference on struct file passed in, may call closef if the
1664 * reference hits zero.
1665 */
1666int
1667fdrop(fp, td)
1668 struct file *fp;
1669 struct thread *td;
1670{
1671
1672 FILE_LOCK(fp);
1673 return (fdrop_locked(fp, td));
1674}
1675
1676/*
1677 * Extract the file pointer associated with the specified descriptor for
1678 * the current user process.
1679 *
1680 * If the descriptor doesn't exist, EBADF is returned.
1681 *
1682 * If the descriptor exists but doesn't match 'flags' then
1683 * return EBADF for read attempts and EINVAL for write attempts.
1684 *
1685 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1686 * It should be droped with fdrop().
1687 * If it is not set, then the refcount will not be bumped however the
1688 * thread's filedesc struct will be returned locked (for fgetsock).
1689 *
1690 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1691 * Otherwise *fpp is set and zero is returned.
1692 */
1693static __inline int
1694_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1695{
1696 struct filedesc *fdp;
1697 struct file *fp;
1698
1699 *fpp = NULL;
1700 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1701 return (EBADF);
1702 FILEDESC_LOCK(fdp);
1703 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1704 FILEDESC_UNLOCK(fdp);
1705 return (EBADF);
1706 }
1707
1708 /*
1709 * Note: FREAD failures returns EBADF to maintain backwards
1710 * compatibility with what routines returned before.
1711 *
1712 * Only one flag, or 0, may be specified.
1713 */
1714 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1715 FILEDESC_UNLOCK(fdp);
1716 return (EBADF);
1717 }
1718 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1719 FILEDESC_UNLOCK(fdp);
1720 return (EINVAL);
1721 }
1722 if (hold) {
1723 fhold(fp);
1724 FILEDESC_UNLOCK(fdp);
1725 }
1726 *fpp = fp;
1727 return (0);
1728}
1729
1730int
1731fget(struct thread *td, int fd, struct file **fpp)
1732{
1733
1734 return(_fget(td, fd, fpp, 0, 1));
1735}
1736
1737int
1738fget_read(struct thread *td, int fd, struct file **fpp)
1739{
1740
1741 return(_fget(td, fd, fpp, FREAD, 1));
1742}
1743
1744int
1745fget_write(struct thread *td, int fd, struct file **fpp)
1746{
1747
1748 return(_fget(td, fd, fpp, FWRITE, 1));
1749}
1750
1751/*
1752 * Like fget() but loads the underlying vnode, or returns an error if
1753 * the descriptor does not represent a vnode. Note that pipes use vnodes
1754 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1755 * error). The returned vnode will be vref()d.
1756 */
1757static __inline int
1758_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1759{
1760 struct file *fp;
1761 int error;
1762
1763 *vpp = NULL;
1764 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1765 return (error);
1766 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1767 error = EINVAL;
1768 } else {
1769 *vpp = fp->f_data;
1770 vref(*vpp);
1771 }
1772 FILEDESC_UNLOCK(td->td_proc->p_fd);
1773 return (error);
1774}
1775
1776int
1777fgetvp(struct thread *td, int fd, struct vnode **vpp)
1778{
1779
1780 return (_fgetvp(td, fd, vpp, 0));
1781}
1782
1783int
1784fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1785{
1786
1787 return (_fgetvp(td, fd, vpp, FREAD));
1788}
1789
1790int
1791fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1792{
1793
1794 return (_fgetvp(td, fd, vpp, FWRITE));
1795}
1796
1797/*
1798 * Like fget() but loads the underlying socket, or returns an error if
1799 * the descriptor does not represent a socket.
1800 *
1801 * We bump the ref count on the returned socket. XXX Also obtain the SX
1802 * lock in the future.
1803 */
1804int
1805fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1806{
1807 struct file *fp;
1808 int error;
1809
1810 *spp = NULL;
1811 if (fflagp != NULL)
1812 *fflagp = 0;
1813 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1814 return (error);
1815 if (fp->f_type != DTYPE_SOCKET) {
1816 error = ENOTSOCK;
1817 } else {
1818 *spp = fp->f_data;
1819 if (fflagp)
1820 *fflagp = fp->f_flag;
1821 soref(*spp);
1822 }
1823 FILEDESC_UNLOCK(td->td_proc->p_fd);
1824 return (error);
1825}
1826
1827/*
1828 * Drop the reference count on the the socket and XXX release the SX lock in
1829 * the future. The last reference closes the socket.
1830 */
1831void
1832fputsock(struct socket *so)
1833{
1834
1835 sorele(so);
1836}
1837
1838/*
1839 * Drop reference on struct file passed in, may call closef if the
1840 * reference hits zero.
1841 * Expects struct file locked, and will unlock it.
1842 */
1843int
1844fdrop_locked(fp, td)
1845 struct file *fp;
1846 struct thread *td;
1847{
1848 struct flock lf;
1849 struct vnode *vp;
1850 int error;
1851
1852 FILE_LOCK_ASSERT(fp, MA_OWNED);
1853
1854 if (--fp->f_count > 0) {
1855 FILE_UNLOCK(fp);
1856 return (0);
1857 }
1858 mtx_lock(&Giant);
1859 if (fp->f_count < 0)
1860 panic("fdrop: count < 0");
1861 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1862 lf.l_whence = SEEK_SET;
1863 lf.l_start = 0;
1864 lf.l_len = 0;
1865 lf.l_type = F_UNLCK;
1866 vp = fp->f_data;
1867 FILE_UNLOCK(fp);
1868 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1869 } else
1870 FILE_UNLOCK(fp);
1871 if (fp->f_ops != &badfileops)
1872 error = fo_close(fp, td);
1873 else
1874 error = 0;
1875 ffree(fp);
1876 mtx_unlock(&Giant);
1877 return (error);
1878}
1879
1880/*
1881 * Apply an advisory lock on a file descriptor.
1882 *
1883 * Just attempt to get a record lock of the requested type on
1884 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1885 */
1886#ifndef _SYS_SYSPROTO_H_
1887struct flock_args {
1888 int fd;
1889 int how;
1890};
1891#endif
1892/*
1893 * MPSAFE
1894 */
1895/* ARGSUSED */
1896int
1897flock(td, uap)
1898 struct thread *td;
1899 struct flock_args *uap;
1900{
1901 struct file *fp;
1902 struct vnode *vp;
1903 struct flock lf;
1904 int error;
1905
1906 if ((error = fget(td, uap->fd, &fp)) != 0)
1907 return (error);
1908 if (fp->f_type != DTYPE_VNODE) {
1909 fdrop(fp, td);
1910 return (EOPNOTSUPP);
1911 }
1912
1913 mtx_lock(&Giant);
1914 vp = fp->f_data;
1915 lf.l_whence = SEEK_SET;
1916 lf.l_start = 0;
1917 lf.l_len = 0;
1918 if (uap->how & LOCK_UN) {
1919 lf.l_type = F_UNLCK;
1920 FILE_LOCK(fp);
1921 fp->f_flag &= ~FHASLOCK;
1922 FILE_UNLOCK(fp);
1923 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1924 goto done2;
1925 }
1926 if (uap->how & LOCK_EX)
1927 lf.l_type = F_WRLCK;
1928 else if (uap->how & LOCK_SH)
1929 lf.l_type = F_RDLCK;
1930 else {
1931 error = EBADF;
1932 goto done2;
1933 }
1934 FILE_LOCK(fp);
1935 fp->f_flag |= FHASLOCK;
1936 FILE_UNLOCK(fp);
1937 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1938 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1939done2:
1940 fdrop(fp, td);
1941 mtx_unlock(&Giant);
1942 return (error);
1943}
1944
1945/*
1946 * File Descriptor pseudo-device driver (/dev/fd/).
1947 *
1948 * Opening minor device N dup()s the file (if any) connected to file
1949 * descriptor N belonging to the calling process. Note that this driver
1950 * consists of only the ``open()'' routine, because all subsequent
1951 * references to this file will be direct to the other driver.
1952 */
1953/* ARGSUSED */
1954static int
1955fdopen(dev, mode, type, td)
1956 dev_t dev;
1957 int mode, type;
1958 struct thread *td;
1959{
1960
1961 /*
1962 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1963 * the file descriptor being sought for duplication. The error
1964 * return ensures that the vnode for this device will be released
1965 * by vn_open. Open will detect this special error and take the
1966 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1967 * will simply report the error.
1968 */
1969 td->td_dupfd = dev2unit(dev);
1970 return (ENODEV);
1971}
1972
1973/*
1974 * Duplicate the specified descriptor to a free descriptor.
1975 */
1976int
1977dupfdopen(td, fdp, indx, dfd, mode, error)
1978 struct thread *td;
1979 struct filedesc *fdp;
1980 int indx, dfd;
1981 int mode;
1982 int error;
1983{
1984 struct file *wfp;
1985 struct file *fp;
1986
1987 /*
1988 * If the to-be-dup'd fd number is greater than the allowed number
1989 * of file descriptors, or the fd to be dup'd has already been
1990 * closed, then reject.
1991 */
1992 FILEDESC_LOCK(fdp);
1993 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1994 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1995 FILEDESC_UNLOCK(fdp);
1996 return (EBADF);
1997 }
1998
1999 /*
2000 * There are two cases of interest here.
2001 *
2002 * For ENODEV simply dup (dfd) to file descriptor
2003 * (indx) and return.
2004 *
2005 * For ENXIO steal away the file structure from (dfd) and
2006 * store it in (indx). (dfd) is effectively closed by
2007 * this operation.
2008 *
2009 * Any other error code is just returned.
2010 */
2011 switch (error) {
2012 case ENODEV:
2013 /*
2014 * Check that the mode the file is being opened for is a
2015 * subset of the mode of the existing descriptor.
2016 */
2017 FILE_LOCK(wfp);
2018 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2019 FILE_UNLOCK(wfp);
2020 FILEDESC_UNLOCK(fdp);
2021 return (EACCES);
2022 }
2023 fp = fdp->fd_ofiles[indx];
2024#if 0
2025 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2026 (void) munmapfd(td, indx);
2027#endif
2028 fdp->fd_ofiles[indx] = wfp;
2029 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2030 fhold_locked(wfp);
2031 FILE_UNLOCK(wfp);
2032 if (indx > fdp->fd_lastfile)
2033 fdp->fd_lastfile = indx;
2034 if (fp != NULL)
2035 FILE_LOCK(fp);
2036 FILEDESC_UNLOCK(fdp);
2037 /*
2038 * We now own the reference to fp that the ofiles[] array
2039 * used to own. Release it.
2040 */
2041 if (fp != NULL)
2042 fdrop_locked(fp, td);
2043 return (0);
2044
2045 case ENXIO:
2046 /*
2047 * Steal away the file pointer from dfd and stuff it into indx.
2048 */
2049 fp = fdp->fd_ofiles[indx];
2050#if 0
2051 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2052 (void) munmapfd(td, indx);
2053#endif
2054 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2055 fdp->fd_ofiles[dfd] = NULL;
2056 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2057 fdp->fd_ofileflags[dfd] = 0;
2058
2059 /*
2060 * Complete the clean up of the filedesc structure by
2061 * recomputing the various hints.
2062 */
2063 if (indx > fdp->fd_lastfile) {
2064 fdp->fd_lastfile = indx;
2065 } else {
2066 while (fdp->fd_lastfile > 0 &&
2067 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2068 fdp->fd_lastfile--;
2069 }
2070 if (dfd < fdp->fd_freefile)
2071 fdp->fd_freefile = dfd;
2072 }
2073 if (fp != NULL)
2074 FILE_LOCK(fp);
2075 FILEDESC_UNLOCK(fdp);
2076
2077 /*
2078 * we now own the reference to fp that the ofiles[] array
2079 * used to own. Release it.
2080 */
2081 if (fp != NULL)
2082 fdrop_locked(fp, td);
2083 return (0);
2084
2085 default:
2086 FILEDESC_UNLOCK(fdp);
2087 return (error);
2088 }
2089 /* NOTREACHED */
2090}
2091
2092/*
2093 * Get file structures.
2094 */
2095static int
2096sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2097{
2098 struct xfile xf;
2099 struct filedesc *fdp;
2100 struct file *fp;
2101 struct proc *p;
2102 int error, n;
2103
2104 sysctl_wire_old_buffer(req, 0);
2105 if (req->oldptr == NULL) {
2106 n = 16; /* A slight overestimate. */
2107 sx_slock(&filelist_lock);
2108 LIST_FOREACH(fp, &filehead, f_list) {
2109 /*
2110 * We should grab the lock, but this is an
2111 * estimate, so does it really matter?
2112 */
2113 /* mtx_lock(fp->f_mtxp); */
2114 n += fp->f_count;
2115 /* mtx_unlock(f->f_mtxp); */
2116 }
2117 sx_sunlock(&filelist_lock);
2118 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2119 }
2120 error = 0;
2121 bzero(&xf, sizeof(xf));
2122 xf.xf_size = sizeof(xf);
2123 sx_slock(&allproc_lock);
2124 LIST_FOREACH(p, &allproc, p_list) {
2125 PROC_LOCK(p);
2126 xf.xf_pid = p->p_pid;
2127 xf.xf_uid = p->p_ucred->cr_uid;
2128 PROC_UNLOCK(p);
2129 mtx_lock(&fdesc_mtx);
2130 if ((fdp = p->p_fd) == NULL) {
2131 mtx_unlock(&fdesc_mtx);
2132 continue;
2133 }
2134 FILEDESC_LOCK(fdp);
2135 for (n = 0; n < fdp->fd_nfiles; ++n) {
2136 if ((fp = fdp->fd_ofiles[n]) == NULL)
2137 continue;
2138 xf.xf_fd = n;
2139 xf.xf_file = fp;
2140 xf.xf_data = fp->f_data;
2141 xf.xf_type = fp->f_type;
2142 xf.xf_count = fp->f_count;
2143 xf.xf_msgcount = fp->f_msgcount;
2144 xf.xf_offset = fp->f_offset;
2145 xf.xf_flag = fp->f_flag;
2146 error = SYSCTL_OUT(req, &xf, sizeof(xf));
2147 if (error)
2148 break;
2149 }
2150 FILEDESC_UNLOCK(fdp);
2151 mtx_unlock(&fdesc_mtx);
2152 if (error)
2153 break;
2154 }
2155 sx_sunlock(&allproc_lock);
2156 return (error);
2157}
2158
2159SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2160 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2161
2162SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2163 &maxfilesperproc, 0, "Maximum files allowed open per process");
2164
2165SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2166 &maxfiles, 0, "Maximum number of files");
2167
2168SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2169 &nfiles, 0, "System-wide number of open files");
2170
2171static void
2172fildesc_drvinit(void *unused)
2173{
2174 dev_t dev;
2175
2176 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
2177 make_dev_alias(dev, "stdin");
2178 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
2179 make_dev_alias(dev, "stdout");
2180 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
2181 make_dev_alias(dev, "stderr");
2182}
2183
2184static fo_rdwr_t badfo_readwrite;
2185static fo_ioctl_t badfo_ioctl;
2186static fo_poll_t badfo_poll;
2187static fo_kqfilter_t badfo_kqfilter;
2188static fo_stat_t badfo_stat;
2189static fo_close_t badfo_close;
2190
2191struct fileops badfileops = {
2192 badfo_readwrite,
2193 badfo_readwrite,
2194 badfo_ioctl,
2195 badfo_poll,
2196 badfo_kqfilter,
2197 badfo_stat,
2198 badfo_close,
2199 0
2200};
2201
2202static int
2203badfo_readwrite(fp, uio, active_cred, flags, td)
2204 struct file *fp;
2205 struct uio *uio;
2206 struct ucred *active_cred;
2207 struct thread *td;
2208 int flags;
2209{
2210
2211 return (EBADF);
2212}
2213
2214static int
2215badfo_ioctl(fp, com, data, active_cred, td)
2216 struct file *fp;
2217 u_long com;
2218 void *data;
2219 struct ucred *active_cred;
2220 struct thread *td;
2221{
2222
2223 return (EBADF);
2224}
2225
2226static int
2227badfo_poll(fp, events, active_cred, td)
2228 struct file *fp;
2229 int events;
2230 struct ucred *active_cred;
2231 struct thread *td;
2232{
2233
2234 return (0);
2235}
2236
2237static int
2238badfo_kqfilter(fp, kn)
2239 struct file *fp;
2240 struct knote *kn;
2241{
2242
2243 return (0);
2244}
2245
2246static int
2247badfo_stat(fp, sb, active_cred, td)
2248 struct file *fp;
2249 struct stat *sb;
2250 struct ucred *active_cred;
2251 struct thread *td;
2252{
2253
2254 return (EBADF);
2255}
2256
2257static int
2258badfo_close(fp, td)
2259 struct file *fp;
2260 struct thread *td;
2261{
2262
2263 return (EBADF);
2264}
2265
2266SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2267 fildesc_drvinit,NULL)
2268
2269static void filelistinit(void *);
2270SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2271
2272/* ARGSUSED*/
2273static void
2274filelistinit(dummy)
2275 void *dummy;
2276{
2277
2278 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2279 NULL, NULL, UMA_ALIGN_PTR, 0);
2280 sx_init(&filelist_lock, "filelist lock");
2281 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2282}
89};
90
91/* How to treat 'new' parameter when allocating a fd for do_dup(). */
92enum dup_type { DUP_VARIABLE, DUP_FIXED };
93
94static int do_dup(struct thread *td, enum dup_type type, int old, int new,
95 register_t *retval);
96
97/*
98 * Descriptor management.
99 */
100struct filelist filehead; /* head of list of open files */
101int nfiles; /* actual number of open files */
102extern int cmask;
103struct sx filelist_lock; /* sx to protect filelist */
104struct mtx sigio_lock; /* mtx to protect pointers to sigio */
105
106/*
107 * System calls on descriptors.
108 */
109#ifndef _SYS_SYSPROTO_H_
110struct getdtablesize_args {
111 int dummy;
112};
113#endif
114/*
115 * MPSAFE
116 */
117/* ARGSUSED */
118int
119getdtablesize(td, uap)
120 struct thread *td;
121 struct getdtablesize_args *uap;
122{
123 struct proc *p = td->td_proc;
124
125 mtx_lock(&Giant);
126 td->td_retval[0] =
127 min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
128 mtx_unlock(&Giant);
129 return (0);
130}
131
132/*
133 * Duplicate a file descriptor to a particular value.
134 *
135 * note: keep in mind that a potential race condition exists when closing
136 * descriptors from a shared descriptor table (via rfork).
137 */
138#ifndef _SYS_SYSPROTO_H_
139struct dup2_args {
140 u_int from;
141 u_int to;
142};
143#endif
144/*
145 * MPSAFE
146 */
147/* ARGSUSED */
148int
149dup2(td, uap)
150 struct thread *td;
151 struct dup2_args *uap;
152{
153
154 return (do_dup(td, DUP_FIXED, (int)uap->from, (int)uap->to,
155 td->td_retval));
156}
157
158/*
159 * Duplicate a file descriptor.
160 */
161#ifndef _SYS_SYSPROTO_H_
162struct dup_args {
163 u_int fd;
164};
165#endif
166/*
167 * MPSAFE
168 */
169/* ARGSUSED */
170int
171dup(td, uap)
172 struct thread *td;
173 struct dup_args *uap;
174{
175
176 return (do_dup(td, DUP_VARIABLE, (int)uap->fd, 0, td->td_retval));
177}
178
179/*
180 * The file control system call.
181 */
182#ifndef _SYS_SYSPROTO_H_
183struct fcntl_args {
184 int fd;
185 int cmd;
186 long arg;
187};
188#endif
189/*
190 * MPSAFE
191 */
192/* ARGSUSED */
193int
194fcntl(td, uap)
195 struct thread *td;
196 struct fcntl_args *uap;
197{
198 struct flock fl;
199 intptr_t arg;
200 int error;
201
202 error = 0;
203 switch (uap->cmd) {
204 case F_GETLK:
205 case F_SETLK:
206 case F_SETLKW:
207 error = copyin((void *)(intptr_t)uap->arg, &fl, sizeof(fl));
208 arg = (intptr_t)&fl;
209 break;
210 default:
211 arg = uap->arg;
212 break;
213 }
214 if (error)
215 return (error);
216 error = kern_fcntl(td, uap->fd, uap->cmd, arg);
217 if (error)
218 return (error);
219 if (uap->cmd == F_GETLK)
220 error = copyout(&fl, (void *)(intptr_t)uap->arg, sizeof(fl));
221 return (error);
222}
223
224int
225kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
226{
227 struct filedesc *fdp;
228 struct flock *flp;
229 struct file *fp;
230 struct proc *p;
231 char *pop;
232 struct vnode *vp;
233 u_int newmin;
234 int error, flg, tmp;
235
236 error = 0;
237 flg = F_POSIX;
238 p = td->td_proc;
239 fdp = p->p_fd;
240 mtx_lock(&Giant);
241 FILEDESC_LOCK(fdp);
242 if ((unsigned)fd >= fdp->fd_nfiles ||
243 (fp = fdp->fd_ofiles[fd]) == NULL) {
244 FILEDESC_UNLOCK(fdp);
245 error = EBADF;
246 goto done2;
247 }
248 pop = &fdp->fd_ofileflags[fd];
249
250 switch (cmd) {
251 case F_DUPFD:
252 FILEDESC_UNLOCK(fdp);
253 newmin = arg;
254 if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
255 newmin >= maxfilesperproc) {
256 error = EINVAL;
257 break;
258 }
259 error = do_dup(td, DUP_VARIABLE, fd, newmin, td->td_retval);
260 break;
261
262 case F_GETFD:
263 td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
264 FILEDESC_UNLOCK(fdp);
265 break;
266
267 case F_SETFD:
268 *pop = (*pop &~ UF_EXCLOSE) |
269 (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
270 FILEDESC_UNLOCK(fdp);
271 break;
272
273 case F_GETFL:
274 FILE_LOCK(fp);
275 FILEDESC_UNLOCK(fdp);
276 td->td_retval[0] = OFLAGS(fp->f_flag);
277 FILE_UNLOCK(fp);
278 break;
279
280 case F_SETFL:
281 FILE_LOCK(fp);
282 FILEDESC_UNLOCK(fdp);
283 fhold_locked(fp);
284 fp->f_flag &= ~FCNTLFLAGS;
285 fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
286 FILE_UNLOCK(fp);
287 tmp = fp->f_flag & FNONBLOCK;
288 error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
289 if (error) {
290 fdrop(fp, td);
291 break;
292 }
293 tmp = fp->f_flag & FASYNC;
294 error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
295 if (error == 0) {
296 fdrop(fp, td);
297 break;
298 }
299 FILE_LOCK(fp);
300 fp->f_flag &= ~FNONBLOCK;
301 FILE_UNLOCK(fp);
302 tmp = 0;
303 (void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
304 fdrop(fp, td);
305 break;
306
307 case F_GETOWN:
308 fhold(fp);
309 FILEDESC_UNLOCK(fdp);
310 error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
311 if (error == 0)
312 td->td_retval[0] = tmp;
313 fdrop(fp, td);
314 break;
315
316 case F_SETOWN:
317 fhold(fp);
318 FILEDESC_UNLOCK(fdp);
319 tmp = arg;
320 error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
321 fdrop(fp, td);
322 break;
323
324 case F_SETLKW:
325 flg |= F_WAIT;
326 /* FALLTHROUGH F_SETLK */
327
328 case F_SETLK:
329 if (fp->f_type != DTYPE_VNODE) {
330 FILEDESC_UNLOCK(fdp);
331 error = EBADF;
332 break;
333 }
334
335 flp = (struct flock *)arg;
336 if (flp->l_whence == SEEK_CUR) {
337 if (fp->f_offset < 0 ||
338 (flp->l_start > 0 &&
339 fp->f_offset > OFF_MAX - flp->l_start)) {
340 FILEDESC_UNLOCK(fdp);
341 error = EOVERFLOW;
342 break;
343 }
344 flp->l_start += fp->f_offset;
345 }
346
347 /*
348 * VOP_ADVLOCK() may block.
349 */
350 fhold(fp);
351 FILEDESC_UNLOCK(fdp);
352 vp = fp->f_data;
353
354 switch (flp->l_type) {
355 case F_RDLCK:
356 if ((fp->f_flag & FREAD) == 0) {
357 error = EBADF;
358 break;
359 }
360 PROC_LOCK(p->p_leader);
361 p->p_leader->p_flag |= P_ADVLOCK;
362 PROC_UNLOCK(p->p_leader);
363 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
364 flp, flg);
365 break;
366 case F_WRLCK:
367 if ((fp->f_flag & FWRITE) == 0) {
368 error = EBADF;
369 break;
370 }
371 PROC_LOCK(p->p_leader);
372 p->p_leader->p_flag |= P_ADVLOCK;
373 PROC_UNLOCK(p->p_leader);
374 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK,
375 flp, flg);
376 break;
377 case F_UNLCK:
378 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK,
379 flp, F_POSIX);
380 break;
381 default:
382 error = EINVAL;
383 break;
384 }
385 /* Check for race with close */
386 FILEDESC_LOCK(fdp);
387 if ((unsigned) fd >= fdp->fd_nfiles ||
388 fp != fdp->fd_ofiles[fd]) {
389 FILEDESC_UNLOCK(fdp);
390 flp->l_whence = SEEK_SET;
391 flp->l_start = 0;
392 flp->l_len = 0;
393 flp->l_type = F_UNLCK;
394 (void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
395 F_UNLCK, flp, F_POSIX);
396 } else
397 FILEDESC_UNLOCK(fdp);
398 fdrop(fp, td);
399 break;
400
401 case F_GETLK:
402 if (fp->f_type != DTYPE_VNODE) {
403 FILEDESC_UNLOCK(fdp);
404 error = EBADF;
405 break;
406 }
407 flp = (struct flock *)arg;
408 if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
409 flp->l_type != F_UNLCK) {
410 FILEDESC_UNLOCK(fdp);
411 error = EINVAL;
412 break;
413 }
414 if (flp->l_whence == SEEK_CUR) {
415 if ((flp->l_start > 0 &&
416 fp->f_offset > OFF_MAX - flp->l_start) ||
417 (flp->l_start < 0 &&
418 fp->f_offset < OFF_MIN - flp->l_start)) {
419 FILEDESC_UNLOCK(fdp);
420 error = EOVERFLOW;
421 break;
422 }
423 flp->l_start += fp->f_offset;
424 }
425 /*
426 * VOP_ADVLOCK() may block.
427 */
428 fhold(fp);
429 FILEDESC_UNLOCK(fdp);
430 vp = fp->f_data;
431 error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
432 F_POSIX);
433 fdrop(fp, td);
434 break;
435 default:
436 FILEDESC_UNLOCK(fdp);
437 error = EINVAL;
438 break;
439 }
440done2:
441 mtx_unlock(&Giant);
442 return (error);
443}
444
445/*
446 * Common code for dup, dup2, and fcntl(F_DUPFD).
447 */
448static int
449do_dup(td, type, old, new, retval)
450 enum dup_type type;
451 int old, new;
452 register_t *retval;
453 struct thread *td;
454{
455 struct filedesc *fdp;
456 struct proc *p;
457 struct file *fp;
458 struct file *delfp;
459 int error, newfd;
460
461 p = td->td_proc;
462 fdp = p->p_fd;
463
464 /*
465 * Verify we have a valid descriptor to dup from and possibly to
466 * dup to.
467 */
468 if (old < 0 || new < 0 || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur ||
469 new >= maxfilesperproc)
470 return (EBADF);
471 FILEDESC_LOCK(fdp);
472 if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
473 FILEDESC_UNLOCK(fdp);
474 return (EBADF);
475 }
476 if (type == DUP_FIXED && old == new) {
477 *retval = new;
478 FILEDESC_UNLOCK(fdp);
479 return (0);
480 }
481 fp = fdp->fd_ofiles[old];
482 fhold(fp);
483
484 /*
485 * Expand the table for the new descriptor if needed. This may
486 * block and drop and reacquire the filedesc lock.
487 */
488 if (type == DUP_VARIABLE || new >= fdp->fd_nfiles) {
489 error = fdalloc(td, new, &newfd);
490 if (error) {
491 FILEDESC_UNLOCK(fdp);
492 fdrop(fp, td);
493 return (error);
494 }
495 }
496 if (type == DUP_VARIABLE)
497 new = newfd;
498
499 /*
500 * If the old file changed out from under us then treat it as a
501 * bad file descriptor. Userland should do its own locking to
502 * avoid this case.
503 */
504 if (fdp->fd_ofiles[old] != fp) {
505 if (fdp->fd_ofiles[new] == NULL) {
506 if (new < fdp->fd_freefile)
507 fdp->fd_freefile = new;
508 while (fdp->fd_lastfile > 0 &&
509 fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
510 fdp->fd_lastfile--;
511 }
512 FILEDESC_UNLOCK(fdp);
513 fdrop(fp, td);
514 return (EBADF);
515 }
516 KASSERT(old != new, ("new fd is same as old"));
517
518 /*
519 * Save info on the descriptor being overwritten. We have
520 * to do the unmap now, but we cannot close it without
521 * introducing an ownership race for the slot.
522 */
523 delfp = fdp->fd_ofiles[new];
524 KASSERT(delfp == NULL || type == DUP_FIXED,
525 ("dup() picked an open file"));
526#if 0
527 if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED))
528 (void) munmapfd(td, new);
529#endif
530
531 /*
532 * Duplicate the source descriptor, update lastfile
533 */
534 fdp->fd_ofiles[new] = fp;
535 fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE;
536 if (new > fdp->fd_lastfile)
537 fdp->fd_lastfile = new;
538 FILEDESC_UNLOCK(fdp);
539 *retval = new;
540
541 /*
542 * If we dup'd over a valid file, we now own the reference to it
543 * and must dispose of it using closef() semantics (as if a
544 * close() were performed on it).
545 */
546 if (delfp) {
547 mtx_lock(&Giant);
548 (void) closef(delfp, td);
549 mtx_unlock(&Giant);
550 }
551 return (0);
552}
553
554/*
555 * If sigio is on the list associated with a process or process group,
556 * disable signalling from the device, remove sigio from the list and
557 * free sigio.
558 */
559void
560funsetown(sigiop)
561 struct sigio **sigiop;
562{
563 struct sigio *sigio;
564
565 SIGIO_LOCK();
566 sigio = *sigiop;
567 if (sigio == NULL) {
568 SIGIO_UNLOCK();
569 return;
570 }
571 *(sigio->sio_myref) = NULL;
572 if ((sigio)->sio_pgid < 0) {
573 struct pgrp *pg = (sigio)->sio_pgrp;
574 PGRP_LOCK(pg);
575 SLIST_REMOVE(&sigio->sio_pgrp->pg_sigiolst, sigio,
576 sigio, sio_pgsigio);
577 PGRP_UNLOCK(pg);
578 } else {
579 struct proc *p = (sigio)->sio_proc;
580 PROC_LOCK(p);
581 SLIST_REMOVE(&sigio->sio_proc->p_sigiolst, sigio,
582 sigio, sio_pgsigio);
583 PROC_UNLOCK(p);
584 }
585 SIGIO_UNLOCK();
586 crfree(sigio->sio_ucred);
587 FREE(sigio, M_SIGIO);
588}
589
590/*
591 * Free a list of sigio structures.
592 * We only need to lock the SIGIO_LOCK because we have made ourselves
593 * inaccessable to callers of fsetown and therefore do not need to lock
594 * the proc or pgrp struct for the list manipulation.
595 */
596void
597funsetownlst(sigiolst)
598 struct sigiolst *sigiolst;
599{
600 struct proc *p;
601 struct pgrp *pg;
602 struct sigio *sigio;
603
604 sigio = SLIST_FIRST(sigiolst);
605 if (sigio == NULL)
606 return;
607 p = NULL;
608 pg = NULL;
609
610 /*
611 * Every entry of the list should belong
612 * to a single proc or pgrp.
613 */
614 if (sigio->sio_pgid < 0) {
615 pg = sigio->sio_pgrp;
616 PGRP_LOCK_ASSERT(pg, MA_NOTOWNED);
617 } else /* if (sigio->sio_pgid > 0) */ {
618 p = sigio->sio_proc;
619 PROC_LOCK_ASSERT(p, MA_NOTOWNED);
620 }
621
622 SIGIO_LOCK();
623 while ((sigio = SLIST_FIRST(sigiolst)) != NULL) {
624 *(sigio->sio_myref) = NULL;
625 if (pg != NULL) {
626 KASSERT(sigio->sio_pgid < 0,
627 ("Proc sigio in pgrp sigio list"));
628 KASSERT(sigio->sio_pgrp == pg,
629 ("Bogus pgrp in sigio list"));
630 PGRP_LOCK(pg);
631 SLIST_REMOVE(&pg->pg_sigiolst, sigio, sigio,
632 sio_pgsigio);
633 PGRP_UNLOCK(pg);
634 } else /* if (p != NULL) */ {
635 KASSERT(sigio->sio_pgid > 0,
636 ("Pgrp sigio in proc sigio list"));
637 KASSERT(sigio->sio_proc == p,
638 ("Bogus proc in sigio list"));
639 PROC_LOCK(p);
640 SLIST_REMOVE(&p->p_sigiolst, sigio, sigio,
641 sio_pgsigio);
642 PROC_UNLOCK(p);
643 }
644 SIGIO_UNLOCK();
645 crfree(sigio->sio_ucred);
646 FREE(sigio, M_SIGIO);
647 SIGIO_LOCK();
648 }
649 SIGIO_UNLOCK();
650}
651
652/*
653 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
654 *
655 * After permission checking, add a sigio structure to the sigio list for
656 * the process or process group.
657 */
658int
659fsetown(pgid, sigiop)
660 pid_t pgid;
661 struct sigio **sigiop;
662{
663 struct proc *proc;
664 struct pgrp *pgrp;
665 struct sigio *sigio;
666 int ret;
667
668 if (pgid == 0) {
669 funsetown(sigiop);
670 return (0);
671 }
672
673 ret = 0;
674
675 /* Allocate and fill in the new sigio out of locks. */
676 MALLOC(sigio, struct sigio *, sizeof(struct sigio), M_SIGIO, M_WAITOK);
677 sigio->sio_pgid = pgid;
678 sigio->sio_ucred = crhold(curthread->td_ucred);
679 sigio->sio_myref = sigiop;
680
681 sx_slock(&proctree_lock);
682 if (pgid > 0) {
683 proc = pfind(pgid);
684 if (proc == NULL) {
685 ret = ESRCH;
686 goto fail;
687 }
688
689 /*
690 * Policy - Don't allow a process to FSETOWN a process
691 * in another session.
692 *
693 * Remove this test to allow maximum flexibility or
694 * restrict FSETOWN to the current process or process
695 * group for maximum safety.
696 */
697 PROC_UNLOCK(proc);
698 if (proc->p_session != curthread->td_proc->p_session) {
699 ret = EPERM;
700 goto fail;
701 }
702
703 pgrp = NULL;
704 } else /* if (pgid < 0) */ {
705 pgrp = pgfind(-pgid);
706 if (pgrp == NULL) {
707 ret = ESRCH;
708 goto fail;
709 }
710 PGRP_UNLOCK(pgrp);
711
712 /*
713 * Policy - Don't allow a process to FSETOWN a process
714 * in another session.
715 *
716 * Remove this test to allow maximum flexibility or
717 * restrict FSETOWN to the current process or process
718 * group for maximum safety.
719 */
720 if (pgrp->pg_session != curthread->td_proc->p_session) {
721 ret = EPERM;
722 goto fail;
723 }
724
725 proc = NULL;
726 }
727 funsetown(sigiop);
728 if (pgid > 0) {
729 PROC_LOCK(proc);
730 /*
731 * Since funsetownlst() is called without the proctree
732 * locked, we need to check for P_WEXIT.
733 * XXX: is ESRCH correct?
734 */
735 if ((proc->p_flag & P_WEXIT) != 0) {
736 PROC_UNLOCK(proc);
737 ret = ESRCH;
738 goto fail;
739 }
740 SLIST_INSERT_HEAD(&proc->p_sigiolst, sigio, sio_pgsigio);
741 sigio->sio_proc = proc;
742 PROC_UNLOCK(proc);
743 } else {
744 PGRP_LOCK(pgrp);
745 SLIST_INSERT_HEAD(&pgrp->pg_sigiolst, sigio, sio_pgsigio);
746 sigio->sio_pgrp = pgrp;
747 PGRP_UNLOCK(pgrp);
748 }
749 sx_sunlock(&proctree_lock);
750 SIGIO_LOCK();
751 *sigiop = sigio;
752 SIGIO_UNLOCK();
753 return (0);
754
755fail:
756 sx_sunlock(&proctree_lock);
757 crfree(sigio->sio_ucred);
758 FREE(sigio, M_SIGIO);
759 return (ret);
760}
761
762/*
763 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
764 */
765pid_t
766fgetown(sigiop)
767 struct sigio **sigiop;
768{
769 pid_t pgid;
770
771 SIGIO_LOCK();
772 pgid = (*sigiop != NULL) ? (*sigiop)->sio_pgid : 0;
773 SIGIO_UNLOCK();
774 return (pgid);
775}
776
777/*
778 * Close a file descriptor.
779 */
780#ifndef _SYS_SYSPROTO_H_
781struct close_args {
782 int fd;
783};
784#endif
785/*
786 * MPSAFE
787 */
788/* ARGSUSED */
789int
790close(td, uap)
791 struct thread *td;
792 struct close_args *uap;
793{
794 struct filedesc *fdp;
795 struct file *fp;
796 int fd, error;
797
798 fd = uap->fd;
799 error = 0;
800 fdp = td->td_proc->p_fd;
801 mtx_lock(&Giant);
802 FILEDESC_LOCK(fdp);
803 if ((unsigned)fd >= fdp->fd_nfiles ||
804 (fp = fdp->fd_ofiles[fd]) == NULL) {
805 FILEDESC_UNLOCK(fdp);
806 error = EBADF;
807 goto done2;
808 }
809#if 0
810 if (fdp->fd_ofileflags[fd] & UF_MAPPED)
811 (void) munmapfd(td, fd);
812#endif
813 fdp->fd_ofiles[fd] = NULL;
814 fdp->fd_ofileflags[fd] = 0;
815
816 /*
817 * we now hold the fp reference that used to be owned by the descriptor
818 * array.
819 */
820 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
821 fdp->fd_lastfile--;
822 if (fd < fdp->fd_freefile)
823 fdp->fd_freefile = fd;
824 if (fd < fdp->fd_knlistsize) {
825 FILEDESC_UNLOCK(fdp);
826 knote_fdclose(td, fd);
827 } else
828 FILEDESC_UNLOCK(fdp);
829
830 error = closef(fp, td);
831done2:
832 mtx_unlock(&Giant);
833 return (error);
834}
835
836#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
837/*
838 * Return status information about a file descriptor.
839 */
840#ifndef _SYS_SYSPROTO_H_
841struct ofstat_args {
842 int fd;
843 struct ostat *sb;
844};
845#endif
846/*
847 * MPSAFE
848 */
849/* ARGSUSED */
850int
851ofstat(td, uap)
852 struct thread *td;
853 struct ofstat_args *uap;
854{
855 struct file *fp;
856 struct stat ub;
857 struct ostat oub;
858 int error;
859
860 mtx_lock(&Giant);
861 if ((error = fget(td, uap->fd, &fp)) != 0)
862 goto done2;
863 error = fo_stat(fp, &ub, td->td_ucred, td);
864 if (error == 0) {
865 cvtstat(&ub, &oub);
866 error = copyout(&oub, uap->sb, sizeof(oub));
867 }
868 fdrop(fp, td);
869done2:
870 mtx_unlock(&Giant);
871 return (error);
872}
873#endif /* COMPAT_43 || COMPAT_SUNOS */
874
875/*
876 * Return status information about a file descriptor.
877 */
878#ifndef _SYS_SYSPROTO_H_
879struct fstat_args {
880 int fd;
881 struct stat *sb;
882};
883#endif
884/*
885 * MPSAFE
886 */
887/* ARGSUSED */
888int
889fstat(td, uap)
890 struct thread *td;
891 struct fstat_args *uap;
892{
893 struct file *fp;
894 struct stat ub;
895 int error;
896
897 mtx_lock(&Giant);
898 if ((error = fget(td, uap->fd, &fp)) != 0)
899 goto done2;
900 error = fo_stat(fp, &ub, td->td_ucred, td);
901 if (error == 0)
902 error = copyout(&ub, uap->sb, sizeof(ub));
903 fdrop(fp, td);
904done2:
905 mtx_unlock(&Giant);
906 return (error);
907}
908
909/*
910 * Return status information about a file descriptor.
911 */
912#ifndef _SYS_SYSPROTO_H_
913struct nfstat_args {
914 int fd;
915 struct nstat *sb;
916};
917#endif
918/*
919 * MPSAFE
920 */
921/* ARGSUSED */
922int
923nfstat(td, uap)
924 struct thread *td;
925 struct nfstat_args *uap;
926{
927 struct file *fp;
928 struct stat ub;
929 struct nstat nub;
930 int error;
931
932 mtx_lock(&Giant);
933 if ((error = fget(td, uap->fd, &fp)) != 0)
934 goto done2;
935 error = fo_stat(fp, &ub, td->td_ucred, td);
936 if (error == 0) {
937 cvtnstat(&ub, &nub);
938 error = copyout(&nub, uap->sb, sizeof(nub));
939 }
940 fdrop(fp, td);
941done2:
942 mtx_unlock(&Giant);
943 return (error);
944}
945
946/*
947 * Return pathconf information about a file descriptor.
948 */
949#ifndef _SYS_SYSPROTO_H_
950struct fpathconf_args {
951 int fd;
952 int name;
953};
954#endif
955/*
956 * MPSAFE
957 */
958/* ARGSUSED */
959int
960fpathconf(td, uap)
961 struct thread *td;
962 struct fpathconf_args *uap;
963{
964 struct file *fp;
965 struct vnode *vp;
966 int error;
967
968 if ((error = fget(td, uap->fd, &fp)) != 0)
969 return (error);
970
971 /* If asynchronous I/O is available, it works for all descriptors. */
972 if (uap->name == _PC_ASYNC_IO) {
973 td->td_retval[0] = async_io_version;
974 goto out;
975 }
976 switch (fp->f_type) {
977 case DTYPE_PIPE:
978 case DTYPE_SOCKET:
979 if (uap->name != _PC_PIPE_BUF) {
980 error = EINVAL;
981 } else {
982 td->td_retval[0] = PIPE_BUF;
983 error = 0;
984 }
985 break;
986 case DTYPE_FIFO:
987 case DTYPE_VNODE:
988 vp = fp->f_data;
989 mtx_lock(&Giant);
990 error = VOP_PATHCONF(vp, uap->name, td->td_retval);
991 mtx_unlock(&Giant);
992 break;
993 default:
994 error = EOPNOTSUPP;
995 break;
996 }
997out:
998 fdrop(fp, td);
999 return (error);
1000}
1001
1002/*
1003 * Allocate a file descriptor for the process.
1004 */
1005static int fdexpand;
1006SYSCTL_INT(_debug, OID_AUTO, fdexpand, CTLFLAG_RD, &fdexpand, 0, "");
1007
1008int
1009fdalloc(td, want, result)
1010 struct thread *td;
1011 int want;
1012 int *result;
1013{
1014 struct proc *p = td->td_proc;
1015 struct filedesc *fdp = td->td_proc->p_fd;
1016 int i;
1017 int lim, last, nfiles;
1018 struct file **newofile, **oldofile;
1019 char *newofileflags;
1020
1021 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1022
1023 /*
1024 * Search for a free descriptor starting at the higher
1025 * of want or fd_freefile. If that fails, consider
1026 * expanding the ofile array.
1027 */
1028 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1029 for (;;) {
1030 last = min(fdp->fd_nfiles, lim);
1031 i = max(want, fdp->fd_freefile);
1032 for (; i < last; i++) {
1033 if (fdp->fd_ofiles[i] == NULL) {
1034 fdp->fd_ofileflags[i] = 0;
1035 if (i > fdp->fd_lastfile)
1036 fdp->fd_lastfile = i;
1037 if (want <= fdp->fd_freefile)
1038 fdp->fd_freefile = i;
1039 *result = i;
1040 return (0);
1041 }
1042 }
1043
1044 /*
1045 * No space in current array. Expand?
1046 */
1047 if (i >= lim)
1048 return (EMFILE);
1049 if (fdp->fd_nfiles < NDEXTENT)
1050 nfiles = NDEXTENT;
1051 else
1052 nfiles = 2 * fdp->fd_nfiles;
1053 while (nfiles < want)
1054 nfiles <<= 1;
1055 FILEDESC_UNLOCK(fdp);
1056 /*
1057 * XXX malloc() calls uma_large_malloc() for sizes larger
1058 * than KMEM_ZMAX bytes. uma_large_malloc() requires Giant.
1059 */
1060 mtx_lock(&Giant);
1061 newofile = malloc(nfiles * OFILESIZE, M_FILEDESC, M_WAITOK);
1062 mtx_unlock(&Giant);
1063
1064 /*
1065 * Deal with file-table extend race that might have
1066 * occurred while filedesc was unlocked.
1067 */
1068 FILEDESC_LOCK(fdp);
1069 if (fdp->fd_nfiles >= nfiles) {
1070 /* XXX uma_large_free() needs Giant. */
1071 FILEDESC_UNLOCK(fdp);
1072 mtx_lock(&Giant);
1073 free(newofile, M_FILEDESC);
1074 mtx_unlock(&Giant);
1075 FILEDESC_LOCK(fdp);
1076 continue;
1077 }
1078 newofileflags = (char *) &newofile[nfiles];
1079 /*
1080 * Copy the existing ofile and ofileflags arrays
1081 * and zero the new portion of each array.
1082 */
1083 i = fdp->fd_nfiles * sizeof(struct file *);
1084 bcopy(fdp->fd_ofiles, newofile, i);
1085 bzero((char *)newofile + i,
1086 nfiles * sizeof(struct file *) - i);
1087 i = fdp->fd_nfiles * sizeof(char);
1088 bcopy(fdp->fd_ofileflags, newofileflags, i);
1089 bzero(newofileflags + i, nfiles * sizeof(char) - i);
1090 if (fdp->fd_nfiles > NDFILE)
1091 oldofile = fdp->fd_ofiles;
1092 else
1093 oldofile = NULL;
1094 fdp->fd_ofiles = newofile;
1095 fdp->fd_ofileflags = newofileflags;
1096 fdp->fd_nfiles = nfiles;
1097 fdexpand++;
1098 if (oldofile != NULL) {
1099 /* XXX uma_large_free() needs Giant. */
1100 FILEDESC_UNLOCK(fdp);
1101 mtx_lock(&Giant);
1102 free(oldofile, M_FILEDESC);
1103 mtx_unlock(&Giant);
1104 FILEDESC_LOCK(fdp);
1105 }
1106 }
1107 return (0);
1108}
1109
1110/*
1111 * Check to see whether n user file descriptors
1112 * are available to the process p.
1113 */
1114int
1115fdavail(td, n)
1116 struct thread *td;
1117 int n;
1118{
1119 struct proc *p = td->td_proc;
1120 struct filedesc *fdp = td->td_proc->p_fd;
1121 struct file **fpp;
1122 int i, lim, last;
1123
1124 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1125
1126 lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc);
1127 if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0)
1128 return (1);
1129 last = min(fdp->fd_nfiles, lim);
1130 fpp = &fdp->fd_ofiles[fdp->fd_freefile];
1131 for (i = last - fdp->fd_freefile; --i >= 0; fpp++) {
1132 if (*fpp == NULL && --n <= 0)
1133 return (1);
1134 }
1135 return (0);
1136}
1137
1138/*
1139 * Create a new open file structure and allocate
1140 * a file decriptor for the process that refers to it.
1141 */
1142int
1143falloc(td, resultfp, resultfd)
1144 struct thread *td;
1145 struct file **resultfp;
1146 int *resultfd;
1147{
1148 struct proc *p = td->td_proc;
1149 struct file *fp, *fq;
1150 int error, i;
1151
1152 fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
1153 sx_xlock(&filelist_lock);
1154 if (nfiles >= maxfiles) {
1155 sx_xunlock(&filelist_lock);
1156 uma_zfree(file_zone, fp);
1157 tablefull("file");
1158 return (ENFILE);
1159 }
1160 nfiles++;
1161
1162 /*
1163 * If the process has file descriptor zero open, add the new file
1164 * descriptor to the list of open files at that point, otherwise
1165 * put it at the front of the list of open files.
1166 */
1167 fp->f_mtxp = mtx_pool_alloc();
1168 fp->f_gcflag = 0;
1169 fp->f_count = 1;
1170 fp->f_cred = crhold(td->td_ucred);
1171 fp->f_ops = &badfileops;
1172 fp->f_seqcount = 1;
1173 FILEDESC_LOCK(p->p_fd);
1174 if ((fq = p->p_fd->fd_ofiles[0])) {
1175 LIST_INSERT_AFTER(fq, fp, f_list);
1176 } else {
1177 LIST_INSERT_HEAD(&filehead, fp, f_list);
1178 }
1179 sx_xunlock(&filelist_lock);
1180 if ((error = fdalloc(td, 0, &i))) {
1181 FILEDESC_UNLOCK(p->p_fd);
1182 fdrop(fp, td);
1183 return (error);
1184 }
1185 p->p_fd->fd_ofiles[i] = fp;
1186 FILEDESC_UNLOCK(p->p_fd);
1187 if (resultfp)
1188 *resultfp = fp;
1189 if (resultfd)
1190 *resultfd = i;
1191 return (0);
1192}
1193
1194/*
1195 * Free a file descriptor.
1196 */
1197void
1198ffree(fp)
1199 struct file *fp;
1200{
1201
1202 KASSERT(fp->f_count == 0, ("ffree: fp_fcount not 0!"));
1203 sx_xlock(&filelist_lock);
1204 LIST_REMOVE(fp, f_list);
1205 nfiles--;
1206 sx_xunlock(&filelist_lock);
1207 crfree(fp->f_cred);
1208 uma_zfree(file_zone, fp);
1209}
1210
1211/*
1212 * Build a new filedesc structure from another.
1213 * Copy the current, root, and jail root vnode references.
1214 */
1215struct filedesc *
1216fdinit(fdp)
1217 struct filedesc *fdp;
1218{
1219 struct filedesc0 *newfdp;
1220
1221 MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0),
1222 M_FILEDESC, M_WAITOK | M_ZERO);
1223 mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1224 newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
1225 if (newfdp->fd_fd.fd_cdir)
1226 VREF(newfdp->fd_fd.fd_cdir);
1227 newfdp->fd_fd.fd_rdir = fdp->fd_rdir;
1228 if (newfdp->fd_fd.fd_rdir)
1229 VREF(newfdp->fd_fd.fd_rdir);
1230 newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
1231 if (newfdp->fd_fd.fd_jdir)
1232 VREF(newfdp->fd_fd.fd_jdir);
1233
1234 /* Create the file descriptor table. */
1235 newfdp->fd_fd.fd_refcnt = 1;
1236 newfdp->fd_fd.fd_cmask = cmask;
1237 newfdp->fd_fd.fd_ofiles = newfdp->fd_dfiles;
1238 newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags;
1239 newfdp->fd_fd.fd_nfiles = NDFILE;
1240 newfdp->fd_fd.fd_knlistsize = -1;
1241 return (&newfdp->fd_fd);
1242}
1243
1244/*
1245 * Share a filedesc structure.
1246 */
1247struct filedesc *
1248fdshare(fdp)
1249 struct filedesc *fdp;
1250{
1251 FILEDESC_LOCK(fdp);
1252 fdp->fd_refcnt++;
1253 FILEDESC_UNLOCK(fdp);
1254 return (fdp);
1255}
1256
1257/*
1258 * Copy a filedesc structure.
1259 * A NULL pointer in returns a NULL reference, this is to ease callers,
1260 * not catch errors.
1261 */
1262struct filedesc *
1263fdcopy(fdp)
1264 struct filedesc *fdp;
1265{
1266 struct filedesc *newfdp;
1267 struct file **fpp;
1268 int i, j;
1269
1270 /* Certain daemons might not have file descriptors. */
1271 if (fdp == NULL)
1272 return (NULL);
1273
1274 FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
1275
1276 FILEDESC_UNLOCK(fdp);
1277 MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0),
1278 M_FILEDESC, M_WAITOK);
1279 FILEDESC_LOCK(fdp);
1280 bcopy(fdp, newfdp, sizeof(struct filedesc));
1281 FILEDESC_UNLOCK(fdp);
1282 bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx));
1283 mtx_init(&newfdp->fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
1284 if (newfdp->fd_cdir)
1285 VREF(newfdp->fd_cdir);
1286 if (newfdp->fd_rdir)
1287 VREF(newfdp->fd_rdir);
1288 if (newfdp->fd_jdir)
1289 VREF(newfdp->fd_jdir);
1290 newfdp->fd_refcnt = 1;
1291
1292 /*
1293 * If the number of open files fits in the internal arrays
1294 * of the open file structure, use them, otherwise allocate
1295 * additional memory for the number of descriptors currently
1296 * in use.
1297 */
1298 FILEDESC_LOCK(fdp);
1299 newfdp->fd_lastfile = fdp->fd_lastfile;
1300 newfdp->fd_nfiles = fdp->fd_nfiles;
1301 if (newfdp->fd_lastfile < NDFILE) {
1302 newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles;
1303 newfdp->fd_ofileflags =
1304 ((struct filedesc0 *) newfdp)->fd_dfileflags;
1305 i = NDFILE;
1306 } else {
1307 /*
1308 * Compute the smallest multiple of NDEXTENT needed
1309 * for the file descriptors currently in use,
1310 * allowing the table to shrink.
1311 */
1312retry:
1313 i = newfdp->fd_nfiles;
1314 while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2)
1315 i /= 2;
1316 FILEDESC_UNLOCK(fdp);
1317 MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE,
1318 M_FILEDESC, M_WAITOK);
1319 FILEDESC_LOCK(fdp);
1320 newfdp->fd_lastfile = fdp->fd_lastfile;
1321 newfdp->fd_nfiles = fdp->fd_nfiles;
1322 j = newfdp->fd_nfiles;
1323 while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2)
1324 j /= 2;
1325 if (i != j) {
1326 /*
1327 * The size of the original table has changed.
1328 * Go over once again.
1329 */
1330 FILEDESC_UNLOCK(fdp);
1331 FREE(newfdp->fd_ofiles, M_FILEDESC);
1332 FILEDESC_LOCK(fdp);
1333 newfdp->fd_lastfile = fdp->fd_lastfile;
1334 newfdp->fd_nfiles = fdp->fd_nfiles;
1335 goto retry;
1336 }
1337 newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i];
1338 }
1339 newfdp->fd_nfiles = i;
1340 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
1341 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
1342
1343 /*
1344 * kq descriptors cannot be copied.
1345 */
1346 if (newfdp->fd_knlistsize != -1) {
1347 fpp = &newfdp->fd_ofiles[newfdp->fd_lastfile];
1348 for (i = newfdp->fd_lastfile; i >= 0; i--, fpp--) {
1349 if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) {
1350 *fpp = NULL;
1351 if (i < newfdp->fd_freefile)
1352 newfdp->fd_freefile = i;
1353 }
1354 if (*fpp == NULL && i == newfdp->fd_lastfile && i > 0)
1355 newfdp->fd_lastfile--;
1356 }
1357 newfdp->fd_knlist = NULL;
1358 newfdp->fd_knlistsize = -1;
1359 newfdp->fd_knhash = NULL;
1360 newfdp->fd_knhashmask = 0;
1361 }
1362
1363 fpp = newfdp->fd_ofiles;
1364 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) {
1365 if (*fpp != NULL)
1366 fhold(*fpp);
1367 }
1368 return (newfdp);
1369}
1370
1371/* A mutex to protect the association between a proc and filedesc. */
1372struct mtx fdesc_mtx;
1373MTX_SYSINIT(fdesc, &fdesc_mtx, "fdesc", MTX_DEF);
1374
1375/*
1376 * Release a filedesc structure.
1377 */
1378void
1379fdfree(td)
1380 struct thread *td;
1381{
1382 struct filedesc *fdp;
1383 struct file **fpp;
1384 int i;
1385
1386 /* Certain daemons might not have file descriptors. */
1387 fdp = td->td_proc->p_fd;
1388 if (fdp == NULL)
1389 return;
1390
1391 FILEDESC_LOCK(fdp);
1392 if (--fdp->fd_refcnt > 0) {
1393 FILEDESC_UNLOCK(fdp);
1394 return;
1395 }
1396
1397 /*
1398 * We are the last reference to the structure, so we can
1399 * safely assume it will not change out from under us.
1400 */
1401 FILEDESC_UNLOCK(fdp);
1402 fpp = fdp->fd_ofiles;
1403 for (i = fdp->fd_lastfile; i-- >= 0; fpp++) {
1404 if (*fpp)
1405 (void) closef(*fpp, td);
1406 }
1407
1408 /* XXX This should happen earlier. */
1409 mtx_lock(&fdesc_mtx);
1410 td->td_proc->p_fd = NULL;
1411 mtx_unlock(&fdesc_mtx);
1412
1413 if (fdp->fd_nfiles > NDFILE)
1414 FREE(fdp->fd_ofiles, M_FILEDESC);
1415 if (fdp->fd_cdir)
1416 vrele(fdp->fd_cdir);
1417 if (fdp->fd_rdir)
1418 vrele(fdp->fd_rdir);
1419 if (fdp->fd_jdir)
1420 vrele(fdp->fd_jdir);
1421 if (fdp->fd_knlist)
1422 FREE(fdp->fd_knlist, M_KQUEUE);
1423 if (fdp->fd_knhash)
1424 FREE(fdp->fd_knhash, M_KQUEUE);
1425 mtx_destroy(&fdp->fd_mtx);
1426 FREE(fdp, M_FILEDESC);
1427}
1428
1429/*
1430 * For setugid programs, we don't want to people to use that setugidness
1431 * to generate error messages which write to a file which otherwise would
1432 * otherwise be off-limits to the process. We check for filesystems where
1433 * the vnode can change out from under us after execve (like [lin]procfs).
1434 *
1435 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1436 * sufficient. We also don't for check setugidness since we know we are.
1437 */
1438static int
1439is_unsafe(struct file *fp)
1440{
1441 if (fp->f_type == DTYPE_VNODE) {
1442 struct vnode *vp = fp->f_data;
1443
1444 if ((vp->v_vflag & VV_PROCDEP) != 0)
1445 return (1);
1446 }
1447 return (0);
1448}
1449
1450/*
1451 * Make this setguid thing safe, if at all possible.
1452 */
1453void
1454setugidsafety(td)
1455 struct thread *td;
1456{
1457 struct filedesc *fdp;
1458 int i;
1459
1460 /* Certain daemons might not have file descriptors. */
1461 fdp = td->td_proc->p_fd;
1462 if (fdp == NULL)
1463 return;
1464
1465 /*
1466 * Note: fdp->fd_ofiles may be reallocated out from under us while
1467 * we are blocked in a close. Be careful!
1468 */
1469 FILEDESC_LOCK(fdp);
1470 for (i = 0; i <= fdp->fd_lastfile; i++) {
1471 if (i > 2)
1472 break;
1473 if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) {
1474 struct file *fp;
1475
1476#if 0
1477 if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0)
1478 (void) munmapfd(td, i);
1479#endif
1480 if (i < fdp->fd_knlistsize) {
1481 FILEDESC_UNLOCK(fdp);
1482 knote_fdclose(td, i);
1483 FILEDESC_LOCK(fdp);
1484 }
1485 /*
1486 * NULL-out descriptor prior to close to avoid
1487 * a race while close blocks.
1488 */
1489 fp = fdp->fd_ofiles[i];
1490 fdp->fd_ofiles[i] = NULL;
1491 fdp->fd_ofileflags[i] = 0;
1492 if (i < fdp->fd_freefile)
1493 fdp->fd_freefile = i;
1494 FILEDESC_UNLOCK(fdp);
1495 (void) closef(fp, td);
1496 FILEDESC_LOCK(fdp);
1497 }
1498 }
1499 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1500 fdp->fd_lastfile--;
1501 FILEDESC_UNLOCK(fdp);
1502}
1503
1504/*
1505 * Close any files on exec?
1506 */
1507void
1508fdcloseexec(td)
1509 struct thread *td;
1510{
1511 struct filedesc *fdp;
1512 int i;
1513
1514 /* Certain daemons might not have file descriptors. */
1515 fdp = td->td_proc->p_fd;
1516 if (fdp == NULL)
1517 return;
1518
1519 FILEDESC_LOCK(fdp);
1520
1521 /*
1522 * We cannot cache fd_ofiles or fd_ofileflags since operations
1523 * may block and rip them out from under us.
1524 */
1525 for (i = 0; i <= fdp->fd_lastfile; i++) {
1526 if (fdp->fd_ofiles[i] != NULL &&
1527 (fdp->fd_ofileflags[i] & UF_EXCLOSE)) {
1528 struct file *fp;
1529
1530#if 0
1531 if (fdp->fd_ofileflags[i] & UF_MAPPED)
1532 (void) munmapfd(td, i);
1533#endif
1534 if (i < fdp->fd_knlistsize) {
1535 FILEDESC_UNLOCK(fdp);
1536 knote_fdclose(td, i);
1537 FILEDESC_LOCK(fdp);
1538 }
1539 /*
1540 * NULL-out descriptor prior to close to avoid
1541 * a race while close blocks.
1542 */
1543 fp = fdp->fd_ofiles[i];
1544 fdp->fd_ofiles[i] = NULL;
1545 fdp->fd_ofileflags[i] = 0;
1546 if (i < fdp->fd_freefile)
1547 fdp->fd_freefile = i;
1548 FILEDESC_UNLOCK(fdp);
1549 (void) closef(fp, td);
1550 FILEDESC_LOCK(fdp);
1551 }
1552 }
1553 while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL)
1554 fdp->fd_lastfile--;
1555 FILEDESC_UNLOCK(fdp);
1556}
1557
1558/*
1559 * It is unsafe for set[ug]id processes to be started with file
1560 * descriptors 0..2 closed, as these descriptors are given implicit
1561 * significance in the Standard C library. fdcheckstd() will create a
1562 * descriptor referencing /dev/null for each of stdin, stdout, and
1563 * stderr that is not already open.
1564 */
1565int
1566fdcheckstd(td)
1567 struct thread *td;
1568{
1569 struct nameidata nd;
1570 struct filedesc *fdp;
1571 struct file *fp;
1572 register_t retval;
1573 int fd, i, error, flags, devnull;
1574
1575 fdp = td->td_proc->p_fd;
1576 if (fdp == NULL)
1577 return (0);
1578 devnull = -1;
1579 error = 0;
1580 for (i = 0; i < 3; i++) {
1581 if (fdp->fd_ofiles[i] != NULL)
1582 continue;
1583 if (devnull < 0) {
1584 error = falloc(td, &fp, &fd);
1585 if (error != 0)
1586 break;
1587 KASSERT(fd == i, ("oof, we didn't get our fd"));
1588 NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/dev/null",
1589 td);
1590 flags = FREAD | FWRITE;
1591 error = vn_open(&nd, &flags, 0);
1592 if (error != 0) {
1593 FILEDESC_LOCK(fdp);
1594 fdp->fd_ofiles[fd] = NULL;
1595 FILEDESC_UNLOCK(fdp);
1596 fdrop(fp, td);
1597 break;
1598 }
1599 NDFREE(&nd, NDF_ONLY_PNBUF);
1600 fp->f_data = nd.ni_vp;
1601 fp->f_flag = flags;
1602 fp->f_ops = &vnops;
1603 fp->f_type = DTYPE_VNODE;
1604 VOP_UNLOCK(nd.ni_vp, 0, td);
1605 devnull = fd;
1606 } else {
1607 error = do_dup(td, DUP_FIXED, devnull, i, &retval);
1608 if (error != 0)
1609 break;
1610 }
1611 }
1612 return (error);
1613}
1614
1615/*
1616 * Internal form of close.
1617 * Decrement reference count on file structure.
1618 * Note: td may be NULL when closing a file
1619 * that was being passed in a message.
1620 */
1621int
1622closef(fp, td)
1623 struct file *fp;
1624 struct thread *td;
1625{
1626 struct vnode *vp;
1627 struct flock lf;
1628
1629 if (fp == NULL)
1630 return (0);
1631 /*
1632 * POSIX record locking dictates that any close releases ALL
1633 * locks owned by this process. This is handled by setting
1634 * a flag in the unlock to free ONLY locks obeying POSIX
1635 * semantics, and not to free BSD-style file locks.
1636 * If the descriptor was in a message, POSIX-style locks
1637 * aren't passed with the descriptor.
1638 */
1639 if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 &&
1640 fp->f_type == DTYPE_VNODE) {
1641 lf.l_whence = SEEK_SET;
1642 lf.l_start = 0;
1643 lf.l_len = 0;
1644 lf.l_type = F_UNLCK;
1645 vp = fp->f_data;
1646 (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader,
1647 F_UNLCK, &lf, F_POSIX);
1648 }
1649 return (fdrop(fp, td));
1650}
1651
1652/*
1653 * Drop reference on struct file passed in, may call closef if the
1654 * reference hits zero.
1655 */
1656int
1657fdrop(fp, td)
1658 struct file *fp;
1659 struct thread *td;
1660{
1661
1662 FILE_LOCK(fp);
1663 return (fdrop_locked(fp, td));
1664}
1665
1666/*
1667 * Extract the file pointer associated with the specified descriptor for
1668 * the current user process.
1669 *
1670 * If the descriptor doesn't exist, EBADF is returned.
1671 *
1672 * If the descriptor exists but doesn't match 'flags' then
1673 * return EBADF for read attempts and EINVAL for write attempts.
1674 *
1675 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
1676 * It should be droped with fdrop().
1677 * If it is not set, then the refcount will not be bumped however the
1678 * thread's filedesc struct will be returned locked (for fgetsock).
1679 *
1680 * If an error occured the non-zero error is returned and *fpp is set to NULL.
1681 * Otherwise *fpp is set and zero is returned.
1682 */
1683static __inline int
1684_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
1685{
1686 struct filedesc *fdp;
1687 struct file *fp;
1688
1689 *fpp = NULL;
1690 if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
1691 return (EBADF);
1692 FILEDESC_LOCK(fdp);
1693 if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
1694 FILEDESC_UNLOCK(fdp);
1695 return (EBADF);
1696 }
1697
1698 /*
1699 * Note: FREAD failures returns EBADF to maintain backwards
1700 * compatibility with what routines returned before.
1701 *
1702 * Only one flag, or 0, may be specified.
1703 */
1704 if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
1705 FILEDESC_UNLOCK(fdp);
1706 return (EBADF);
1707 }
1708 if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
1709 FILEDESC_UNLOCK(fdp);
1710 return (EINVAL);
1711 }
1712 if (hold) {
1713 fhold(fp);
1714 FILEDESC_UNLOCK(fdp);
1715 }
1716 *fpp = fp;
1717 return (0);
1718}
1719
1720int
1721fget(struct thread *td, int fd, struct file **fpp)
1722{
1723
1724 return(_fget(td, fd, fpp, 0, 1));
1725}
1726
1727int
1728fget_read(struct thread *td, int fd, struct file **fpp)
1729{
1730
1731 return(_fget(td, fd, fpp, FREAD, 1));
1732}
1733
1734int
1735fget_write(struct thread *td, int fd, struct file **fpp)
1736{
1737
1738 return(_fget(td, fd, fpp, FWRITE, 1));
1739}
1740
1741/*
1742 * Like fget() but loads the underlying vnode, or returns an error if
1743 * the descriptor does not represent a vnode. Note that pipes use vnodes
1744 * but never have VM objects (so VOP_GETVOBJECT() calls will return an
1745 * error). The returned vnode will be vref()d.
1746 */
1747static __inline int
1748_fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
1749{
1750 struct file *fp;
1751 int error;
1752
1753 *vpp = NULL;
1754 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1755 return (error);
1756 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) {
1757 error = EINVAL;
1758 } else {
1759 *vpp = fp->f_data;
1760 vref(*vpp);
1761 }
1762 FILEDESC_UNLOCK(td->td_proc->p_fd);
1763 return (error);
1764}
1765
1766int
1767fgetvp(struct thread *td, int fd, struct vnode **vpp)
1768{
1769
1770 return (_fgetvp(td, fd, vpp, 0));
1771}
1772
1773int
1774fgetvp_read(struct thread *td, int fd, struct vnode **vpp)
1775{
1776
1777 return (_fgetvp(td, fd, vpp, FREAD));
1778}
1779
1780int
1781fgetvp_write(struct thread *td, int fd, struct vnode **vpp)
1782{
1783
1784 return (_fgetvp(td, fd, vpp, FWRITE));
1785}
1786
1787/*
1788 * Like fget() but loads the underlying socket, or returns an error if
1789 * the descriptor does not represent a socket.
1790 *
1791 * We bump the ref count on the returned socket. XXX Also obtain the SX
1792 * lock in the future.
1793 */
1794int
1795fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
1796{
1797 struct file *fp;
1798 int error;
1799
1800 *spp = NULL;
1801 if (fflagp != NULL)
1802 *fflagp = 0;
1803 if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
1804 return (error);
1805 if (fp->f_type != DTYPE_SOCKET) {
1806 error = ENOTSOCK;
1807 } else {
1808 *spp = fp->f_data;
1809 if (fflagp)
1810 *fflagp = fp->f_flag;
1811 soref(*spp);
1812 }
1813 FILEDESC_UNLOCK(td->td_proc->p_fd);
1814 return (error);
1815}
1816
1817/*
1818 * Drop the reference count on the the socket and XXX release the SX lock in
1819 * the future. The last reference closes the socket.
1820 */
1821void
1822fputsock(struct socket *so)
1823{
1824
1825 sorele(so);
1826}
1827
1828/*
1829 * Drop reference on struct file passed in, may call closef if the
1830 * reference hits zero.
1831 * Expects struct file locked, and will unlock it.
1832 */
1833int
1834fdrop_locked(fp, td)
1835 struct file *fp;
1836 struct thread *td;
1837{
1838 struct flock lf;
1839 struct vnode *vp;
1840 int error;
1841
1842 FILE_LOCK_ASSERT(fp, MA_OWNED);
1843
1844 if (--fp->f_count > 0) {
1845 FILE_UNLOCK(fp);
1846 return (0);
1847 }
1848 mtx_lock(&Giant);
1849 if (fp->f_count < 0)
1850 panic("fdrop: count < 0");
1851 if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) {
1852 lf.l_whence = SEEK_SET;
1853 lf.l_start = 0;
1854 lf.l_len = 0;
1855 lf.l_type = F_UNLCK;
1856 vp = fp->f_data;
1857 FILE_UNLOCK(fp);
1858 (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1859 } else
1860 FILE_UNLOCK(fp);
1861 if (fp->f_ops != &badfileops)
1862 error = fo_close(fp, td);
1863 else
1864 error = 0;
1865 ffree(fp);
1866 mtx_unlock(&Giant);
1867 return (error);
1868}
1869
1870/*
1871 * Apply an advisory lock on a file descriptor.
1872 *
1873 * Just attempt to get a record lock of the requested type on
1874 * the entire file (l_whence = SEEK_SET, l_start = 0, l_len = 0).
1875 */
1876#ifndef _SYS_SYSPROTO_H_
1877struct flock_args {
1878 int fd;
1879 int how;
1880};
1881#endif
1882/*
1883 * MPSAFE
1884 */
1885/* ARGSUSED */
1886int
1887flock(td, uap)
1888 struct thread *td;
1889 struct flock_args *uap;
1890{
1891 struct file *fp;
1892 struct vnode *vp;
1893 struct flock lf;
1894 int error;
1895
1896 if ((error = fget(td, uap->fd, &fp)) != 0)
1897 return (error);
1898 if (fp->f_type != DTYPE_VNODE) {
1899 fdrop(fp, td);
1900 return (EOPNOTSUPP);
1901 }
1902
1903 mtx_lock(&Giant);
1904 vp = fp->f_data;
1905 lf.l_whence = SEEK_SET;
1906 lf.l_start = 0;
1907 lf.l_len = 0;
1908 if (uap->how & LOCK_UN) {
1909 lf.l_type = F_UNLCK;
1910 FILE_LOCK(fp);
1911 fp->f_flag &= ~FHASLOCK;
1912 FILE_UNLOCK(fp);
1913 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
1914 goto done2;
1915 }
1916 if (uap->how & LOCK_EX)
1917 lf.l_type = F_WRLCK;
1918 else if (uap->how & LOCK_SH)
1919 lf.l_type = F_RDLCK;
1920 else {
1921 error = EBADF;
1922 goto done2;
1923 }
1924 FILE_LOCK(fp);
1925 fp->f_flag |= FHASLOCK;
1926 FILE_UNLOCK(fp);
1927 error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
1928 (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
1929done2:
1930 fdrop(fp, td);
1931 mtx_unlock(&Giant);
1932 return (error);
1933}
1934
1935/*
1936 * File Descriptor pseudo-device driver (/dev/fd/).
1937 *
1938 * Opening minor device N dup()s the file (if any) connected to file
1939 * descriptor N belonging to the calling process. Note that this driver
1940 * consists of only the ``open()'' routine, because all subsequent
1941 * references to this file will be direct to the other driver.
1942 */
1943/* ARGSUSED */
1944static int
1945fdopen(dev, mode, type, td)
1946 dev_t dev;
1947 int mode, type;
1948 struct thread *td;
1949{
1950
1951 /*
1952 * XXX Kludge: set curthread->td_dupfd to contain the value of the
1953 * the file descriptor being sought for duplication. The error
1954 * return ensures that the vnode for this device will be released
1955 * by vn_open. Open will detect this special error and take the
1956 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
1957 * will simply report the error.
1958 */
1959 td->td_dupfd = dev2unit(dev);
1960 return (ENODEV);
1961}
1962
1963/*
1964 * Duplicate the specified descriptor to a free descriptor.
1965 */
1966int
1967dupfdopen(td, fdp, indx, dfd, mode, error)
1968 struct thread *td;
1969 struct filedesc *fdp;
1970 int indx, dfd;
1971 int mode;
1972 int error;
1973{
1974 struct file *wfp;
1975 struct file *fp;
1976
1977 /*
1978 * If the to-be-dup'd fd number is greater than the allowed number
1979 * of file descriptors, or the fd to be dup'd has already been
1980 * closed, then reject.
1981 */
1982 FILEDESC_LOCK(fdp);
1983 if (dfd < 0 || dfd >= fdp->fd_nfiles ||
1984 (wfp = fdp->fd_ofiles[dfd]) == NULL) {
1985 FILEDESC_UNLOCK(fdp);
1986 return (EBADF);
1987 }
1988
1989 /*
1990 * There are two cases of interest here.
1991 *
1992 * For ENODEV simply dup (dfd) to file descriptor
1993 * (indx) and return.
1994 *
1995 * For ENXIO steal away the file structure from (dfd) and
1996 * store it in (indx). (dfd) is effectively closed by
1997 * this operation.
1998 *
1999 * Any other error code is just returned.
2000 */
2001 switch (error) {
2002 case ENODEV:
2003 /*
2004 * Check that the mode the file is being opened for is a
2005 * subset of the mode of the existing descriptor.
2006 */
2007 FILE_LOCK(wfp);
2008 if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
2009 FILE_UNLOCK(wfp);
2010 FILEDESC_UNLOCK(fdp);
2011 return (EACCES);
2012 }
2013 fp = fdp->fd_ofiles[indx];
2014#if 0
2015 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2016 (void) munmapfd(td, indx);
2017#endif
2018 fdp->fd_ofiles[indx] = wfp;
2019 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2020 fhold_locked(wfp);
2021 FILE_UNLOCK(wfp);
2022 if (indx > fdp->fd_lastfile)
2023 fdp->fd_lastfile = indx;
2024 if (fp != NULL)
2025 FILE_LOCK(fp);
2026 FILEDESC_UNLOCK(fdp);
2027 /*
2028 * We now own the reference to fp that the ofiles[] array
2029 * used to own. Release it.
2030 */
2031 if (fp != NULL)
2032 fdrop_locked(fp, td);
2033 return (0);
2034
2035 case ENXIO:
2036 /*
2037 * Steal away the file pointer from dfd and stuff it into indx.
2038 */
2039 fp = fdp->fd_ofiles[indx];
2040#if 0
2041 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED)
2042 (void) munmapfd(td, indx);
2043#endif
2044 fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd];
2045 fdp->fd_ofiles[dfd] = NULL;
2046 fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
2047 fdp->fd_ofileflags[dfd] = 0;
2048
2049 /*
2050 * Complete the clean up of the filedesc structure by
2051 * recomputing the various hints.
2052 */
2053 if (indx > fdp->fd_lastfile) {
2054 fdp->fd_lastfile = indx;
2055 } else {
2056 while (fdp->fd_lastfile > 0 &&
2057 fdp->fd_ofiles[fdp->fd_lastfile] == NULL) {
2058 fdp->fd_lastfile--;
2059 }
2060 if (dfd < fdp->fd_freefile)
2061 fdp->fd_freefile = dfd;
2062 }
2063 if (fp != NULL)
2064 FILE_LOCK(fp);
2065 FILEDESC_UNLOCK(fdp);
2066
2067 /*
2068 * we now own the reference to fp that the ofiles[] array
2069 * used to own. Release it.
2070 */
2071 if (fp != NULL)
2072 fdrop_locked(fp, td);
2073 return (0);
2074
2075 default:
2076 FILEDESC_UNLOCK(fdp);
2077 return (error);
2078 }
2079 /* NOTREACHED */
2080}
2081
2082/*
2083 * Get file structures.
2084 */
2085static int
2086sysctl_kern_file(SYSCTL_HANDLER_ARGS)
2087{
2088 struct xfile xf;
2089 struct filedesc *fdp;
2090 struct file *fp;
2091 struct proc *p;
2092 int error, n;
2093
2094 sysctl_wire_old_buffer(req, 0);
2095 if (req->oldptr == NULL) {
2096 n = 16; /* A slight overestimate. */
2097 sx_slock(&filelist_lock);
2098 LIST_FOREACH(fp, &filehead, f_list) {
2099 /*
2100 * We should grab the lock, but this is an
2101 * estimate, so does it really matter?
2102 */
2103 /* mtx_lock(fp->f_mtxp); */
2104 n += fp->f_count;
2105 /* mtx_unlock(f->f_mtxp); */
2106 }
2107 sx_sunlock(&filelist_lock);
2108 return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
2109 }
2110 error = 0;
2111 bzero(&xf, sizeof(xf));
2112 xf.xf_size = sizeof(xf);
2113 sx_slock(&allproc_lock);
2114 LIST_FOREACH(p, &allproc, p_list) {
2115 PROC_LOCK(p);
2116 xf.xf_pid = p->p_pid;
2117 xf.xf_uid = p->p_ucred->cr_uid;
2118 PROC_UNLOCK(p);
2119 mtx_lock(&fdesc_mtx);
2120 if ((fdp = p->p_fd) == NULL) {
2121 mtx_unlock(&fdesc_mtx);
2122 continue;
2123 }
2124 FILEDESC_LOCK(fdp);
2125 for (n = 0; n < fdp->fd_nfiles; ++n) {
2126 if ((fp = fdp->fd_ofiles[n]) == NULL)
2127 continue;
2128 xf.xf_fd = n;
2129 xf.xf_file = fp;
2130 xf.xf_data = fp->f_data;
2131 xf.xf_type = fp->f_type;
2132 xf.xf_count = fp->f_count;
2133 xf.xf_msgcount = fp->f_msgcount;
2134 xf.xf_offset = fp->f_offset;
2135 xf.xf_flag = fp->f_flag;
2136 error = SYSCTL_OUT(req, &xf, sizeof(xf));
2137 if (error)
2138 break;
2139 }
2140 FILEDESC_UNLOCK(fdp);
2141 mtx_unlock(&fdesc_mtx);
2142 if (error)
2143 break;
2144 }
2145 sx_sunlock(&allproc_lock);
2146 return (error);
2147}
2148
2149SYSCTL_PROC(_kern, KERN_FILE, file, CTLTYPE_OPAQUE|CTLFLAG_RD,
2150 0, 0, sysctl_kern_file, "S,xfile", "Entire file table");
2151
2152SYSCTL_INT(_kern, KERN_MAXFILESPERPROC, maxfilesperproc, CTLFLAG_RW,
2153 &maxfilesperproc, 0, "Maximum files allowed open per process");
2154
2155SYSCTL_INT(_kern, KERN_MAXFILES, maxfiles, CTLFLAG_RW,
2156 &maxfiles, 0, "Maximum number of files");
2157
2158SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
2159 &nfiles, 0, "System-wide number of open files");
2160
2161static void
2162fildesc_drvinit(void *unused)
2163{
2164 dev_t dev;
2165
2166 dev = make_dev(&fildesc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0666, "fd/0");
2167 make_dev_alias(dev, "stdin");
2168 dev = make_dev(&fildesc_cdevsw, 1, UID_ROOT, GID_WHEEL, 0666, "fd/1");
2169 make_dev_alias(dev, "stdout");
2170 dev = make_dev(&fildesc_cdevsw, 2, UID_ROOT, GID_WHEEL, 0666, "fd/2");
2171 make_dev_alias(dev, "stderr");
2172}
2173
2174static fo_rdwr_t badfo_readwrite;
2175static fo_ioctl_t badfo_ioctl;
2176static fo_poll_t badfo_poll;
2177static fo_kqfilter_t badfo_kqfilter;
2178static fo_stat_t badfo_stat;
2179static fo_close_t badfo_close;
2180
2181struct fileops badfileops = {
2182 badfo_readwrite,
2183 badfo_readwrite,
2184 badfo_ioctl,
2185 badfo_poll,
2186 badfo_kqfilter,
2187 badfo_stat,
2188 badfo_close,
2189 0
2190};
2191
2192static int
2193badfo_readwrite(fp, uio, active_cred, flags, td)
2194 struct file *fp;
2195 struct uio *uio;
2196 struct ucred *active_cred;
2197 struct thread *td;
2198 int flags;
2199{
2200
2201 return (EBADF);
2202}
2203
2204static int
2205badfo_ioctl(fp, com, data, active_cred, td)
2206 struct file *fp;
2207 u_long com;
2208 void *data;
2209 struct ucred *active_cred;
2210 struct thread *td;
2211{
2212
2213 return (EBADF);
2214}
2215
2216static int
2217badfo_poll(fp, events, active_cred, td)
2218 struct file *fp;
2219 int events;
2220 struct ucred *active_cred;
2221 struct thread *td;
2222{
2223
2224 return (0);
2225}
2226
2227static int
2228badfo_kqfilter(fp, kn)
2229 struct file *fp;
2230 struct knote *kn;
2231{
2232
2233 return (0);
2234}
2235
2236static int
2237badfo_stat(fp, sb, active_cred, td)
2238 struct file *fp;
2239 struct stat *sb;
2240 struct ucred *active_cred;
2241 struct thread *td;
2242{
2243
2244 return (EBADF);
2245}
2246
2247static int
2248badfo_close(fp, td)
2249 struct file *fp;
2250 struct thread *td;
2251{
2252
2253 return (EBADF);
2254}
2255
2256SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,
2257 fildesc_drvinit,NULL)
2258
2259static void filelistinit(void *);
2260SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL)
2261
2262/* ARGSUSED*/
2263static void
2264filelistinit(dummy)
2265 void *dummy;
2266{
2267
2268 file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
2269 NULL, NULL, UMA_ALIGN_PTR, 0);
2270 sx_init(&filelist_lock, "filelist lock");
2271 mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
2272}