Deleted Added
full compact
vfs_vnops.c (59794) vfs_vnops.c (60041)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
1/*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
39 * $FreeBSD: head/sys/kern/vfs_vnops.c 59794 2000-04-30 18:52:11Z phk $
39 * $FreeBSD: head/sys/kern/vfs_vnops.c 60041 2000-05-05 09:59:14Z phk $
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/fcntl.h>
45#include <sys/file.h>
46#include <sys/stat.h>
47#include <sys/proc.h>
48#include <sys/mount.h>
49#include <sys/namei.h>
50#include <sys/vnode.h>
40 */
41
42#include <sys/param.h>
43#include <sys/systm.h>
44#include <sys/fcntl.h>
45#include <sys/file.h>
46#include <sys/stat.h>
47#include <sys/proc.h>
48#include <sys/mount.h>
49#include <sys/namei.h>
50#include <sys/vnode.h>
51#include <sys/bio.h>
51#include <sys/buf.h>
52#include <sys/filio.h>
53#include <sys/ttycom.h>
54#include <sys/conf.h>
55
56#include <ufs/ufs/quota.h>
57#include <ufs/ufs/inode.h>
58
59static int vn_closefile __P((struct file *fp, struct proc *p));
60static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data,
61 struct proc *p));
62static int vn_read __P((struct file *fp, struct uio *uio,
63 struct ucred *cred, int flags, struct proc *p));
64static int vn_poll __P((struct file *fp, int events, struct ucred *cred,
65 struct proc *p));
66static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p));
67static int vn_write __P((struct file *fp, struct uio *uio,
68 struct ucred *cred, int flags, struct proc *p));
69
70struct fileops vnops =
71 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile };
72
73static int filt_nullattach(struct knote *kn);
74static int filt_vnattach(struct knote *kn);
75static void filt_vndetach(struct knote *kn);
76static int filt_vnode(struct knote *kn, long hint);
77static int filt_vnread(struct knote *kn, long hint);
78
79struct filterops vn_filtops =
80 { 1, filt_vnattach, filt_vndetach, filt_vnode };
81
82/*
83 * XXX
84 * filt_vnread is ufs-specific, so the attach routine should really
85 * switch out to different filterops based on the vn filetype
86 */
87struct filterops vn_rwfiltops[] = {
88 { 1, filt_vnattach, filt_vndetach, filt_vnread },
89 { 1, filt_nullattach, NULL, NULL },
90};
91
92/*
93 * Common code for vnode open operations.
94 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
95 *
96 * Note that this do NOT free nameidata for the successful case,
97 * due to the NDINIT being done elsewhere.
98 */
99int
100vn_open(ndp, fmode, cmode)
101 register struct nameidata *ndp;
102 int fmode, cmode;
103{
104 register struct vnode *vp;
105 register struct proc *p = ndp->ni_cnd.cn_proc;
106 register struct ucred *cred = p->p_ucred;
107 struct vattr vat;
108 struct vattr *vap = &vat;
109 int mode, error;
110
111 if (fmode & O_CREAT) {
112 ndp->ni_cnd.cn_nameiop = CREATE;
113 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
114 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
115 ndp->ni_cnd.cn_flags |= FOLLOW;
116 bwillwrite();
117 error = namei(ndp);
118 if (error)
119 return (error);
120 if (ndp->ni_vp == NULL) {
121 VATTR_NULL(vap);
122 vap->va_type = VREG;
123 vap->va_mode = cmode;
124 if (fmode & O_EXCL)
125 vap->va_vaflags |= VA_EXCLUSIVE;
126 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
127 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
128 &ndp->ni_cnd, vap);
129 if (error) {
130 NDFREE(ndp, NDF_ONLY_PNBUF);
131 vput(ndp->ni_dvp);
132 return (error);
133 }
134 vput(ndp->ni_dvp);
135 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
136 ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
137 fmode &= ~O_TRUNC;
138 vp = ndp->ni_vp;
139 } else {
140 if (ndp->ni_dvp == ndp->ni_vp)
141 vrele(ndp->ni_dvp);
142 else
143 vput(ndp->ni_dvp);
144 ndp->ni_dvp = NULL;
145 vp = ndp->ni_vp;
146 if (fmode & O_EXCL) {
147 error = EEXIST;
148 goto bad;
149 }
150 fmode &= ~O_CREAT;
151 }
152 } else {
153 ndp->ni_cnd.cn_nameiop = LOOKUP;
154 ndp->ni_cnd.cn_flags =
155 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
156 error = namei(ndp);
157 if (error)
158 return (error);
159 vp = ndp->ni_vp;
160 }
161 if (vp->v_type == VLNK) {
162 error = EMLINK;
163 goto bad;
164 }
165 if (vp->v_type == VSOCK) {
166 error = EOPNOTSUPP;
167 goto bad;
168 }
169 if ((fmode & O_CREAT) == 0) {
170 mode = 0;
171 if (fmode & (FWRITE | O_TRUNC)) {
172 if (vp->v_type == VDIR) {
173 error = EISDIR;
174 goto bad;
175 }
176 error = vn_writechk(vp);
177 if (error)
178 goto bad;
179 mode |= VWRITE;
180 }
181 if (fmode & FREAD)
182 mode |= VREAD;
183 if (mode) {
184 error = VOP_ACCESS(vp, mode, cred, p);
185 if (error)
186 goto bad;
187 }
188 }
189 if (fmode & O_TRUNC) {
190 VOP_UNLOCK(vp, 0, p); /* XXX */
191 VOP_LEASE(vp, p, cred, LEASE_WRITE);
192 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
193 VATTR_NULL(vap);
194 vap->va_size = 0;
195 error = VOP_SETATTR(vp, vap, cred, p);
196 if (error)
197 goto bad;
198 }
199 error = VOP_OPEN(vp, fmode, cred, p);
200 if (error)
201 goto bad;
202 /*
203 * Make sure that a VM object is created for VMIO support.
204 */
205 if (vn_canvmio(vp) == TRUE) {
206 if ((error = vfs_object_create(vp, p, cred)) != 0)
207 goto bad;
208 }
209
210 if (fmode & FWRITE)
211 vp->v_writecount++;
212 return (0);
213bad:
214 NDFREE(ndp, NDF_ONLY_PNBUF);
215 vput(vp);
216 return (error);
217}
218
219/*
220 * Check for write permissions on the specified vnode.
221 * Prototype text segments cannot be written.
222 */
223int
224vn_writechk(vp)
225 register struct vnode *vp;
226{
227
228 /*
229 * If there's shared text associated with
230 * the vnode, try to free it up once. If
231 * we fail, we can't allow writing.
232 */
233 if (vp->v_flag & VTEXT)
234 return (ETXTBSY);
235 return (0);
236}
237
238/*
239 * Vnode close call
240 */
241int
242vn_close(vp, flags, cred, p)
243 register struct vnode *vp;
244 int flags;
245 struct ucred *cred;
246 struct proc *p;
247{
248 int error;
249
250 if (flags & FWRITE)
251 vp->v_writecount--;
252 error = VOP_CLOSE(vp, flags, cred, p);
253 vrele(vp);
254 return (error);
255}
256
257static __inline
258int
259sequential_heuristic(struct uio *uio, struct file *fp)
260{
261 /*
262 * Sequential heuristic - detect sequential operation
263 */
264 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
265 uio->uio_offset == fp->f_nextoff) {
266 /*
267 * XXX we assume that the filesystem block size is
268 * the default. Not true, but still gives us a pretty
269 * good indicator of how sequential the read operations
270 * are.
271 */
272 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
273 if (fp->f_seqcount >= 127)
274 fp->f_seqcount = 127;
275 return(fp->f_seqcount << 16);
276 }
277
278 /*
279 * Not sequential, quick draw-down of seqcount
280 */
281 if (fp->f_seqcount > 1)
282 fp->f_seqcount = 1;
283 else
284 fp->f_seqcount = 0;
285 return(0);
286}
287
288/*
289 * Package up an I/O request on a vnode into a uio and do it.
290 */
291int
292vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
293 enum uio_rw rw;
294 struct vnode *vp;
295 caddr_t base;
296 int len;
297 off_t offset;
298 enum uio_seg segflg;
299 int ioflg;
300 struct ucred *cred;
301 int *aresid;
302 struct proc *p;
303{
304 struct uio auio;
305 struct iovec aiov;
306 int error;
307
308 if ((ioflg & IO_NODELOCKED) == 0)
309 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
310 auio.uio_iov = &aiov;
311 auio.uio_iovcnt = 1;
312 aiov.iov_base = base;
313 aiov.iov_len = len;
314 auio.uio_resid = len;
315 auio.uio_offset = offset;
316 auio.uio_segflg = segflg;
317 auio.uio_rw = rw;
318 auio.uio_procp = p;
319 if (rw == UIO_READ) {
320 error = VOP_READ(vp, &auio, ioflg, cred);
321 } else {
322 error = VOP_WRITE(vp, &auio, ioflg, cred);
323 }
324 if (aresid)
325 *aresid = auio.uio_resid;
326 else
327 if (auio.uio_resid && error == 0)
328 error = EIO;
329 if ((ioflg & IO_NODELOCKED) == 0)
330 VOP_UNLOCK(vp, 0, p);
331 return (error);
332}
333
334/*
335 * File table vnode read routine.
336 */
337static int
338vn_read(fp, uio, cred, flags, p)
339 struct file *fp;
340 struct uio *uio;
341 struct ucred *cred;
342 struct proc *p;
343 int flags;
344{
345 struct vnode *vp;
346 int error, ioflag;
347
348 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
349 uio->uio_procp, p));
350 vp = (struct vnode *)fp->f_data;
351 ioflag = 0;
352 if (fp->f_flag & FNONBLOCK)
353 ioflag |= IO_NDELAY;
354 VOP_LEASE(vp, p, cred, LEASE_READ);
355 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
356 if ((flags & FOF_OFFSET) == 0)
357 uio->uio_offset = fp->f_offset;
358
359 ioflag |= sequential_heuristic(uio, fp);
360
361 error = VOP_READ(vp, uio, ioflag, cred);
362 if ((flags & FOF_OFFSET) == 0)
363 fp->f_offset = uio->uio_offset;
364 fp->f_nextoff = uio->uio_offset;
365 VOP_UNLOCK(vp, 0, p);
366 return (error);
367}
368
369/*
370 * File table vnode write routine.
371 */
372static int
373vn_write(fp, uio, cred, flags, p)
374 struct file *fp;
375 struct uio *uio;
376 struct ucred *cred;
377 struct proc *p;
378 int flags;
379{
380 struct vnode *vp;
381 int error, ioflag;
382
383 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
384 uio->uio_procp, p));
385 vp = (struct vnode *)fp->f_data;
386 if (vp->v_type == VREG)
387 bwillwrite();
388 vp = (struct vnode *)fp->f_data; /* XXX needed? */
389 ioflag = IO_UNIT;
390 if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
391 ioflag |= IO_APPEND;
392 if (fp->f_flag & FNONBLOCK)
393 ioflag |= IO_NDELAY;
394 if ((fp->f_flag & O_FSYNC) ||
395 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
396 ioflag |= IO_SYNC;
397 VOP_LEASE(vp, p, cred, LEASE_WRITE);
398 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
399 if ((flags & FOF_OFFSET) == 0)
400 uio->uio_offset = fp->f_offset;
401 ioflag |= sequential_heuristic(uio, fp);
402 error = VOP_WRITE(vp, uio, ioflag, cred);
403 if ((flags & FOF_OFFSET) == 0)
404 fp->f_offset = uio->uio_offset;
405 fp->f_nextoff = uio->uio_offset;
406 VOP_UNLOCK(vp, 0, p);
407 return (error);
408}
409
410/*
411 * File table vnode stat routine.
412 */
413static int
414vn_statfile(fp, sb, p)
415 struct file *fp;
416 struct stat *sb;
417 struct proc *p;
418{
419 struct vnode *vp = (struct vnode *)fp->f_data;
420
421 return vn_stat(vp, sb, p);
422}
423
424int
425vn_stat(vp, sb, p)
426 struct vnode *vp;
427 register struct stat *sb;
428 struct proc *p;
429{
430 struct vattr vattr;
431 register struct vattr *vap;
432 int error;
433 u_short mode;
434
435 vap = &vattr;
436 error = VOP_GETATTR(vp, vap, p->p_ucred, p);
437 if (error)
438 return (error);
439
440 /*
441 * Zero the spare stat fields
442 */
443 sb->st_lspare = 0;
444 sb->st_qspare[0] = 0;
445 sb->st_qspare[1] = 0;
446
447 /*
448 * Copy from vattr table
449 */
450 if (vap->va_fsid != VNOVAL)
451 sb->st_dev = vap->va_fsid;
452 else
453 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
454 sb->st_ino = vap->va_fileid;
455 mode = vap->va_mode;
456 switch (vap->va_type) {
457 case VREG:
458 mode |= S_IFREG;
459 break;
460 case VDIR:
461 mode |= S_IFDIR;
462 break;
463 case VBLK:
464 mode |= S_IFBLK;
465 break;
466 case VCHR:
467 mode |= S_IFCHR;
468 break;
469 case VLNK:
470 mode |= S_IFLNK;
471 /* This is a cosmetic change, symlinks do not have a mode. */
472 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
473 sb->st_mode &= ~ACCESSPERMS; /* 0000 */
474 else
475 sb->st_mode |= ACCESSPERMS; /* 0777 */
476 break;
477 case VSOCK:
478 mode |= S_IFSOCK;
479 break;
480 case VFIFO:
481 mode |= S_IFIFO;
482 break;
483 default:
484 return (EBADF);
485 };
486 sb->st_mode = mode;
487 sb->st_nlink = vap->va_nlink;
488 sb->st_uid = vap->va_uid;
489 sb->st_gid = vap->va_gid;
490 sb->st_rdev = vap->va_rdev;
491 sb->st_size = vap->va_size;
492 sb->st_atimespec = vap->va_atime;
493 sb->st_mtimespec = vap->va_mtime;
494 sb->st_ctimespec = vap->va_ctime;
495
496 /*
497 * According to www.opengroup.org, the meaning of st_blksize is
498 * "a filesystem-specific preferred I/O block size for this
499 * object. In some filesystem types, this may vary from file
500 * to file"
501 * Default to zero to catch bogus uses of this field.
502 */
503
504 if (vap->va_type == VREG) {
505 sb->st_blksize = vap->va_blocksize;
506 } else if (vn_isdisk(vp, NULL)) {
507 sb->st_blksize = vp->v_rdev->si_bsize_best;
508 if (sb->st_blksize < vp->v_rdev->si_bsize_phys)
509 sb->st_blksize = vp->v_rdev->si_bsize_phys;
510 if (sb->st_blksize < BLKDEV_IOSIZE)
511 sb->st_blksize = BLKDEV_IOSIZE;
512 } else {
513 sb->st_blksize = 0;
514 }
515
516 sb->st_flags = vap->va_flags;
517 if (suser_xxx(p->p_ucred, 0, 0))
518 sb->st_gen = 0;
519 else
520 sb->st_gen = vap->va_gen;
521
522#if (S_BLKSIZE == 512)
523 /* Optimize this case */
524 sb->st_blocks = vap->va_bytes >> 9;
525#else
526 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
527#endif
528 return (0);
529}
530
531/*
532 * File table vnode ioctl routine.
533 */
534static int
535vn_ioctl(fp, com, data, p)
536 struct file *fp;
537 u_long com;
538 caddr_t data;
539 struct proc *p;
540{
541 register struct vnode *vp = ((struct vnode *)fp->f_data);
542 struct vattr vattr;
543 int error;
544
545 switch (vp->v_type) {
546
547 case VREG:
548 case VDIR:
549 if (com == FIONREAD) {
550 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
551 if (error)
552 return (error);
553 *(int *)data = vattr.va_size - fp->f_offset;
554 return (0);
555 }
556 if (com == FIONBIO || com == FIOASYNC) /* XXX */
557 return (0); /* XXX */
558 /* fall into ... */
559
560 default:
561#if 0
562 return (ENOTTY);
563#endif
564 case VFIFO:
565 case VCHR:
566 case VBLK:
567 if (com == FIODTYPE) {
568 if (vp->v_type != VCHR && vp->v_type != VBLK)
569 return (ENOTTY);
570 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK;
571 return (0);
572 }
573 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
574 if (error == 0 && com == TIOCSCTTY) {
575
576 /* Do nothing if reassigning same control tty */
577 if (p->p_session->s_ttyvp == vp)
578 return (0);
579
580 /* Get rid of reference to old control tty */
581 if (p->p_session->s_ttyvp)
582 vrele(p->p_session->s_ttyvp);
583
584 p->p_session->s_ttyvp = vp;
585 VREF(vp);
586 }
587 return (error);
588 }
589}
590
591/*
592 * File table vnode poll routine.
593 */
594static int
595vn_poll(fp, events, cred, p)
596 struct file *fp;
597 int events;
598 struct ucred *cred;
599 struct proc *p;
600{
601
602 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p));
603}
604
605/*
606 * Check that the vnode is still valid, and if so
607 * acquire requested lock.
608 */
609int
610#ifndef DEBUG_LOCKS
611vn_lock(vp, flags, p)
612#else
613debug_vn_lock(vp, flags, p, filename, line)
614#endif
615 struct vnode *vp;
616 int flags;
617 struct proc *p;
618#ifdef DEBUG_LOCKS
619 const char *filename;
620 int line;
621#endif
622{
623 int error;
624
625 do {
626 if ((flags & LK_INTERLOCK) == 0)
627 simple_lock(&vp->v_interlock);
628 if (vp->v_flag & VXLOCK) {
629 vp->v_flag |= VXWANT;
630 simple_unlock(&vp->v_interlock);
631 tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
632 error = ENOENT;
633 } else {
634#ifdef DEBUG_LOCKS
635 vp->filename = filename;
636 vp->line = line;
637#endif
638 error = VOP_LOCK(vp,
639 flags | LK_NOPAUSE | LK_INTERLOCK, p);
640 if (error == 0)
641 return (error);
642 }
643 flags &= ~LK_INTERLOCK;
644 } while (flags & LK_RETRY);
645 return (error);
646}
647
648/*
649 * File table vnode close routine.
650 */
651static int
652vn_closefile(fp, p)
653 struct file *fp;
654 struct proc *p;
655{
656
657 fp->f_ops = &badfileops;
658 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
659 fp->f_cred, p));
660}
661
662static int
663filt_vnattach(struct knote *kn)
664{
665 struct vnode *vp;
666
667 if (kn->kn_fp->f_type != DTYPE_VNODE &&
668 kn->kn_fp->f_type != DTYPE_FIFO)
669 return (EBADF);
670
671 vp = (struct vnode *)kn->kn_fp->f_data;
672
673 simple_lock(&vp->v_pollinfo.vpi_lock);
674 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
675 simple_unlock(&vp->v_pollinfo.vpi_lock);
676
677 return (0);
678}
679
680static void
681filt_vndetach(struct knote *kn)
682{
683 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
684
685 simple_lock(&vp->v_pollinfo.vpi_lock);
686 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note,
687 kn, knote, kn_selnext);
688 simple_unlock(&vp->v_pollinfo.vpi_lock);
689}
690
691static int
692filt_vnode(struct knote *kn, long hint)
693{
694
695 if (kn->kn_sfflags & hint)
696 kn->kn_fflags |= hint;
697 return (kn->kn_fflags != 0);
698}
699
700static int
701filt_nullattach(struct knote *kn)
702{
703 return (ENXIO);
704}
705
706/*ARGSUSED*/
707static int
708filt_vnread(struct knote *kn, long hint)
709{
710 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
711 struct inode *ip = VTOI(vp);
712
713 kn->kn_data = ip->i_size - kn->kn_fp->f_offset;
714 return (kn->kn_data != 0);
715}
52#include <sys/buf.h>
53#include <sys/filio.h>
54#include <sys/ttycom.h>
55#include <sys/conf.h>
56
57#include <ufs/ufs/quota.h>
58#include <ufs/ufs/inode.h>
59
60static int vn_closefile __P((struct file *fp, struct proc *p));
61static int vn_ioctl __P((struct file *fp, u_long com, caddr_t data,
62 struct proc *p));
63static int vn_read __P((struct file *fp, struct uio *uio,
64 struct ucred *cred, int flags, struct proc *p));
65static int vn_poll __P((struct file *fp, int events, struct ucred *cred,
66 struct proc *p));
67static int vn_statfile __P((struct file *fp, struct stat *sb, struct proc *p));
68static int vn_write __P((struct file *fp, struct uio *uio,
69 struct ucred *cred, int flags, struct proc *p));
70
71struct fileops vnops =
72 { vn_read, vn_write, vn_ioctl, vn_poll, vn_statfile, vn_closefile };
73
74static int filt_nullattach(struct knote *kn);
75static int filt_vnattach(struct knote *kn);
76static void filt_vndetach(struct knote *kn);
77static int filt_vnode(struct knote *kn, long hint);
78static int filt_vnread(struct knote *kn, long hint);
79
80struct filterops vn_filtops =
81 { 1, filt_vnattach, filt_vndetach, filt_vnode };
82
83/*
84 * XXX
85 * filt_vnread is ufs-specific, so the attach routine should really
86 * switch out to different filterops based on the vn filetype
87 */
88struct filterops vn_rwfiltops[] = {
89 { 1, filt_vnattach, filt_vndetach, filt_vnread },
90 { 1, filt_nullattach, NULL, NULL },
91};
92
93/*
94 * Common code for vnode open operations.
95 * Check permissions, and call the VOP_OPEN or VOP_CREATE routine.
96 *
97 * Note that this do NOT free nameidata for the successful case,
98 * due to the NDINIT being done elsewhere.
99 */
100int
101vn_open(ndp, fmode, cmode)
102 register struct nameidata *ndp;
103 int fmode, cmode;
104{
105 register struct vnode *vp;
106 register struct proc *p = ndp->ni_cnd.cn_proc;
107 register struct ucred *cred = p->p_ucred;
108 struct vattr vat;
109 struct vattr *vap = &vat;
110 int mode, error;
111
112 if (fmode & O_CREAT) {
113 ndp->ni_cnd.cn_nameiop = CREATE;
114 ndp->ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
115 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
116 ndp->ni_cnd.cn_flags |= FOLLOW;
117 bwillwrite();
118 error = namei(ndp);
119 if (error)
120 return (error);
121 if (ndp->ni_vp == NULL) {
122 VATTR_NULL(vap);
123 vap->va_type = VREG;
124 vap->va_mode = cmode;
125 if (fmode & O_EXCL)
126 vap->va_vaflags |= VA_EXCLUSIVE;
127 VOP_LEASE(ndp->ni_dvp, p, cred, LEASE_WRITE);
128 error = VOP_CREATE(ndp->ni_dvp, &ndp->ni_vp,
129 &ndp->ni_cnd, vap);
130 if (error) {
131 NDFREE(ndp, NDF_ONLY_PNBUF);
132 vput(ndp->ni_dvp);
133 return (error);
134 }
135 vput(ndp->ni_dvp);
136 ASSERT_VOP_UNLOCKED(ndp->ni_dvp, "create");
137 ASSERT_VOP_LOCKED(ndp->ni_vp, "create");
138 fmode &= ~O_TRUNC;
139 vp = ndp->ni_vp;
140 } else {
141 if (ndp->ni_dvp == ndp->ni_vp)
142 vrele(ndp->ni_dvp);
143 else
144 vput(ndp->ni_dvp);
145 ndp->ni_dvp = NULL;
146 vp = ndp->ni_vp;
147 if (fmode & O_EXCL) {
148 error = EEXIST;
149 goto bad;
150 }
151 fmode &= ~O_CREAT;
152 }
153 } else {
154 ndp->ni_cnd.cn_nameiop = LOOKUP;
155 ndp->ni_cnd.cn_flags =
156 ((fmode & O_NOFOLLOW) ? NOFOLLOW : FOLLOW) | LOCKLEAF;
157 error = namei(ndp);
158 if (error)
159 return (error);
160 vp = ndp->ni_vp;
161 }
162 if (vp->v_type == VLNK) {
163 error = EMLINK;
164 goto bad;
165 }
166 if (vp->v_type == VSOCK) {
167 error = EOPNOTSUPP;
168 goto bad;
169 }
170 if ((fmode & O_CREAT) == 0) {
171 mode = 0;
172 if (fmode & (FWRITE | O_TRUNC)) {
173 if (vp->v_type == VDIR) {
174 error = EISDIR;
175 goto bad;
176 }
177 error = vn_writechk(vp);
178 if (error)
179 goto bad;
180 mode |= VWRITE;
181 }
182 if (fmode & FREAD)
183 mode |= VREAD;
184 if (mode) {
185 error = VOP_ACCESS(vp, mode, cred, p);
186 if (error)
187 goto bad;
188 }
189 }
190 if (fmode & O_TRUNC) {
191 VOP_UNLOCK(vp, 0, p); /* XXX */
192 VOP_LEASE(vp, p, cred, LEASE_WRITE);
193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
194 VATTR_NULL(vap);
195 vap->va_size = 0;
196 error = VOP_SETATTR(vp, vap, cred, p);
197 if (error)
198 goto bad;
199 }
200 error = VOP_OPEN(vp, fmode, cred, p);
201 if (error)
202 goto bad;
203 /*
204 * Make sure that a VM object is created for VMIO support.
205 */
206 if (vn_canvmio(vp) == TRUE) {
207 if ((error = vfs_object_create(vp, p, cred)) != 0)
208 goto bad;
209 }
210
211 if (fmode & FWRITE)
212 vp->v_writecount++;
213 return (0);
214bad:
215 NDFREE(ndp, NDF_ONLY_PNBUF);
216 vput(vp);
217 return (error);
218}
219
220/*
221 * Check for write permissions on the specified vnode.
222 * Prototype text segments cannot be written.
223 */
224int
225vn_writechk(vp)
226 register struct vnode *vp;
227{
228
229 /*
230 * If there's shared text associated with
231 * the vnode, try to free it up once. If
232 * we fail, we can't allow writing.
233 */
234 if (vp->v_flag & VTEXT)
235 return (ETXTBSY);
236 return (0);
237}
238
239/*
240 * Vnode close call
241 */
242int
243vn_close(vp, flags, cred, p)
244 register struct vnode *vp;
245 int flags;
246 struct ucred *cred;
247 struct proc *p;
248{
249 int error;
250
251 if (flags & FWRITE)
252 vp->v_writecount--;
253 error = VOP_CLOSE(vp, flags, cred, p);
254 vrele(vp);
255 return (error);
256}
257
258static __inline
259int
260sequential_heuristic(struct uio *uio, struct file *fp)
261{
262 /*
263 * Sequential heuristic - detect sequential operation
264 */
265 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
266 uio->uio_offset == fp->f_nextoff) {
267 /*
268 * XXX we assume that the filesystem block size is
269 * the default. Not true, but still gives us a pretty
270 * good indicator of how sequential the read operations
271 * are.
272 */
273 fp->f_seqcount += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
274 if (fp->f_seqcount >= 127)
275 fp->f_seqcount = 127;
276 return(fp->f_seqcount << 16);
277 }
278
279 /*
280 * Not sequential, quick draw-down of seqcount
281 */
282 if (fp->f_seqcount > 1)
283 fp->f_seqcount = 1;
284 else
285 fp->f_seqcount = 0;
286 return(0);
287}
288
289/*
290 * Package up an I/O request on a vnode into a uio and do it.
291 */
292int
293vn_rdwr(rw, vp, base, len, offset, segflg, ioflg, cred, aresid, p)
294 enum uio_rw rw;
295 struct vnode *vp;
296 caddr_t base;
297 int len;
298 off_t offset;
299 enum uio_seg segflg;
300 int ioflg;
301 struct ucred *cred;
302 int *aresid;
303 struct proc *p;
304{
305 struct uio auio;
306 struct iovec aiov;
307 int error;
308
309 if ((ioflg & IO_NODELOCKED) == 0)
310 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
311 auio.uio_iov = &aiov;
312 auio.uio_iovcnt = 1;
313 aiov.iov_base = base;
314 aiov.iov_len = len;
315 auio.uio_resid = len;
316 auio.uio_offset = offset;
317 auio.uio_segflg = segflg;
318 auio.uio_rw = rw;
319 auio.uio_procp = p;
320 if (rw == UIO_READ) {
321 error = VOP_READ(vp, &auio, ioflg, cred);
322 } else {
323 error = VOP_WRITE(vp, &auio, ioflg, cred);
324 }
325 if (aresid)
326 *aresid = auio.uio_resid;
327 else
328 if (auio.uio_resid && error == 0)
329 error = EIO;
330 if ((ioflg & IO_NODELOCKED) == 0)
331 VOP_UNLOCK(vp, 0, p);
332 return (error);
333}
334
335/*
336 * File table vnode read routine.
337 */
338static int
339vn_read(fp, uio, cred, flags, p)
340 struct file *fp;
341 struct uio *uio;
342 struct ucred *cred;
343 struct proc *p;
344 int flags;
345{
346 struct vnode *vp;
347 int error, ioflag;
348
349 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
350 uio->uio_procp, p));
351 vp = (struct vnode *)fp->f_data;
352 ioflag = 0;
353 if (fp->f_flag & FNONBLOCK)
354 ioflag |= IO_NDELAY;
355 VOP_LEASE(vp, p, cred, LEASE_READ);
356 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
357 if ((flags & FOF_OFFSET) == 0)
358 uio->uio_offset = fp->f_offset;
359
360 ioflag |= sequential_heuristic(uio, fp);
361
362 error = VOP_READ(vp, uio, ioflag, cred);
363 if ((flags & FOF_OFFSET) == 0)
364 fp->f_offset = uio->uio_offset;
365 fp->f_nextoff = uio->uio_offset;
366 VOP_UNLOCK(vp, 0, p);
367 return (error);
368}
369
370/*
371 * File table vnode write routine.
372 */
373static int
374vn_write(fp, uio, cred, flags, p)
375 struct file *fp;
376 struct uio *uio;
377 struct ucred *cred;
378 struct proc *p;
379 int flags;
380{
381 struct vnode *vp;
382 int error, ioflag;
383
384 KASSERT(uio->uio_procp == p, ("uio_procp %p is not p %p",
385 uio->uio_procp, p));
386 vp = (struct vnode *)fp->f_data;
387 if (vp->v_type == VREG)
388 bwillwrite();
389 vp = (struct vnode *)fp->f_data; /* XXX needed? */
390 ioflag = IO_UNIT;
391 if (vp->v_type == VREG && (fp->f_flag & O_APPEND))
392 ioflag |= IO_APPEND;
393 if (fp->f_flag & FNONBLOCK)
394 ioflag |= IO_NDELAY;
395 if ((fp->f_flag & O_FSYNC) ||
396 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
397 ioflag |= IO_SYNC;
398 VOP_LEASE(vp, p, cred, LEASE_WRITE);
399 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
400 if ((flags & FOF_OFFSET) == 0)
401 uio->uio_offset = fp->f_offset;
402 ioflag |= sequential_heuristic(uio, fp);
403 error = VOP_WRITE(vp, uio, ioflag, cred);
404 if ((flags & FOF_OFFSET) == 0)
405 fp->f_offset = uio->uio_offset;
406 fp->f_nextoff = uio->uio_offset;
407 VOP_UNLOCK(vp, 0, p);
408 return (error);
409}
410
411/*
412 * File table vnode stat routine.
413 */
414static int
415vn_statfile(fp, sb, p)
416 struct file *fp;
417 struct stat *sb;
418 struct proc *p;
419{
420 struct vnode *vp = (struct vnode *)fp->f_data;
421
422 return vn_stat(vp, sb, p);
423}
424
425int
426vn_stat(vp, sb, p)
427 struct vnode *vp;
428 register struct stat *sb;
429 struct proc *p;
430{
431 struct vattr vattr;
432 register struct vattr *vap;
433 int error;
434 u_short mode;
435
436 vap = &vattr;
437 error = VOP_GETATTR(vp, vap, p->p_ucred, p);
438 if (error)
439 return (error);
440
441 /*
442 * Zero the spare stat fields
443 */
444 sb->st_lspare = 0;
445 sb->st_qspare[0] = 0;
446 sb->st_qspare[1] = 0;
447
448 /*
449 * Copy from vattr table
450 */
451 if (vap->va_fsid != VNOVAL)
452 sb->st_dev = vap->va_fsid;
453 else
454 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
455 sb->st_ino = vap->va_fileid;
456 mode = vap->va_mode;
457 switch (vap->va_type) {
458 case VREG:
459 mode |= S_IFREG;
460 break;
461 case VDIR:
462 mode |= S_IFDIR;
463 break;
464 case VBLK:
465 mode |= S_IFBLK;
466 break;
467 case VCHR:
468 mode |= S_IFCHR;
469 break;
470 case VLNK:
471 mode |= S_IFLNK;
472 /* This is a cosmetic change, symlinks do not have a mode. */
473 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
474 sb->st_mode &= ~ACCESSPERMS; /* 0000 */
475 else
476 sb->st_mode |= ACCESSPERMS; /* 0777 */
477 break;
478 case VSOCK:
479 mode |= S_IFSOCK;
480 break;
481 case VFIFO:
482 mode |= S_IFIFO;
483 break;
484 default:
485 return (EBADF);
486 };
487 sb->st_mode = mode;
488 sb->st_nlink = vap->va_nlink;
489 sb->st_uid = vap->va_uid;
490 sb->st_gid = vap->va_gid;
491 sb->st_rdev = vap->va_rdev;
492 sb->st_size = vap->va_size;
493 sb->st_atimespec = vap->va_atime;
494 sb->st_mtimespec = vap->va_mtime;
495 sb->st_ctimespec = vap->va_ctime;
496
497 /*
498 * According to www.opengroup.org, the meaning of st_blksize is
499 * "a filesystem-specific preferred I/O block size for this
500 * object. In some filesystem types, this may vary from file
501 * to file"
502 * Default to zero to catch bogus uses of this field.
503 */
504
505 if (vap->va_type == VREG) {
506 sb->st_blksize = vap->va_blocksize;
507 } else if (vn_isdisk(vp, NULL)) {
508 sb->st_blksize = vp->v_rdev->si_bsize_best;
509 if (sb->st_blksize < vp->v_rdev->si_bsize_phys)
510 sb->st_blksize = vp->v_rdev->si_bsize_phys;
511 if (sb->st_blksize < BLKDEV_IOSIZE)
512 sb->st_blksize = BLKDEV_IOSIZE;
513 } else {
514 sb->st_blksize = 0;
515 }
516
517 sb->st_flags = vap->va_flags;
518 if (suser_xxx(p->p_ucred, 0, 0))
519 sb->st_gen = 0;
520 else
521 sb->st_gen = vap->va_gen;
522
523#if (S_BLKSIZE == 512)
524 /* Optimize this case */
525 sb->st_blocks = vap->va_bytes >> 9;
526#else
527 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
528#endif
529 return (0);
530}
531
532/*
533 * File table vnode ioctl routine.
534 */
535static int
536vn_ioctl(fp, com, data, p)
537 struct file *fp;
538 u_long com;
539 caddr_t data;
540 struct proc *p;
541{
542 register struct vnode *vp = ((struct vnode *)fp->f_data);
543 struct vattr vattr;
544 int error;
545
546 switch (vp->v_type) {
547
548 case VREG:
549 case VDIR:
550 if (com == FIONREAD) {
551 error = VOP_GETATTR(vp, &vattr, p->p_ucred, p);
552 if (error)
553 return (error);
554 *(int *)data = vattr.va_size - fp->f_offset;
555 return (0);
556 }
557 if (com == FIONBIO || com == FIOASYNC) /* XXX */
558 return (0); /* XXX */
559 /* fall into ... */
560
561 default:
562#if 0
563 return (ENOTTY);
564#endif
565 case VFIFO:
566 case VCHR:
567 case VBLK:
568 if (com == FIODTYPE) {
569 if (vp->v_type != VCHR && vp->v_type != VBLK)
570 return (ENOTTY);
571 *(int *)data = devsw(vp->v_rdev)->d_flags & D_TYPEMASK;
572 return (0);
573 }
574 error = VOP_IOCTL(vp, com, data, fp->f_flag, p->p_ucred, p);
575 if (error == 0 && com == TIOCSCTTY) {
576
577 /* Do nothing if reassigning same control tty */
578 if (p->p_session->s_ttyvp == vp)
579 return (0);
580
581 /* Get rid of reference to old control tty */
582 if (p->p_session->s_ttyvp)
583 vrele(p->p_session->s_ttyvp);
584
585 p->p_session->s_ttyvp = vp;
586 VREF(vp);
587 }
588 return (error);
589 }
590}
591
592/*
593 * File table vnode poll routine.
594 */
595static int
596vn_poll(fp, events, cred, p)
597 struct file *fp;
598 int events;
599 struct ucred *cred;
600 struct proc *p;
601{
602
603 return (VOP_POLL(((struct vnode *)fp->f_data), events, cred, p));
604}
605
606/*
607 * Check that the vnode is still valid, and if so
608 * acquire requested lock.
609 */
610int
611#ifndef DEBUG_LOCKS
612vn_lock(vp, flags, p)
613#else
614debug_vn_lock(vp, flags, p, filename, line)
615#endif
616 struct vnode *vp;
617 int flags;
618 struct proc *p;
619#ifdef DEBUG_LOCKS
620 const char *filename;
621 int line;
622#endif
623{
624 int error;
625
626 do {
627 if ((flags & LK_INTERLOCK) == 0)
628 simple_lock(&vp->v_interlock);
629 if (vp->v_flag & VXLOCK) {
630 vp->v_flag |= VXWANT;
631 simple_unlock(&vp->v_interlock);
632 tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
633 error = ENOENT;
634 } else {
635#ifdef DEBUG_LOCKS
636 vp->filename = filename;
637 vp->line = line;
638#endif
639 error = VOP_LOCK(vp,
640 flags | LK_NOPAUSE | LK_INTERLOCK, p);
641 if (error == 0)
642 return (error);
643 }
644 flags &= ~LK_INTERLOCK;
645 } while (flags & LK_RETRY);
646 return (error);
647}
648
649/*
650 * File table vnode close routine.
651 */
652static int
653vn_closefile(fp, p)
654 struct file *fp;
655 struct proc *p;
656{
657
658 fp->f_ops = &badfileops;
659 return (vn_close(((struct vnode *)fp->f_data), fp->f_flag,
660 fp->f_cred, p));
661}
662
663static int
664filt_vnattach(struct knote *kn)
665{
666 struct vnode *vp;
667
668 if (kn->kn_fp->f_type != DTYPE_VNODE &&
669 kn->kn_fp->f_type != DTYPE_FIFO)
670 return (EBADF);
671
672 vp = (struct vnode *)kn->kn_fp->f_data;
673
674 simple_lock(&vp->v_pollinfo.vpi_lock);
675 SLIST_INSERT_HEAD(&vp->v_pollinfo.vpi_selinfo.si_note, kn, kn_selnext);
676 simple_unlock(&vp->v_pollinfo.vpi_lock);
677
678 return (0);
679}
680
681static void
682filt_vndetach(struct knote *kn)
683{
684 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
685
686 simple_lock(&vp->v_pollinfo.vpi_lock);
687 SLIST_REMOVE(&vp->v_pollinfo.vpi_selinfo.si_note,
688 kn, knote, kn_selnext);
689 simple_unlock(&vp->v_pollinfo.vpi_lock);
690}
691
692static int
693filt_vnode(struct knote *kn, long hint)
694{
695
696 if (kn->kn_sfflags & hint)
697 kn->kn_fflags |= hint;
698 return (kn->kn_fflags != 0);
699}
700
701static int
702filt_nullattach(struct knote *kn)
703{
704 return (ENXIO);
705}
706
707/*ARGSUSED*/
708static int
709filt_vnread(struct knote *kn, long hint)
710{
711 struct vnode *vp = (struct vnode *)kn->kn_fp->f_data;
712 struct inode *ip = VTOI(vp);
713
714 kn->kn_data = ip->i_size - kn->kn_fp->f_offset;
715 return (kn->kn_data != 0);
716}