Cross Reference: /freebsd-10.3-release/sys/kern/vfs

Deleted Added

sdiff udiff text old ( 227697 ) new ( 232317 )

full compact

1/*-
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed
6 * to Berkeley by John Heidemann of the UCLA Ficus project.
7 *
8 * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/vfs_default.c 227697 2011-11-19 07:50:49Z kib $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/bio.h>
41#include <sys/buf.h>
42#include <sys/conf.h>
43#include <sys/event.h>
44#include <sys/kernel.h>
45#include <sys/limits.h>
46#include <sys/lock.h>
47#include <sys/lockf.h>
48#include <sys/malloc.h>
49#include <sys/mount.h>
50#include <sys/mutex.h>
51#include <sys/namei.h>
52#include <sys/fcntl.h>
53#include <sys/unistd.h>
54#include <sys/vnode.h>
55#include <sys/dirent.h>
56#include <sys/poll.h>
57
58#include <security/mac/mac_framework.h>
59
60#include <vm/vm.h>
61#include <vm/vm_object.h>
62#include <vm/vm_extern.h>
63#include <vm/pmap.h>
64#include <vm/vm_map.h>
65#include <vm/vm_page.h>
66#include <vm/vm_pager.h>
67#include <vm/vnode_pager.h>
68
69static int vop_nolookup(struct vop_lookup_args *);
70static int vop_norename(struct vop_rename_args *);
71static int vop_nostrategy(struct vop_strategy_args *);
72static int get_next_dirent(struct vnode *vp, struct dirent **dpp,
73 char *dirbuf, int dirbuflen, off_t *off,
74 char **cpos, int *len, int *eofflag,
75 struct thread *td);
76static int dirent_exists(struct vnode *vp, const char *dirname,
77 struct thread *td);
78
79#define DIRENT_MINSIZE (sizeof(struct dirent) - (MAXNAMLEN+1) + 4)
80
81/*
82 * This vnode table stores what we want to do if the filesystem doesn't
83 * implement a particular VOP.
84 *
85 * If there is no specific entry here, we will return EOPNOTSUPP.
86 *
87 * Note that every filesystem has to implement either vop_access
88 * or vop_accessx; failing to do so will result in immediate crash
89 * due to stack overflow, as vop_stdaccess() calls vop_stdaccessx(),
90 * which calls vop_stdaccess() etc.
91 */
92
93struct vop_vector default_vnodeops = {
94 .vop_default = NULL,
95 .vop_bypass = VOP_EOPNOTSUPP,
96
97 .vop_access = vop_stdaccess,
98 .vop_accessx = vop_stdaccessx,
99 .vop_advise = vop_stdadvise,
100 .vop_advlock = vop_stdadvlock,
101 .vop_advlockasync = vop_stdadvlockasync,
102 .vop_advlockpurge = vop_stdadvlockpurge,
103 .vop_allocate = vop_stdallocate,
104 .vop_bmap = vop_stdbmap,
105 .vop_close = VOP_NULL,
106 .vop_fsync = VOP_NULL,
107 .vop_getpages = vop_stdgetpages,
108 .vop_getwritemount = vop_stdgetwritemount,
109 .vop_inactive = VOP_NULL,
110 .vop_ioctl = VOP_ENOTTY,
111 .vop_kqfilter = vop_stdkqfilter,
112 .vop_islocked = vop_stdislocked,
113 .vop_lock1 = vop_stdlock,
114 .vop_lookup = vop_nolookup,
115 .vop_open = VOP_NULL,
116 .vop_pathconf = VOP_EINVAL,
117 .vop_poll = vop_nopoll,
118 .vop_putpages = vop_stdputpages,
119 .vop_readlink = VOP_EINVAL,
120 .vop_rename = vop_norename,
121 .vop_revoke = VOP_PANIC,
122 .vop_strategy = vop_nostrategy,
123 .vop_unlock = vop_stdunlock,
124 .vop_vptocnp = vop_stdvptocnp,
125 .vop_vptofh = vop_stdvptofh,
126};
127
128/*
129 * Series of placeholder functions for various error returns for
130 * VOPs.
131 */
132
133int
134vop_eopnotsupp(struct vop_generic_args *ap)
135{
136 /*
137 printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
138 */
139
140 return (EOPNOTSUPP);
141}
142
143int
144vop_ebadf(struct vop_generic_args *ap)
145{
146
147 return (EBADF);
148}
149
150int
151vop_enotty(struct vop_generic_args *ap)
152{
153
154 return (ENOTTY);
155}
156
157int
158vop_einval(struct vop_generic_args *ap)
159{
160
161 return (EINVAL);
162}
163
164int
165vop_enoent(struct vop_generic_args *ap)
166{
167
168 return (ENOENT);
169}
170
171int
172vop_null(struct vop_generic_args *ap)
173{
174
175 return (0);
176}
177
178/*
179 * Helper function to panic on some bad VOPs in some filesystems.
180 */
181int
182vop_panic(struct vop_generic_args *ap)
183{
184
185 panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
186}
187
188/*
189 * vop_std<something> and vop_no<something> are default functions for use by
190 * filesystems that need the "default reasonable" implementation for a
191 * particular operation.
192 *
193 * The documentation for the operations they implement exists (if it exists)
194 * in the VOP_<SOMETHING>(9) manpage (all uppercase).
195 */
196
197/*
198 * Default vop for filesystems that do not support name lookup
199 */
200static int
201vop_nolookup(ap)
202 struct vop_lookup_args /* {
203 struct vnode *a_dvp;
204 struct vnode **a_vpp;
205 struct componentname *a_cnp;
206 } */ *ap;
207{
208
209 *ap->a_vpp = NULL;
210 return (ENOTDIR);
211}
212
213/*
214 * vop_norename:
215 *
216 * Handle unlock and reference counting for arguments of vop_rename
217 * for filesystems that do not implement rename operation.
218 */
219static int
220vop_norename(struct vop_rename_args *ap)
221{
222
223 vop_rename_fail(ap);
224 return (EOPNOTSUPP);
225}
226
227/*
228 * vop_nostrategy:
229 *
230 * Strategy routine for VFS devices that have none.
231 *
232 * BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
233 * routine. Typically this is done for a BIO_READ strategy call.
234 * Typically B_INVAL is assumed to already be clear prior to a write
235 * and should not be cleared manually unless you just made the buffer
236 * invalid. BIO_ERROR should be cleared either way.
237 */
238
239static int
240vop_nostrategy (struct vop_strategy_args *ap)
241{
242 printf("No strategy for buffer at %p\n", ap->a_bp);
243 vprint("vnode", ap->a_vp);
244 ap->a_bp->b_ioflags |= BIO_ERROR;
245 ap->a_bp->b_error = EOPNOTSUPP;
246 bufdone(ap->a_bp);
247 return (EOPNOTSUPP);
248}
249
250static int
251get_next_dirent(struct vnode *vp, struct dirent **dpp, char *dirbuf,
252 int dirbuflen, off_t *off, char **cpos, int *len,
253 int *eofflag, struct thread *td)
254{
255 int error, reclen;
256 struct uio uio;
257 struct iovec iov;
258 struct dirent *dp;
259
260 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
261 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
262
263 if (*len == 0) {
264 iov.iov_base = dirbuf;
265 iov.iov_len = dirbuflen;
266
267 uio.uio_iov = &iov;
268 uio.uio_iovcnt = 1;
269 uio.uio_offset = *off;
270 uio.uio_resid = dirbuflen;
271 uio.uio_segflg = UIO_SYSSPACE;
272 uio.uio_rw = UIO_READ;
273 uio.uio_td = td;
274
275 *eofflag = 0;
276
277#ifdef MAC
278 error = mac_vnode_check_readdir(td->td_ucred, vp);
279 if (error == 0)
280#endif
281 error = VOP_READDIR(vp, &uio, td->td_ucred, eofflag,
282 NULL, NULL);
283 if (error)
284 return (error);
285
286 *off = uio.uio_offset;
287
288 *cpos = dirbuf;
289 *len = (dirbuflen - uio.uio_resid);
290
291 if (*len == 0)
292 return (ENOENT);
293 }
294
295 dp = (struct dirent *)(*cpos);
296 reclen = dp->d_reclen;
297 *dpp = dp;
298
299 /* check for malformed directory.. */
300 if (reclen < DIRENT_MINSIZE)
301 return (EINVAL);
302
303 *cpos += reclen;
304 *len -= reclen;
305
306 return (0);
307}
308
309/*
310 * Check if a named file exists in a given directory vnode.
311 */
312static int
313dirent_exists(struct vnode *vp, const char *dirname, struct thread *td)
314{
315 char *dirbuf, *cpos;
316 int error, eofflag, dirbuflen, len, found;
317 off_t off;
318 struct dirent *dp;
319 struct vattr va;
320
321 KASSERT(VOP_ISLOCKED(vp), ("vp %p is not locked", vp));
322 KASSERT(vp->v_type == VDIR, ("vp %p is not a directory", vp));
323
324 found = 0;
325
326 error = VOP_GETATTR(vp, &va, td->td_ucred);
327 if (error)
328 return (found);
329
330 dirbuflen = DEV_BSIZE;
331 if (dirbuflen < va.va_blocksize)
332 dirbuflen = va.va_blocksize;
333 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
334
335 off = 0;
336 len = 0;
337 do {
338 error = get_next_dirent(vp, &dp, dirbuf, dirbuflen, &off,
339 &cpos, &len, &eofflag, td);
340 if (error)
341 goto out;
342
343 if ((dp->d_type != DT_WHT) &&
344 !strcmp(dp->d_name, dirname)) {
345 found = 1;
346 goto out;
347 }
348 } while (len > 0 || !eofflag);
349
350out:
351 free(dirbuf, M_TEMP);
352 return (found);
353}
354
355int
356vop_stdaccess(struct vop_access_args *ap)
357{
358
359 KASSERT((ap->a_accmode & ~(VEXEC | VWRITE | VREAD | VADMIN |
360 VAPPEND)) == 0, ("invalid bit in accmode"));
361
362 return (VOP_ACCESSX(ap->a_vp, ap->a_accmode, ap->a_cred, ap->a_td));
363}
364
365int
366vop_stdaccessx(struct vop_accessx_args *ap)
367{
368 int error;
369 accmode_t accmode = ap->a_accmode;
370
371 error = vfs_unixify_accmode(&accmode);
372 if (error != 0)
373 return (error);
374
375 if (accmode == 0)
376 return (0);
377
378 return (VOP_ACCESS(ap->a_vp, accmode, ap->a_cred, ap->a_td));
379}
380
381/*
382 * Advisory record locking support
383 */
384int
385vop_stdadvlock(struct vop_advlock_args *ap)
386{
387 struct vnode *vp;
388 struct ucred *cred;
389 struct vattr vattr;
390 int error;
391
392 vp = ap->a_vp;
393 cred = curthread->td_ucred;
394 vn_lock(vp, LK_SHARED | LK_RETRY);
395 error = VOP_GETATTR(vp, &vattr, cred);
396 VOP_UNLOCK(vp, 0);
397 if (error)
398 return (error);
399
400 return (lf_advlock(ap, &(vp->v_lockf), vattr.va_size));
401}
402
403int
404vop_stdadvlockasync(struct vop_advlockasync_args *ap)
405{
406 struct vnode *vp;
407 struct ucred *cred;
408 struct vattr vattr;
409 int error;
410
411 vp = ap->a_vp;
412 cred = curthread->td_ucred;
413 vn_lock(vp, LK_SHARED | LK_RETRY);
414 error = VOP_GETATTR(vp, &vattr, cred);
415 VOP_UNLOCK(vp, 0);
416 if (error)
417 return (error);
418
419 return (lf_advlockasync(ap, &(vp->v_lockf), vattr.va_size));
420}
421
422int
423vop_stdadvlockpurge(struct vop_advlockpurge_args *ap)
424{
425 struct vnode *vp;
426
427 vp = ap->a_vp;
428 lf_purgelocks(vp, &vp->v_lockf);
429 return (0);
430}
431
432/*
433 * vop_stdpathconf:
434 *
435 * Standard implementation of POSIX pathconf, to get information about limits
436 * for a filesystem.
437 * Override per filesystem for the case where the filesystem has smaller
438 * limits.
439 */
440int
441vop_stdpathconf(ap)
442 struct vop_pathconf_args /* {
443 struct vnode *a_vp;
444 int a_name;
445 int *a_retval;
446 } */ *ap;
447{
448
449 switch (ap->a_name) {
450 case _PC_NAME_MAX:
451 *ap->a_retval = NAME_MAX;
452 return (0);
453 case _PC_PATH_MAX:
454 *ap->a_retval = PATH_MAX;
455 return (0);
456 case _PC_LINK_MAX:
457 *ap->a_retval = LINK_MAX;
458 return (0);
459 case _PC_MAX_CANON:
460 *ap->a_retval = MAX_CANON;
461 return (0);
462 case _PC_MAX_INPUT:
463 *ap->a_retval = MAX_INPUT;
464 return (0);
465 case _PC_PIPE_BUF:
466 *ap->a_retval = PIPE_BUF;
467 return (0);
468 case _PC_CHOWN_RESTRICTED:
469 *ap->a_retval = 1;
470 return (0);
471 case _PC_VDISABLE:
472 *ap->a_retval = _POSIX_VDISABLE;
473 return (0);
474 default:
475 return (EINVAL);
476 }
477 /* NOTREACHED */
478}
479
480/*
481 * Standard lock, unlock and islocked functions.
482 */
483int
484vop_stdlock(ap)
485 struct vop_lock1_args /* {
486 struct vnode *a_vp;
487 int a_flags;
488 char *file;
489 int line;
490 } */ *ap;
491{
492 struct vnode *vp = ap->a_vp;
493
494 return (_lockmgr_args(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
495 LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT, ap->a_file,
496 ap->a_line));
497}
498
499/* See above. */
500int
501vop_stdunlock(ap)
502 struct vop_unlock_args /* {
503 struct vnode *a_vp;
504 int a_flags;
505 } */ *ap;
506{
507 struct vnode *vp = ap->a_vp;
508
509 return (lockmgr(vp->v_vnlock, ap->a_flags | LK_RELEASE, VI_MTX(vp)));
510}
511
512/* See above. */
513int
514vop_stdislocked(ap)
515 struct vop_islocked_args /* {
516 struct vnode *a_vp;
517 } */ *ap;
518{
519
520 return (lockstatus(ap->a_vp->v_vnlock));
521}
522
523/*
524 * Return true for select/poll.
525 */
526int
527vop_nopoll(ap)
528 struct vop_poll_args /* {
529 struct vnode *a_vp;
530 int a_events;
531 struct ucred *a_cred;
532 struct thread *a_td;
533 } */ *ap;
534{
535
536 return (poll_no_poll(ap->a_events));
537}
538
539/*
540 * Implement poll for local filesystems that support it.
541 */
542int
543vop_stdpoll(ap)
544 struct vop_poll_args /* {
545 struct vnode *a_vp;
546 int a_events;
547 struct ucred *a_cred;
548 struct thread *a_td;
549 } */ *ap;
550{
551 if (ap->a_events & ~POLLSTANDARD)
552 return (vn_pollrecord(ap->a_vp, ap->a_td, ap->a_events));
553 return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
554}
555
556/*
557 * Return our mount point, as we will take charge of the writes.
558 */
559int
560vop_stdgetwritemount(ap)
561 struct vop_getwritemount_args /* {
562 struct vnode *a_vp;
563 struct mount **a_mpp;
564 } */ *ap;
565{
566 struct mount *mp;
567
568 /*
569 * XXX Since this is called unlocked we may be recycled while
570 * attempting to ref the mount. If this is the case or mountpoint
571 * will be set to NULL. We only have to prevent this call from
572 * returning with a ref to an incorrect mountpoint. It is not
573 * harmful to return with a ref to our previous mountpoint.
574 */
575 mp = ap->a_vp->v_mount;
576 if (mp != NULL) {
577 vfs_ref(mp);
578 if (mp != ap->a_vp->v_mount) {
579 vfs_rel(mp);
580 mp = NULL;
581 }
582 }
583 *(ap->a_mpp) = mp;
584 return (0);
585}
586
587/* XXX Needs good comment and VOP_BMAP(9) manpage */
588int
589vop_stdbmap(ap)
590 struct vop_bmap_args /* {
591 struct vnode *a_vp;
592 daddr_t a_bn;
593 struct bufobj **a_bop;
594 daddr_t *a_bnp;
595 int *a_runp;
596 int *a_runb;
597 } */ *ap;
598{
599
600 if (ap->a_bop != NULL)
601 *ap->a_bop = &ap->a_vp->v_bufobj;
602 if (ap->a_bnp != NULL)
603 *ap->a_bnp = ap->a_bn * btodb(ap->a_vp->v_mount->mnt_stat.f_iosize);
604 if (ap->a_runp != NULL)
605 *ap->a_runp = 0;
606 if (ap->a_runb != NULL)
607 *ap->a_runb = 0;
608 return (0);
609}
610
611int
612vop_stdfsync(ap)
613 struct vop_fsync_args /* {
614 struct vnode *a_vp;
615 struct ucred *a_cred;
616 int a_waitfor;
617 struct thread *a_td;
618 } */ *ap;
619{
620 struct vnode *vp = ap->a_vp;
621 struct buf *bp;
622 struct bufobj *bo;
623 struct buf *nbp;
624 int error = 0;
625 int maxretry = 1000; /* large, arbitrarily chosen */
626
627 bo = &vp->v_bufobj;
628 BO_LOCK(bo);
629loop1:
630 /*
631 * MARK/SCAN initialization to avoid infinite loops.
632 */
633 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
634 bp->b_vflags &= ~BV_SCANNED;
635 bp->b_error = 0;
636 }
637
638 /*
639 * Flush all dirty buffers associated with a vnode.
640 */
641loop2:
642 TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
643 if ((bp->b_vflags & BV_SCANNED) != 0)
644 continue;
645 bp->b_vflags |= BV_SCANNED;
646 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
647 continue;
648 BO_UNLOCK(bo);
649 KASSERT(bp->b_bufobj == bo,
650 ("bp %p wrong b_bufobj %p should be %p",
651 bp, bp->b_bufobj, bo));
652 if ((bp->b_flags & B_DELWRI) == 0)
653 panic("fsync: not dirty");
654 if ((vp->v_object != NULL) && (bp->b_flags & B_CLUSTEROK)) {
655 vfs_bio_awrite(bp);
656 } else {
657 bremfree(bp);
658 bawrite(bp);
659 }
660 BO_LOCK(bo);
661 goto loop2;
662 }
663
664 /*
665 * If synchronous the caller expects us to completely resolve all
666 * dirty buffers in the system. Wait for in-progress I/O to
667 * complete (which could include background bitmap writes), then
668 * retry if dirty blocks still exist.
669 */
670 if (ap->a_waitfor == MNT_WAIT) {
671 bufobj_wwait(bo, 0, 0);
672 if (bo->bo_dirty.bv_cnt > 0) {
673 /*
674 * If we are unable to write any of these buffers
675 * then we fail now rather than trying endlessly
676 * to write them out.
677 */
678 TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
679 if ((error = bp->b_error) == 0)
680 continue;
681 if (error == 0 && --maxretry >= 0)
682 goto loop1;
683 error = EAGAIN;
684 }
685 }
686 BO_UNLOCK(bo);
687 if (error == EAGAIN)
688 vprint("fsync: giving up on dirty", vp);
689
690 return (error);
691}
692
693/* XXX Needs good comment and more info in the manpage (VOP_GETPAGES(9)). */
694int
695vop_stdgetpages(ap)
696 struct vop_getpages_args /* {
697 struct vnode *a_vp;
698 vm_page_t *a_m;
699 int a_count;
700 int a_reqpage;
701 vm_ooffset_t a_offset;
702 } */ *ap;
703{
704
705 return vnode_pager_generic_getpages(ap->a_vp, ap->a_m,
706 ap->a_count, ap->a_reqpage);
707}
708
709int
710vop_stdkqfilter(struct vop_kqfilter_args *ap)
711{
712 return vfs_kqfilter(ap);
713}
714
715/* XXX Needs good comment and more info in the manpage (VOP_PUTPAGES(9)). */
716int
717vop_stdputpages(ap)
718 struct vop_putpages_args /* {
719 struct vnode *a_vp;
720 vm_page_t *a_m;
721 int a_count;
722 int a_sync;
723 int *a_rtvals;
724 vm_ooffset_t a_offset;
725 } */ *ap;
726{
727
728 return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
729 ap->a_sync, ap->a_rtvals);
730}
731
732int
733vop_stdvptofh(struct vop_vptofh_args *ap)
734{
735 return (EOPNOTSUPP);
736}
737
738int
739vop_stdvptocnp(struct vop_vptocnp_args *ap)
740{
741 struct vnode *vp = ap->a_vp;
742 struct vnode **dvp = ap->a_vpp;
743 struct ucred *cred = ap->a_cred;
744 char *buf = ap->a_buf;
745 int *buflen = ap->a_buflen;
746 char *dirbuf, *cpos;
747 int i, error, eofflag, dirbuflen, flags, locked, len, covered;
748 off_t off;
749 ino_t fileno;
750 struct vattr va;
751 struct nameidata nd;
752 struct thread *td;
753 struct dirent *dp;
754 struct vnode *mvp;
755
756 i = *buflen;
757 error = 0;
758 covered = 0;
759 td = curthread;
760
761 if (vp->v_type != VDIR)
762 return (ENOENT);
763
764 error = VOP_GETATTR(vp, &va, cred);
765 if (error)
766 return (error);
767
768 VREF(vp);
769 locked = VOP_ISLOCKED(vp);
770 VOP_UNLOCK(vp, 0);
771 NDINIT_ATVP(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
772 "..", vp, td);
773 flags = FREAD;
774 error = vn_open_cred(&nd, &flags, 0, VN_OPEN_NOAUDIT, cred, NULL);
775 if (error) {
776 vn_lock(vp, locked | LK_RETRY);
777 return (error);
778 }
779 NDFREE(&nd, NDF_ONLY_PNBUF);
780
781 mvp = *dvp = nd.ni_vp;
782
783 if (vp->v_mount != (*dvp)->v_mount &&
784 ((*dvp)->v_vflag & VV_ROOT) &&
785 ((*dvp)->v_mount->mnt_flag & MNT_UNION)) {
786 *dvp = (*dvp)->v_mount->mnt_vnodecovered;
787 VREF(mvp);
788 VOP_UNLOCK(mvp, 0);
789 vn_close(mvp, FREAD, cred, td);
790 VREF(*dvp);
791 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
792 covered = 1;
793 }
794
795 fileno = va.va_fileid;
796
797 dirbuflen = DEV_BSIZE;
798 if (dirbuflen < va.va_blocksize)
799 dirbuflen = va.va_blocksize;
800 dirbuf = (char *)malloc(dirbuflen, M_TEMP, M_WAITOK);
801
802 if ((*dvp)->v_type != VDIR) {
803 error = ENOENT;
804 goto out;
805 }
806
807 off = 0;
808 len = 0;
809 do {
810 /* call VOP_READDIR of parent */
811 error = get_next_dirent(*dvp, &dp, dirbuf, dirbuflen, &off,
812 &cpos, &len, &eofflag, td);
813 if (error)
814 goto out;
815
816 if ((dp->d_type != DT_WHT) &&
817 (dp->d_fileno == fileno)) {
818 if (covered) {
819 VOP_UNLOCK(*dvp, 0);
820 vn_lock(mvp, LK_EXCLUSIVE | LK_RETRY);
821 if (dirent_exists(mvp, dp->d_name, td)) {
822 error = ENOENT;
823 VOP_UNLOCK(mvp, 0);
824 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
825 goto out;
826 }
827 VOP_UNLOCK(mvp, 0);
828 vn_lock(*dvp, LK_EXCLUSIVE | LK_RETRY);
829 }
830 i -= dp->d_namlen;
831
832 if (i < 0) {
833 error = ENOMEM;
834 goto out;
835 }
836 bcopy(dp->d_name, buf + i, dp->d_namlen);
837 error = 0;
838 goto out;
839 }
840 } while (len > 0 || !eofflag);
841 error = ENOENT;
842
843out:
844 free(dirbuf, M_TEMP);
845 if (!error) {
846 *buflen = i;
847 vref(*dvp);
848 }
849 if (covered) {
850 vput(*dvp);
851 vrele(mvp);
852 } else {
853 VOP_UNLOCK(mvp, 0);
854 vn_close(mvp, FREAD, cred, td);
855 }
856 vn_lock(vp, locked | LK_RETRY);
857 return (error);
858}
859
860int
861vop_stdallocate(struct vop_allocate_args *ap)
862{
863#ifdef __notyet__
864 struct statfs sfs;
865#endif
866 struct iovec aiov;
867 struct vattr vattr, *vap;
868 struct uio auio;
869 off_t fsize, len, cur, offset;
870 uint8_t *buf;
871 struct thread *td;
872 struct vnode *vp;
873 size_t iosize;
874 int error;
875
876 buf = NULL;
877 error = 0;
878 td = curthread;
879 vap = &vattr;
880 vp = ap->a_vp;
881 len = *ap->a_len;
882 offset = *ap->a_offset;
883
884 error = VOP_GETATTR(vp, vap, td->td_ucred);
885 if (error != 0)
886 goto out;
887 fsize = vap->va_size;
888 iosize = vap->va_blocksize;
889 if (iosize == 0)
890 iosize = BLKDEV_IOSIZE;
891 if (iosize > MAXPHYS)
892 iosize = MAXPHYS;
893 buf = malloc(iosize, M_TEMP, M_WAITOK);
894
895#ifdef __notyet__
896 /*
897 * Check if the filesystem sets f_maxfilesize; if not use
898 * VOP_SETATTR to perform the check.
899 */
900 error = VFS_STATFS(vp->v_mount, &sfs, td);
901 if (error != 0)
902 goto out;
903 if (sfs.f_maxfilesize) {
904 if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
905 offset + len > sfs.f_maxfilesize) {
906 error = EFBIG;
907 goto out;
908 }
909 } else
910#endif
911 if (offset + len > vap->va_size) {
912 /*
913 * Test offset + len against the filesystem's maxfilesize.
914 */
915 VATTR_NULL(vap);
916 vap->va_size = offset + len;
917 error = VOP_SETATTR(vp, vap, td->td_ucred);
918 if (error != 0)
919 goto out;
920 VATTR_NULL(vap);
921 vap->va_size = fsize;
922 error = VOP_SETATTR(vp, vap, td->td_ucred);
923 if (error != 0)
924 goto out;
925 }
926
927 for (;;) {
928 /*
929 * Read and write back anything below the nominal file
930 * size. There's currently no way outside the filesystem
931 * to know whether this area is sparse or not.
932 */
933 cur = iosize;
934 if ((offset % iosize) != 0)
935 cur -= (offset % iosize);
936 if (cur > len)
937 cur = len;
938 if (offset < fsize) {
939 aiov.iov_base = buf;
940 aiov.iov_len = cur;
941 auio.uio_iov = &aiov;
942 auio.uio_iovcnt = 1;
943 auio.uio_offset = offset;
944 auio.uio_resid = cur;
945 auio.uio_segflg = UIO_SYSSPACE;
946 auio.uio_rw = UIO_READ;
947 auio.uio_td = td;
948 error = VOP_READ(vp, &auio, 0, td->td_ucred);
949 if (error != 0)
950 break;
951 if (auio.uio_resid > 0) {
952 bzero(buf + cur - auio.uio_resid,
953 auio.uio_resid);
954 }
955 } else {
956 bzero(buf, cur);
957 }
958
959 aiov.iov_base = buf;
960 aiov.iov_len = cur;
961 auio.uio_iov = &aiov;
962 auio.uio_iovcnt = 1;
963 auio.uio_offset = offset;
964 auio.uio_resid = cur;
965 auio.uio_segflg = UIO_SYSSPACE;
966 auio.uio_rw = UIO_WRITE;
967 auio.uio_td = td;
968
969 error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
970 if (error != 0)
971 break;
972
973 len -= cur;
974 offset += cur;
975 if (len == 0)
976 break;
977 if (should_yield())
978 break;
979 }
980
981 out:
982 *ap->a_len = len;
983 *ap->a_offset = offset;
984 free(buf, M_TEMP);
985 return (error);
986}
987
988int
989vop_stdadvise(struct vop_advise_args *ap)
990{
991 struct vnode *vp;
992 off_t start, end;
993 int error, vfslocked;
994
995 vp = ap->a_vp;
996 switch (ap->a_advice) {
997 case POSIX_FADV_WILLNEED:
998 /*
999 * Do nothing for now. Filesystems should provide a
1000 * custom method which starts an asynchronous read of
1001 * the requested region.
1002 */
1003 error = 0;
1004 break;
1005 case POSIX_FADV_DONTNEED:
1006 /*
1007 * Flush any open FS buffers and then remove pages
1008 * from the backing VM object. Using vinvalbuf() here
1009 * is a bit heavy-handed as it flushes all buffers for
1010 * the given vnode, not just the buffers covering the
1011 * requested range.
1012 */
1013 error = 0;
1014 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1015 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
1016 if (vp->v_iflag & VI_DOOMED) {
1017 VOP_UNLOCK(vp, 0);
1018 VFS_UNLOCK_GIANT(vfslocked);
1019 break;
1020 }
1021 vinvalbuf(vp, V_CLEANONLY, 0, 0);
1022 if (vp->v_object != NULL) {
1023 start = trunc_page(ap->a_start);
1024 end = round_page(ap->a_end);
1025 VM_OBJECT_LOCK(vp->v_object);
1026 vm_object_page_cache(vp->v_object, OFF_TO_IDX(start),
1027 OFF_TO_IDX(end));
1028 VM_OBJECT_UNLOCK(vp->v_object);
1029 }
1030 VOP_UNLOCK(vp, 0);
1031 VFS_UNLOCK_GIANT(vfslocked);
1032 break;
1033 default:
1034 error = EINVAL;
1035 break;
1036 }
1037 return (error);
1038}
1039
1040/*
1041 * vfs default ops
1042 * used to fill the vfs function table to get reasonable default return values.
1043 */
1044int
1045vfs_stdroot (mp, flags, vpp)
1046 struct mount *mp;
1047 int flags;
1048 struct vnode **vpp;
1049{
1050
1051 return (EOPNOTSUPP);
1052}
1053
1054int
1055vfs_stdstatfs (mp, sbp)
1056 struct mount *mp;
1057 struct statfs *sbp;
1058{
1059
1060 return (EOPNOTSUPP);
1061}
1062
1063int
1064vfs_stdquotactl (mp, cmds, uid, arg)
1065 struct mount *mp;
1066 int cmds;
1067 uid_t uid;
1068 void *arg;
1069{
1070
1071 return (EOPNOTSUPP);
1072}
1073
1074int
1075vfs_stdsync(mp, waitfor)
1076 struct mount *mp;
1077 int waitfor;
1078{
1079 struct vnode *vp, *mvp;
1080 struct thread *td;
1081 int error, lockreq, allerror = 0;
1082
1083 td = curthread;
1084 lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
1085 if (waitfor != MNT_WAIT)
1086 lockreq |= LK_NOWAIT;
1087 /*
1088 * Force stale buffer cache information to be flushed.
1089 */
1090 MNT_ILOCK(mp);
1091loop:
1092 MNT_VNODE_FOREACH(vp, mp, mvp) {
1093 /* bv_cnt is an acceptable race here. */
1094 if (vp->v_bufobj.bo_dirty.bv_cnt == 0)
1095 continue;
1096 VI_LOCK(vp);
1097 MNT_IUNLOCK(mp);
1098 if ((error = vget(vp, lockreq, td)) != 0) {
1099 MNT_ILOCK(mp);
1100 if (error == ENOENT) {
1101 MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp);
1102 goto loop;
1103 }
1104 continue;
1105 }
1106 error = VOP_FSYNC(vp, waitfor, td);
1107 if (error)
1108 allerror = error;
1109 vput(vp);
1110 MNT_ILOCK(mp);
1111 }
1112 MNT_IUNLOCK(mp);
1113 return (allerror);
1114}
1115
1116int
1117vfs_stdnosync (mp, waitfor)
1118 struct mount *mp;
1119 int waitfor;
1120{
1121
1122 return (0);
1123}
1124
1125int
1126vfs_stdvget (mp, ino, flags, vpp)
1127 struct mount *mp;
1128 ino_t ino;
1129 int flags;
1130 struct vnode **vpp;
1131{
1132
1133 return (EOPNOTSUPP);
1134}
1135
1136int
1137vfs_stdfhtovp (mp, fhp, flags, vpp)
1138 struct mount *mp;
1139 struct fid *fhp;
1140 int flags;
1141 struct vnode **vpp;
1142{
1143
1144 return (EOPNOTSUPP);
1145}
1146
1147int
1148vfs_stdinit (vfsp)
1149 struct vfsconf *vfsp;
1150{
1151
1152 return (0);
1153}
1154
1155int
1156vfs_stduninit (vfsp)
1157 struct vfsconf *vfsp;
1158{
1159
1160 return(0);
1161}
1162
1163int
1164vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)
1165 struct mount *mp;
1166 int cmd;
1167 struct vnode *filename_vp;
1168 int attrnamespace;
1169 const char *attrname;
1170{
1171
1172 if (filename_vp != NULL)
1173 VOP_UNLOCK(filename_vp, 0);
1174 return (EOPNOTSUPP);
1175}
1176
1177int
1178vfs_stdsysctl(mp, op, req)
1179 struct mount *mp;
1180 fsctlop_t op;
1181 struct sysctl_req *req;
1182{
1183
1184 return (EOPNOTSUPP);
1185}
1186
1187/* end of vfs default ops */