Deleted Added
full compact
null_vnops.c (50477) null_vnops.c (50616)
1/*
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * John Heidemann of the UCLA Ficus project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
37 *
38 * Ancestors:
39 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
1/*
2 * Copyright (c) 1992, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * John Heidemann of the UCLA Ficus project.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
37 *
38 * Ancestors:
39 * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
40 * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 50477 1999-08-28 01:08:13Z peter $
40 * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 50616 1999-08-30 07:08:04Z bde $
41 * ...and...
42 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
43 *
41 * ...and...
42 * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
43 *
44 * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 50477 1999-08-28 01:08:13Z peter $
44 * $FreeBSD: head/sys/fs/nullfs/null_vnops.c 50616 1999-08-30 07:08:04Z bde $
45 */
46
47/*
48 * Null Layer
49 *
50 * (See mount_null(8) for more information.)
51 *
52 * The null layer duplicates a portion of the file system
53 * name space under a new name. In this respect, it is
54 * similar to the loopback file system. It differs from
55 * the loopback fs in two respects: it is implemented using
56 * a stackable layers techniques, and its "null-node"s stack above
57 * all lower-layer vnodes, not just over directory vnodes.
58 *
59 * The null layer has two purposes. First, it serves as a demonstration
60 * of layering by proving a layer which does nothing. (It actually
61 * does everything the loopback file system does, which is slightly
62 * more than nothing.) Second, the null layer can serve as a prototype
63 * layer. Since it provides all necessary layer framework,
64 * new file system layers can be created very easily be starting
65 * with a null layer.
66 *
67 * The remainder of this man page examines the null layer as a basis
68 * for constructing new layers.
69 *
70 *
71 * INSTANTIATING NEW NULL LAYERS
72 *
73 * New null layers are created with mount_null(8).
74 * Mount_null(8) takes two arguments, the pathname
75 * of the lower vfs (target-pn) and the pathname where the null
76 * layer will appear in the namespace (alias-pn). After
77 * the null layer is put into place, the contents
78 * of target-pn subtree will be aliased under alias-pn.
79 *
80 *
81 * OPERATION OF A NULL LAYER
82 *
83 * The null layer is the minimum file system layer,
84 * simply bypassing all possible operations to the lower layer
85 * for processing there. The majority of its activity centers
86 * on the bypass routine, through which nearly all vnode operations
87 * pass.
88 *
89 * The bypass routine accepts arbitrary vnode operations for
90 * handling by the lower layer. It begins by examing vnode
91 * operation arguments and replacing any null-nodes by their
92 * lower-layer equivlants. It then invokes the operation
93 * on the lower layer. Finally, it replaces the null-nodes
94 * in the arguments and, if a vnode is return by the operation,
95 * stacks a null-node on top of the returned vnode.
96 *
97 * Although bypass handles most operations, vop_getattr, vop_lock,
98 * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
99 * bypassed. Vop_getattr must change the fsid being returned.
100 * Vop_lock and vop_unlock must handle any locking for the
101 * current vnode as well as pass the lock request down.
102 * Vop_inactive and vop_reclaim are not bypassed so that
103 * they can handle freeing null-layer specific data. Vop_print
104 * is not bypassed to avoid excessive debugging information.
105 * Also, certain vnode operations change the locking state within
106 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
107 * and symlink). Ideally these operations should not change the
108 * lock state, but should be changed to let the caller of the
109 * function unlock them. Otherwise all intermediate vnode layers
110 * (such as union, umapfs, etc) must catch these functions to do
111 * the necessary locking at their layer.
112 *
113 *
114 * INSTANTIATING VNODE STACKS
115 *
116 * Mounting associates the null layer with a lower layer,
117 * effect stacking two VFSes. Vnode stacks are instead
118 * created on demand as files are accessed.
119 *
120 * The initial mount creates a single vnode stack for the
121 * root of the new null layer. All other vnode stacks
122 * are created as a result of vnode operations on
123 * this or other null vnode stacks.
124 *
125 * New vnode stacks come into existance as a result of
126 * an operation which returns a vnode.
127 * The bypass routine stacks a null-node above the new
128 * vnode before returning it to the caller.
129 *
130 * For example, imagine mounting a null layer with
131 * "mount_null /usr/include /dev/layer/null".
132 * Changing directory to /dev/layer/null will assign
133 * the root null-node (which was created when the null layer was mounted).
134 * Now consider opening "sys". A vop_lookup would be
135 * done on the root null-node. This operation would bypass through
136 * to the lower layer which would return a vnode representing
137 * the UFS "sys". Null_bypass then builds a null-node
138 * aliasing the UFS "sys" and returns this to the caller.
139 * Later operations on the null-node "sys" will repeat this
140 * process when constructing other vnode stacks.
141 *
142 *
143 * CREATING OTHER FILE SYSTEM LAYERS
144 *
145 * One of the easiest ways to construct new file system layers is to make
146 * a copy of the null layer, rename all files and variables, and
147 * then begin modifing the copy. Sed can be used to easily rename
148 * all variables.
149 *
150 * The umap layer is an example of a layer descended from the
151 * null layer.
152 *
153 *
154 * INVOKING OPERATIONS ON LOWER LAYERS
155 *
156 * There are two techniques to invoke operations on a lower layer
157 * when the operation cannot be completely bypassed. Each method
158 * is appropriate in different situations. In both cases,
159 * it is the responsibility of the aliasing layer to make
160 * the operation arguments "correct" for the lower layer
161 * by mapping an vnode arguments to the lower layer.
162 *
163 * The first approach is to call the aliasing layer's bypass routine.
164 * This method is most suitable when you wish to invoke the operation
165 * currently being handled on the lower layer. It has the advantage
166 * that the bypass routine already must do argument mapping.
167 * An example of this is null_getattrs in the null layer.
168 *
169 * A second approach is to directly invoke vnode operations on
170 * the lower layer with the VOP_OPERATIONNAME interface.
171 * The advantage of this method is that it is easy to invoke
172 * arbitrary operations on the lower layer. The disadvantage
173 * is that vnode arguments must be manualy mapped.
174 *
175 */
176
177#include "opt_debug_nullfs.h"
178
179#include <sys/param.h>
180#include <sys/systm.h>
181#include <sys/kernel.h>
182#include <sys/sysctl.h>
183#include <sys/vnode.h>
184#include <sys/mount.h>
185#include <sys/namei.h>
186#include <sys/malloc.h>
187#include <sys/buf.h>
188#include <miscfs/nullfs/null.h>
189
190static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
191SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
192 &null_bug_bypass, 0, "");
193
194static int null_access __P((struct vop_access_args *ap));
195static int null_getattr __P((struct vop_getattr_args *ap));
196static int null_inactive __P((struct vop_inactive_args *ap));
197static int null_lock __P((struct vop_lock_args *ap));
198static int null_lookup __P((struct vop_lookup_args *ap));
199static int null_print __P((struct vop_print_args *ap));
200static int null_reclaim __P((struct vop_reclaim_args *ap));
201static int null_setattr __P((struct vop_setattr_args *ap));
202static int null_unlock __P((struct vop_unlock_args *ap));
203
204/*
205 * This is the 10-Apr-92 bypass routine.
206 * This version has been optimized for speed, throwing away some
207 * safety checks. It should still always work, but it's not as
208 * robust to programmer errors.
45 */
46
47/*
48 * Null Layer
49 *
50 * (See mount_null(8) for more information.)
51 *
52 * The null layer duplicates a portion of the file system
53 * name space under a new name. In this respect, it is
54 * similar to the loopback file system. It differs from
55 * the loopback fs in two respects: it is implemented using
56 * a stackable layers techniques, and its "null-node"s stack above
57 * all lower-layer vnodes, not just over directory vnodes.
58 *
59 * The null layer has two purposes. First, it serves as a demonstration
60 * of layering by proving a layer which does nothing. (It actually
61 * does everything the loopback file system does, which is slightly
62 * more than nothing.) Second, the null layer can serve as a prototype
63 * layer. Since it provides all necessary layer framework,
64 * new file system layers can be created very easily be starting
65 * with a null layer.
66 *
67 * The remainder of this man page examines the null layer as a basis
68 * for constructing new layers.
69 *
70 *
71 * INSTANTIATING NEW NULL LAYERS
72 *
73 * New null layers are created with mount_null(8).
74 * Mount_null(8) takes two arguments, the pathname
75 * of the lower vfs (target-pn) and the pathname where the null
76 * layer will appear in the namespace (alias-pn). After
77 * the null layer is put into place, the contents
78 * of target-pn subtree will be aliased under alias-pn.
79 *
80 *
81 * OPERATION OF A NULL LAYER
82 *
83 * The null layer is the minimum file system layer,
84 * simply bypassing all possible operations to the lower layer
85 * for processing there. The majority of its activity centers
86 * on the bypass routine, through which nearly all vnode operations
87 * pass.
88 *
89 * The bypass routine accepts arbitrary vnode operations for
90 * handling by the lower layer. It begins by examing vnode
91 * operation arguments and replacing any null-nodes by their
92 * lower-layer equivlants. It then invokes the operation
93 * on the lower layer. Finally, it replaces the null-nodes
94 * in the arguments and, if a vnode is return by the operation,
95 * stacks a null-node on top of the returned vnode.
96 *
97 * Although bypass handles most operations, vop_getattr, vop_lock,
98 * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
99 * bypassed. Vop_getattr must change the fsid being returned.
100 * Vop_lock and vop_unlock must handle any locking for the
101 * current vnode as well as pass the lock request down.
102 * Vop_inactive and vop_reclaim are not bypassed so that
103 * they can handle freeing null-layer specific data. Vop_print
104 * is not bypassed to avoid excessive debugging information.
105 * Also, certain vnode operations change the locking state within
106 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
107 * and symlink). Ideally these operations should not change the
108 * lock state, but should be changed to let the caller of the
109 * function unlock them. Otherwise all intermediate vnode layers
110 * (such as union, umapfs, etc) must catch these functions to do
111 * the necessary locking at their layer.
112 *
113 *
114 * INSTANTIATING VNODE STACKS
115 *
116 * Mounting associates the null layer with a lower layer,
117 * effect stacking two VFSes. Vnode stacks are instead
118 * created on demand as files are accessed.
119 *
120 * The initial mount creates a single vnode stack for the
121 * root of the new null layer. All other vnode stacks
122 * are created as a result of vnode operations on
123 * this or other null vnode stacks.
124 *
125 * New vnode stacks come into existance as a result of
126 * an operation which returns a vnode.
127 * The bypass routine stacks a null-node above the new
128 * vnode before returning it to the caller.
129 *
130 * For example, imagine mounting a null layer with
131 * "mount_null /usr/include /dev/layer/null".
132 * Changing directory to /dev/layer/null will assign
133 * the root null-node (which was created when the null layer was mounted).
134 * Now consider opening "sys". A vop_lookup would be
135 * done on the root null-node. This operation would bypass through
136 * to the lower layer which would return a vnode representing
137 * the UFS "sys". Null_bypass then builds a null-node
138 * aliasing the UFS "sys" and returns this to the caller.
139 * Later operations on the null-node "sys" will repeat this
140 * process when constructing other vnode stacks.
141 *
142 *
143 * CREATING OTHER FILE SYSTEM LAYERS
144 *
145 * One of the easiest ways to construct new file system layers is to make
146 * a copy of the null layer, rename all files and variables, and
147 * then begin modifing the copy. Sed can be used to easily rename
148 * all variables.
149 *
150 * The umap layer is an example of a layer descended from the
151 * null layer.
152 *
153 *
154 * INVOKING OPERATIONS ON LOWER LAYERS
155 *
156 * There are two techniques to invoke operations on a lower layer
157 * when the operation cannot be completely bypassed. Each method
158 * is appropriate in different situations. In both cases,
159 * it is the responsibility of the aliasing layer to make
160 * the operation arguments "correct" for the lower layer
161 * by mapping an vnode arguments to the lower layer.
162 *
163 * The first approach is to call the aliasing layer's bypass routine.
164 * This method is most suitable when you wish to invoke the operation
165 * currently being handled on the lower layer. It has the advantage
166 * that the bypass routine already must do argument mapping.
167 * An example of this is null_getattrs in the null layer.
168 *
169 * A second approach is to directly invoke vnode operations on
170 * the lower layer with the VOP_OPERATIONNAME interface.
171 * The advantage of this method is that it is easy to invoke
172 * arbitrary operations on the lower layer. The disadvantage
173 * is that vnode arguments must be manualy mapped.
174 *
175 */
176
177#include "opt_debug_nullfs.h"
178
179#include <sys/param.h>
180#include <sys/systm.h>
181#include <sys/kernel.h>
182#include <sys/sysctl.h>
183#include <sys/vnode.h>
184#include <sys/mount.h>
185#include <sys/namei.h>
186#include <sys/malloc.h>
187#include <sys/buf.h>
188#include <miscfs/nullfs/null.h>
189
190static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
191SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
192 &null_bug_bypass, 0, "");
193
194static int null_access __P((struct vop_access_args *ap));
195static int null_getattr __P((struct vop_getattr_args *ap));
196static int null_inactive __P((struct vop_inactive_args *ap));
197static int null_lock __P((struct vop_lock_args *ap));
198static int null_lookup __P((struct vop_lookup_args *ap));
199static int null_print __P((struct vop_print_args *ap));
200static int null_reclaim __P((struct vop_reclaim_args *ap));
201static int null_setattr __P((struct vop_setattr_args *ap));
202static int null_unlock __P((struct vop_unlock_args *ap));
203
204/*
205 * This is the 10-Apr-92 bypass routine.
206 * This version has been optimized for speed, throwing away some
207 * safety checks. It should still always work, but it's not as
208 * robust to programmer errors.
209 * Define SAFETY to include some error checking code.
210 *
211 * In general, we map all vnodes going down and unmap them on the way back.
212 * As an exception to this, vnodes can be marked "unmapped" by setting
213 * the Nth bit in operation's vdesc_flags.
214 *
215 * Also, some BSD vnode operations have the side effect of vrele'ing
216 * their arguments. With stacking, the reference counts are held
217 * by the upper node, not the lower one, so we must handle these
218 * side-effects here. This is not of concern in Sun-derived systems
219 * since there are no such side-effects.
220 *
221 * This makes the following assumptions:
222 * - only one returned vpp
223 * - no INOUT vpp's (Sun's vop_open has one of these)
224 * - the vnode operation vector of the first vnode should be used
225 * to determine what implementation of the op should be invoked
226 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
227 * problems on rmdir'ing mount points and renaming?)
228 */
229int
230null_bypass(ap)
231 struct vop_generic_args /* {
232 struct vnodeop_desc *a_desc;
233 <other random data follows, presumably>
234 } */ *ap;
235{
236 register struct vnode **this_vp_p;
237 int error;
238 struct vnode *old_vps[VDESC_MAX_VPS];
239 struct vnode **vps_p[VDESC_MAX_VPS];
240 struct vnode ***vppp;
241 struct vnodeop_desc *descp = ap->a_desc;
242 int reles, i;
243
244 if (null_bug_bypass)
245 printf ("null_bypass: %s\n", descp->vdesc_name);
246
209 *
210 * In general, we map all vnodes going down and unmap them on the way back.
211 * As an exception to this, vnodes can be marked "unmapped" by setting
212 * the Nth bit in operation's vdesc_flags.
213 *
214 * Also, some BSD vnode operations have the side effect of vrele'ing
215 * their arguments. With stacking, the reference counts are held
216 * by the upper node, not the lower one, so we must handle these
217 * side-effects here. This is not of concern in Sun-derived systems
218 * since there are no such side-effects.
219 *
220 * This makes the following assumptions:
221 * - only one returned vpp
222 * - no INOUT vpp's (Sun's vop_open has one of these)
223 * - the vnode operation vector of the first vnode should be used
224 * to determine what implementation of the op should be invoked
225 * - all mapped vnodes are of our vnode-type (NEEDSWORK:
226 * problems on rmdir'ing mount points and renaming?)
227 */
228int
229null_bypass(ap)
230 struct vop_generic_args /* {
231 struct vnodeop_desc *a_desc;
232 <other random data follows, presumably>
233 } */ *ap;
234{
235 register struct vnode **this_vp_p;
236 int error;
237 struct vnode *old_vps[VDESC_MAX_VPS];
238 struct vnode **vps_p[VDESC_MAX_VPS];
239 struct vnode ***vppp;
240 struct vnodeop_desc *descp = ap->a_desc;
241 int reles, i;
242
243 if (null_bug_bypass)
244 printf ("null_bypass: %s\n", descp->vdesc_name);
245
247#ifdef SAFETY
246#ifdef DIAGNOSTIC
248 /*
249 * We require at least one vp.
250 */
251 if (descp->vdesc_vp_offsets == NULL ||
252 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
247 /*
248 * We require at least one vp.
249 */
250 if (descp->vdesc_vp_offsets == NULL ||
251 descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
253 panic ("null_bypass: no vp's in map.");
252 panic ("null_bypass: no vp's in map");
254#endif
255
256 /*
257 * Map the vnodes going in.
258 * Later, we'll invoke the operation based on
259 * the first mapped vnode's operation vector.
260 */
261 reles = descp->vdesc_flags;
262 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
263 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
264 break; /* bail out at end of list */
265 vps_p[i] = this_vp_p =
266 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
267 /*
268 * We're not guaranteed that any but the first vnode
269 * are of our type. Check for and don't map any
270 * that aren't. (We must always map first vp or vclean fails.)
271 */
272 if (i && (*this_vp_p == NULLVP ||
273 (*this_vp_p)->v_op != null_vnodeop_p)) {
274 old_vps[i] = NULLVP;
275 } else {
276 old_vps[i] = *this_vp_p;
277 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
278 /*
279 * XXX - Several operations have the side effect
280 * of vrele'ing their vp's. We must account for
281 * that. (This should go away in the future.)
282 */
283 if (reles & 1)
284 VREF(*this_vp_p);
285 }
286
287 }
288
289 /*
290 * Call the operation on the lower layer
291 * with the modified argument structure.
292 */
293 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
294
295 /*
296 * Maintain the illusion of call-by-value
297 * by restoring vnodes in the argument structure
298 * to their original value.
299 */
300 reles = descp->vdesc_flags;
301 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
302 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
303 break; /* bail out at end of list */
304 if (old_vps[i]) {
305 *(vps_p[i]) = old_vps[i];
306 if (reles & 1)
307 vrele(*(vps_p[i]));
308 }
309 }
310
311 /*
312 * Map the possible out-going vpp
313 * (Assumes that the lower layer always returns
314 * a VREF'ed vpp unless it gets an error.)
315 */
316 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
317 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
318 !error) {
319 /*
320 * XXX - even though some ops have vpp returned vp's,
321 * several ops actually vrele this before returning.
322 * We must avoid these ops.
323 * (This should go away when these ops are regularized.)
324 */
325 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
326 goto out;
327 vppp = VOPARG_OFFSETTO(struct vnode***,
328 descp->vdesc_vpp_offset,ap);
329 if (*vppp)
330 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
331 }
332
333 out:
334 return (error);
335}
336
337/*
338 * We have to carry on the locking protocol on the null layer vnodes
339 * as we progress through the tree. We also have to enforce read-only
340 * if this layer is mounted read-only.
341 */
342static int
343null_lookup(ap)
344 struct vop_lookup_args /* {
345 struct vnode * a_dvp;
346 struct vnode ** a_vpp;
347 struct componentname * a_cnp;
348 } */ *ap;
349{
350 struct componentname *cnp = ap->a_cnp;
351 struct proc *p = cnp->cn_proc;
352 int flags = cnp->cn_flags;
353 struct vop_lock_args lockargs;
354 struct vop_unlock_args unlockargs;
355 struct vnode *dvp, *vp;
356 int error;
357
358 if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
359 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
360 return (EROFS);
361 error = null_bypass((struct vop_generic_args *)ap);
362 if (error == EJUSTRETURN && (flags & ISLASTCN) &&
363 (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
364 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
365 error = EROFS;
366 /*
367 * We must do the same locking and unlocking at this layer as
368 * is done in the layers below us. We could figure this out
369 * based on the error return and the LASTCN, LOCKPARENT, and
370 * LOCKLEAF flags. However, it is more expidient to just find
371 * out the state of the lower level vnodes and set ours to the
372 * same state.
373 */
374 dvp = ap->a_dvp;
375 vp = *ap->a_vpp;
376 if (dvp == vp)
377 return (error);
378 if (!VOP_ISLOCKED(dvp)) {
379 unlockargs.a_vp = dvp;
380 unlockargs.a_flags = 0;
381 unlockargs.a_p = p;
382 vop_nounlock(&unlockargs);
383 }
384 if (vp != NULLVP && VOP_ISLOCKED(vp)) {
385 lockargs.a_vp = vp;
386 lockargs.a_flags = LK_SHARED;
387 lockargs.a_p = p;
388 vop_nolock(&lockargs);
389 }
390 return (error);
391}
392
393/*
394 * Setattr call. Disallow write attempts if the layer is mounted read-only.
395 */
396int
397null_setattr(ap)
398 struct vop_setattr_args /* {
399 struct vnodeop_desc *a_desc;
400 struct vnode *a_vp;
401 struct vattr *a_vap;
402 struct ucred *a_cred;
403 struct proc *a_p;
404 } */ *ap;
405{
406 struct vnode *vp = ap->a_vp;
407 struct vattr *vap = ap->a_vap;
408
409 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
410 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
411 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
412 (vp->v_mount->mnt_flag & MNT_RDONLY))
413 return (EROFS);
414 if (vap->va_size != VNOVAL) {
415 switch (vp->v_type) {
416 case VDIR:
417 return (EISDIR);
418 case VCHR:
419 case VBLK:
420 case VSOCK:
421 case VFIFO:
422 if (vap->va_flags != VNOVAL)
423 return (EOPNOTSUPP);
424 return (0);
425 case VREG:
426 case VLNK:
427 default:
428 /*
429 * Disallow write attempts if the filesystem is
430 * mounted read-only.
431 */
432 if (vp->v_mount->mnt_flag & MNT_RDONLY)
433 return (EROFS);
434 }
435 }
436 return (null_bypass((struct vop_generic_args *)ap));
437}
438
439/*
440 * We handle getattr only to change the fsid.
441 */
442static int
443null_getattr(ap)
444 struct vop_getattr_args /* {
445 struct vnode *a_vp;
446 struct vattr *a_vap;
447 struct ucred *a_cred;
448 struct proc *a_p;
449 } */ *ap;
450{
451 int error;
452
453 if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
454 return (error);
455 return (0);
456}
457
458static int
459null_access(ap)
460 struct vop_access_args /* {
461 struct vnode *a_vp;
462 int a_mode;
463 struct ucred *a_cred;
464 struct proc *a_p;
465 } */ *ap;
466{
467 struct vnode *vp = ap->a_vp;
468 mode_t mode = ap->a_mode;
469
470 /*
471 * Disallow write attempts on read-only layers;
472 * unless the file is a socket, fifo, or a block or
473 * character device resident on the file system.
474 */
475 if (mode & VWRITE) {
476 switch (vp->v_type) {
477 case VDIR:
478 case VLNK:
479 case VREG:
480 if (vp->v_mount->mnt_flag & MNT_RDONLY)
481 return (EROFS);
482 break;
483 default:
484 break;
485 }
486 }
487 return (null_bypass((struct vop_generic_args *)ap));
488}
489
490/*
491 * We need to process our own vnode lock and then clear the
492 * interlock flag as it applies only to our vnode, not the
493 * vnodes below us on the stack.
494 */
495static int
496null_lock(ap)
497 struct vop_lock_args /* {
498 struct vnode *a_vp;
499 int a_flags;
500 struct proc *a_p;
501 } */ *ap;
502{
503
504 vop_nolock(ap);
505 if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
506 return (0);
507 ap->a_flags &= ~LK_INTERLOCK;
508 return (null_bypass((struct vop_generic_args *)ap));
509}
510
511/*
512 * We need to process our own vnode unlock and then clear the
513 * interlock flag as it applies only to our vnode, not the
514 * vnodes below us on the stack.
515 */
516static int
517null_unlock(ap)
518 struct vop_unlock_args /* {
519 struct vnode *a_vp;
520 int a_flags;
521 struct proc *a_p;
522 } */ *ap;
523{
524 vop_nounlock(ap);
525 ap->a_flags &= ~LK_INTERLOCK;
526 return (null_bypass((struct vop_generic_args *)ap));
527}
528
529static int
530null_inactive(ap)
531 struct vop_inactive_args /* {
532 struct vnode *a_vp;
533 struct proc *a_p;
534 } */ *ap;
535{
536 struct vnode *vp = ap->a_vp;
537 struct null_node *xp = VTONULL(vp);
538 struct vnode *lowervp = xp->null_lowervp;
539 /*
540 * Do nothing (and _don't_ bypass).
541 * Wait to vrele lowervp until reclaim,
542 * so that until then our null_node is in the
543 * cache and reusable.
544 * We still have to tell the lower layer the vnode
545 * is now inactive though.
546 *
547 * NEEDSWORK: Someday, consider inactive'ing
548 * the lowervp and then trying to reactivate it
549 * with capabilities (v_id)
550 * like they do in the name lookup cache code.
551 * That's too much work for now.
552 */
553 VOP_INACTIVE(lowervp, ap->a_p);
554 VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
555 return (0);
556}
557
558static int
559null_reclaim(ap)
560 struct vop_reclaim_args /* {
561 struct vnode *a_vp;
562 struct proc *a_p;
563 } */ *ap;
564{
565 struct vnode *vp = ap->a_vp;
566 struct null_node *xp = VTONULL(vp);
567 struct vnode *lowervp = xp->null_lowervp;
568
569 /*
570 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
571 * so we can't call VOPs on ourself.
572 */
573 /* After this assignment, this node will not be re-used. */
574 xp->null_lowervp = NULLVP;
575 LIST_REMOVE(xp, null_hash);
576 FREE(vp->v_data, M_TEMP);
577 vp->v_data = NULL;
578 vrele (lowervp);
579 return (0);
580}
581
582static int
583null_print(ap)
584 struct vop_print_args /* {
585 struct vnode *a_vp;
586 } */ *ap;
587{
588 register struct vnode *vp = ap->a_vp;
589 printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp));
590 return (0);
591}
592
593/*
594 * Global vfs data structures
595 */
596vop_t **null_vnodeop_p;
597static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
598 { &vop_default_desc, (vop_t *) null_bypass },
599 { &vop_access_desc, (vop_t *) null_access },
600 { &vop_getattr_desc, (vop_t *) null_getattr },
601 { &vop_inactive_desc, (vop_t *) null_inactive },
602 { &vop_lock_desc, (vop_t *) null_lock },
603 { &vop_lookup_desc, (vop_t *) null_lookup },
604 { &vop_print_desc, (vop_t *) null_print },
605 { &vop_reclaim_desc, (vop_t *) null_reclaim },
606 { &vop_setattr_desc, (vop_t *) null_setattr },
607 { &vop_unlock_desc, (vop_t *) null_unlock },
608 { NULL, NULL }
609};
610static struct vnodeopv_desc null_vnodeop_opv_desc =
611 { &null_vnodeop_p, null_vnodeop_entries };
612
613VNODEOP_SET(null_vnodeop_opv_desc);
253#endif
254
255 /*
256 * Map the vnodes going in.
257 * Later, we'll invoke the operation based on
258 * the first mapped vnode's operation vector.
259 */
260 reles = descp->vdesc_flags;
261 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
262 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
263 break; /* bail out at end of list */
264 vps_p[i] = this_vp_p =
265 VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
266 /*
267 * We're not guaranteed that any but the first vnode
268 * are of our type. Check for and don't map any
269 * that aren't. (We must always map first vp or vclean fails.)
270 */
271 if (i && (*this_vp_p == NULLVP ||
272 (*this_vp_p)->v_op != null_vnodeop_p)) {
273 old_vps[i] = NULLVP;
274 } else {
275 old_vps[i] = *this_vp_p;
276 *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
277 /*
278 * XXX - Several operations have the side effect
279 * of vrele'ing their vp's. We must account for
280 * that. (This should go away in the future.)
281 */
282 if (reles & 1)
283 VREF(*this_vp_p);
284 }
285
286 }
287
288 /*
289 * Call the operation on the lower layer
290 * with the modified argument structure.
291 */
292 error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
293
294 /*
295 * Maintain the illusion of call-by-value
296 * by restoring vnodes in the argument structure
297 * to their original value.
298 */
299 reles = descp->vdesc_flags;
300 for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
301 if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
302 break; /* bail out at end of list */
303 if (old_vps[i]) {
304 *(vps_p[i]) = old_vps[i];
305 if (reles & 1)
306 vrele(*(vps_p[i]));
307 }
308 }
309
310 /*
311 * Map the possible out-going vpp
312 * (Assumes that the lower layer always returns
313 * a VREF'ed vpp unless it gets an error.)
314 */
315 if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
316 !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
317 !error) {
318 /*
319 * XXX - even though some ops have vpp returned vp's,
320 * several ops actually vrele this before returning.
321 * We must avoid these ops.
322 * (This should go away when these ops are regularized.)
323 */
324 if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
325 goto out;
326 vppp = VOPARG_OFFSETTO(struct vnode***,
327 descp->vdesc_vpp_offset,ap);
328 if (*vppp)
329 error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
330 }
331
332 out:
333 return (error);
334}
335
336/*
337 * We have to carry on the locking protocol on the null layer vnodes
338 * as we progress through the tree. We also have to enforce read-only
339 * if this layer is mounted read-only.
340 */
341static int
342null_lookup(ap)
343 struct vop_lookup_args /* {
344 struct vnode * a_dvp;
345 struct vnode ** a_vpp;
346 struct componentname * a_cnp;
347 } */ *ap;
348{
349 struct componentname *cnp = ap->a_cnp;
350 struct proc *p = cnp->cn_proc;
351 int flags = cnp->cn_flags;
352 struct vop_lock_args lockargs;
353 struct vop_unlock_args unlockargs;
354 struct vnode *dvp, *vp;
355 int error;
356
357 if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
358 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
359 return (EROFS);
360 error = null_bypass((struct vop_generic_args *)ap);
361 if (error == EJUSTRETURN && (flags & ISLASTCN) &&
362 (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
363 (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
364 error = EROFS;
365 /*
366 * We must do the same locking and unlocking at this layer as
367 * is done in the layers below us. We could figure this out
368 * based on the error return and the LASTCN, LOCKPARENT, and
369 * LOCKLEAF flags. However, it is more expidient to just find
370 * out the state of the lower level vnodes and set ours to the
371 * same state.
372 */
373 dvp = ap->a_dvp;
374 vp = *ap->a_vpp;
375 if (dvp == vp)
376 return (error);
377 if (!VOP_ISLOCKED(dvp)) {
378 unlockargs.a_vp = dvp;
379 unlockargs.a_flags = 0;
380 unlockargs.a_p = p;
381 vop_nounlock(&unlockargs);
382 }
383 if (vp != NULLVP && VOP_ISLOCKED(vp)) {
384 lockargs.a_vp = vp;
385 lockargs.a_flags = LK_SHARED;
386 lockargs.a_p = p;
387 vop_nolock(&lockargs);
388 }
389 return (error);
390}
391
392/*
393 * Setattr call. Disallow write attempts if the layer is mounted read-only.
394 */
395int
396null_setattr(ap)
397 struct vop_setattr_args /* {
398 struct vnodeop_desc *a_desc;
399 struct vnode *a_vp;
400 struct vattr *a_vap;
401 struct ucred *a_cred;
402 struct proc *a_p;
403 } */ *ap;
404{
405 struct vnode *vp = ap->a_vp;
406 struct vattr *vap = ap->a_vap;
407
408 if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
409 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
410 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
411 (vp->v_mount->mnt_flag & MNT_RDONLY))
412 return (EROFS);
413 if (vap->va_size != VNOVAL) {
414 switch (vp->v_type) {
415 case VDIR:
416 return (EISDIR);
417 case VCHR:
418 case VBLK:
419 case VSOCK:
420 case VFIFO:
421 if (vap->va_flags != VNOVAL)
422 return (EOPNOTSUPP);
423 return (0);
424 case VREG:
425 case VLNK:
426 default:
427 /*
428 * Disallow write attempts if the filesystem is
429 * mounted read-only.
430 */
431 if (vp->v_mount->mnt_flag & MNT_RDONLY)
432 return (EROFS);
433 }
434 }
435 return (null_bypass((struct vop_generic_args *)ap));
436}
437
438/*
439 * We handle getattr only to change the fsid.
440 */
441static int
442null_getattr(ap)
443 struct vop_getattr_args /* {
444 struct vnode *a_vp;
445 struct vattr *a_vap;
446 struct ucred *a_cred;
447 struct proc *a_p;
448 } */ *ap;
449{
450 int error;
451
452 if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
453 return (error);
454 return (0);
455}
456
457static int
458null_access(ap)
459 struct vop_access_args /* {
460 struct vnode *a_vp;
461 int a_mode;
462 struct ucred *a_cred;
463 struct proc *a_p;
464 } */ *ap;
465{
466 struct vnode *vp = ap->a_vp;
467 mode_t mode = ap->a_mode;
468
469 /*
470 * Disallow write attempts on read-only layers;
471 * unless the file is a socket, fifo, or a block or
472 * character device resident on the file system.
473 */
474 if (mode & VWRITE) {
475 switch (vp->v_type) {
476 case VDIR:
477 case VLNK:
478 case VREG:
479 if (vp->v_mount->mnt_flag & MNT_RDONLY)
480 return (EROFS);
481 break;
482 default:
483 break;
484 }
485 }
486 return (null_bypass((struct vop_generic_args *)ap));
487}
488
489/*
490 * We need to process our own vnode lock and then clear the
491 * interlock flag as it applies only to our vnode, not the
492 * vnodes below us on the stack.
493 */
494static int
495null_lock(ap)
496 struct vop_lock_args /* {
497 struct vnode *a_vp;
498 int a_flags;
499 struct proc *a_p;
500 } */ *ap;
501{
502
503 vop_nolock(ap);
504 if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
505 return (0);
506 ap->a_flags &= ~LK_INTERLOCK;
507 return (null_bypass((struct vop_generic_args *)ap));
508}
509
510/*
511 * We need to process our own vnode unlock and then clear the
512 * interlock flag as it applies only to our vnode, not the
513 * vnodes below us on the stack.
514 */
515static int
516null_unlock(ap)
517 struct vop_unlock_args /* {
518 struct vnode *a_vp;
519 int a_flags;
520 struct proc *a_p;
521 } */ *ap;
522{
523 vop_nounlock(ap);
524 ap->a_flags &= ~LK_INTERLOCK;
525 return (null_bypass((struct vop_generic_args *)ap));
526}
527
528static int
529null_inactive(ap)
530 struct vop_inactive_args /* {
531 struct vnode *a_vp;
532 struct proc *a_p;
533 } */ *ap;
534{
535 struct vnode *vp = ap->a_vp;
536 struct null_node *xp = VTONULL(vp);
537 struct vnode *lowervp = xp->null_lowervp;
538 /*
539 * Do nothing (and _don't_ bypass).
540 * Wait to vrele lowervp until reclaim,
541 * so that until then our null_node is in the
542 * cache and reusable.
543 * We still have to tell the lower layer the vnode
544 * is now inactive though.
545 *
546 * NEEDSWORK: Someday, consider inactive'ing
547 * the lowervp and then trying to reactivate it
548 * with capabilities (v_id)
549 * like they do in the name lookup cache code.
550 * That's too much work for now.
551 */
552 VOP_INACTIVE(lowervp, ap->a_p);
553 VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
554 return (0);
555}
556
557static int
558null_reclaim(ap)
559 struct vop_reclaim_args /* {
560 struct vnode *a_vp;
561 struct proc *a_p;
562 } */ *ap;
563{
564 struct vnode *vp = ap->a_vp;
565 struct null_node *xp = VTONULL(vp);
566 struct vnode *lowervp = xp->null_lowervp;
567
568 /*
569 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
570 * so we can't call VOPs on ourself.
571 */
572 /* After this assignment, this node will not be re-used. */
573 xp->null_lowervp = NULLVP;
574 LIST_REMOVE(xp, null_hash);
575 FREE(vp->v_data, M_TEMP);
576 vp->v_data = NULL;
577 vrele (lowervp);
578 return (0);
579}
580
581static int
582null_print(ap)
583 struct vop_print_args /* {
584 struct vnode *a_vp;
585 } */ *ap;
586{
587 register struct vnode *vp = ap->a_vp;
588 printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp));
589 return (0);
590}
591
592/*
593 * Global vfs data structures
594 */
595vop_t **null_vnodeop_p;
596static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
597 { &vop_default_desc, (vop_t *) null_bypass },
598 { &vop_access_desc, (vop_t *) null_access },
599 { &vop_getattr_desc, (vop_t *) null_getattr },
600 { &vop_inactive_desc, (vop_t *) null_inactive },
601 { &vop_lock_desc, (vop_t *) null_lock },
602 { &vop_lookup_desc, (vop_t *) null_lookup },
603 { &vop_print_desc, (vop_t *) null_print },
604 { &vop_reclaim_desc, (vop_t *) null_reclaim },
605 { &vop_setattr_desc, (vop_t *) null_setattr },
606 { &vop_unlock_desc, (vop_t *) null_unlock },
607 { NULL, NULL }
608};
609static struct vnodeopv_desc null_vnodeop_opv_desc =
610 { &null_vnodeop_p, null_vnodeop_entries };
611
612VNODEOP_SET(null_vnodeop_opv_desc);