1/*	$NetBSD: puffs_vnops.c,v 1.163.2.3 2012/08/12 12:59:49 martin Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007  Antti Kantee.  All Rights Reserved.
5 *
6 * Development of this software was supported by the
7 * Google Summer of Code program and the Ulla Tuominen Foundation.
8 * The Google SoC project was mentored by Bill Studenmund.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
20 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__KERNEL_RCSID(0, "$NetBSD: puffs_vnops.c,v 1.163.2.3 2012/08/12 12:59:49 martin Exp $");
34
35#include <sys/param.h>
36#include <sys/buf.h>
37#include <sys/lockf.h>
38#include <sys/malloc.h>
39#include <sys/mount.h>
40#include <sys/namei.h>
41#include <sys/vnode.h>
42#include <sys/proc.h>
43#include <sys/kernel.h> /* For hz, hardclock_ticks */
44
45#include <uvm/uvm.h>
46
47#include <fs/puffs/puffs_msgif.h>
48#include <fs/puffs/puffs_sys.h>
49
50#include <miscfs/fifofs/fifo.h>
51#include <miscfs/genfs/genfs.h>
52#include <miscfs/specfs/specdev.h>
53
54int	puffs_vnop_lookup(void *);
55int	puffs_vnop_create(void *);
56int	puffs_vnop_access(void *);
57int	puffs_vnop_mknod(void *);
58int	puffs_vnop_open(void *);
59int	puffs_vnop_close(void *);
60int	puffs_vnop_getattr(void *);
61int	puffs_vnop_setattr(void *);
62int	puffs_vnop_reclaim(void *);
63int	puffs_vnop_readdir(void *);
64int	puffs_vnop_poll(void *);
65int	puffs_vnop_fsync(void *);
66int	puffs_vnop_seek(void *);
67int	puffs_vnop_remove(void *);
68int	puffs_vnop_mkdir(void *);
69int	puffs_vnop_rmdir(void *);
70int	puffs_vnop_link(void *);
71int	puffs_vnop_readlink(void *);
72int	puffs_vnop_symlink(void *);
73int	puffs_vnop_rename(void *);
74int	puffs_vnop_read(void *);
75int	puffs_vnop_write(void *);
76int	puffs_vnop_fcntl(void *);
77int	puffs_vnop_ioctl(void *);
78int	puffs_vnop_inactive(void *);
79int	puffs_vnop_print(void *);
80int	puffs_vnop_pathconf(void *);
81int	puffs_vnop_advlock(void *);
82int	puffs_vnop_strategy(void *);
83int	puffs_vnop_bmap(void *);
84int	puffs_vnop_mmap(void *);
85int	puffs_vnop_getpages(void *);
86int	puffs_vnop_abortop(void *);
87int	puffs_vnop_getextattr(void *);
88int	puffs_vnop_setextattr(void *);
89int	puffs_vnop_listextattr(void *);
90int	puffs_vnop_deleteextattr(void *);
91
92int	puffs_vnop_spec_read(void *);
93int	puffs_vnop_spec_write(void *);
94int	puffs_vnop_fifo_read(void *);
95int	puffs_vnop_fifo_write(void *);
96
97int	puffs_vnop_checkop(void *);
98
99#define puffs_vnop_lock genfs_lock
100#define puffs_vnop_unlock genfs_unlock
101#define puffs_vnop_islocked genfs_islocked
102
103int (**puffs_vnodeop_p)(void *);
104const struct vnodeopv_entry_desc puffs_vnodeop_entries[] = {
105	{ &vop_default_desc, vn_default_error },
106	{ &vop_lookup_desc, puffs_vnop_lookup },	/* REAL lookup */
107	{ &vop_create_desc, puffs_vnop_checkop },	/* create */
108        { &vop_mknod_desc, puffs_vnop_checkop },	/* mknod */
109        { &vop_open_desc, puffs_vnop_open },		/* REAL open */
110        { &vop_close_desc, puffs_vnop_checkop },	/* close */
111        { &vop_access_desc, puffs_vnop_access },	/* REAL access */
112        { &vop_getattr_desc, puffs_vnop_checkop },	/* getattr */
113        { &vop_setattr_desc, puffs_vnop_checkop },	/* setattr */
114        { &vop_read_desc, puffs_vnop_checkop },		/* read */
115        { &vop_write_desc, puffs_vnop_checkop },	/* write */
116        { &vop_fsync_desc, puffs_vnop_fsync },		/* REAL fsync */
117        { &vop_seek_desc, puffs_vnop_checkop },		/* seek */
118        { &vop_remove_desc, puffs_vnop_checkop },	/* remove */
119        { &vop_link_desc, puffs_vnop_checkop },		/* link */
120        { &vop_rename_desc, puffs_vnop_checkop },	/* rename */
121        { &vop_mkdir_desc, puffs_vnop_checkop },	/* mkdir */
122        { &vop_rmdir_desc, puffs_vnop_checkop },	/* rmdir */
123        { &vop_symlink_desc, puffs_vnop_checkop },	/* symlink */
124        { &vop_readdir_desc, puffs_vnop_checkop },	/* readdir */
125        { &vop_readlink_desc, puffs_vnop_checkop },	/* readlink */
126        { &vop_getpages_desc, puffs_vnop_checkop },	/* getpages */
127        { &vop_putpages_desc, genfs_putpages },		/* REAL putpages */
128        { &vop_pathconf_desc, puffs_vnop_checkop },	/* pathconf */
129        { &vop_advlock_desc, puffs_vnop_advlock },	/* advlock */
130        { &vop_strategy_desc, puffs_vnop_strategy },	/* REAL strategy */
131        { &vop_revoke_desc, genfs_revoke },		/* REAL revoke */
132        { &vop_abortop_desc, puffs_vnop_abortop },	/* REAL abortop */
133        { &vop_inactive_desc, puffs_vnop_inactive },	/* REAL inactive */
134        { &vop_reclaim_desc, puffs_vnop_reclaim },	/* REAL reclaim */
135        { &vop_lock_desc, puffs_vnop_lock },		/* REAL lock */
136        { &vop_unlock_desc, puffs_vnop_unlock },	/* REAL unlock */
137        { &vop_bmap_desc, puffs_vnop_bmap },		/* REAL bmap */
138        { &vop_print_desc, puffs_vnop_print },		/* REAL print */
139        { &vop_islocked_desc, puffs_vnop_islocked },	/* REAL islocked */
140        { &vop_bwrite_desc, genfs_nullop },		/* REAL bwrite */
141        { &vop_mmap_desc, puffs_vnop_mmap },		/* REAL mmap */
142        { &vop_poll_desc, puffs_vnop_poll },		/* REAL poll */
143	{ &vop_getextattr_desc, puffs_vnop_getextattr },	/* getextattr */
144	{ &vop_setextattr_desc, puffs_vnop_setextattr },	/* setextattr */
145	{ &vop_listextattr_desc, puffs_vnop_listextattr },	/* listextattr */
146	{ &vop_deleteextattr_desc, puffs_vnop_deleteextattr },/* deleteextattr */
147#if 0
148	{ &vop_openextattr_desc, puffs_vnop_checkop },	/* openextattr */
149	{ &vop_closeextattr_desc, puffs_vnop_checkop },	/* closeextattr */
150#endif
151        { &vop_kqfilter_desc, genfs_eopnotsupp },	/* kqfilter XXX */
152	{ NULL, NULL }
153};
154const struct vnodeopv_desc puffs_vnodeop_opv_desc =
155	{ &puffs_vnodeop_p, puffs_vnodeop_entries };
156
157
158int (**puffs_specop_p)(void *);
159const struct vnodeopv_entry_desc puffs_specop_entries[] = {
160	{ &vop_default_desc, vn_default_error },
161	{ &vop_lookup_desc, spec_lookup },		/* lookup, ENOTDIR */
162	{ &vop_create_desc, spec_create },		/* genfs_badop */
163	{ &vop_mknod_desc, spec_mknod },		/* genfs_badop */
164	{ &vop_open_desc, spec_open },			/* spec_open */
165	{ &vop_close_desc, spec_close },		/* spec_close */
166	{ &vop_access_desc, puffs_vnop_checkop },	/* access */
167	{ &vop_getattr_desc, puffs_vnop_checkop },	/* getattr */
168	{ &vop_setattr_desc, puffs_vnop_checkop },	/* setattr */
169	{ &vop_read_desc, puffs_vnop_spec_read },	/* update, read */
170	{ &vop_write_desc, puffs_vnop_spec_write },	/* update, write */
171	{ &vop_ioctl_desc, spec_ioctl },		/* spec_ioctl */
172	{ &vop_fcntl_desc, genfs_fcntl },		/* dummy */
173	{ &vop_poll_desc, spec_poll },			/* spec_poll */
174	{ &vop_kqfilter_desc, spec_kqfilter },		/* spec_kqfilter */
175	{ &vop_revoke_desc, spec_revoke },		/* genfs_revoke */
176	{ &vop_mmap_desc, spec_mmap },			/* spec_mmap */
177	{ &vop_fsync_desc, spec_fsync },		/* vflushbuf */
178	{ &vop_seek_desc, spec_seek },			/* genfs_nullop */
179	{ &vop_remove_desc, spec_remove },		/* genfs_badop */
180	{ &vop_link_desc, spec_link },			/* genfs_badop */
181	{ &vop_rename_desc, spec_rename },		/* genfs_badop */
182	{ &vop_mkdir_desc, spec_mkdir },		/* genfs_badop */
183	{ &vop_rmdir_desc, spec_rmdir },		/* genfs_badop */
184	{ &vop_symlink_desc, spec_symlink },		/* genfs_badop */
185	{ &vop_readdir_desc, spec_readdir },		/* genfs_badop */
186	{ &vop_readlink_desc, spec_readlink },		/* genfs_badop */
187	{ &vop_abortop_desc, spec_abortop },		/* genfs_badop */
188	{ &vop_inactive_desc, puffs_vnop_inactive },	/* REAL inactive */
189	{ &vop_reclaim_desc, puffs_vnop_reclaim },	/* REAL reclaim */
190	{ &vop_lock_desc, puffs_vnop_lock },		/* REAL lock */
191	{ &vop_unlock_desc, puffs_vnop_unlock },	/* REAL unlock */
192	{ &vop_bmap_desc, spec_bmap },			/* dummy */
193	{ &vop_strategy_desc, spec_strategy },		/* dev strategy */
194	{ &vop_print_desc, puffs_vnop_print },		/* REAL print */
195	{ &vop_islocked_desc, puffs_vnop_islocked },	/* REAL islocked */
196	{ &vop_pathconf_desc, spec_pathconf },		/* pathconf */
197	{ &vop_advlock_desc, spec_advlock },		/* lf_advlock */
198	{ &vop_bwrite_desc, vn_bwrite },		/* bwrite */
199	{ &vop_getpages_desc, spec_getpages },		/* genfs_getpages */
200	{ &vop_putpages_desc, spec_putpages },		/* genfs_putpages */
201	{ &vop_getextattr_desc, puffs_vnop_checkop },	/* getextattr */
202	{ &vop_setextattr_desc, puffs_vnop_checkop },	/* setextattr */
203	{ &vop_listextattr_desc, puffs_vnop_checkop },	/* listextattr */
204	{ &vop_deleteextattr_desc, puffs_vnop_checkop },/* deleteextattr */
205#if 0
206	{ &vop_openextattr_desc, _openextattr },	/* openextattr */
207	{ &vop_closeextattr_desc, _closeextattr },	/* closeextattr */
208#endif
209	{ NULL, NULL }
210};
211const struct vnodeopv_desc puffs_specop_opv_desc =
212	{ &puffs_specop_p, puffs_specop_entries };
213
214
215int (**puffs_fifoop_p)(void *);
216const struct vnodeopv_entry_desc puffs_fifoop_entries[] = {
217	{ &vop_default_desc, vn_default_error },
218	{ &vop_lookup_desc, vn_fifo_bypass },		/* lookup, ENOTDIR */
219	{ &vop_create_desc, vn_fifo_bypass },		/* genfs_badop */
220	{ &vop_mknod_desc, vn_fifo_bypass },		/* genfs_badop */
221	{ &vop_open_desc, vn_fifo_bypass },		/* open */
222	{ &vop_close_desc, vn_fifo_bypass },		/* close */
223	{ &vop_access_desc, puffs_vnop_checkop },	/* access */
224	{ &vop_getattr_desc, puffs_vnop_checkop },	/* getattr */
225	{ &vop_setattr_desc, puffs_vnop_checkop },	/* setattr */
226	{ &vop_read_desc, puffs_vnop_fifo_read },	/* read, update */
227	{ &vop_write_desc, puffs_vnop_fifo_write },	/* write, update */
228	{ &vop_ioctl_desc, vn_fifo_bypass },		/* ioctl */
229	{ &vop_fcntl_desc, genfs_fcntl },		/* dummy */
230	{ &vop_poll_desc, vn_fifo_bypass },		/* poll */
231	{ &vop_kqfilter_desc, vn_fifo_bypass },		/* kqfilter */
232	{ &vop_revoke_desc, vn_fifo_bypass },		/* genfs_revoke */
233	{ &vop_mmap_desc, vn_fifo_bypass },		/* genfs_badop */
234	{ &vop_fsync_desc, vn_fifo_bypass },		/* genfs_nullop*/
235	{ &vop_seek_desc, vn_fifo_bypass },		/* genfs_badop */
236	{ &vop_remove_desc, vn_fifo_bypass },		/* genfs_badop */
237	{ &vop_link_desc, vn_fifo_bypass },		/* genfs_badop */
238	{ &vop_rename_desc, vn_fifo_bypass },		/* genfs_badop */
239	{ &vop_mkdir_desc, vn_fifo_bypass },		/* genfs_badop */
240	{ &vop_rmdir_desc, vn_fifo_bypass },		/* genfs_badop */
241	{ &vop_symlink_desc, vn_fifo_bypass },		/* genfs_badop */
242	{ &vop_readdir_desc, vn_fifo_bypass },		/* genfs_badop */
243	{ &vop_readlink_desc, vn_fifo_bypass },		/* genfs_badop */
244	{ &vop_abortop_desc, vn_fifo_bypass },		/* genfs_badop */
245	{ &vop_inactive_desc, puffs_vnop_inactive },	/* REAL inactive */
246	{ &vop_reclaim_desc, puffs_vnop_reclaim },	/* REAL reclaim */
247	{ &vop_lock_desc, puffs_vnop_lock },		/* REAL lock */
248	{ &vop_unlock_desc, puffs_vnop_unlock },	/* REAL unlock */
249	{ &vop_bmap_desc, vn_fifo_bypass },		/* dummy */
250	{ &vop_strategy_desc, vn_fifo_bypass },		/* genfs_badop */
251	{ &vop_print_desc, puffs_vnop_print },		/* REAL print */
252	{ &vop_islocked_desc, puffs_vnop_islocked },	/* REAL islocked */
253	{ &vop_pathconf_desc, vn_fifo_bypass },		/* pathconf */
254	{ &vop_advlock_desc, vn_fifo_bypass },		/* genfs_einval */
255	{ &vop_bwrite_desc, vn_bwrite },		/* bwrite */
256	{ &vop_putpages_desc, vn_fifo_bypass }, 	/* genfs_null_putpages*/
257#if 0
258	{ &vop_openextattr_desc, _openextattr },	/* openextattr */
259	{ &vop_closeextattr_desc, _closeextattr },	/* closeextattr */
260#endif
261	{ &vop_getextattr_desc, puffs_vnop_checkop },		/* getextattr */
262	{ &vop_setextattr_desc, puffs_vnop_checkop },		/* setextattr */
263	{ &vop_listextattr_desc, puffs_vnop_checkop },	/* listextattr */
264	{ &vop_deleteextattr_desc, puffs_vnop_checkop },	/* deleteextattr */
265	{ NULL, NULL }
266};
267const struct vnodeopv_desc puffs_fifoop_opv_desc =
268	{ &puffs_fifoop_p, puffs_fifoop_entries };
269
270
271/* "real" vnode operations */
272int (**puffs_msgop_p)(void *);
273const struct vnodeopv_entry_desc puffs_msgop_entries[] = {
274	{ &vop_default_desc, vn_default_error },
275	{ &vop_create_desc, puffs_vnop_create },	/* create */
276        { &vop_mknod_desc, puffs_vnop_mknod },		/* mknod */
277        { &vop_open_desc, puffs_vnop_open },		/* open */
278        { &vop_close_desc, puffs_vnop_close },		/* close */
279        { &vop_access_desc, puffs_vnop_access },	/* access */
280        { &vop_getattr_desc, puffs_vnop_getattr },	/* getattr */
281        { &vop_setattr_desc, puffs_vnop_setattr },	/* setattr */
282        { &vop_read_desc, puffs_vnop_read },		/* read */
283        { &vop_write_desc, puffs_vnop_write },		/* write */
284        { &vop_seek_desc, puffs_vnop_seek },		/* seek */
285        { &vop_remove_desc, puffs_vnop_remove },	/* remove */
286        { &vop_link_desc, puffs_vnop_link },		/* link */
287        { &vop_rename_desc, puffs_vnop_rename },	/* rename */
288        { &vop_mkdir_desc, puffs_vnop_mkdir },		/* mkdir */
289        { &vop_rmdir_desc, puffs_vnop_rmdir },		/* rmdir */
290        { &vop_symlink_desc, puffs_vnop_symlink },	/* symlink */
291        { &vop_readdir_desc, puffs_vnop_readdir },	/* readdir */
292        { &vop_readlink_desc, puffs_vnop_readlink },	/* readlink */
293        { &vop_print_desc, puffs_vnop_print },		/* print */
294        { &vop_islocked_desc, puffs_vnop_islocked },	/* islocked */
295        { &vop_pathconf_desc, puffs_vnop_pathconf },	/* pathconf */
296        { &vop_getpages_desc, puffs_vnop_getpages },	/* getpages */
297	{ NULL, NULL }
298};
299const struct vnodeopv_desc puffs_msgop_opv_desc =
300	{ &puffs_msgop_p, puffs_msgop_entries };
301
302/*
303 * for dosetattr / update_va
304 */
305#define SETATTR_CHSIZE	0x01
306#define SETATTR_ASYNC	0x02
307
308#define ERROUT(err)							\
309do {									\
310	error = err;							\
311	goto out;							\
312} while (/*CONSTCOND*/0)
313
314/*
315 * This is a generic vnode operation handler.  It checks if the necessary
316 * operations for the called vnode operation are implemented by userspace
317 * and either returns a dummy return value or proceeds to call the real
318 * vnode operation from puffs_msgop_v.
319 *
320 * XXX: this should described elsewhere and autogenerated, the complexity
321 * of the vnode operations vectors and their interrelationships is also
322 * getting a bit out of hand.  Another problem is that we need this same
323 * information in the fs server code, so keeping the two in sync manually
324 * is not a viable (long term) plan.
325 */
326
327/* not supported, handle locking protocol */
328#define CHECKOP_NOTSUPP(op)						\
329case VOP_##op##_DESCOFFSET:						\
330	if (pmp->pmp_vnopmask[PUFFS_VN_##op] == 0)			\
331		return genfs_eopnotsupp(v);				\
332	break
333
334/* always succeed, no locking */
335#define CHECKOP_SUCCESS(op)						\
336case VOP_##op##_DESCOFFSET:						\
337	if (pmp->pmp_vnopmask[PUFFS_VN_##op] == 0)			\
338		return 0;						\
339	break
340
341int
342puffs_vnop_checkop(void *v)
343{
344	struct vop_generic_args /* {
345		struct vnodeop_desc *a_desc;
346		spooky mystery contents;
347	} */ *ap = v;
348	struct vnodeop_desc *desc = ap->a_desc;
349	struct puffs_mount *pmp;
350	struct vnode *vp;
351	int offset, rv;
352
353	offset = ap->a_desc->vdesc_vp_offsets[0];
354#ifdef DIAGNOSTIC
355	if (offset == VDESC_NO_OFFSET)
356		panic("puffs_checkop: no vnode, why did you call me?");
357#endif
358	vp = *VOPARG_OFFSETTO(struct vnode **, offset, ap);
359	pmp = MPTOPUFFSMP(vp->v_mount);
360
361	DPRINTF_VERBOSE(("checkop call %s (%d), vp %p\n",
362	    ap->a_desc->vdesc_name, ap->a_desc->vdesc_offset, vp));
363
364	if (!ALLOPS(pmp)) {
365		switch (desc->vdesc_offset) {
366			CHECKOP_NOTSUPP(CREATE);
367			CHECKOP_NOTSUPP(MKNOD);
368			CHECKOP_NOTSUPP(GETATTR);
369			CHECKOP_NOTSUPP(SETATTR);
370			CHECKOP_NOTSUPP(READ);
371			CHECKOP_NOTSUPP(WRITE);
372			CHECKOP_NOTSUPP(FCNTL);
373			CHECKOP_NOTSUPP(IOCTL);
374			CHECKOP_NOTSUPP(REMOVE);
375			CHECKOP_NOTSUPP(LINK);
376			CHECKOP_NOTSUPP(RENAME);
377			CHECKOP_NOTSUPP(MKDIR);
378			CHECKOP_NOTSUPP(RMDIR);
379			CHECKOP_NOTSUPP(SYMLINK);
380			CHECKOP_NOTSUPP(READDIR);
381			CHECKOP_NOTSUPP(READLINK);
382			CHECKOP_NOTSUPP(PRINT);
383			CHECKOP_NOTSUPP(PATHCONF);
384			CHECKOP_NOTSUPP(GETEXTATTR);
385			CHECKOP_NOTSUPP(SETEXTATTR);
386			CHECKOP_NOTSUPP(LISTEXTATTR);
387			CHECKOP_NOTSUPP(DELETEEXTATTR);
388
389			CHECKOP_SUCCESS(ACCESS);
390			CHECKOP_SUCCESS(CLOSE);
391			CHECKOP_SUCCESS(SEEK);
392
393		case VOP_GETPAGES_DESCOFFSET:
394			if (!EXISTSOP(pmp, READ))
395				return genfs_eopnotsupp(v);
396			break;
397
398		default:
399			panic("puffs_checkop: unhandled vnop %d",
400			    desc->vdesc_offset);
401		}
402	}
403
404	rv = VOCALL(puffs_msgop_p, ap->a_desc->vdesc_offset, v);
405
406	DPRINTF_VERBOSE(("checkop return %s (%d), vp %p: %d\n",
407	    ap->a_desc->vdesc_name, ap->a_desc->vdesc_offset, vp, rv));
408
409	return rv;
410}
411
412static int callremove(struct puffs_mount *, puffs_cookie_t, puffs_cookie_t,
413			    struct componentname *);
414static int callrmdir(struct puffs_mount *, puffs_cookie_t, puffs_cookie_t,
415			   struct componentname *);
416static void callinactive(struct puffs_mount *, puffs_cookie_t, int);
417static void callreclaim(struct puffs_mount *, puffs_cookie_t, int);
418static int  flushvncache(struct vnode *, off_t, off_t, bool);
419static void update_va(struct vnode *, struct vattr *, struct vattr *,
420		      struct timespec *, struct timespec *, int);
421static void update_parent(struct vnode *, struct vnode *);
422
423
424#define PUFFS_ABORT_LOOKUP	1
425#define PUFFS_ABORT_CREATE	2
426#define PUFFS_ABORT_MKNOD	3
427#define PUFFS_ABORT_MKDIR	4
428#define PUFFS_ABORT_SYMLINK	5
429
430/*
431 * Press the pani^Wabort button!  Kernel resource allocation failed.
432 */
433static void
434puffs_abortbutton(struct puffs_mount *pmp, int what,
435	puffs_cookie_t dck, puffs_cookie_t ck, struct componentname *cnp)
436{
437
438	switch (what) {
439	case PUFFS_ABORT_CREATE:
440	case PUFFS_ABORT_MKNOD:
441	case PUFFS_ABORT_SYMLINK:
442		callremove(pmp, dck, ck, cnp);
443		break;
444	case PUFFS_ABORT_MKDIR:
445		callrmdir(pmp, dck, ck, cnp);
446		break;
447	}
448
449	callinactive(pmp, ck, 0);
450	callreclaim(pmp, ck, 1);
451}
452
453/*
454 * Begin vnode operations.
455 *
456 * A word from the keymaster about locks: generally we don't want
457 * to use the vnode locks at all: it creates an ugly dependency between
458 * the userlandia file server and the kernel.  But we'll play along with
459 * the kernel vnode locks for now.  However, even currently we attempt
460 * to release locks as early as possible.  This is possible for some
461 * operations which a) don't need a locked vnode after the userspace op
462 * and b) return with the vnode unlocked.  Theoretically we could
463 * unlock-do op-lock for others and order the graph in userspace, but I
464 * don't want to think of the consequences for the time being.
465 */
466
467#define TTL_TO_TIMEOUT(ts) \
468    (hardclock_ticks + (ts->tv_sec * hz) + (ts->tv_nsec * hz / 1000000000))
469#define TTL_VALID(ts) \
470    ((ts != NULL) && !((ts->tv_sec == 0) && (ts->tv_nsec == 0)))
471#define TIMED_OUT(expire) \
472    ((int)((unsigned int)hardclock_ticks - (unsigned int)expire) > 0)
473int
474puffs_vnop_lookup(void *v)
475{
476        struct vop_lookup_args /* {
477		const struct vnodeop_desc *a_desc;
478		struct vnode *a_dvp;
479		struct vnode **a_vpp;
480		struct componentname *a_cnp;
481        } */ *ap = v;
482	PUFFS_MSG_VARS(vn, lookup);
483	struct puffs_mount *pmp;
484	struct componentname *cnp;
485	struct vnode *vp, *dvp, *cvp;
486	struct puffs_node *dpn, *cpn;
487	int isdot;
488	int error;
489
490	pmp = MPTOPUFFSMP(ap->a_dvp->v_mount);
491	cnp = ap->a_cnp;
492	dvp = ap->a_dvp;
493	cvp = NULL;
494	cpn = NULL;
495	*ap->a_vpp = NULL;
496
497	/* r/o fs?  we check create later to handle EEXIST */
498	if ((cnp->cn_flags & ISLASTCN)
499	    && (dvp->v_mount->mnt_flag & MNT_RDONLY)
500	    && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
501		return EROFS;
502
503	isdot = cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.';
504
505	DPRINTF(("puffs_lookup: \"%s\", parent vnode %p, op: %x\n",
506	    cnp->cn_nameptr, dvp, cnp->cn_nameiop));
507
508	/*
509	 * If dotdot cache is enabled, unlock parent, lock ..
510	 * (grand-parent) and relock parent.
511	 */
512	if (PUFFS_USE_DOTDOTCACHE(pmp) && (cnp->cn_flags & ISDOTDOT)) {
513		VOP_UNLOCK(dvp);
514
515		vp = VPTOPP(ap->a_dvp)->pn_parent;
516		vref(vp);
517
518		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
519		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
520
521		*ap->a_vpp = vp;
522		return 0;
523	}
524
525	/*
526	 * Check if someone fed it into the cache
527	 */
528	if (!isdot && PUFFS_USE_NAMECACHE(pmp)) {
529		error = cache_lookup(dvp, ap->a_vpp, cnp);
530
531		if ((error == 0) && PUFFS_USE_FS_TTL(pmp)) {
532			cvp = *ap->a_vpp;
533			cpn = VPTOPP(cvp);
534
535			if (TIMED_OUT(cpn->pn_cn_timeout)) {
536				cache_purge(cvp);
537				/*
538				 * cached vnode (cvp) is still locked
539				 * so that we can reuse it upon a new
540				 * successful lookup.
541				 */
542				*ap->a_vpp = NULL;
543				error = -1;
544			}
545		}
546
547		/*
548		 * Do not use negative caching, since the filesystem
549		 * provides no TTL for it.
550		 */
551		if ((error == ENOENT) && PUFFS_USE_FS_TTL(pmp))
552			error = -1;
553
554		if (error >= 0)
555			return error;
556	}
557
558	if (isdot) {
559		/* deal with rename lookup semantics */
560		if (cnp->cn_nameiop == RENAME && (cnp->cn_flags & ISLASTCN))
561			return EISDIR;
562
563		vp = ap->a_dvp;
564		vref(vp);
565		*ap->a_vpp = vp;
566		return 0;
567	}
568
569	if (cvp != NULL)
570		mutex_enter(&cpn->pn_sizemtx);
571
572	PUFFS_MSG_ALLOC(vn, lookup);
573	puffs_makecn(&lookup_msg->pvnr_cn, &lookup_msg->pvnr_cn_cred,
574	    cnp, PUFFS_USE_FULLPNBUF(pmp));
575
576	if (cnp->cn_flags & ISDOTDOT)
577		VOP_UNLOCK(dvp);
578
579	puffs_msg_setinfo(park_lookup, PUFFSOP_VN,
580	    PUFFS_VN_LOOKUP, VPTOPNC(dvp));
581	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_lookup, dvp->v_data, NULL, error);
582	DPRINTF(("puffs_lookup: return of the userspace, part %d\n", error));
583
584	/*
585	 * In case of error, there is no new vnode to play with, so be
586	 * happy with the NULL value given to vpp in the beginning.
587	 * Also, check if this really was an error or the target was not
588	 * present.  Either treat it as a non-error for CREATE/RENAME or
589	 * enter the component into the negative name cache (if desired).
590	 */
591	if (error) {
592		error = checkerr(pmp, error, __func__);
593		if (error == ENOENT) {
594			/* don't allow to create files on r/o fs */
595			if ((dvp->v_mount->mnt_flag & MNT_RDONLY)
596			    && cnp->cn_nameiop == CREATE) {
597				error = EROFS;
598
599			/* adjust values if we are creating */
600			} else if ((cnp->cn_flags & ISLASTCN)
601			    && (cnp->cn_nameiop == CREATE
602			      || cnp->cn_nameiop == RENAME)) {
603				error = EJUSTRETURN;
604
605			/* save negative cache entry */
606			} else {
607				if (PUFFS_USE_NAMECACHE(pmp) &&
608				    !PUFFS_USE_FS_TTL(pmp))
609					cache_enter(dvp, NULL, cnp);
610			}
611		}
612		goto out;
613	}
614
615	/*
616	 * Check that we don't get our parent node back, that would cause
617	 * a pretty obvious deadlock.
618	 */
619	dpn = dvp->v_data;
620	if (lookup_msg->pvnr_newnode == dpn->pn_cookie) {
621		puffs_senderr(pmp, PUFFS_ERR_LOOKUP, EINVAL,
622		    "lookup produced parent cookie", lookup_msg->pvnr_newnode);
623		error = EPROTO;
624		goto out;
625	}
626
627	/*
628	 * Check if we looked up the cached vnode
629	 */
630	vp = NULL;
631	if (cvp && (VPTOPP(cvp)->pn_cookie == lookup_msg->pvnr_newnode)) {
632		int grace;
633
634		/*
635		 * Bump grace time of this node so that it does not get
636		 * reclaimed too fast. We try to increase a bit more the
637		 * lifetime of busiest * nodes - with some limits.
638		 */
639		grace = 10 * puffs_sopreq_expire_timeout;
640		cpn->pn_cn_grace = hardclock_ticks + grace;
641		vp = cvp;
642	}
643
644	/*
645	 * No cached vnode available, or the cached vnode does not
646	 * match the userland cookie anymore: is the node known?
647	 */
648	if (vp == NULL) {
649		error = puffs_cookie2vnode(pmp, lookup_msg->pvnr_newnode,
650					   1, 1, &vp);
651	}
652
653	if (error == PUFFS_NOSUCHCOOKIE) {
654		error = puffs_getvnode(dvp->v_mount,
655		    lookup_msg->pvnr_newnode, lookup_msg->pvnr_vtype,
656		    lookup_msg->pvnr_size, lookup_msg->pvnr_rdev, &vp);
657		if (error) {
658			puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP,
659			    VPTOPNC(dvp), lookup_msg->pvnr_newnode,
660			    ap->a_cnp);
661			goto out;
662		}
663
664		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
665	} else if (error) {
666		puffs_abortbutton(pmp, PUFFS_ABORT_LOOKUP, VPTOPNC(dvp),
667		    lookup_msg->pvnr_newnode, ap->a_cnp);
668		goto out;
669	}
670
671	/*
672	 * Update cache and TTL
673	 */
674	if (PUFFS_USE_FS_TTL(pmp)) {
675		struct timespec *va_ttl = &lookup_msg->pvnr_va_ttl;
676		struct timespec *cn_ttl = &lookup_msg->pvnr_cn_ttl;
677		update_va(vp, NULL, &lookup_msg->pvnr_va,
678			  va_ttl, cn_ttl, SETATTR_CHSIZE);
679	}
680
681	KASSERT(lookup_msg->pvnr_newnode == VPTOPP(vp)->pn_cookie);
682	*ap->a_vpp = vp;
683
684	if (PUFFS_USE_NAMECACHE(pmp))
685		cache_enter(dvp, vp, cnp);
686
687	/* XXX */
688	if ((lookup_msg->pvnr_cn.pkcn_flags & REQUIREDIR) == 0)
689		cnp->cn_flags &= ~REQUIREDIR;
690	if (lookup_msg->pvnr_cn.pkcn_consume)
691		cnp->cn_consume = MIN(lookup_msg->pvnr_cn.pkcn_consume,
692		    strlen(cnp->cn_nameptr) - cnp->cn_namelen);
693
694	VPTOPP(vp)->pn_nlookup++;
695
696	if (PUFFS_USE_DOTDOTCACHE(pmp) &&
697	    (VPTOPP(vp)->pn_parent != dvp))
698		update_parent(vp, dvp);
699
700 out:
701	if (cvp != NULL) {
702		mutex_exit(&cpn->pn_sizemtx);
703
704		if (error || (cvp != vp))
705			vput(cvp);
706	}
707
708	if (cnp->cn_flags & ISDOTDOT)
709		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
710
711	DPRINTF(("puffs_lookup: returning %d %p\n", error, *ap->a_vpp));
712	PUFFS_MSG_RELEASE(lookup);
713	return error;
714}
715
716#define REFPN_AND_UNLOCKVP(a, b)					\
717do {									\
718	mutex_enter(&b->pn_mtx);					\
719	puffs_referencenode(b);						\
720	mutex_exit(&b->pn_mtx);						\
721	VOP_UNLOCK(a);						\
722} while (/*CONSTCOND*/0)
723
724#define REFPN(b)							\
725do {									\
726	mutex_enter(&b->pn_mtx);					\
727	puffs_referencenode(b);						\
728	mutex_exit(&b->pn_mtx);						\
729} while (/*CONSTCOND*/0)
730
731#define RELEPN_AND_VP(a, b)						\
732do {									\
733	puffs_releasenode(b);						\
734	vrele(a);							\
735} while (/*CONSTCOND*/0)
736
737int
738puffs_vnop_create(void *v)
739{
740	struct vop_create_args /* {
741		const struct vnodeop_desc *a_desc;
742		struct vnode *a_dvp;
743		struct vnode **a_vpp;
744		struct componentname *a_cnp;
745		struct vattr *a_vap;
746	} */ *ap = v;
747	PUFFS_MSG_VARS(vn, create);
748	struct vnode *dvp = ap->a_dvp;
749	struct puffs_node *dpn = VPTOPP(dvp);
750	struct componentname *cnp = ap->a_cnp;
751	struct mount *mp = dvp->v_mount;
752	struct puffs_mount *pmp = MPTOPUFFSMP(mp);
753	int error;
754
755	DPRINTF(("puffs_create: dvp %p, cnp: %s\n",
756	    dvp, ap->a_cnp->cn_nameptr));
757
758	PUFFS_MSG_ALLOC(vn, create);
759	puffs_makecn(&create_msg->pvnr_cn, &create_msg->pvnr_cn_cred,
760	    cnp, PUFFS_USE_FULLPNBUF(pmp));
761	create_msg->pvnr_va = *ap->a_vap;
762	puffs_msg_setinfo(park_create, PUFFSOP_VN,
763	    PUFFS_VN_CREATE, VPTOPNC(dvp));
764	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_create, dvp->v_data, NULL, error);
765
766	error = checkerr(pmp, error, __func__);
767	if (error)
768		goto out;
769
770	error = puffs_newnode(mp, dvp, ap->a_vpp,
771	    create_msg->pvnr_newnode, cnp, ap->a_vap->va_type, 0);
772	if (error) {
773		puffs_abortbutton(pmp, PUFFS_ABORT_CREATE, dpn->pn_cookie,
774		    create_msg->pvnr_newnode, cnp);
775		goto out;
776	}
777
778	if (PUFFS_USE_FS_TTL(pmp)) {
779		struct timespec *va_ttl = &create_msg->pvnr_va_ttl;
780		struct timespec *cn_ttl = &create_msg->pvnr_cn_ttl;
781		struct vattr *rvap = &create_msg->pvnr_va;
782
783		update_va(*ap->a_vpp, NULL, rvap,
784			  va_ttl, cn_ttl, SETATTR_CHSIZE);
785	}
786
787	VPTOPP(*ap->a_vpp)->pn_nlookup++;
788
789	if (PUFFS_USE_DOTDOTCACHE(pmp) &&
790	    (VPTOPP(*ap->a_vpp)->pn_parent != dvp))
791		update_parent(*ap->a_vpp, dvp);
792
793 out:
794	vput(dvp);
795
796	DPRINTF(("puffs_create: return %d\n", error));
797	PUFFS_MSG_RELEASE(create);
798	return error;
799}
800
801int
802puffs_vnop_mknod(void *v)
803{
804	struct vop_mknod_args /* {
805		const struct vnodeop_desc *a_desc;
806		struct vnode *a_dvp;
807		struct vnode **a_vpp;
808		struct componentname *a_cnp;
809		struct vattr *a_vap;
810	} */ *ap = v;
811	PUFFS_MSG_VARS(vn, mknod);
812	struct vnode *dvp = ap->a_dvp;
813	struct puffs_node *dpn = VPTOPP(dvp);
814	struct componentname *cnp = ap->a_cnp;
815	struct mount *mp = dvp->v_mount;
816	struct puffs_mount *pmp = MPTOPUFFSMP(mp);
817	int error;
818
819	PUFFS_MSG_ALLOC(vn, mknod);
820	puffs_makecn(&mknod_msg->pvnr_cn, &mknod_msg->pvnr_cn_cred,
821	    cnp, PUFFS_USE_FULLPNBUF(pmp));
822	mknod_msg->pvnr_va = *ap->a_vap;
823	puffs_msg_setinfo(park_mknod, PUFFSOP_VN,
824	    PUFFS_VN_MKNOD, VPTOPNC(dvp));
825
826	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_mknod, dvp->v_data, NULL, error);
827
828	error = checkerr(pmp, error, __func__);
829	if (error)
830		goto out;
831
832	error = puffs_newnode(mp, dvp, ap->a_vpp,
833	    mknod_msg->pvnr_newnode, cnp, ap->a_vap->va_type,
834	    ap->a_vap->va_rdev);
835	if (error) {
836		puffs_abortbutton(pmp, PUFFS_ABORT_MKNOD, dpn->pn_cookie,
837		    mknod_msg->pvnr_newnode, cnp);
838		goto out;
839	}
840
841	if (PUFFS_USE_FS_TTL(pmp)) {
842		struct timespec *va_ttl = &mknod_msg->pvnr_va_ttl;
843		struct timespec *cn_ttl = &mknod_msg->pvnr_cn_ttl;
844		struct vattr *rvap = &mknod_msg->pvnr_va;
845
846		update_va(*ap->a_vpp, NULL, rvap,
847			   va_ttl, cn_ttl, SETATTR_CHSIZE);
848	}
849
850	VPTOPP(*ap->a_vpp)->pn_nlookup++;
851
852	if (PUFFS_USE_DOTDOTCACHE(pmp) &&
853	    (VPTOPP(*ap->a_vpp)->pn_parent != dvp))
854		update_parent(*ap->a_vpp, dvp);
855
856 out:
857	vput(dvp);
858	PUFFS_MSG_RELEASE(mknod);
859	return error;
860}
861
862int
863puffs_vnop_open(void *v)
864{
865	struct vop_open_args /* {
866		const struct vnodeop_desc *a_desc;
867		struct vnode *a_vp;
868		int a_mode;
869		kauth_cred_t a_cred;
870	} */ *ap = v;
871	PUFFS_MSG_VARS(vn, open);
872	struct vnode *vp = ap->a_vp;
873	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
874	int mode = ap->a_mode;
875	int error;
876
877	DPRINTF(("puffs_open: vp %p, mode 0x%x\n", vp, mode));
878
879	if (vp->v_type == VREG && mode & FWRITE && !EXISTSOP(pmp, WRITE))
880		ERROUT(EROFS);
881
882	if (!EXISTSOP(pmp, OPEN))
883		ERROUT(0);
884
885	PUFFS_MSG_ALLOC(vn, open);
886	open_msg->pvnr_mode = mode;
887	puffs_credcvt(&open_msg->pvnr_cred, ap->a_cred);
888	puffs_msg_setinfo(park_open, PUFFSOP_VN,
889	    PUFFS_VN_OPEN, VPTOPNC(vp));
890
891	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_open, vp->v_data, NULL, error);
892	error = checkerr(pmp, error, __func__);
893
894 out:
895	DPRINTF(("puffs_open: returning %d\n", error));
896	PUFFS_MSG_RELEASE(open);
897	return error;
898}
899
900int
901puffs_vnop_close(void *v)
902{
903	struct vop_close_args /* {
904		const struct vnodeop_desc *a_desc;
905		struct vnode *a_vp;
906		int a_fflag;
907		kauth_cred_t a_cred;
908	} */ *ap = v;
909	PUFFS_MSG_VARS(vn, close);
910	struct vnode *vp = ap->a_vp;
911	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
912
913	PUFFS_MSG_ALLOC(vn, close);
914	puffs_msg_setfaf(park_close);
915	close_msg->pvnr_fflag = ap->a_fflag;
916	puffs_credcvt(&close_msg->pvnr_cred, ap->a_cred);
917	puffs_msg_setinfo(park_close, PUFFSOP_VN,
918	    PUFFS_VN_CLOSE, VPTOPNC(vp));
919
920	puffs_msg_enqueue(pmp, park_close);
921	PUFFS_MSG_RELEASE(close);
922	return 0;
923}
924
925int
926puffs_vnop_access(void *v)
927{
928	struct vop_access_args /* {
929		const struct vnodeop_desc *a_desc;
930		struct vnode *a_vp;
931		int a_mode;
932		kauth_cred_t a_cred;
933	} */ *ap = v;
934	PUFFS_MSG_VARS(vn, access);
935	struct vnode *vp = ap->a_vp;
936	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
937	int mode = ap->a_mode;
938	int error;
939
940	if (mode & VWRITE) {
941		switch (vp->v_type) {
942		case VDIR:
943		case VLNK:
944		case VREG:
945			if ((vp->v_mount->mnt_flag & MNT_RDONLY)
946			    || !EXISTSOP(pmp, WRITE))
947				return EROFS;
948			break;
949		default:
950			break;
951		}
952	}
953
954	if (!EXISTSOP(pmp, ACCESS))
955		return 0;
956
957	PUFFS_MSG_ALLOC(vn, access);
958	access_msg->pvnr_mode = ap->a_mode;
959	puffs_credcvt(&access_msg->pvnr_cred, ap->a_cred);
960	puffs_msg_setinfo(park_access, PUFFSOP_VN,
961	    PUFFS_VN_ACCESS, VPTOPNC(vp));
962
963	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_access, vp->v_data, NULL, error);
964	error = checkerr(pmp, error, __func__);
965	PUFFS_MSG_RELEASE(access);
966
967	return error;
968}
969
970static void
971update_va(struct vnode *vp, struct vattr *vap, struct vattr *rvap,
972	  struct timespec *va_ttl, struct timespec *cn_ttl, int flags)
973{
974	struct puffs_node *pn = VPTOPP(vp);
975
976	if (TTL_VALID(cn_ttl)) {
977		pn->pn_cn_timeout = TTL_TO_TIMEOUT(cn_ttl);
978		pn->pn_cn_grace = MAX(pn->pn_cn_timeout, pn->pn_cn_grace);
979	}
980
981	/*
982	 * Don't listen to the file server regarding special device
983	 * size info, the file server doesn't know anything about them.
984	 */
985	if (vp->v_type == VBLK || vp->v_type == VCHR)
986		rvap->va_size = vp->v_size;
987
988	/* Ditto for blocksize (ufs comment: this doesn't belong here) */
989	if (vp->v_type == VBLK)
990		rvap->va_blocksize = BLKDEV_IOSIZE;
991	else if (vp->v_type == VCHR)
992		rvap->va_blocksize = MAXBSIZE;
993
994	if (vap != NULL) {
995		(void) memcpy(vap, rvap, sizeof(struct vattr));
996		vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
997
998		if (pn->pn_stat & PNODE_METACACHE_ATIME)
999			vap->va_atime = pn->pn_mc_atime;
1000		if (pn->pn_stat & PNODE_METACACHE_CTIME)
1001			vap->va_ctime = pn->pn_mc_ctime;
1002		if (pn->pn_stat & PNODE_METACACHE_MTIME)
1003			vap->va_mtime = pn->pn_mc_mtime;
1004		if (pn->pn_stat & PNODE_METACACHE_SIZE)
1005			vap->va_size = pn->pn_mc_size;
1006	}
1007
1008	if (!(pn->pn_stat & PNODE_METACACHE_SIZE) && (flags & SETATTR_CHSIZE)) {
1009		if (rvap->va_size != VNOVAL
1010		    && vp->v_type != VBLK && vp->v_type != VCHR) {
1011			uvm_vnp_setsize(vp, rvap->va_size);
1012			pn->pn_serversize = rvap->va_size;
1013		}
1014	}
1015
1016	if ((va_ttl != NULL) && TTL_VALID(va_ttl)) {
1017		if (pn->pn_va_cache == NULL)
1018			pn->pn_va_cache = pool_get(&puffs_vapool, PR_WAITOK);
1019
1020		(void)memcpy(pn->pn_va_cache, rvap, sizeof(*rvap));
1021
1022		pn->pn_va_timeout = TTL_TO_TIMEOUT(va_ttl);
1023	}
1024}
1025
1026static void
1027update_parent(struct vnode *vp, struct vnode *dvp)
1028{
1029	struct puffs_node *pn = VPTOPP(vp);
1030
1031	if (pn->pn_parent != NULL) {
1032		KASSERT(pn->pn_parent != dvp);
1033		vrele(pn->pn_parent);
1034	}
1035
1036	vref(dvp);
1037	pn->pn_parent = dvp;
1038}
1039
1040int
1041puffs_vnop_getattr(void *v)
1042{
1043	struct vop_getattr_args /* {
1044		const struct vnodeop_desc *a_desc;
1045		struct vnode *a_vp;
1046		struct vattr *a_vap;
1047		kauth_cred_t a_cred;
1048	} */ *ap = v;
1049	PUFFS_MSG_VARS(vn, getattr);
1050	struct vnode *vp = ap->a_vp;
1051	struct mount *mp = vp->v_mount;
1052	struct puffs_mount *pmp = MPTOPUFFSMP(mp);
1053	struct vattr *vap, *rvap;
1054	struct puffs_node *pn = VPTOPP(vp);
1055	struct timespec *va_ttl = NULL;
1056	int error = 0;
1057
1058	/*
1059	 * A lock is required so that we do not race with
1060	 * setattr, write and fsync when changing vp->v_size.
1061	 * This is critical, since setting a stall smaler value
1062	 * triggers a file truncate in uvm_vnp_setsize(), which
1063	 * most of the time means data corruption (a chunk of
1064	 * data is replaced by zeroes). This can be removed if
1065	 * we decide one day that VOP_GETATTR must operate on
1066	 * a locked vnode.
1067	 *
1068	 * XXX Should be useless now that VOP_GETATTR has been
1069	 *     fixed to always require a shared lock at least.
1070	 */
1071	mutex_enter(&pn->pn_sizemtx);
1072
1073	REFPN(pn);
1074	vap = ap->a_vap;
1075
1076	if (PUFFS_USE_FS_TTL(pmp)) {
1077		if (!TIMED_OUT(pn->pn_va_timeout)) {
1078			update_va(vp, vap, pn->pn_va_cache,
1079				  NULL, NULL, SETATTR_CHSIZE);
1080			goto out2;
1081		}
1082	}
1083
1084	PUFFS_MSG_ALLOC(vn, getattr);
1085	vattr_null(&getattr_msg->pvnr_va);
1086	puffs_credcvt(&getattr_msg->pvnr_cred, ap->a_cred);
1087	puffs_msg_setinfo(park_getattr, PUFFSOP_VN,
1088	    PUFFS_VN_GETATTR, VPTOPNC(vp));
1089
1090	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_getattr, vp->v_data, NULL, error);
1091	error = checkerr(pmp, error, __func__);
1092	if (error)
1093		goto out;
1094
1095	rvap = &getattr_msg->pvnr_va;
1096
1097	if (PUFFS_USE_FS_TTL(pmp))
1098		va_ttl = &getattr_msg->pvnr_va_ttl;
1099
1100	update_va(vp, vap, rvap, va_ttl, NULL, SETATTR_CHSIZE);
1101
1102 out:
1103	PUFFS_MSG_RELEASE(getattr);
1104
1105 out2:
1106	puffs_releasenode(pn);
1107
1108	mutex_exit(&pn->pn_sizemtx);
1109
1110	return error;
1111}
1112
1113static int
1114dosetattr(struct vnode *vp, struct vattr *vap, kauth_cred_t cred, int flags)
1115{
1116	PUFFS_MSG_VARS(vn, setattr);
1117	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1118	struct puffs_node *pn = vp->v_data;
1119	int error = 0;
1120
1121	KASSERT(!(flags & SETATTR_CHSIZE) || mutex_owned(&pn->pn_sizemtx));
1122
1123	if ((vp->v_mount->mnt_flag & MNT_RDONLY) &&
1124	    (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL
1125	    || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL
1126	    || vap->va_mode != (mode_t)VNOVAL))
1127		return EROFS;
1128
1129	if ((vp->v_mount->mnt_flag & MNT_RDONLY)
1130	    && vp->v_type == VREG && vap->va_size != VNOVAL)
1131		return EROFS;
1132
1133	/*
1134	 * Flush metacache first.  If we are called with some explicit
1135	 * parameters, treat them as information overriding metacache
1136	 * information.
1137	 */
1138	if (pn->pn_stat & PNODE_METACACHE_MASK) {
1139		if ((pn->pn_stat & PNODE_METACACHE_ATIME)
1140		    && vap->va_atime.tv_sec == VNOVAL)
1141			vap->va_atime = pn->pn_mc_atime;
1142		if ((pn->pn_stat & PNODE_METACACHE_CTIME)
1143		    && vap->va_ctime.tv_sec == VNOVAL)
1144			vap->va_ctime = pn->pn_mc_ctime;
1145		if ((pn->pn_stat & PNODE_METACACHE_MTIME)
1146		    && vap->va_mtime.tv_sec == VNOVAL)
1147			vap->va_mtime = pn->pn_mc_mtime;
1148		if ((pn->pn_stat & PNODE_METACACHE_SIZE)
1149		    && vap->va_size == VNOVAL)
1150			vap->va_size = pn->pn_mc_size;
1151
1152		pn->pn_stat &= ~PNODE_METACACHE_MASK;
1153	}
1154
1155	/*
1156	 * Flush attribute cache so that another thread do
1157	 * not get a stale value during the operation.
1158	 */
1159	if (PUFFS_USE_FS_TTL(pmp))
1160		pn->pn_va_timeout = 0;
1161
1162	PUFFS_MSG_ALLOC(vn, setattr);
1163	(void)memcpy(&setattr_msg->pvnr_va, vap, sizeof(struct vattr));
1164	puffs_credcvt(&setattr_msg->pvnr_cred, cred);
1165	puffs_msg_setinfo(park_setattr, PUFFSOP_VN,
1166	    PUFFS_VN_SETATTR, VPTOPNC(vp));
1167	if (flags & SETATTR_ASYNC)
1168		puffs_msg_setfaf(park_setattr);
1169
1170	puffs_msg_enqueue(pmp, park_setattr);
1171	if ((flags & SETATTR_ASYNC) == 0)
1172		error = puffs_msg_wait2(pmp, park_setattr, vp->v_data, NULL);
1173
1174	if ((error == 0) && PUFFS_USE_FS_TTL(pmp)) {
1175		struct timespec *va_ttl = &setattr_msg->pvnr_va_ttl;
1176		struct vattr *rvap = &setattr_msg->pvnr_va;
1177
1178		update_va(vp, NULL, rvap, va_ttl, NULL, flags);
1179	}
1180
1181	PUFFS_MSG_RELEASE(setattr);
1182	if ((flags & SETATTR_ASYNC) == 0) {
1183		error = checkerr(pmp, error, __func__);
1184		if (error)
1185			return error;
1186	} else {
1187		error = 0;
1188	}
1189
1190	if (vap->va_size != VNOVAL) {
1191		pn->pn_serversize = vap->va_size;
1192		if (flags & SETATTR_CHSIZE)
1193			uvm_vnp_setsize(vp, vap->va_size);
1194	}
1195
1196	return 0;
1197}
1198
1199int
1200puffs_vnop_setattr(void *v)
1201{
1202	struct vop_getattr_args /* {
1203		const struct vnodeop_desc *a_desc;
1204		struct vnode *a_vp;
1205		struct vattr *a_vap;
1206		kauth_cred_t a_cred;
1207	} */ *ap = v;
1208	struct puffs_node *pn = ap->a_vp->v_data;
1209	int error;
1210
1211	mutex_enter(&pn->pn_sizemtx);
1212	error = dosetattr(ap->a_vp, ap->a_vap, ap->a_cred, SETATTR_CHSIZE);
1213	mutex_exit(&pn->pn_sizemtx);
1214
1215	return error;
1216}
1217
1218static __inline int
1219doinact(struct puffs_mount *pmp, int iaflag)
1220{
1221
1222	if (EXISTSOP(pmp, INACTIVE))
1223		if (pmp->pmp_flags & PUFFS_KFLAG_IAONDEMAND)
1224			if (iaflag || ALLOPS(pmp))
1225				return 1;
1226			else
1227				return 0;
1228		else
1229			return 1;
1230	else
1231		return 0;
1232}
1233
1234static void
1235callinactive(struct puffs_mount *pmp, puffs_cookie_t ck, int iaflag)
1236{
1237	int error;
1238	PUFFS_MSG_VARS(vn, inactive);
1239
1240	if (doinact(pmp, iaflag)) {
1241		PUFFS_MSG_ALLOC(vn, inactive);
1242		puffs_msg_setinfo(park_inactive, PUFFSOP_VN,
1243		    PUFFS_VN_INACTIVE, ck);
1244
1245		PUFFS_MSG_ENQUEUEWAIT(pmp, park_inactive, error);
1246		PUFFS_MSG_RELEASE(inactive);
1247	}
1248}
1249
1250/* XXX: callinactive can't setback */
1251int
1252puffs_vnop_inactive(void *v)
1253{
1254	struct vop_inactive_args /* {
1255		const struct vnodeop_desc *a_desc;
1256		struct vnode *a_vp;
1257	} */ *ap = v;
1258	PUFFS_MSG_VARS(vn, inactive);
1259	struct vnode *vp = ap->a_vp;
1260	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1261	struct puffs_node *pnode;
1262	bool recycle = false;
1263	int error;
1264
1265	pnode = vp->v_data;
1266	mutex_enter(&pnode->pn_sizemtx);
1267
1268	if (doinact(pmp, pnode->pn_stat & PNODE_DOINACT)) {
1269		flushvncache(vp, 0, 0, false);
1270		PUFFS_MSG_ALLOC(vn, inactive);
1271		puffs_msg_setinfo(park_inactive, PUFFSOP_VN,
1272		    PUFFS_VN_INACTIVE, VPTOPNC(vp));
1273
1274		PUFFS_MSG_ENQUEUEWAIT2(pmp, park_inactive, vp->v_data,
1275		    NULL, error);
1276		PUFFS_MSG_RELEASE(inactive);
1277	}
1278	pnode->pn_stat &= ~PNODE_DOINACT;
1279
1280	/*
1281	 * file server thinks it's gone?  then don't be afraid care,
1282	 * node's life was already all it would ever be
1283	 */
1284	if (pnode->pn_stat & PNODE_NOREFS) {
1285		pnode->pn_stat |= PNODE_DYING;
1286		recycle = true;
1287	}
1288
1289	/*
1290	 * Handle node TTL.
1291	 * If grace has already timed out, make it reclaimed.
1292	 * Otherwise, we queue its expiration by sop thread, so
1293	 * that it does not remain for ages in the freelist,
1294	 * holding memory in userspace, while we will have
1295	 * to look it up again anyway.
1296	 */
1297	if (PUFFS_USE_FS_TTL(pmp) && !(vp->v_vflag & VV_ROOT) && !recycle) {
1298		bool incache = !TIMED_OUT(pnode->pn_cn_timeout);
1299		bool ingrace = !TIMED_OUT(pnode->pn_cn_grace);
1300		bool reclaimqueued = pnode->pn_stat & PNODE_SOPEXP;
1301
1302		if (!incache && !ingrace && !reclaimqueued) {
1303			pnode->pn_stat |= PNODE_DYING;
1304			recycle = true;
1305		}
1306
1307		if (!recycle && !reclaimqueued) {
1308			struct puffs_sopreq *psopr;
1309			int at = MAX(pnode->pn_cn_grace, pnode->pn_cn_timeout);
1310
1311			KASSERT(curlwp != uvm.pagedaemon_lwp);
1312			psopr = kmem_alloc(sizeof(*psopr), KM_SLEEP);
1313			psopr->psopr_ck = VPTOPNC(pnode->pn_vp);
1314			psopr->psopr_sopreq = PUFFS_SOPREQ_EXPIRE;
1315			psopr->psopr_at = at;
1316
1317			mutex_enter(&pmp->pmp_sopmtx);
1318
1319			/*
1320			 * If thread has disapeared, just give up. The
1321			 * fs is being unmounted and the node will be
1322			 * be reclaimed anyway.
1323			 *
1324			 * Otherwise, we queue the request but do not
1325			 * immediatly signal the thread, as the node
1326			 * has not been expired yet.
1327			 */
1328			if (pmp->pmp_sopthrcount == 0) {
1329				kmem_free(psopr, sizeof(*psopr));
1330			} else {
1331				TAILQ_INSERT_TAIL(&pmp->pmp_sopnodereqs,
1332				    psopr, psopr_entries);
1333				pnode->pn_stat |= PNODE_SOPEXP;
1334			}
1335
1336			mutex_exit(&pmp->pmp_sopmtx);
1337		}
1338	}
1339
1340	*ap->a_recycle = recycle;
1341
1342	mutex_exit(&pnode->pn_sizemtx);
1343	VOP_UNLOCK(vp);
1344
1345	return 0;
1346}
1347
1348static void
1349callreclaim(struct puffs_mount *pmp, puffs_cookie_t ck, int nlookup)
1350{
1351	PUFFS_MSG_VARS(vn, reclaim);
1352
1353	if (!EXISTSOP(pmp, RECLAIM))
1354		return;
1355
1356	PUFFS_MSG_ALLOC(vn, reclaim);
1357	reclaim_msg->pvnr_nlookup = nlookup;
1358	puffs_msg_setfaf(park_reclaim);
1359	puffs_msg_setinfo(park_reclaim, PUFFSOP_VN, PUFFS_VN_RECLAIM, ck);
1360
1361	puffs_msg_enqueue(pmp, park_reclaim);
1362	PUFFS_MSG_RELEASE(reclaim);
1363	return;
1364}
1365
1366/*
1367 * always FAF, we don't really care if the server wants to fail to
1368 * reclaim the node or not
1369 */
1370int
1371puffs_vnop_reclaim(void *v)
1372{
1373	struct vop_reclaim_args /* {
1374		const struct vnodeop_desc *a_desc;
1375		struct vnode *a_vp;
1376	} */ *ap = v;
1377	struct vnode *vp = ap->a_vp;
1378	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1379	struct puffs_node *pnode = vp->v_data;
1380	bool notifyserver = true;
1381
1382	/*
1383	 * first things first: check if someone is trying to reclaim the
1384	 * root vnode.  do not allow that to travel to userspace.
1385	 * Note that we don't need to take the lock similarly to
1386	 * puffs_root(), since there is only one of us.
1387	 */
1388	if (vp->v_vflag & VV_ROOT) {
1389		mutex_enter(&pmp->pmp_lock);
1390		KASSERT(pmp->pmp_root != NULL);
1391		pmp->pmp_root = NULL;
1392		mutex_exit(&pmp->pmp_lock);
1393		notifyserver = false;
1394	}
1395
1396	/*
1397	 * purge info from kernel before issueing FAF, since we
1398	 * don't really know when we'll get around to it after
1399	 * that and someone might race us into node creation
1400	 */
1401	mutex_enter(&pmp->pmp_lock);
1402	LIST_REMOVE(pnode, pn_hashent);
1403	if (PUFFS_USE_NAMECACHE(pmp))
1404		cache_purge(vp);
1405	mutex_exit(&pmp->pmp_lock);
1406
1407	if (notifyserver) {
1408		int nlookup = VPTOPP(vp)->pn_nlookup;
1409
1410		callreclaim(MPTOPUFFSMP(vp->v_mount), VPTOPNC(vp), nlookup);
1411	}
1412
1413	if (PUFFS_USE_DOTDOTCACHE(pmp)) {
1414		if (__predict_true(VPTOPP(vp)->pn_parent != NULL))
1415			vrele(VPTOPP(vp)->pn_parent);
1416		else
1417			KASSERT(vp->v_vflag & VV_ROOT);
1418	}
1419
1420	puffs_putvnode(vp);
1421	vp->v_data = NULL;
1422
1423	return 0;
1424}
1425
1426#define CSIZE sizeof(**ap->a_cookies)
1427int
1428puffs_vnop_readdir(void *v)
1429{
1430	struct vop_readdir_args /* {
1431		const struct vnodeop_desc *a_desc;
1432		struct vnode *a_vp;
1433		struct uio *a_uio;
1434		kauth_cred_t a_cred;
1435		int *a_eofflag;
1436		off_t **a_cookies;
1437		int *a_ncookies;
1438	} */ *ap = v;
1439	PUFFS_MSG_VARS(vn, readdir);
1440	struct vnode *vp = ap->a_vp;
1441	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1442	size_t argsize, tomove, cookiemem, cookiesmax;
1443	struct uio *uio = ap->a_uio;
1444	size_t howmuch, resid;
1445	int error;
1446
1447	/*
1448	 * ok, so we need: resid + cookiemem = maxreq
1449	 * => resid + cookiesize * (resid/minsize) = maxreq
1450	 * => resid + cookiesize/minsize * resid = maxreq
1451	 * => (cookiesize/minsize + 1) * resid = maxreq
1452	 * => resid = maxreq / (cookiesize/minsize + 1)
1453	 *
1454	 * Since cookiesize <= minsize and we're not very big on floats,
1455	 * we approximate that to be 1.  Therefore:
1456	 *
1457	 * resid = maxreq / 2;
1458	 *
1459	 * Well, at least we didn't have to use differential equations
1460	 * or the Gram-Schmidt process.
1461	 *
1462	 * (yes, I'm very afraid of this)
1463	 */
1464	KASSERT(CSIZE <= _DIRENT_MINSIZE((struct dirent *)0));
1465
1466	if (ap->a_cookies) {
1467		KASSERT(ap->a_ncookies != NULL);
1468		if (pmp->pmp_args.pa_fhsize == 0)
1469			return EOPNOTSUPP;
1470		resid = PUFFS_TOMOVE(uio->uio_resid, pmp) / 2;
1471		cookiesmax = resid/_DIRENT_MINSIZE((struct dirent *)0);
1472		cookiemem = ALIGN(cookiesmax*CSIZE); /* play safe */
1473	} else {
1474		resid = PUFFS_TOMOVE(uio->uio_resid, pmp);
1475		cookiesmax = 0;
1476		cookiemem = 0;
1477	}
1478
1479	argsize = sizeof(struct puffs_vnmsg_readdir);
1480	tomove = resid + cookiemem;
1481	puffs_msgmem_alloc(argsize + tomove, &park_readdir,
1482	    (void *)&readdir_msg, 1);
1483
1484	puffs_credcvt(&readdir_msg->pvnr_cred, ap->a_cred);
1485	readdir_msg->pvnr_offset = uio->uio_offset;
1486	readdir_msg->pvnr_resid = resid;
1487	readdir_msg->pvnr_ncookies = cookiesmax;
1488	readdir_msg->pvnr_eofflag = 0;
1489	readdir_msg->pvnr_dentoff = cookiemem;
1490	puffs_msg_setinfo(park_readdir, PUFFSOP_VN,
1491	    PUFFS_VN_READDIR, VPTOPNC(vp));
1492	puffs_msg_setdelta(park_readdir, tomove);
1493
1494	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_readdir, vp->v_data, NULL, error);
1495	error = checkerr(pmp, error, __func__);
1496	if (error)
1497		goto out;
1498
1499	/* userspace is cheating? */
1500	if (readdir_msg->pvnr_resid > resid) {
1501		puffs_senderr(pmp, PUFFS_ERR_READDIR, E2BIG,
1502		    "resid grew", VPTOPNC(vp));
1503		ERROUT(EPROTO);
1504	}
1505	if (readdir_msg->pvnr_ncookies > cookiesmax) {
1506		puffs_senderr(pmp, PUFFS_ERR_READDIR, E2BIG,
1507		    "too many cookies", VPTOPNC(vp));
1508		ERROUT(EPROTO);
1509	}
1510
1511	/* check eof */
1512	if (readdir_msg->pvnr_eofflag)
1513		*ap->a_eofflag = 1;
1514
1515	/* bouncy-wouncy with the directory data */
1516	howmuch = resid - readdir_msg->pvnr_resid;
1517
1518	/* force eof if no data was returned (getcwd() needs this) */
1519	if (howmuch == 0) {
1520		*ap->a_eofflag = 1;
1521		goto out;
1522	}
1523
1524	error = uiomove(readdir_msg->pvnr_data + cookiemem, howmuch, uio);
1525	if (error)
1526		goto out;
1527
1528	/* provide cookies to caller if so desired */
1529	if (ap->a_cookies) {
1530		KASSERT(curlwp != uvm.pagedaemon_lwp);
1531		*ap->a_cookies = malloc(readdir_msg->pvnr_ncookies*CSIZE,
1532		    M_TEMP, M_WAITOK);
1533		*ap->a_ncookies = readdir_msg->pvnr_ncookies;
1534		memcpy(*ap->a_cookies, readdir_msg->pvnr_data,
1535		    *ap->a_ncookies*CSIZE);
1536	}
1537
1538	/* next readdir starts here */
1539	uio->uio_offset = readdir_msg->pvnr_offset;
1540
1541 out:
1542	puffs_msgmem_release(park_readdir);
1543	return error;
1544}
1545#undef CSIZE
1546
1547/*
1548 * poll works by consuming the bitmask in pn_revents.  If there are
1549 * events available, poll returns immediately.  If not, it issues a
1550 * poll to userspace, selrecords itself and returns with no available
1551 * events.  When the file server returns, it executes puffs_parkdone_poll(),
1552 * where available events are added to the bitmask.  selnotify() is
1553 * then also executed by that function causing us to enter here again
1554 * and hopefully find the missing bits (unless someone got them first,
1555 * in which case it starts all over again).
1556 */
1557int
1558puffs_vnop_poll(void *v)
1559{
1560	struct vop_poll_args /* {
1561		const struct vnodeop_desc *a_desc;
1562		struct vnode *a_vp;
1563		int a_events;
1564	} */ *ap = v;
1565	PUFFS_MSG_VARS(vn, poll);
1566	struct vnode *vp = ap->a_vp;
1567	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1568	struct puffs_node *pn = vp->v_data;
1569	int events, error;
1570
1571	if (EXISTSOP(pmp, POLL)) {
1572		mutex_enter(&pn->pn_mtx);
1573		events = pn->pn_revents & ap->a_events;
1574		if (events & ap->a_events) {
1575			pn->pn_revents &= ~ap->a_events;
1576			mutex_exit(&pn->pn_mtx);
1577
1578			return events;
1579		} else {
1580			puffs_referencenode(pn);
1581			mutex_exit(&pn->pn_mtx);
1582
1583			PUFFS_MSG_ALLOC(vn, poll);
1584			poll_msg->pvnr_events = ap->a_events;
1585			puffs_msg_setinfo(park_poll, PUFFSOP_VN,
1586			    PUFFS_VN_POLL, VPTOPNC(vp));
1587			puffs_msg_setcall(park_poll, puffs_parkdone_poll, pn);
1588			selrecord(curlwp, &pn->pn_sel);
1589
1590			PUFFS_MSG_ENQUEUEWAIT2(pmp, park_poll, vp->v_data,
1591			    NULL, error);
1592			PUFFS_MSG_RELEASE(poll);
1593
1594			return 0;
1595		}
1596	} else {
1597		return genfs_poll(v);
1598	}
1599}
1600
1601static int
1602flushvncache(struct vnode *vp, off_t offlo, off_t offhi, bool wait)
1603{
1604	struct puffs_node *pn = VPTOPP(vp);
1605	struct vattr va;
1606	int pflags, error;
1607
1608	/* flush out information from our metacache, see vop_setattr */
1609	if (pn->pn_stat & PNODE_METACACHE_MASK
1610	    && (pn->pn_stat & PNODE_DYING) == 0) {
1611		vattr_null(&va);
1612		error = dosetattr(vp, &va, FSCRED,
1613		    SETATTR_CHSIZE | (wait ? 0 : SETATTR_ASYNC));
1614		if (error)
1615			return error;
1616	}
1617
1618	/*
1619	 * flush pages to avoid being overly dirty
1620	 */
1621	pflags = PGO_CLEANIT;
1622	if (wait)
1623		pflags |= PGO_SYNCIO;
1624
1625	mutex_enter(vp->v_interlock);
1626	return VOP_PUTPAGES(vp, trunc_page(offlo), round_page(offhi), pflags);
1627}
1628
1629int
1630puffs_vnop_fsync(void *v)
1631{
1632	struct vop_fsync_args /* {
1633		const struct vnodeop_desc *a_desc;
1634		struct vnode *a_vp;
1635		kauth_cred_t a_cred;
1636		int a_flags;
1637		off_t a_offlo;
1638		off_t a_offhi;
1639	} */ *ap = v;
1640	PUFFS_MSG_VARS(vn, fsync);
1641	struct vnode *vp;
1642	struct puffs_node *pn;
1643	struct puffs_mount *pmp;
1644	int error, dofaf;
1645
1646	vp = ap->a_vp;
1647	KASSERT(vp != NULL);
1648	pn = VPTOPP(vp);
1649	KASSERT(pn != NULL);
1650	pmp = MPTOPUFFSMP(vp->v_mount);
1651	if (ap->a_flags & FSYNC_WAIT) {
1652		mutex_enter(&pn->pn_sizemtx);
1653	} else {
1654		if (mutex_tryenter(&pn->pn_sizemtx) == 0)
1655			return EDEADLK;
1656	}
1657
1658	error = flushvncache(vp, ap->a_offlo, ap->a_offhi,
1659	    (ap->a_flags & FSYNC_WAIT) == FSYNC_WAIT);
1660	if (error)
1661		goto out;
1662
1663	/*
1664	 * HELLO!  We exit already here if the user server does not
1665	 * support fsync OR if we should call fsync for a node which
1666	 * has references neither in the kernel or the fs server.
1667	 * Otherwise we continue to issue fsync() forward.
1668	 */
1669	error = 0;
1670	if (!EXISTSOP(pmp, FSYNC) || (pn->pn_stat & PNODE_DYING))
1671		goto out;
1672
1673	dofaf = (ap->a_flags & FSYNC_WAIT) == 0 || ap->a_flags == FSYNC_LAZY;
1674	/*
1675	 * We abuse VXLOCK to mean "vnode is going to die", so we issue
1676	 * only FAFs for those.  Otherwise there's a danger of deadlock,
1677	 * since the execution context here might be the user server
1678	 * doing some operation on another fs, which in turn caused a
1679	 * vnode to be reclaimed from the freelist for this fs.
1680	 */
1681	if (dofaf == 0) {
1682		mutex_enter(vp->v_interlock);
1683		if (vp->v_iflag & VI_XLOCK)
1684			dofaf = 1;
1685		mutex_exit(vp->v_interlock);
1686	}
1687
1688	PUFFS_MSG_ALLOC(vn, fsync);
1689	if (dofaf)
1690		puffs_msg_setfaf(park_fsync);
1691
1692	puffs_credcvt(&fsync_msg->pvnr_cred, ap->a_cred);
1693	fsync_msg->pvnr_flags = ap->a_flags;
1694	fsync_msg->pvnr_offlo = ap->a_offlo;
1695	fsync_msg->pvnr_offhi = ap->a_offhi;
1696	puffs_msg_setinfo(park_fsync, PUFFSOP_VN,
1697	    PUFFS_VN_FSYNC, VPTOPNC(vp));
1698
1699	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_fsync, vp->v_data, NULL, error);
1700	PUFFS_MSG_RELEASE(fsync);
1701
1702	error = checkerr(pmp, error, __func__);
1703
1704out:
1705	mutex_exit(&pn->pn_sizemtx);
1706	return error;
1707}
1708
1709int
1710puffs_vnop_seek(void *v)
1711{
1712	struct vop_seek_args /* {
1713		const struct vnodeop_desc *a_desc;
1714		struct vnode *a_vp;
1715		off_t a_oldoff;
1716		off_t a_newoff;
1717		kauth_cred_t a_cred;
1718	} */ *ap = v;
1719	PUFFS_MSG_VARS(vn, seek);
1720	struct vnode *vp = ap->a_vp;
1721	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
1722	int error;
1723
1724	PUFFS_MSG_ALLOC(vn, seek);
1725	seek_msg->pvnr_oldoff = ap->a_oldoff;
1726	seek_msg->pvnr_newoff = ap->a_newoff;
1727	puffs_credcvt(&seek_msg->pvnr_cred, ap->a_cred);
1728	puffs_msg_setinfo(park_seek, PUFFSOP_VN,
1729	    PUFFS_VN_SEEK, VPTOPNC(vp));
1730
1731	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_seek, vp->v_data, NULL, error);
1732	PUFFS_MSG_RELEASE(seek);
1733	return checkerr(pmp, error, __func__);
1734}
1735
1736static int
1737callremove(struct puffs_mount *pmp, puffs_cookie_t dck, puffs_cookie_t ck,
1738	struct componentname *cnp)
1739{
1740	PUFFS_MSG_VARS(vn, remove);
1741	int error;
1742
1743	PUFFS_MSG_ALLOC(vn, remove);
1744	remove_msg->pvnr_cookie_targ = ck;
1745	puffs_makecn(&remove_msg->pvnr_cn, &remove_msg->pvnr_cn_cred,
1746	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1747	puffs_msg_setinfo(park_remove, PUFFSOP_VN, PUFFS_VN_REMOVE, dck);
1748
1749	PUFFS_MSG_ENQUEUEWAIT(pmp, park_remove, error);
1750	PUFFS_MSG_RELEASE(remove);
1751
1752	return checkerr(pmp, error, __func__);
1753}
1754
1755/*
1756 * XXX: can't use callremove now because can't catch setbacks with
1757 * it due to lack of a pnode argument.
1758 */
1759int
1760puffs_vnop_remove(void *v)
1761{
1762	struct vop_remove_args /* {
1763		const struct vnodeop_desc *a_desc;
1764		struct vnode *a_dvp;
1765		struct vnode *a_vp;
1766		struct componentname *a_cnp;
1767	} */ *ap = v;
1768	PUFFS_MSG_VARS(vn, remove);
1769	struct vnode *dvp = ap->a_dvp;
1770	struct vnode *vp = ap->a_vp;
1771	struct puffs_node *dpn = VPTOPP(dvp);
1772	struct puffs_node *pn = VPTOPP(vp);
1773	struct componentname *cnp = ap->a_cnp;
1774	struct mount *mp = dvp->v_mount;
1775	struct puffs_mount *pmp = MPTOPUFFSMP(mp);
1776	int error;
1777
1778	PUFFS_MSG_ALLOC(vn, remove);
1779	remove_msg->pvnr_cookie_targ = VPTOPNC(vp);
1780	puffs_makecn(&remove_msg->pvnr_cn, &remove_msg->pvnr_cn_cred,
1781	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1782	puffs_msg_setinfo(park_remove, PUFFSOP_VN,
1783	    PUFFS_VN_REMOVE, VPTOPNC(dvp));
1784
1785	puffs_msg_enqueue(pmp, park_remove);
1786	REFPN_AND_UNLOCKVP(dvp, dpn);
1787	if (dvp == vp)
1788		REFPN(pn);
1789	else
1790		REFPN_AND_UNLOCKVP(vp, pn);
1791	error = puffs_msg_wait2(pmp, park_remove, dpn, pn);
1792
1793	PUFFS_MSG_RELEASE(remove);
1794
1795	RELEPN_AND_VP(dvp, dpn);
1796	RELEPN_AND_VP(vp, pn);
1797
1798	error = checkerr(pmp, error, __func__);
1799	return error;
1800}
1801
1802int
1803puffs_vnop_mkdir(void *v)
1804{
1805	struct vop_mkdir_args /* {
1806		const struct vnodeop_desc *a_desc;
1807		struct vnode *a_dvp;
1808		struct vnode **a_vpp;
1809		struct componentname *a_cnp;
1810		struct vattr *a_vap;
1811	} */ *ap = v;
1812	PUFFS_MSG_VARS(vn, mkdir);
1813	struct vnode *dvp = ap->a_dvp;
1814	struct puffs_node *dpn = VPTOPP(dvp);
1815	struct componentname *cnp = ap->a_cnp;
1816	struct mount *mp = dvp->v_mount;
1817	struct puffs_mount *pmp = MPTOPUFFSMP(mp);
1818	int error;
1819
1820	PUFFS_MSG_ALLOC(vn, mkdir);
1821	puffs_makecn(&mkdir_msg->pvnr_cn, &mkdir_msg->pvnr_cn_cred,
1822	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1823	mkdir_msg->pvnr_va = *ap->a_vap;
1824	puffs_msg_setinfo(park_mkdir, PUFFSOP_VN,
1825	    PUFFS_VN_MKDIR, VPTOPNC(dvp));
1826
1827	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_mkdir, dvp->v_data, NULL, error);
1828
1829	error = checkerr(pmp, error, __func__);
1830	if (error)
1831		goto out;
1832
1833	error = puffs_newnode(mp, dvp, ap->a_vpp,
1834	    mkdir_msg->pvnr_newnode, cnp, VDIR, 0);
1835	if (error) {
1836		puffs_abortbutton(pmp, PUFFS_ABORT_MKDIR, dpn->pn_cookie,
1837		    mkdir_msg->pvnr_newnode, cnp);
1838		goto out;
1839	}
1840
1841	if (PUFFS_USE_FS_TTL(pmp)) {
1842		struct timespec *va_ttl = &mkdir_msg->pvnr_va_ttl;
1843		struct timespec *cn_ttl = &mkdir_msg->pvnr_cn_ttl;
1844		struct vattr *rvap = &mkdir_msg->pvnr_va;
1845
1846		update_va(*ap->a_vpp, NULL, rvap,
1847			  va_ttl, cn_ttl, SETATTR_CHSIZE);
1848	}
1849
1850	VPTOPP(*ap->a_vpp)->pn_nlookup++;
1851
1852	if (PUFFS_USE_DOTDOTCACHE(pmp) &&
1853	    (VPTOPP(*ap->a_vpp)->pn_parent != dvp))
1854		update_parent(*ap->a_vpp, dvp);
1855
1856 out:
1857	vput(dvp);
1858	PUFFS_MSG_RELEASE(mkdir);
1859	return error;
1860}
1861
1862static int
1863callrmdir(struct puffs_mount *pmp, puffs_cookie_t dck, puffs_cookie_t ck,
1864	struct componentname *cnp)
1865{
1866	PUFFS_MSG_VARS(vn, rmdir);
1867	int error;
1868
1869	PUFFS_MSG_ALLOC(vn, rmdir);
1870	rmdir_msg->pvnr_cookie_targ = ck;
1871	puffs_makecn(&rmdir_msg->pvnr_cn, &rmdir_msg->pvnr_cn_cred,
1872	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1873	puffs_msg_setinfo(park_rmdir, PUFFSOP_VN, PUFFS_VN_RMDIR, dck);
1874
1875	PUFFS_MSG_ENQUEUEWAIT(pmp, park_rmdir, error);
1876	PUFFS_MSG_RELEASE(rmdir);
1877
1878	return checkerr(pmp, error, __func__);
1879}
1880
1881int
1882puffs_vnop_rmdir(void *v)
1883{
1884	struct vop_rmdir_args /* {
1885		const struct vnodeop_desc *a_desc;
1886		struct vnode *a_dvp;
1887		struct vnode *a_vp;
1888		struct componentname *a_cnp;
1889	} */ *ap = v;
1890	PUFFS_MSG_VARS(vn, rmdir);
1891	struct vnode *dvp = ap->a_dvp;
1892	struct vnode *vp = ap->a_vp;
1893	struct puffs_node *dpn = VPTOPP(dvp);
1894	struct puffs_node *pn = VPTOPP(vp);
1895	struct puffs_mount *pmp = MPTOPUFFSMP(dvp->v_mount);
1896	struct componentname *cnp = ap->a_cnp;
1897	int error;
1898
1899	PUFFS_MSG_ALLOC(vn, rmdir);
1900	rmdir_msg->pvnr_cookie_targ = VPTOPNC(vp);
1901	puffs_makecn(&rmdir_msg->pvnr_cn, &rmdir_msg->pvnr_cn_cred,
1902	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1903	puffs_msg_setinfo(park_rmdir, PUFFSOP_VN,
1904	    PUFFS_VN_RMDIR, VPTOPNC(dvp));
1905
1906	puffs_msg_enqueue(pmp, park_rmdir);
1907	REFPN_AND_UNLOCKVP(dvp, dpn);
1908	REFPN_AND_UNLOCKVP(vp, pn);
1909	error = puffs_msg_wait2(pmp, park_rmdir, dpn, pn);
1910
1911	PUFFS_MSG_RELEASE(rmdir);
1912
1913	/* XXX: some call cache_purge() *for both vnodes* here, investigate */
1914	RELEPN_AND_VP(dvp, dpn);
1915	RELEPN_AND_VP(vp, pn);
1916
1917	return error;
1918}
1919
1920int
1921puffs_vnop_link(void *v)
1922{
1923	struct vop_link_args /* {
1924		const struct vnodeop_desc *a_desc;
1925		struct vnode *a_dvp;
1926		struct vnode *a_vp;
1927		struct componentname *a_cnp;
1928	} */ *ap = v;
1929	PUFFS_MSG_VARS(vn, link);
1930	struct vnode *dvp = ap->a_dvp;
1931	struct vnode *vp = ap->a_vp;
1932	struct puffs_node *dpn = VPTOPP(dvp);
1933	struct puffs_node *pn = VPTOPP(vp);
1934	struct puffs_mount *pmp = MPTOPUFFSMP(dvp->v_mount);
1935	struct componentname *cnp = ap->a_cnp;
1936	int error;
1937
1938	PUFFS_MSG_ALLOC(vn, link);
1939	link_msg->pvnr_cookie_targ = VPTOPNC(vp);
1940	puffs_makecn(&link_msg->pvnr_cn, &link_msg->pvnr_cn_cred,
1941	    cnp, PUFFS_USE_FULLPNBUF(pmp));
1942	puffs_msg_setinfo(park_link, PUFFSOP_VN,
1943	    PUFFS_VN_LINK, VPTOPNC(dvp));
1944
1945	puffs_msg_enqueue(pmp, park_link);
1946	REFPN_AND_UNLOCKVP(dvp, dpn);
1947	REFPN(pn);
1948	error = puffs_msg_wait2(pmp, park_link, dpn, pn);
1949
1950	PUFFS_MSG_RELEASE(link);
1951
1952	error = checkerr(pmp, error, __func__);
1953
1954	/*
1955	 * XXX: stay in touch with the cache.  I don't like this, but
1956	 * don't have a better solution either.  See also puffs_rename().
1957	 */
1958	if (error == 0)
1959		puffs_updatenode(pn, PUFFS_UPDATECTIME, 0);
1960
1961	RELEPN_AND_VP(dvp, dpn);
1962	puffs_releasenode(pn);
1963
1964	return error;
1965}
1966
1967int
1968puffs_vnop_symlink(void *v)
1969{
1970	struct vop_symlink_args /* {
1971		const struct vnodeop_desc *a_desc;
1972		struct vnode *a_dvp;
1973		struct vnode **a_vpp;
1974		struct componentname *a_cnp;
1975		struct vattr *a_vap;
1976		char *a_target;
1977	} */ *ap = v;
1978	PUFFS_MSG_VARS(vn, symlink);
1979	struct vnode *dvp = ap->a_dvp;
1980	struct puffs_node *dpn = VPTOPP(dvp);
1981	struct mount *mp = dvp->v_mount;
1982	struct puffs_mount *pmp = MPTOPUFFSMP(dvp->v_mount);
1983	struct componentname *cnp = ap->a_cnp;
1984	int error;
1985
1986	*ap->a_vpp = NULL;
1987
1988	PUFFS_MSG_ALLOC(vn, symlink);
1989	puffs_makecn(&symlink_msg->pvnr_cn, &symlink_msg->pvnr_cn_cred,
1990		cnp, PUFFS_USE_FULLPNBUF(pmp));
1991	symlink_msg->pvnr_va = *ap->a_vap;
1992	(void)strlcpy(symlink_msg->pvnr_link, ap->a_target,
1993	    sizeof(symlink_msg->pvnr_link));
1994	puffs_msg_setinfo(park_symlink, PUFFSOP_VN,
1995	    PUFFS_VN_SYMLINK, VPTOPNC(dvp));
1996
1997	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_symlink, dvp->v_data, NULL, error);
1998
1999	error = checkerr(pmp, error, __func__);
2000	if (error)
2001		goto out;
2002
2003	error = puffs_newnode(mp, dvp, ap->a_vpp,
2004	    symlink_msg->pvnr_newnode, cnp, VLNK, 0);
2005	if (error) {
2006		puffs_abortbutton(pmp, PUFFS_ABORT_SYMLINK, dpn->pn_cookie,
2007		    symlink_msg->pvnr_newnode, cnp);
2008		goto out;
2009	}
2010
2011	if (PUFFS_USE_FS_TTL(pmp)) {
2012		struct timespec *va_ttl = &symlink_msg->pvnr_va_ttl;
2013		struct timespec *cn_ttl = &symlink_msg->pvnr_cn_ttl;
2014		struct vattr *rvap = &symlink_msg->pvnr_va;
2015
2016		update_va(*ap->a_vpp, NULL, rvap,
2017			  va_ttl, cn_ttl, SETATTR_CHSIZE);
2018	}
2019
2020	VPTOPP(*ap->a_vpp)->pn_nlookup++;
2021
2022	if (PUFFS_USE_DOTDOTCACHE(pmp) &&
2023	    (VPTOPP(*ap->a_vpp)->pn_parent != dvp))
2024		update_parent(*ap->a_vpp, dvp);
2025
2026 out:
2027	vput(dvp);
2028	PUFFS_MSG_RELEASE(symlink);
2029
2030	return error;
2031}
2032
2033int
2034puffs_vnop_readlink(void *v)
2035{
2036	struct vop_readlink_args /* {
2037		const struct vnodeop_desc *a_desc;
2038		struct vnode *a_vp;
2039		struct uio *a_uio;
2040		kauth_cred_t a_cred;
2041	} */ *ap = v;
2042	PUFFS_MSG_VARS(vn, readlink);
2043	struct vnode *vp = ap->a_vp;
2044	struct puffs_mount *pmp = MPTOPUFFSMP(ap->a_vp->v_mount);
2045	size_t linklen;
2046	int error;
2047
2048	PUFFS_MSG_ALLOC(vn, readlink);
2049	puffs_credcvt(&readlink_msg->pvnr_cred, ap->a_cred);
2050	linklen = sizeof(readlink_msg->pvnr_link);
2051	readlink_msg->pvnr_linklen = linklen;
2052	puffs_msg_setinfo(park_readlink, PUFFSOP_VN,
2053	    PUFFS_VN_READLINK, VPTOPNC(vp));
2054
2055	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_readlink, vp->v_data, NULL, error);
2056	error = checkerr(pmp, error, __func__);
2057	if (error)
2058		goto out;
2059
2060	/* bad bad user file server */
2061	if (readlink_msg->pvnr_linklen > linklen) {
2062		puffs_senderr(pmp, PUFFS_ERR_READLINK, E2BIG,
2063		    "linklen too big", VPTOPNC(ap->a_vp));
2064		error = EPROTO;
2065		goto out;
2066	}
2067
2068	error = uiomove(&readlink_msg->pvnr_link, readlink_msg->pvnr_linklen,
2069	    ap->a_uio);
2070 out:
2071	PUFFS_MSG_RELEASE(readlink);
2072	return error;
2073}
2074
2075int
2076puffs_vnop_rename(void *v)
2077{
2078	struct vop_rename_args /* {
2079		const struct vnodeop_desc *a_desc;
2080		struct vnode *a_fdvp;
2081		struct vnode *a_fvp;
2082		struct componentname *a_fcnp;
2083		struct vnode *a_tdvp;
2084		struct vnode *a_tvp;
2085		struct componentname *a_tcnp;
2086	} */ *ap = v;
2087	PUFFS_MSG_VARS(vn, rename);
2088	struct vnode *fdvp = ap->a_fdvp, *fvp = ap->a_fvp;
2089	struct vnode *tdvp = ap->a_tdvp, *tvp = ap->a_tvp;
2090	struct puffs_node *fpn = ap->a_fvp->v_data;
2091	struct puffs_mount *pmp = MPTOPUFFSMP(fdvp->v_mount);
2092	int error;
2093	bool doabort = true;
2094
2095	if ((fvp->v_mount != tdvp->v_mount) ||
2096	    (tvp && (fvp->v_mount != tvp->v_mount))) {
2097		ERROUT(EXDEV);
2098	}
2099
2100	PUFFS_MSG_ALLOC(vn, rename);
2101	rename_msg->pvnr_cookie_src = VPTOPNC(fvp);
2102	rename_msg->pvnr_cookie_targdir = VPTOPNC(tdvp);
2103	if (tvp)
2104		rename_msg->pvnr_cookie_targ = VPTOPNC(tvp);
2105	else
2106		rename_msg->pvnr_cookie_targ = NULL;
2107	puffs_makecn(&rename_msg->pvnr_cn_src, &rename_msg->pvnr_cn_src_cred,
2108	    ap->a_fcnp, PUFFS_USE_FULLPNBUF(pmp));
2109	puffs_makecn(&rename_msg->pvnr_cn_targ, &rename_msg->pvnr_cn_targ_cred,
2110	    ap->a_tcnp, PUFFS_USE_FULLPNBUF(pmp));
2111	puffs_msg_setinfo(park_rename, PUFFSOP_VN,
2112	    PUFFS_VN_RENAME, VPTOPNC(fdvp));
2113
2114	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_rename, fdvp->v_data, NULL, error);
2115	doabort = false;
2116	PUFFS_MSG_RELEASE(rename);
2117	error = checkerr(pmp, error, __func__);
2118
2119	/*
2120	 * XXX: stay in touch with the cache.  I don't like this, but
2121	 * don't have a better solution either.  See also puffs_link().
2122	 */
2123	if (error == 0) {
2124		puffs_updatenode(fpn, PUFFS_UPDATECTIME, 0);
2125
2126		if (PUFFS_USE_DOTDOTCACHE(pmp) &&
2127		    (VPTOPP(fvp)->pn_parent != tdvp))
2128			update_parent(fvp, tdvp);
2129	}
2130
2131
2132 out:
2133	if (doabort)
2134		VOP_ABORTOP(tdvp, ap->a_tcnp);
2135	if (tvp != NULL)
2136		vput(tvp);
2137	if (tdvp == tvp)
2138		vrele(tdvp);
2139	else
2140		vput(tdvp);
2141
2142	if (doabort)
2143		VOP_ABORTOP(fdvp, ap->a_fcnp);
2144	vrele(fdvp);
2145	vrele(fvp);
2146
2147	return error;
2148}
2149
2150#define RWARGS(cont, iofl, move, offset, creds)				\
2151	(cont)->pvnr_ioflag = (iofl);					\
2152	(cont)->pvnr_resid = (move);					\
2153	(cont)->pvnr_offset = (offset);					\
2154	puffs_credcvt(&(cont)->pvnr_cred, creds)
2155
2156int
2157puffs_vnop_read(void *v)
2158{
2159	struct vop_read_args /* {
2160		const struct vnodeop_desc *a_desc;
2161		struct vnode *a_vp;
2162		struct uio *a_uio;
2163		int a_ioflag;
2164		kauth_cred_t a_cred;
2165	} */ *ap = v;
2166	PUFFS_MSG_VARS(vn, read);
2167	struct vnode *vp = ap->a_vp;
2168	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2169	struct uio *uio = ap->a_uio;
2170	size_t tomove, argsize;
2171	vsize_t bytelen;
2172	int error;
2173
2174	read_msg = NULL;
2175	error = 0;
2176
2177	/* std sanity */
2178	if (uio->uio_resid == 0)
2179		return 0;
2180	if (uio->uio_offset < 0)
2181		return EINVAL;
2182
2183	if (vp->v_type == VREG && PUFFS_USE_PAGECACHE(pmp)) {
2184		const int advice = IO_ADV_DECODE(ap->a_ioflag);
2185
2186		while (uio->uio_resid > 0) {
2187			if (vp->v_size <= uio->uio_offset) {
2188				break;
2189			}
2190			bytelen = MIN(uio->uio_resid,
2191			    vp->v_size - uio->uio_offset);
2192			if (bytelen == 0)
2193				break;
2194
2195			error = ubc_uiomove(&vp->v_uobj, uio, bytelen, advice,
2196			    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
2197			if (error)
2198				break;
2199		}
2200
2201		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
2202			puffs_updatenode(VPTOPP(vp), PUFFS_UPDATEATIME, 0);
2203	} else {
2204		/*
2205		 * in case it's not a regular file or we're operating
2206		 * uncached, do read in the old-fashioned style,
2207		 * i.e. explicit read operations
2208		 */
2209
2210		tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
2211		argsize = sizeof(struct puffs_vnmsg_read);
2212		puffs_msgmem_alloc(argsize + tomove, &park_read,
2213		    (void *)&read_msg, 1);
2214
2215		error = 0;
2216		while (uio->uio_resid > 0) {
2217			tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
2218			memset(read_msg, 0, argsize); /* XXX: touser KASSERT */
2219			RWARGS(read_msg, ap->a_ioflag, tomove,
2220			    uio->uio_offset, ap->a_cred);
2221			puffs_msg_setinfo(park_read, PUFFSOP_VN,
2222			    PUFFS_VN_READ, VPTOPNC(vp));
2223			puffs_msg_setdelta(park_read, tomove);
2224
2225			PUFFS_MSG_ENQUEUEWAIT2(pmp, park_read, vp->v_data,
2226			    NULL, error);
2227			error = checkerr(pmp, error, __func__);
2228			if (error)
2229				break;
2230
2231			if (read_msg->pvnr_resid > tomove) {
2232				puffs_senderr(pmp, PUFFS_ERR_READ,
2233				    E2BIG, "resid grew", VPTOPNC(ap->a_vp));
2234				error = EPROTO;
2235				break;
2236			}
2237
2238			error = uiomove(read_msg->pvnr_data,
2239			    tomove - read_msg->pvnr_resid, uio);
2240
2241			/*
2242			 * in case the file is out of juice, resid from
2243			 * userspace is != 0.  and the error-case is
2244			 * quite obvious
2245			 */
2246			if (error || read_msg->pvnr_resid)
2247				break;
2248		}
2249
2250		puffs_msgmem_release(park_read);
2251	}
2252
2253	return error;
2254}
2255
2256/*
2257 * XXX: in case of a failure, this leaves uio in a bad state.
2258 * We could theoretically copy the uio and iovecs and "replay"
2259 * them the right amount after the userspace trip, but don't
2260 * bother for now.
2261 */
2262int
2263puffs_vnop_write(void *v)
2264{
2265	struct vop_write_args /* {
2266		const struct vnodeop_desc *a_desc;
2267		struct vnode *a_vp;
2268		struct uio *a_uio;
2269		int a_ioflag;
2270		kauth_cred_t a_cred;
2271	} */ *ap = v;
2272	PUFFS_MSG_VARS(vn, write);
2273	struct vnode *vp = ap->a_vp;
2274	struct puffs_node *pn = VPTOPP(vp);
2275	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2276	struct uio *uio = ap->a_uio;
2277	size_t tomove, argsize;
2278	off_t oldoff, newoff, origoff;
2279	vsize_t bytelen;
2280	int error, uflags;
2281	int ubcflags;
2282
2283	error = uflags = 0;
2284	write_msg = NULL;
2285
2286	mutex_enter(&pn->pn_sizemtx);
2287
2288	if (vp->v_type == VREG && PUFFS_USE_PAGECACHE(pmp)) {
2289		ubcflags = UBC_WRITE | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp);
2290
2291		/*
2292		 * userspace *should* be allowed to control this,
2293		 * but with UBC it's a bit unclear how to handle it
2294		 */
2295		if (ap->a_ioflag & IO_APPEND)
2296			uio->uio_offset = vp->v_size;
2297
2298		origoff = uio->uio_offset;
2299		while (uio->uio_resid > 0) {
2300			if (vp->v_mount->mnt_flag & MNT_RELATIME)
2301				uflags |= PUFFS_UPDATEATIME;
2302			uflags |= PUFFS_UPDATECTIME;
2303			uflags |= PUFFS_UPDATEMTIME;
2304			oldoff = uio->uio_offset;
2305			bytelen = uio->uio_resid;
2306
2307			newoff = oldoff + bytelen;
2308			if (vp->v_size < newoff) {
2309				uvm_vnp_setwritesize(vp, newoff);
2310			}
2311			error = ubc_uiomove(&vp->v_uobj, uio, bytelen,
2312			    UVM_ADV_RANDOM, ubcflags);
2313
2314			/*
2315			 * In case of a ubc_uiomove() error,
2316			 * opt to not extend the file at all and
2317			 * return an error.  Otherwise, if we attempt
2318			 * to clear the memory we couldn't fault to,
2319			 * we might generate a kernel page fault.
2320			 */
2321			if (vp->v_size < newoff) {
2322				if (error == 0) {
2323					uflags |= PUFFS_UPDATESIZE;
2324					uvm_vnp_setsize(vp, newoff);
2325				} else {
2326					uvm_vnp_setwritesize(vp, vp->v_size);
2327				}
2328			}
2329			if (error)
2330				break;
2331
2332			/*
2333			 * If we're writing large files, flush to file server
2334			 * every 64k.  Otherwise we can very easily exhaust
2335			 * kernel and user memory, as the file server cannot
2336			 * really keep up with our writing speed.
2337			 *
2338			 * Note: this does *NOT* honor MNT_ASYNC, because
2339			 * that gives userland too much say in the kernel.
2340			 */
2341			if (oldoff >> 16 != uio->uio_offset >> 16) {
2342				mutex_enter(vp->v_interlock);
2343				error = VOP_PUTPAGES(vp, oldoff & ~0xffff,
2344				    uio->uio_offset & ~0xffff,
2345				    PGO_CLEANIT | PGO_SYNCIO);
2346				if (error)
2347					break;
2348			}
2349		}
2350
2351		/* synchronous I/O? */
2352		if (error == 0 && ap->a_ioflag & IO_SYNC) {
2353			mutex_enter(vp->v_interlock);
2354			error = VOP_PUTPAGES(vp, trunc_page(origoff),
2355			    round_page(uio->uio_offset),
2356			    PGO_CLEANIT | PGO_SYNCIO);
2357
2358		/* write through page cache? */
2359		} else if (error == 0 && pmp->pmp_flags & PUFFS_KFLAG_WTCACHE) {
2360			mutex_enter(vp->v_interlock);
2361			error = VOP_PUTPAGES(vp, trunc_page(origoff),
2362			    round_page(uio->uio_offset), PGO_CLEANIT);
2363		}
2364
2365		puffs_updatenode(VPTOPP(vp), uflags, vp->v_size);
2366	} else {
2367		/* tomove is non-increasing */
2368		tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
2369		argsize = sizeof(struct puffs_vnmsg_write) + tomove;
2370		puffs_msgmem_alloc(argsize, &park_write, (void *)&write_msg,1);
2371
2372		while (uio->uio_resid > 0) {
2373			/* move data to buffer */
2374			tomove = PUFFS_TOMOVE(uio->uio_resid, pmp);
2375			memset(write_msg, 0, argsize); /* XXX: touser KASSERT */
2376			RWARGS(write_msg, ap->a_ioflag, tomove,
2377			    uio->uio_offset, ap->a_cred);
2378			error = uiomove(write_msg->pvnr_data, tomove, uio);
2379			if (error)
2380				break;
2381
2382			/* move buffer to userspace */
2383			puffs_msg_setinfo(park_write, PUFFSOP_VN,
2384			    PUFFS_VN_WRITE, VPTOPNC(vp));
2385			PUFFS_MSG_ENQUEUEWAIT2(pmp, park_write, vp->v_data,
2386			    NULL, error);
2387			error = checkerr(pmp, error, __func__);
2388			if (error)
2389				break;
2390
2391			if (write_msg->pvnr_resid > tomove) {
2392				puffs_senderr(pmp, PUFFS_ERR_WRITE,
2393				    E2BIG, "resid grew", VPTOPNC(ap->a_vp));
2394				error = EPROTO;
2395				break;
2396			}
2397
2398			/* adjust file size */
2399			if (vp->v_size < uio->uio_offset)
2400				uvm_vnp_setsize(vp, uio->uio_offset);
2401
2402			/* didn't move everything?  bad userspace.  bail */
2403			if (write_msg->pvnr_resid != 0) {
2404				error = EIO;
2405				break;
2406			}
2407		}
2408		puffs_msgmem_release(park_write);
2409	}
2410
2411	mutex_exit(&pn->pn_sizemtx);
2412	return error;
2413}
2414
2415int
2416puffs_vnop_print(void *v)
2417{
2418	struct vop_print_args /* {
2419		struct vnode *a_vp;
2420	} */ *ap = v;
2421	PUFFS_MSG_VARS(vn, print);
2422	struct vnode *vp = ap->a_vp;
2423	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2424	struct puffs_node *pn = vp->v_data;
2425	int error;
2426
2427	/* kernel portion */
2428	printf("tag VT_PUFFS, vnode %p, puffs node: %p,\n"
2429	    "\tuserspace cookie: %p", vp, pn, pn->pn_cookie);
2430	if (vp->v_type == VFIFO)
2431		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
2432	printf("\n");
2433
2434	/* userspace portion */
2435	if (EXISTSOP(pmp, PRINT)) {
2436		PUFFS_MSG_ALLOC(vn, print);
2437		puffs_msg_setinfo(park_print, PUFFSOP_VN,
2438		    PUFFS_VN_PRINT, VPTOPNC(vp));
2439		PUFFS_MSG_ENQUEUEWAIT2(pmp, park_print, vp->v_data,
2440		    NULL, error);
2441		PUFFS_MSG_RELEASE(print);
2442	}
2443
2444	return 0;
2445}
2446
2447int
2448puffs_vnop_pathconf(void *v)
2449{
2450	struct vop_pathconf_args /* {
2451		const struct vnodeop_desc *a_desc;
2452		struct vnode *a_vp;
2453		int a_name;
2454		register_t *a_retval;
2455	} */ *ap = v;
2456	PUFFS_MSG_VARS(vn, pathconf);
2457	struct vnode *vp = ap->a_vp;
2458	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2459	int error;
2460
2461	PUFFS_MSG_ALLOC(vn, pathconf);
2462	pathconf_msg->pvnr_name = ap->a_name;
2463	puffs_msg_setinfo(park_pathconf, PUFFSOP_VN,
2464	    PUFFS_VN_PATHCONF, VPTOPNC(vp));
2465	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_pathconf, vp->v_data, NULL, error);
2466	error = checkerr(pmp, error, __func__);
2467	if (!error)
2468		*ap->a_retval = pathconf_msg->pvnr_retval;
2469	PUFFS_MSG_RELEASE(pathconf);
2470
2471	return error;
2472}
2473
2474int
2475puffs_vnop_advlock(void *v)
2476{
2477	struct vop_advlock_args /* {
2478		const struct vnodeop_desc *a_desc;
2479		struct vnode *a_vp;
2480		void *a_id;
2481		int a_op;
2482		struct flock *a_fl;
2483		int a_flags;
2484	} */ *ap = v;
2485	PUFFS_MSG_VARS(vn, advlock);
2486	struct vnode *vp = ap->a_vp;
2487	struct puffs_node *pn = VPTOPP(vp);
2488	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2489	int error;
2490
2491	if (!EXISTSOP(pmp, ADVLOCK))
2492		return lf_advlock(ap, &pn->pn_lockf, vp->v_size);
2493
2494	PUFFS_MSG_ALLOC(vn, advlock);
2495	(void)memcpy(&advlock_msg->pvnr_fl, ap->a_fl,
2496		     sizeof(advlock_msg->pvnr_fl));
2497	advlock_msg->pvnr_id = ap->a_id;
2498	advlock_msg->pvnr_op = ap->a_op;
2499	advlock_msg->pvnr_flags = ap->a_flags;
2500	puffs_msg_setinfo(park_advlock, PUFFSOP_VN,
2501	    PUFFS_VN_ADVLOCK, VPTOPNC(vp));
2502	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_advlock, vp->v_data, NULL, error);
2503	error = checkerr(pmp, error, __func__);
2504	PUFFS_MSG_RELEASE(advlock);
2505
2506	return error;
2507}
2508
2509int
2510puffs_vnop_abortop(void *v)
2511{
2512	struct vop_abortop_args /* {
2513		struct vnode *a_dvp;
2514		struct componentname *a_cnp;
2515	}; */ *ap = v;
2516	PUFFS_MSG_VARS(vn, abortop);
2517	struct vnode *dvp = ap->a_dvp;
2518	struct puffs_mount *pmp = MPTOPUFFSMP(dvp->v_mount);
2519	struct componentname *cnp = ap->a_cnp;
2520
2521	if (EXISTSOP(pmp, ABORTOP)) {
2522		PUFFS_MSG_ALLOC(vn, abortop);
2523		puffs_makecn(&abortop_msg->pvnr_cn, &abortop_msg->pvnr_cn_cred,
2524		    cnp, PUFFS_USE_FULLPNBUF(pmp));
2525		puffs_msg_setfaf(park_abortop);
2526		puffs_msg_setinfo(park_abortop, PUFFSOP_VN,
2527		    PUFFS_VN_ABORTOP, VPTOPNC(dvp));
2528
2529		puffs_msg_enqueue(pmp, park_abortop);
2530		PUFFS_MSG_RELEASE(abortop);
2531	}
2532
2533	return genfs_abortop(v);
2534}
2535
2536#define BIOASYNC(bp) (bp->b_flags & B_ASYNC)
2537
2538/*
2539 * This maps itself to PUFFS_VN_READ/WRITE for data transfer.
2540 */
2541int
2542puffs_vnop_strategy(void *v)
2543{
2544	struct vop_strategy_args /* {
2545		const struct vnodeop_desc *a_desc;
2546		struct vnode *a_vp;
2547		struct buf *a_bp;
2548	} */ *ap = v;
2549	PUFFS_MSG_VARS(vn, rw);
2550	struct vnode *vp = ap->a_vp;
2551	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2552	struct puffs_node *pn;
2553	struct buf *bp;
2554	size_t argsize;
2555	size_t tomove, moved;
2556	int error, dofaf, cansleep, dobiodone;
2557
2558	pmp = MPTOPUFFSMP(vp->v_mount);
2559	bp = ap->a_bp;
2560	error = 0;
2561	dofaf = 0;
2562	cansleep = 0;
2563	pn = VPTOPP(vp);
2564	park_rw = NULL; /* explicit */
2565	dobiodone = 1;
2566
2567	if ((BUF_ISREAD(bp) && !EXISTSOP(pmp, READ))
2568	    || (BUF_ISWRITE(bp) && !EXISTSOP(pmp, WRITE)))
2569		ERROUT(EOPNOTSUPP);
2570
2571	/*
2572	 * Short-circuit optimization: don't flush buffer in between
2573	 * VOP_INACTIVE and VOP_RECLAIM in case the node has no references.
2574	 */
2575	if (pn->pn_stat & PNODE_DYING) {
2576		KASSERT(BUF_ISWRITE(bp));
2577		bp->b_resid = 0;
2578		goto out;
2579	}
2580
2581#ifdef DIAGNOSTIC
2582	if (bp->b_bcount > pmp->pmp_msg_maxsize - PUFFS_MSGSTRUCT_MAX)
2583		panic("puffs_strategy: wildly inappropriate buf bcount %d",
2584		    bp->b_bcount);
2585#endif
2586
2587	/*
2588	 * See explanation for the necessity of a FAF in puffs_fsync.
2589	 *
2590	 * Also, do FAF in case we're suspending.
2591	 * See puffs_vfsops.c:pageflush()
2592	 */
2593	if (BUF_ISWRITE(bp)) {
2594		mutex_enter(vp->v_interlock);
2595		if (vp->v_iflag & VI_XLOCK)
2596			dofaf = 1;
2597		if (pn->pn_stat & PNODE_FAF)
2598			dofaf = 1;
2599		mutex_exit(vp->v_interlock);
2600	}
2601
2602	cansleep = (curlwp == uvm.pagedaemon_lwp || dofaf) ? 0 : 1;
2603
2604	KASSERT(curlwp != uvm.pagedaemon_lwp || dofaf || BIOASYNC(bp));
2605
2606	/* allocate transport structure */
2607	tomove = PUFFS_TOMOVE(bp->b_bcount, pmp);
2608	argsize = sizeof(struct puffs_vnmsg_rw);
2609	error = puffs_msgmem_alloc(argsize + tomove, &park_rw,
2610	    (void *)&rw_msg, cansleep);
2611	if (error)
2612		goto out;
2613	RWARGS(rw_msg, 0, tomove, bp->b_blkno << DEV_BSHIFT, FSCRED);
2614
2615	/* 2x2 cases: read/write, faf/nofaf */
2616	if (BUF_ISREAD(bp)) {
2617		puffs_msg_setinfo(park_rw, PUFFSOP_VN,
2618		    PUFFS_VN_READ, VPTOPNC(vp));
2619		puffs_msg_setdelta(park_rw, tomove);
2620		if (BIOASYNC(bp)) {
2621			puffs_msg_setcall(park_rw,
2622			    puffs_parkdone_asyncbioread, bp);
2623			puffs_msg_enqueue(pmp, park_rw);
2624			dobiodone = 0;
2625		} else {
2626			PUFFS_MSG_ENQUEUEWAIT2(pmp, park_rw, vp->v_data,
2627			    NULL, error);
2628			error = checkerr(pmp, error, __func__);
2629			if (error)
2630				goto out;
2631
2632			if (rw_msg->pvnr_resid > tomove) {
2633				puffs_senderr(pmp, PUFFS_ERR_READ,
2634				    E2BIG, "resid grew", VPTOPNC(vp));
2635				ERROUT(EPROTO);
2636			}
2637
2638			moved = tomove - rw_msg->pvnr_resid;
2639
2640			(void)memcpy(bp->b_data, rw_msg->pvnr_data, moved);
2641			bp->b_resid = bp->b_bcount - moved;
2642		}
2643	} else {
2644		puffs_msg_setinfo(park_rw, PUFFSOP_VN,
2645		    PUFFS_VN_WRITE, VPTOPNC(vp));
2646		/*
2647		 * make pages read-only before we write them if we want
2648		 * write caching info
2649		 */
2650		if (PUFFS_WCACHEINFO(pmp)) {
2651			struct uvm_object *uobj = &vp->v_uobj;
2652			int npages = (bp->b_bcount + PAGE_SIZE-1) >> PAGE_SHIFT;
2653			struct vm_page *vmp;
2654			int i;
2655
2656			for (i = 0; i < npages; i++) {
2657				vmp= uvm_pageratop((vaddr_t)bp->b_data
2658				    + (i << PAGE_SHIFT));
2659				DPRINTF(("puffs_strategy: write-protecting "
2660				    "vp %p page %p, offset %" PRId64"\n",
2661				    vp, vmp, vmp->offset));
2662				mutex_enter(uobj->vmobjlock);
2663				vmp->flags |= PG_RDONLY;
2664				pmap_page_protect(vmp, VM_PROT_READ);
2665				mutex_exit(uobj->vmobjlock);
2666			}
2667		}
2668
2669		(void)memcpy(&rw_msg->pvnr_data, bp->b_data, tomove);
2670		if (dofaf) {
2671			puffs_msg_setfaf(park_rw);
2672		} else if (BIOASYNC(bp)) {
2673			puffs_msg_setcall(park_rw,
2674			    puffs_parkdone_asyncbiowrite, bp);
2675			dobiodone = 0;
2676		}
2677
2678		PUFFS_MSG_ENQUEUEWAIT2(pmp, park_rw, vp->v_data, NULL, error);
2679
2680		if (dobiodone == 0)
2681			goto out;
2682
2683		/*
2684		 * XXXXXXXX: wrong, but kernel can't survive strategy
2685		 * failure currently.  Here, have one more X: X.
2686		 */
2687		if (error != ENOMEM)
2688			error = 0;
2689
2690		error = checkerr(pmp, error, __func__);
2691		if (error)
2692			goto out;
2693
2694		if (rw_msg->pvnr_resid > tomove) {
2695			puffs_senderr(pmp, PUFFS_ERR_WRITE,
2696			    E2BIG, "resid grew", VPTOPNC(vp));
2697			ERROUT(EPROTO);
2698		}
2699
2700		/*
2701		 * FAF moved everything.  Frankly, we don't
2702		 * really have a choice.
2703		 */
2704		if (dofaf && error == 0)
2705			moved = tomove;
2706		else
2707			moved = tomove - rw_msg->pvnr_resid;
2708
2709		bp->b_resid = bp->b_bcount - moved;
2710		if (bp->b_resid != 0) {
2711			ERROUT(EIO);
2712		}
2713	}
2714
2715 out:
2716	if (park_rw)
2717		puffs_msgmem_release(park_rw);
2718
2719	if (error)
2720		bp->b_error = error;
2721
2722	if (error || dobiodone)
2723		biodone(bp);
2724
2725	return error;
2726}
2727
2728int
2729puffs_vnop_mmap(void *v)
2730{
2731	struct vop_mmap_args /* {
2732		const struct vnodeop_desc *a_desc;
2733		struct vnode *a_vp;
2734		vm_prot_t a_prot;
2735		kauth_cred_t a_cred;
2736	} */ *ap = v;
2737	PUFFS_MSG_VARS(vn, mmap);
2738	struct vnode *vp = ap->a_vp;
2739	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2740	int error;
2741
2742	if (!PUFFS_USE_PAGECACHE(pmp))
2743		return genfs_eopnotsupp(v);
2744
2745	if (EXISTSOP(pmp, MMAP)) {
2746		PUFFS_MSG_ALLOC(vn, mmap);
2747		mmap_msg->pvnr_prot = ap->a_prot;
2748		puffs_credcvt(&mmap_msg->pvnr_cred, ap->a_cred);
2749		puffs_msg_setinfo(park_mmap, PUFFSOP_VN,
2750		    PUFFS_VN_MMAP, VPTOPNC(vp));
2751
2752		PUFFS_MSG_ENQUEUEWAIT2(pmp, park_mmap, vp->v_data, NULL, error);
2753		error = checkerr(pmp, error, __func__);
2754		PUFFS_MSG_RELEASE(mmap);
2755	} else {
2756		error = genfs_mmap(v);
2757	}
2758
2759	return error;
2760}
2761
2762
2763/*
2764 * The rest don't get a free trip to userspace and back, they
2765 * have to stay within the kernel.
2766 */
2767
2768/*
2769 * bmap doesn't really make any sense for puffs, so just 1:1 map it.
2770 * well, maybe somehow, somewhere, some day ....
2771 */
2772int
2773puffs_vnop_bmap(void *v)
2774{
2775	struct vop_bmap_args /* {
2776		const struct vnodeop_desc *a_desc;
2777		struct vnode *a_vp;
2778		daddr_t a_bn;
2779		struct vnode **a_vpp;
2780		daddr_t *a_bnp;
2781		int *a_runp;
2782	} */ *ap = v;
2783	struct puffs_mount *pmp;
2784
2785	pmp = MPTOPUFFSMP(ap->a_vp->v_mount);
2786
2787	if (ap->a_vpp)
2788		*ap->a_vpp = ap->a_vp;
2789	if (ap->a_bnp)
2790		*ap->a_bnp = ap->a_bn;
2791	if (ap->a_runp)
2792		*ap->a_runp
2793		    = (PUFFS_TOMOVE(pmp->pmp_msg_maxsize, pmp)>>DEV_BSHIFT) - 1;
2794
2795	return 0;
2796}
2797
2798/*
2799 * Handle getpages faults in puffs.  We let genfs_getpages() do most
2800 * of the dirty work, but we come in this route to do accounting tasks.
2801 * If the user server has specified functions for cache notifications
2802 * about reads and/or writes, we record which type of operation we got,
2803 * for which page range, and proceed to issue a FAF notification to the
2804 * server about it.
2805 */
2806int
2807puffs_vnop_getpages(void *v)
2808{
2809	struct vop_getpages_args /* {
2810		const struct vnodeop_desc *a_desc;
2811		struct vnode *a_vp;
2812		voff_t a_offset;
2813		struct vm_page **a_m;
2814		int *a_count;
2815		int a_centeridx;
2816		vm_prot_t a_access_type;
2817		int a_advice;
2818		int a_flags;
2819	} */ *ap = v;
2820	struct puffs_mount *pmp;
2821	struct puffs_node *pn;
2822	struct vnode *vp;
2823	struct vm_page **pgs;
2824	struct puffs_cacheinfo *pcinfo = NULL;
2825	struct puffs_cacherun *pcrun;
2826	void *parkmem = NULL;
2827	size_t runsizes;
2828	int i, npages, si, streakon;
2829	int error, locked, write;
2830
2831	pmp = MPTOPUFFSMP(ap->a_vp->v_mount);
2832	npages = *ap->a_count;
2833	pgs = ap->a_m;
2834	vp = ap->a_vp;
2835	pn = vp->v_data;
2836	locked = (ap->a_flags & PGO_LOCKED) != 0;
2837	write = (ap->a_access_type & VM_PROT_WRITE) != 0;
2838
2839	/* ccg xnaht - gets Wuninitialized wrong */
2840	pcrun = NULL;
2841	runsizes = 0;
2842
2843	/*
2844	 * Check that we aren't trying to fault in pages which our file
2845	 * server doesn't know about.  This happens if we extend a file by
2846	 * skipping some pages and later try to fault in pages which
2847	 * are between pn_serversize and vp_size.  This check optimizes
2848	 * away the common case where a file is being extended.
2849	 */
2850	if (ap->a_offset >= pn->pn_serversize && ap->a_offset < vp->v_size) {
2851		struct vattr va;
2852
2853		/* try again later when we can block */
2854		if (locked)
2855			ERROUT(EBUSY);
2856
2857		mutex_exit(vp->v_interlock);
2858		vattr_null(&va);
2859		va.va_size = vp->v_size;
2860		error = dosetattr(vp, &va, FSCRED, 0);
2861		if (error)
2862			ERROUT(error);
2863		mutex_enter(vp->v_interlock);
2864	}
2865
2866	if (write && PUFFS_WCACHEINFO(pmp)) {
2867#ifdef notnowjohn
2868		/* allocate worst-case memory */
2869		runsizes = ((npages / 2) + 1) * sizeof(struct puffs_cacherun);
2870		KASSERT(curlwp != uvm.pagedaemon_lwp || locked);
2871		pcinfo = kmem_zalloc(sizeof(struct puffs_cacheinfo) + runsize,
2872		    locked ? KM_NOSLEEP : KM_SLEEP);
2873
2874		/*
2875		 * can't block if we're locked and can't mess up caching
2876		 * information for fs server.  so come back later, please
2877		 */
2878		if (pcinfo == NULL)
2879			ERROUT(ENOMEM);
2880
2881		parkmem = puffs_park_alloc(locked == 0);
2882		if (parkmem == NULL)
2883			ERROUT(ENOMEM);
2884
2885		pcrun = pcinfo->pcache_runs;
2886#else
2887		(void)parkmem;
2888#endif
2889	}
2890
2891	error = genfs_getpages(v);
2892	if (error)
2893		goto out;
2894
2895	if (PUFFS_WCACHEINFO(pmp) == 0)
2896		goto out;
2897
2898	/*
2899	 * Let's see whose fault it was and inform the user server of
2900	 * possibly read/written pages.  Map pages from read faults
2901	 * strictly read-only, since otherwise we might miss info on
2902	 * when the page is actually write-faulted to.
2903	 */
2904	if (!locked)
2905		mutex_enter(vp->v_uobj.vmobjlock);
2906	for (i = 0, si = 0, streakon = 0; i < npages; i++) {
2907		if (pgs[i] == NULL || pgs[i] == PGO_DONTCARE) {
2908			if (streakon && write) {
2909				streakon = 0;
2910				pcrun[si].pcache_runend
2911				    = trunc_page(pgs[i]->offset) + PAGE_MASK;
2912				si++;
2913			}
2914			continue;
2915		}
2916		if (streakon == 0 && write) {
2917			streakon = 1;
2918			pcrun[si].pcache_runstart = pgs[i]->offset;
2919		}
2920
2921		if (!write)
2922			pgs[i]->flags |= PG_RDONLY;
2923	}
2924	/* was the last page part of our streak? */
2925	if (streakon) {
2926		pcrun[si].pcache_runend
2927		    = trunc_page(pgs[i-1]->offset) + PAGE_MASK;
2928		si++;
2929	}
2930	if (!locked)
2931		mutex_exit(vp->v_uobj.vmobjlock);
2932
2933	KASSERT(si <= (npages / 2) + 1);
2934
2935#ifdef notnowjohn
2936	/* send results to userspace */
2937	if (write)
2938		puffs_cacheop(pmp, parkmem, pcinfo,
2939		    sizeof(struct puffs_cacheinfo) + runsizes, VPTOPNC(vp));
2940#endif
2941
2942 out:
2943	if (error) {
2944		if (pcinfo != NULL)
2945			kmem_free(pcinfo,
2946			    sizeof(struct puffs_cacheinfo) + runsizes);
2947#ifdef notnowjohn
2948		if (parkmem != NULL)
2949			puffs_park_release(parkmem, 1);
2950#endif
2951	}
2952
2953	return error;
2954}
2955
2956/*
2957 * Extended attribute support.
2958 */
2959
2960int
2961puffs_vnop_getextattr(void *v)
2962{
2963	struct vop_getextattr_args /*
2964		struct vnode *a_vp;
2965		int a_attrnamespace;
2966		const char *a_name;
2967		struct uio *a_uio;
2968		size_t *a_size;
2969		kauth_cred_t a_cred;
2970	}; */ *ap = v;
2971	PUFFS_MSG_VARS(vn, getextattr);
2972	struct vnode *vp = ap->a_vp;
2973	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
2974	int attrnamespace = ap->a_attrnamespace;
2975	const char *name = ap->a_name;
2976	struct uio *uio = ap->a_uio;
2977	size_t *sizep = ap->a_size;
2978	size_t tomove, resid;
2979	int error;
2980
2981	if (uio)
2982		resid = uio->uio_resid;
2983	else
2984		resid = 0;
2985
2986	tomove = PUFFS_TOMOVE(resid, pmp);
2987	if (tomove != resid) {
2988		error = E2BIG;
2989		goto out;
2990	}
2991
2992	puffs_msgmem_alloc(sizeof(struct puffs_vnmsg_getextattr) + tomove,
2993	    &park_getextattr, (void *)&getextattr_msg, 1);
2994
2995	getextattr_msg->pvnr_attrnamespace = attrnamespace;
2996	strlcpy(getextattr_msg->pvnr_attrname, name,
2997	    sizeof(getextattr_msg->pvnr_attrname));
2998	puffs_credcvt(&getextattr_msg->pvnr_cred, ap->a_cred);
2999	if (sizep)
3000		getextattr_msg->pvnr_datasize = 1;
3001	getextattr_msg->pvnr_resid = tomove;
3002
3003	puffs_msg_setinfo(park_getextattr,
3004	    PUFFSOP_VN, PUFFS_VN_GETEXTATTR, VPTOPNC(vp));
3005	puffs_msg_setdelta(park_getextattr, tomove);
3006	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_getextattr, vp->v_data, NULL, error);
3007
3008	error = checkerr(pmp, error, __func__);
3009	if (error)
3010		goto out;
3011
3012	resid = getextattr_msg->pvnr_resid;
3013	if (resid > tomove) {
3014		puffs_senderr(pmp, PUFFS_ERR_GETEXTATTR, E2BIG,
3015		    "resid grew", VPTOPNC(vp));
3016		error = EPROTO;
3017		goto out;
3018	}
3019
3020	if (sizep)
3021		*sizep = getextattr_msg->pvnr_datasize;
3022	if (uio)
3023		error = uiomove(getextattr_msg->pvnr_data, tomove - resid, uio);
3024
3025 out:
3026	PUFFS_MSG_RELEASE(getextattr);
3027	return error;
3028}
3029
3030int
3031puffs_vnop_setextattr(void *v)
3032{
3033	struct vop_setextattr_args /* {
3034		struct vnode *a_vp;
3035		int a_attrnamespace;
3036		const char *a_name;
3037		struct uio *a_uio;
3038		kauth_cred_t a_cred;
3039	}; */ *ap = v;
3040	PUFFS_MSG_VARS(vn, setextattr);
3041	struct vnode *vp = ap->a_vp;
3042	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
3043	int attrnamespace = ap->a_attrnamespace;
3044	const char *name = ap->a_name;
3045	struct uio *uio = ap->a_uio;
3046	size_t tomove, resid;
3047	int error;
3048
3049	if (uio)
3050		resid = uio->uio_resid;
3051	else
3052		resid = 0;
3053
3054	tomove = PUFFS_TOMOVE(resid, pmp);
3055	if (tomove != resid) {
3056		error = E2BIG;
3057		goto out;
3058	}
3059
3060	puffs_msgmem_alloc(sizeof(struct puffs_vnmsg_setextattr) + tomove,
3061	    &park_setextattr, (void *)&setextattr_msg, 1);
3062
3063	setextattr_msg->pvnr_attrnamespace = attrnamespace;
3064	strlcpy(setextattr_msg->pvnr_attrname, name,
3065	    sizeof(setextattr_msg->pvnr_attrname));
3066	puffs_credcvt(&setextattr_msg->pvnr_cred, ap->a_cred);
3067	setextattr_msg->pvnr_resid = tomove;
3068
3069	if (uio) {
3070		error = uiomove(setextattr_msg->pvnr_data, tomove, uio);
3071		if (error)
3072			goto out;
3073	}
3074
3075	puffs_msg_setinfo(park_setextattr,
3076	    PUFFSOP_VN, PUFFS_VN_SETEXTATTR, VPTOPNC(vp));
3077	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_setextattr, vp->v_data, NULL, error);
3078
3079	error = checkerr(pmp, error, __func__);
3080	if (error)
3081		goto out;
3082
3083	if (setextattr_msg->pvnr_resid != 0)
3084		error = EIO;
3085
3086 out:
3087	PUFFS_MSG_RELEASE(setextattr);
3088
3089	return error;
3090}
3091
3092int
3093puffs_vnop_listextattr(void *v)
3094{
3095	struct vop_listextattr_args /* {
3096		struct vnode *a_vp;
3097		int a_attrnamespace;
3098		struct uio *a_uio;
3099		size_t *a_size;
3100		int a_flag,
3101		kauth_cred_t a_cred;
3102	}; */ *ap = v;
3103	PUFFS_MSG_VARS(vn, listextattr);
3104	struct vnode *vp = ap->a_vp;
3105	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
3106	int attrnamespace = ap->a_attrnamespace;
3107	struct uio *uio = ap->a_uio;
3108	size_t *sizep = ap->a_size;
3109	int flag = ap->a_flag;
3110	size_t tomove, resid;
3111	int error;
3112
3113	if (uio)
3114		resid = uio->uio_resid;
3115	else
3116		resid = 0;
3117
3118	tomove = PUFFS_TOMOVE(resid, pmp);
3119	if (tomove != resid) {
3120		error = E2BIG;
3121		goto out;
3122	}
3123
3124	puffs_msgmem_alloc(sizeof(struct puffs_vnmsg_listextattr) + tomove,
3125	    &park_listextattr, (void *)&listextattr_msg, 1);
3126
3127	listextattr_msg->pvnr_attrnamespace = attrnamespace;
3128	listextattr_msg->pvnr_flag = flag;
3129	puffs_credcvt(&listextattr_msg->pvnr_cred, ap->a_cred);
3130	listextattr_msg->pvnr_resid = tomove;
3131	if (sizep)
3132		listextattr_msg->pvnr_datasize = 1;
3133
3134	puffs_msg_setinfo(park_listextattr,
3135	    PUFFSOP_VN, PUFFS_VN_LISTEXTATTR, VPTOPNC(vp));
3136	puffs_msg_setdelta(park_listextattr, tomove);
3137	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_listextattr, vp->v_data, NULL, error);
3138
3139	error = checkerr(pmp, error, __func__);
3140	if (error)
3141		goto out;
3142
3143	resid = listextattr_msg->pvnr_resid;
3144	if (resid > tomove) {
3145		puffs_senderr(pmp, PUFFS_ERR_LISTEXTATTR, E2BIG,
3146		    "resid grew", VPTOPNC(vp));
3147		error = EPROTO;
3148		goto out;
3149	}
3150
3151	if (sizep)
3152		*sizep = listextattr_msg->pvnr_datasize;
3153	if (uio)
3154		error = uiomove(listextattr_msg->pvnr_data, tomove-resid, uio);
3155
3156 out:
3157	PUFFS_MSG_RELEASE(listextattr);
3158	return error;
3159}
3160
3161int
3162puffs_vnop_deleteextattr(void *v)
3163{
3164	struct vop_deleteextattr_args /* {
3165		struct vnode *a_vp;
3166		int a_attrnamespace;
3167		const char *a_name;
3168		kauth_cred_t a_cred;
3169	}; */ *ap = v;
3170	PUFFS_MSG_VARS(vn, deleteextattr);
3171	struct vnode *vp = ap->a_vp;
3172	struct puffs_mount *pmp = MPTOPUFFSMP(vp->v_mount);
3173	int attrnamespace = ap->a_attrnamespace;
3174	const char *name = ap->a_name;
3175	int error;
3176
3177	PUFFS_MSG_ALLOC(vn, deleteextattr);
3178	deleteextattr_msg->pvnr_attrnamespace = attrnamespace;
3179	strlcpy(deleteextattr_msg->pvnr_attrname, name,
3180	    sizeof(deleteextattr_msg->pvnr_attrname));
3181	puffs_credcvt(&deleteextattr_msg->pvnr_cred, ap->a_cred);
3182
3183	puffs_msg_setinfo(park_deleteextattr,
3184	    PUFFSOP_VN, PUFFS_VN_DELETEEXTATTR, VPTOPNC(vp));
3185	PUFFS_MSG_ENQUEUEWAIT2(pmp, park_deleteextattr,
3186	    vp->v_data, NULL, error);
3187
3188	error = checkerr(pmp, error, __func__);
3189
3190	PUFFS_MSG_RELEASE(deleteextattr);
3191	return error;
3192}
3193
3194/*
3195 * spec & fifo.  These call the miscfs spec and fifo vectors, but issue
3196 * FAF update information for the puffs node first.
3197 */
3198int
3199puffs_vnop_spec_read(void *v)
3200{
3201	struct vop_read_args /* {
3202		const struct vnodeop_desc *a_desc;
3203		struct vnode *a_vp;
3204		struct uio *a_uio;
3205		int a_ioflag;
3206		kauth_cred_t a_cred;
3207	} */ *ap = v;
3208
3209	puffs_updatenode(VPTOPP(ap->a_vp), PUFFS_UPDATEATIME, 0);
3210	return VOCALL(spec_vnodeop_p, VOFFSET(vop_read), v);
3211}
3212
3213int
3214puffs_vnop_spec_write(void *v)
3215{
3216	struct vop_write_args /* {
3217		const struct vnodeop_desc *a_desc;
3218		struct vnode *a_vp;
3219		struct uio *a_uio;
3220		int a_ioflag;
3221		kauth_cred_t a_cred;
3222	} */ *ap = v;
3223
3224	puffs_updatenode(VPTOPP(ap->a_vp), PUFFS_UPDATEMTIME, 0);
3225	return VOCALL(spec_vnodeop_p, VOFFSET(vop_write), v);
3226}
3227
3228int
3229puffs_vnop_fifo_read(void *v)
3230{
3231	struct vop_read_args /* {
3232		const struct vnodeop_desc *a_desc;
3233		struct vnode *a_vp;
3234		struct uio *a_uio;
3235		int a_ioflag;
3236		kauth_cred_t a_cred;
3237	} */ *ap = v;
3238
3239	puffs_updatenode(VPTOPP(ap->a_vp), PUFFS_UPDATEATIME, 0);
3240	return VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), v);
3241}
3242
3243int
3244puffs_vnop_fifo_write(void *v)
3245{
3246	struct vop_write_args /* {
3247		const struct vnodeop_desc *a_desc;
3248		struct vnode *a_vp;
3249		struct uio *a_uio;
3250		int a_ioflag;
3251		kauth_cred_t a_cred;
3252	} */ *ap = v;
3253
3254	puffs_updatenode(VPTOPP(ap->a_vp), PUFFS_UPDATEMTIME, 0);
3255	return VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), v);
3256}
3257