1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/param.h>
26#include <sys/types.h>
27#include <sys/systm.h>
28#include <sys/cred.h>
29#include <sys/proc.h>
30#include <sys/user.h>
31#include <sys/time.h>
32#include <sys/vnode.h>
33#include <sys/vfs.h>
34#include <sys/vfs_opreg.h>
35#include <sys/file.h>
36#include <sys/filio.h>
37#include <sys/uio.h>
38#include <sys/buf.h>
39#include <sys/mman.h>
40#include <sys/tiuser.h>
41#include <sys/pathname.h>
42#include <sys/dirent.h>
43#include <sys/conf.h>
44#include <sys/debug.h>
45#include <sys/vmsystm.h>
46#include <sys/fcntl.h>
47#include <sys/flock.h>
48#include <sys/swap.h>
49#include <sys/errno.h>
50#include <sys/sysmacros.h>
51#include <sys/disp.h>
52#include <sys/kmem.h>
53#include <sys/cmn_err.h>
54#include <sys/vtrace.h>
55#include <sys/mount.h>
56#include <sys/bootconf.h>
57#include <sys/dnlc.h>
58#include <sys/stat.h>
59#include <sys/acl.h>
60#include <sys/policy.h>
61#include <rpc/types.h>
62
63#include <vm/hat.h>
64#include <vm/as.h>
65#include <vm/page.h>
66#include <vm/pvn.h>
67#include <vm/seg.h>
68#include <vm/seg_map.h>
69#include <vm/seg_vn.h>
70#include <vm/rm.h>
71#include <sys/fs/cachefs_fs.h>
72#include <sys/fs/cachefs_dir.h>
73#include <sys/fs/cachefs_dlog.h>
74#include <sys/fs/cachefs_ioctl.h>
75#include <sys/fs/cachefs_log.h>
76#include <fs/fs_subr.h>
77
78int cachefs_dnlc;	/* use dnlc, debugging */
79
80static void cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp,
81    cred_t *cr);
82static void cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap,
83    cred_t *cr);
84static void cachefs_createacl(cnode_t *dcp, cnode_t *newcp);
85static int cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec);
86static int cachefs_getacldirvp(cnode_t *cp);
87static void cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec);
88static int cachefs_access_local(void *cp, int mode, cred_t *cr);
89static int cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr);
90static int cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
91    u_offset_t iooff, cred_t *cr);
92static int cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
93    u_offset_t iooff, cred_t *cr);
94static int cachefs_setattr_connected(vnode_t *vp, vattr_t *vap, int flags,
95    cred_t *cr, caller_context_t *ct);
96static int cachefs_setattr_disconnected(vnode_t *vp, vattr_t *vap,
97    int flags, cred_t *cr, caller_context_t *ct);
98static int cachefs_access_connected(struct vnode *vp, int mode,
99    int flags, cred_t *cr);
100static int cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
101    cred_t *cr);
102static int cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
103    char *tnm, cred_t *cr);
104static int cachefs_symlink_disconnected(vnode_t *dvp, char *lnm,
105    vattr_t *tva, char *tnm, cred_t *cr);
106static int cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
107    cred_t *cr);
108static int cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp,
109    char *tnm, cred_t *cr);
110static int cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
111    vnode_t **vpp, cred_t *cr);
112static int cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
113    vnode_t **vpp, cred_t *cr);
114static int cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr);
115static int cachefs_rmdir_connected(vnode_t *dvp, char *nm,
116    vnode_t *cdir, cred_t *cr, vnode_t *vp);
117static int cachefs_rmdir_disconnected(vnode_t *dvp, char *nm,
118    vnode_t *cdir, cred_t *cr, vnode_t *vp);
119static char *cachefs_newname(void);
120static int cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm,
121    cred_t *cr);
122static int cachefs_rename_connected(vnode_t *odvp, char *onm,
123    vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
124static int cachefs_rename_disconnected(vnode_t *odvp, char *onm,
125    vnode_t *ndvp, char *nnm, cred_t *cr, vnode_t *delvp);
126static int cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr,
127    int *eofp);
128static int cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop,
129    cred_t *cr, int *eofp);
130static int cachefs_readback_translate(cnode_t *cp, uio_t *uiop,
131	cred_t *cr, int *eofp);
132
133static int cachefs_setattr_common(vnode_t *vp, vattr_t *vap, int flags,
134    cred_t *cr, caller_context_t *ct);
135
136static	int	cachefs_open(struct vnode **, int, cred_t *,
137			caller_context_t *);
138static	int	cachefs_close(struct vnode *, int, int, offset_t,
139			cred_t *, caller_context_t *);
140static	int	cachefs_read(struct vnode *, struct uio *, int, cred_t *,
141			caller_context_t *);
142static	int	cachefs_write(struct vnode *, struct uio *, int, cred_t *,
143			caller_context_t *);
144static	int	cachefs_ioctl(struct vnode *, int, intptr_t, int, cred_t *,
145			int *, caller_context_t *);
146static	int	cachefs_getattr(struct vnode *, struct vattr *, int,
147			cred_t *, caller_context_t *);
148static	int	cachefs_setattr(struct vnode *, struct vattr *,
149			int, cred_t *, caller_context_t *);
150static	int	cachefs_access(struct vnode *, int, int, cred_t *,
151			caller_context_t *);
152static	int	cachefs_lookup(struct vnode *, char *, struct vnode **,
153			struct pathname *, int, struct vnode *, cred_t *,
154			caller_context_t *, int *, pathname_t *);
155static	int	cachefs_create(struct vnode *, char *, struct vattr *,
156			enum vcexcl, int, struct vnode **, cred_t *, int,
157			caller_context_t *, vsecattr_t *);
158static	int	cachefs_create_connected(vnode_t *dvp, char *nm,
159			vattr_t *vap, enum vcexcl exclusive, int mode,
160			vnode_t **vpp, cred_t *cr);
161static	int	cachefs_create_disconnected(vnode_t *dvp, char *nm,
162			vattr_t *vap, enum vcexcl exclusive, int mode,
163			vnode_t **vpp, cred_t *cr);
164static	int	cachefs_remove(struct vnode *, char *, cred_t *,
165			caller_context_t *, int);
166static	int	cachefs_link(struct vnode *, struct vnode *, char *,
167			cred_t *, caller_context_t *, int);
168static	int	cachefs_rename(struct vnode *, char *, struct vnode *,
169			char *, cred_t *, caller_context_t *, int);
170static	int	cachefs_mkdir(struct vnode *, char *, struct
171			vattr *, struct vnode **, cred_t *, caller_context_t *,
172			int, vsecattr_t *);
173static	int	cachefs_rmdir(struct vnode *, char *, struct vnode *,
174			cred_t *, caller_context_t *, int);
175static	int	cachefs_readdir(struct vnode *, struct uio *,
176			cred_t *, int *, caller_context_t *, int);
177static	int	cachefs_symlink(struct vnode *, char *, struct vattr *,
178			char *, cred_t *, caller_context_t *, int);
179static	int	cachefs_readlink(struct vnode *, struct uio *, cred_t *,
180			caller_context_t *);
181static int cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr);
182static int cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop);
183static	int	cachefs_fsync(struct vnode *, int, cred_t *,
184			caller_context_t *);
185static	void	cachefs_inactive(struct vnode *, cred_t *, caller_context_t *);
186static	int	cachefs_fid(struct vnode *, struct fid *, caller_context_t *);
187static	int	cachefs_rwlock(struct vnode *, int, caller_context_t *);
188static	void	cachefs_rwunlock(struct vnode *, int, caller_context_t *);
189static	int	cachefs_seek(struct vnode *, offset_t, offset_t *,
190			caller_context_t *);
191static	int	cachefs_frlock(struct vnode *, int, struct flock64 *,
192			int, offset_t, struct flk_callback *, cred_t *,
193			caller_context_t *);
194static	int	cachefs_space(struct vnode *, int, struct flock64 *, int,
195			offset_t, cred_t *, caller_context_t *);
196static	int	cachefs_realvp(struct vnode *, struct vnode **,
197			caller_context_t *);
198static	int	cachefs_getpage(struct vnode *, offset_t, size_t, uint_t *,
199			struct page *[], size_t, struct seg *, caddr_t,
200			enum seg_rw, cred_t *, caller_context_t *);
201static	int	cachefs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
202			struct page *[], size_t, struct seg *, caddr_t,
203			enum seg_rw, cred_t *);
204static	int	cachefs_getapage_back(struct vnode *, u_offset_t, size_t,
205		uint_t *, struct page *[], size_t, struct seg *, caddr_t,
206			enum seg_rw, cred_t *);
207static	int	cachefs_putpage(struct vnode *, offset_t, size_t, int,
208			cred_t *, caller_context_t *);
209static	int	cachefs_map(struct vnode *, offset_t, struct as *,
210			caddr_t *, size_t, uchar_t, uchar_t, uint_t, cred_t *,
211			caller_context_t *);
212static	int	cachefs_addmap(struct vnode *, offset_t, struct as *,
213			caddr_t, size_t, uchar_t, uchar_t, uint_t, cred_t *,
214			caller_context_t *);
215static	int	cachefs_delmap(struct vnode *, offset_t, struct as *,
216			caddr_t, size_t, uint_t, uint_t, uint_t, cred_t *,
217			caller_context_t *);
218static int	cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec,
219			int flag, cred_t *cr, caller_context_t *);
220static int	cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec,
221			int flag, cred_t *cr, caller_context_t *);
222static	int	cachefs_shrlock(vnode_t *, int, struct shrlock *, int,
223			cred_t *, caller_context_t *);
224static int cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
225    cred_t *cr);
226static int cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec,
227    int flag, cred_t *cr);
228
229static int	cachefs_dump(struct vnode *, caddr_t, offset_t, offset_t,
230			caller_context_t *);
231static int	cachefs_pageio(struct vnode *, page_t *,
232		    u_offset_t, size_t, int, cred_t *, caller_context_t *);
233static int	cachefs_writepage(struct vnode *vp, caddr_t base,
234		    int tcount, struct uio *uiop);
235static int	cachefs_pathconf(vnode_t *, int, ulong_t *, cred_t *,
236			caller_context_t *);
237
238static int	cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
239			cred_t *cr, caller_context_t *ct);
240static int	cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag,
241			cred_t *cr, caller_context_t *ct);
242static int	cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
243			int flags, cred_t *cr, caller_context_t *ct);
244static int	cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr,
245			vnode_t *vp);
246static int	cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off,
247			size_t len, uint_t *protp, struct page *pl[],
248			size_t plsz, struct seg *seg, caddr_t addr,
249			enum seg_rw rw, cred_t *cr);
250static int	cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off,
251			size_t len, int flags, cred_t *cr);
252static int	cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off,
253			struct as *as, caddr_t *addrp, size_t len, uchar_t prot,
254			uchar_t maxprot, uint_t flags, cred_t *cr);
255static int	cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd,
256			struct flock64 *bfp, int flag, offset_t offset,
257			cred_t *cr, caller_context_t *ct);
258
259struct vnodeops *cachefs_vnodeops;
260
261static const fs_operation_def_t cachefs_vnodeops_template[] = {
262	VOPNAME_OPEN,		{ .vop_open = cachefs_open },
263	VOPNAME_CLOSE,		{ .vop_close = cachefs_close },
264	VOPNAME_READ,		{ .vop_read = cachefs_read },
265	VOPNAME_WRITE,		{ .vop_write = cachefs_write },
266	VOPNAME_IOCTL,		{ .vop_ioctl = cachefs_ioctl },
267	VOPNAME_GETATTR,	{ .vop_getattr = cachefs_getattr },
268	VOPNAME_SETATTR,	{ .vop_setattr = cachefs_setattr },
269	VOPNAME_ACCESS,		{ .vop_access = cachefs_access },
270	VOPNAME_LOOKUP,		{ .vop_lookup = cachefs_lookup },
271	VOPNAME_CREATE,		{ .vop_create = cachefs_create },
272	VOPNAME_REMOVE,		{ .vop_remove = cachefs_remove },
273	VOPNAME_LINK,		{ .vop_link = cachefs_link },
274	VOPNAME_RENAME,		{ .vop_rename = cachefs_rename },
275	VOPNAME_MKDIR,		{ .vop_mkdir = cachefs_mkdir },
276	VOPNAME_RMDIR,		{ .vop_rmdir = cachefs_rmdir },
277	VOPNAME_READDIR,	{ .vop_readdir = cachefs_readdir },
278	VOPNAME_SYMLINK,	{ .vop_symlink = cachefs_symlink },
279	VOPNAME_READLINK,	{ .vop_readlink = cachefs_readlink },
280	VOPNAME_FSYNC,		{ .vop_fsync = cachefs_fsync },
281	VOPNAME_INACTIVE,	{ .vop_inactive = cachefs_inactive },
282	VOPNAME_FID,		{ .vop_fid = cachefs_fid },
283	VOPNAME_RWLOCK,		{ .vop_rwlock = cachefs_rwlock },
284	VOPNAME_RWUNLOCK,	{ .vop_rwunlock = cachefs_rwunlock },
285	VOPNAME_SEEK,		{ .vop_seek = cachefs_seek },
286	VOPNAME_FRLOCK,		{ .vop_frlock = cachefs_frlock },
287	VOPNAME_SPACE,		{ .vop_space = cachefs_space },
288	VOPNAME_REALVP,		{ .vop_realvp = cachefs_realvp },
289	VOPNAME_GETPAGE,	{ .vop_getpage = cachefs_getpage },
290	VOPNAME_PUTPAGE,	{ .vop_putpage = cachefs_putpage },
291	VOPNAME_MAP,		{ .vop_map = cachefs_map },
292	VOPNAME_ADDMAP,		{ .vop_addmap = cachefs_addmap },
293	VOPNAME_DELMAP,		{ .vop_delmap = cachefs_delmap },
294	VOPNAME_DUMP,		{ .vop_dump = cachefs_dump },
295	VOPNAME_PATHCONF,	{ .vop_pathconf = cachefs_pathconf },
296	VOPNAME_PAGEIO,		{ .vop_pageio = cachefs_pageio },
297	VOPNAME_SETSECATTR,	{ .vop_setsecattr = cachefs_setsecattr },
298	VOPNAME_GETSECATTR,	{ .vop_getsecattr = cachefs_getsecattr },
299	VOPNAME_SHRLOCK,	{ .vop_shrlock = cachefs_shrlock },
300	NULL,			NULL
301};
302
303/* forward declarations of statics */
304static void cachefs_modified(cnode_t *cp);
305static int cachefs_modified_alloc(cnode_t *cp);
306
307int
308cachefs_init_vnops(char *name)
309{
310	return (vn_make_ops(name,
311	    cachefs_vnodeops_template, &cachefs_vnodeops));
312}
313
314struct vnodeops *
315cachefs_getvnodeops(void)
316{
317	return (cachefs_vnodeops);
318}
319
320static int
321cachefs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
322{
323	int error = 0;
324	cnode_t *cp = VTOC(*vpp);
325	fscache_t *fscp = C_TO_FSCACHE(cp);
326	int held = 0;
327	int type;
328	int connected = 0;
329
330#ifdef CFSDEBUG
331	CFS_DEBUG(CFSDEBUG_VOPS)
332		printf("cachefs_open: ENTER vpp %p flag %x\n",
333		    (void *)vpp, flag);
334#endif
335	if (getzoneid() != GLOBAL_ZONEID) {
336		error = EPERM;
337		goto out;
338	}
339	if ((flag & FWRITE) &&
340	    ((*vpp)->v_type == VDIR || (*vpp)->v_type == VLNK)) {
341		error = EISDIR;
342		goto out;
343	}
344
345	/*
346	 * Cachefs only provides pass-through support for NFSv4,
347	 * and all vnode operations are passed through to the
348	 * back file system. For NFSv4 pass-through to work, only
349	 * connected operation is supported, the cnode backvp must
350	 * exist, and cachefs optional (eg., disconnectable) flags
351	 * are turned off. Assert these conditions to ensure that
352	 * the backfilesystem is called for the open operation.
353	 */
354	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
355	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
356
357	for (;;) {
358		/* get (or renew) access to the file system */
359		if (held) {
360			/* Won't loop with NFSv4 connected behavior */
361			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
362			cachefs_cd_release(fscp);
363			held = 0;
364		}
365		error = cachefs_cd_access(fscp, connected, 0);
366		if (error)
367			goto out;
368		held = 1;
369
370		mutex_enter(&cp->c_statelock);
371
372		/* grab creds if we do not have any yet */
373		if (cp->c_cred == NULL) {
374			crhold(cr);
375			cp->c_cred = cr;
376		}
377		cp->c_flags |= CN_NEEDOPEN;
378
379		/* if we are disconnected */
380		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
381			/* if we cannot write to the file system */
382			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp)) {
383				mutex_exit(&cp->c_statelock);
384				connected = 1;
385				continue;
386			}
387			/*
388			 * Allow read only requests to continue
389			 */
390			if ((flag & (FWRITE|FREAD)) == FREAD) {
391				/* track the flag for opening the backvp */
392				cp->c_rdcnt++;
393				mutex_exit(&cp->c_statelock);
394				error = 0;
395				break;
396			}
397
398			/*
399			 * check credentials  - if this procs
400			 * credentials don't match the creds in the
401			 * cnode disallow writing while disconnected.
402			 */
403			if (crcmp(cp->c_cred, CRED()) != 0 &&
404			    secpolicy_vnode_access2(CRED(), *vpp,
405			    cp->c_attr.va_uid, 0, VWRITE) != 0) {
406				mutex_exit(&cp->c_statelock);
407				connected = 1;
408				continue;
409			}
410			/* to get here, we know that the WRITE flag is on */
411			cp->c_wrcnt++;
412			if (flag & FREAD)
413				cp->c_rdcnt++;
414		}
415
416		/* else if we are connected */
417		else {
418			/* if cannot use the cached copy of the file */
419			if ((flag & FWRITE) && CFS_ISFS_WRITE_AROUND(fscp) &&
420			    ((cp->c_flags & CN_NOCACHE) == 0))
421				cachefs_nocache(cp);
422
423			/* pass open to the back file */
424			if (cp->c_backvp) {
425				cp->c_flags &= ~CN_NEEDOPEN;
426				CFS_DPRINT_BACKFS_NFSV4(fscp,
427				    ("cachefs_open (nfsv4): cnode %p, "
428				    "backvp %p\n", cp, cp->c_backvp));
429				error = VOP_OPEN(&cp->c_backvp, flag, cr, ct);
430				if (CFS_TIMEOUT(fscp, error)) {
431					mutex_exit(&cp->c_statelock);
432					cachefs_cd_release(fscp);
433					held = 0;
434					cachefs_cd_timedout(fscp);
435					continue;
436				} else if (error) {
437					mutex_exit(&cp->c_statelock);
438					break;
439				}
440			} else {
441				/* backvp will be VOP_OPEN'd later */
442				if (flag & FREAD)
443					cp->c_rdcnt++;
444				if (flag & FWRITE)
445					cp->c_wrcnt++;
446			}
447
448			/*
449			 * Now perform a consistency check on the file.
450			 * If strict consistency then force a check to
451			 * the backfs even if the timeout has not expired
452			 * for close-to-open consistency.
453			 */
454			type = 0;
455			if (fscp->fs_consttype == CFS_FS_CONST_STRICT)
456				type = C_BACK_CHECK;
457			error = CFSOP_CHECK_COBJECT(fscp, cp, type, cr);
458			if (CFS_TIMEOUT(fscp, error)) {
459				mutex_exit(&cp->c_statelock);
460				cachefs_cd_release(fscp);
461				held = 0;
462				cachefs_cd_timedout(fscp);
463				continue;
464			}
465		}
466		mutex_exit(&cp->c_statelock);
467		break;
468	}
469	if (held)
470		cachefs_cd_release(fscp);
471out:
472#ifdef CFS_CD_DEBUG
473	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
474#endif
475#ifdef CFSDEBUG
476	CFS_DEBUG(CFSDEBUG_VOPS)
477		printf("cachefs_open: EXIT vpp %p error %d\n",
478		    (void *)vpp, error);
479#endif
480	return (error);
481}
482
483/* ARGSUSED */
484static int
485cachefs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
486	caller_context_t *ct)
487{
488	int error = 0;
489	cnode_t *cp = VTOC(vp);
490	fscache_t *fscp = C_TO_FSCACHE(cp);
491	int held = 0;
492	int connected = 0;
493	int close_cnt = 1;
494	cachefscache_t *cachep;
495
496#ifdef CFSDEBUG
497	CFS_DEBUG(CFSDEBUG_VOPS)
498		printf("cachefs_close: ENTER vp %p\n", (void *)vp);
499#endif
500	/*
501	 * Cachefs only provides pass-through support for NFSv4,
502	 * and all vnode operations are passed through to the
503	 * back file system. For NFSv4 pass-through to work, only
504	 * connected operation is supported, the cnode backvp must
505	 * exist, and cachefs optional (eg., disconnectable) flags
506	 * are turned off. Assert these conditions to ensure that
507	 * the backfilesystem is called for the close operation.
508	 */
509	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
510	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
511
512	/*
513	 * File could have been passed in or inherited from the global zone, so
514	 * we don't want to flat out reject the request; we'll just leave things
515	 * the way they are and let the backfs (NFS) deal with it.
516	 */
517	/* get rid of any local locks */
518	if (CFS_ISFS_LLOCK(fscp)) {
519		(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
520	}
521
522	/* clean up if this is the daemon closing down */
523	if ((fscp->fs_cddaemonid == ttoproc(curthread)->p_pid) &&
524	    ((ttoproc(curthread)->p_pid) != 0) &&
525	    (vp == fscp->fs_rootvp) &&
526	    (count == 1)) {
527		mutex_enter(&fscp->fs_cdlock);
528		fscp->fs_cddaemonid = 0;
529		if (fscp->fs_dlogfile)
530			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
531		else
532			fscp->fs_cdconnected = CFS_CD_CONNECTED;
533		cv_broadcast(&fscp->fs_cdwaitcv);
534		mutex_exit(&fscp->fs_cdlock);
535		if (fscp->fs_flags & CFS_FS_ROOTFS) {
536			cachep = fscp->fs_cache;
537			mutex_enter(&cachep->c_contentslock);
538			ASSERT(cachep->c_rootdaemonid != 0);
539			cachep->c_rootdaemonid = 0;
540			mutex_exit(&cachep->c_contentslock);
541		}
542		return (0);
543	}
544
545	for (;;) {
546		/* get (or renew) access to the file system */
547		if (held) {
548			/* Won't loop with NFSv4 connected behavior */
549			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
550			cachefs_cd_release(fscp);
551			held = 0;
552		}
553		error = cachefs_cd_access(fscp, connected, 0);
554		if (error)
555			goto out;
556		held = 1;
557		connected = 0;
558
559		/* if not the last close */
560		if (count > 1) {
561			if (fscp->fs_cdconnected != CFS_CD_CONNECTED)
562				goto out;
563			mutex_enter(&cp->c_statelock);
564			if (cp->c_backvp) {
565				CFS_DPRINT_BACKFS_NFSV4(fscp,
566				    ("cachefs_close (nfsv4): cnode %p, "
567				    "backvp %p\n", cp, cp->c_backvp));
568				error = VOP_CLOSE(cp->c_backvp, flag, count,
569				    offset, cr, ct);
570				if (CFS_TIMEOUT(fscp, error)) {
571					mutex_exit(&cp->c_statelock);
572					cachefs_cd_release(fscp);
573					held = 0;
574					cachefs_cd_timedout(fscp);
575					continue;
576				}
577			}
578			mutex_exit(&cp->c_statelock);
579			goto out;
580		}
581
582		/*
583		 * If the file is an unlinked file, then flush the lookup
584		 * cache so that inactive will be called if this is
585		 * the last reference.  It will invalidate all of the
586		 * cached pages, without writing them out.  Writing them
587		 * out is not required because they will be written to a
588		 * file which will be immediately removed.
589		 */
590		if (cp->c_unldvp != NULL) {
591			dnlc_purge_vp(vp);
592			mutex_enter(&cp->c_statelock);
593			error = cp->c_error;
594			cp->c_error = 0;
595			mutex_exit(&cp->c_statelock);
596			/* always call VOP_CLOSE() for back fs vnode */
597		}
598
599		/* force dirty data to stable storage */
600		else if ((vp->v_type == VREG) && (flag & FWRITE) &&
601		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
602			/* clean the cachefs pages synchronously */
603			error = cachefs_putpage_common(vp, (offset_t)0,
604			    0, 0, cr);
605			if (CFS_TIMEOUT(fscp, error)) {
606				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
607					cachefs_cd_release(fscp);
608					held = 0;
609					cachefs_cd_timedout(fscp);
610					continue;
611				} else {
612					connected = 1;
613					continue;
614				}
615			}
616
617			/* if no space left in cache, wait until connected */
618			if ((error == ENOSPC) &&
619			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
620				connected = 1;
621				continue;
622			}
623
624			/* clear the cnode error if putpage worked */
625			if ((error == 0) && cp->c_error) {
626				mutex_enter(&cp->c_statelock);
627				cp->c_error = 0;
628				mutex_exit(&cp->c_statelock);
629			}
630
631			/* if any other important error */
632			if (cp->c_error) {
633				/* get rid of the pages */
634				(void) cachefs_putpage_common(vp,
635				    (offset_t)0, 0, B_INVAL | B_FORCE, cr);
636				dnlc_purge_vp(vp);
637			}
638		}
639
640		mutex_enter(&cp->c_statelock);
641		if (cp->c_backvp &&
642		    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
643			error = VOP_CLOSE(cp->c_backvp, flag, close_cnt,
644			    offset, cr, ct);
645			if (CFS_TIMEOUT(fscp, error)) {
646				mutex_exit(&cp->c_statelock);
647				cachefs_cd_release(fscp);
648				held = 0;
649				cachefs_cd_timedout(fscp);
650				/* don't decrement the vnode counts again */
651				close_cnt = 0;
652				continue;
653			}
654		}
655		mutex_exit(&cp->c_statelock);
656		break;
657	}
658
659	mutex_enter(&cp->c_statelock);
660	if (!error)
661		error = cp->c_error;
662	cp->c_error = 0;
663	mutex_exit(&cp->c_statelock);
664
665out:
666	if (held)
667		cachefs_cd_release(fscp);
668#ifdef CFS_CD_DEBUG
669	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
670#endif
671
672#ifdef CFSDEBUG
673	CFS_DEBUG(CFSDEBUG_VOPS)
674		printf("cachefs_close: EXIT vp %p\n", (void *)vp);
675#endif
676	return (error);
677}
678
679/*ARGSUSED*/
680static int
681cachefs_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
682	caller_context_t *ct)
683{
684	struct cnode *cp = VTOC(vp);
685	fscache_t *fscp = C_TO_FSCACHE(cp);
686	register u_offset_t off;
687	register int mapoff;
688	register caddr_t base;
689	int n;
690	offset_t diff;
691	uint_t flags = 0;
692	int error = 0;
693
694#if 0
695	if (vp->v_flag & VNOCACHE)
696		flags = SM_INVAL;
697#endif
698	if (getzoneid() != GLOBAL_ZONEID)
699		return (EPERM);
700	if (vp->v_type != VREG)
701		return (EISDIR);
702
703	ASSERT(RW_READ_HELD(&cp->c_rwlock));
704
705	if (uiop->uio_resid == 0)
706		return (0);
707
708
709	if (uiop->uio_loffset < (offset_t)0)
710		return (EINVAL);
711
712	/*
713	 * Call backfilesystem to read if NFSv4, the cachefs code
714	 * does the read from the back filesystem asynchronously
715	 * which is not supported by pass-through functionality.
716	 */
717	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
718		error = cachefs_read_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
719		goto out;
720	}
721
722	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
723		error = chklock(vp, FREAD, (offset_t)uiop->uio_loffset,
724		    uiop->uio_resid, uiop->uio_fmode, ct);
725		if (error)
726			return (error);
727	}
728
729	/*
730	 * Sit in a loop and transfer (uiomove) the data in up to
731	 * MAXBSIZE chunks. Each chunk is mapped into the kernel's
732	 * address space as needed and then released.
733	 */
734	do {
735		/*
736		 *	off	Offset of current MAXBSIZE chunk
737		 *	mapoff	Offset within the current chunk
738		 *	n	Number of bytes to move from this chunk
739		 *	base	kernel address of mapped in chunk
740		 */
741		off = uiop->uio_loffset & (offset_t)MAXBMASK;
742		mapoff = uiop->uio_loffset & MAXBOFFSET;
743		n = MAXBSIZE - mapoff;
744		if (n > uiop->uio_resid)
745			n = (uint_t)uiop->uio_resid;
746
747		/* perform consistency check */
748		error = cachefs_cd_access(fscp, 0, 0);
749		if (error)
750			break;
751		mutex_enter(&cp->c_statelock);
752		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
753		diff = cp->c_size - uiop->uio_loffset;
754		mutex_exit(&cp->c_statelock);
755		if (CFS_TIMEOUT(fscp, error)) {
756			cachefs_cd_release(fscp);
757			cachefs_cd_timedout(fscp);
758			error = 0;
759			continue;
760		}
761		cachefs_cd_release(fscp);
762
763		if (error)
764			break;
765
766		if (diff <= (offset_t)0)
767			break;
768		if (diff < (offset_t)n)
769			n = diff;
770
771		base = segmap_getmapflt(segkmap, vp, off, (uint_t)n, 1, S_READ);
772
773		error = segmap_fault(kas.a_hat, segkmap, base, n,
774		    F_SOFTLOCK, S_READ);
775		if (error) {
776			(void) segmap_release(segkmap, base, 0);
777			if (FC_CODE(error) == FC_OBJERR)
778				error =  FC_ERRNO(error);
779			else
780				error = EIO;
781			break;
782		}
783		error = uiomove(base+mapoff, n, UIO_READ, uiop);
784		(void) segmap_fault(kas.a_hat, segkmap, base, n,
785		    F_SOFTUNLOCK, S_READ);
786		if (error == 0) {
787			/*
788			 * if we read a whole page(s), or to eof,
789			 *  we won't need this page(s) again soon.
790			 */
791			if (n + mapoff == MAXBSIZE ||
792			    uiop->uio_loffset == cp->c_size)
793				flags |= SM_DONTNEED;
794		}
795		(void) segmap_release(segkmap, base, flags);
796	} while (error == 0 && uiop->uio_resid > 0);
797
798out:
799#ifdef CFSDEBUG
800	CFS_DEBUG(CFSDEBUG_VOPS)
801		printf("cachefs_read: EXIT error %d resid %ld\n", error,
802		    uiop->uio_resid);
803#endif
804	return (error);
805}
806
807/*
808 * cachefs_read_backfs_nfsv4
809 *
810 * Call NFSv4 back filesystem to handle the read (cachefs
811 * pass-through support for NFSv4).
812 */
813static int
814cachefs_read_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
815			caller_context_t *ct)
816{
817	cnode_t *cp = VTOC(vp);
818	fscache_t *fscp = C_TO_FSCACHE(cp);
819	vnode_t *backvp;
820	int error;
821
822	/*
823	 * For NFSv4 pass-through to work, only connected operation
824	 * is supported, the cnode backvp must exist, and cachefs
825	 * optional (eg., disconnectable) flags are turned off. Assert
826	 * these conditions for the read operation.
827	 */
828	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
829	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
830
831	/* Call backfs vnode op after extracting backvp */
832	mutex_enter(&cp->c_statelock);
833	backvp = cp->c_backvp;
834	mutex_exit(&cp->c_statelock);
835
836	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_read_backfs_nfsv4: cnode %p, "
837	    "backvp %p\n", cp, backvp));
838
839	(void) VOP_RWLOCK(backvp, V_WRITELOCK_FALSE, ct);
840	error = VOP_READ(backvp, uiop, ioflag, cr, ct);
841	VOP_RWUNLOCK(backvp, V_WRITELOCK_FALSE, ct);
842
843	/* Increment cache miss counter */
844	fscp->fs_stats.st_misses++;
845
846	return (error);
847}
848
849/*ARGSUSED*/
850static int
851cachefs_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
852	caller_context_t *ct)
853{
854	struct cnode *cp = VTOC(vp);
855	fscache_t *fscp = C_TO_FSCACHE(cp);
856	int error = 0;
857	u_offset_t off;
858	caddr_t base;
859	uint_t bsize;
860	uint_t flags;
861	int n, on;
862	rlim64_t limit = uiop->uio_llimit;
863	ssize_t resid;
864	offset_t offset;
865	offset_t remainder;
866
867#ifdef CFSDEBUG
868	CFS_DEBUG(CFSDEBUG_VOPS)
869		printf(
870		"cachefs_write: ENTER vp %p offset %llu count %ld cflags %x\n",
871		    (void *)vp, uiop->uio_loffset, uiop->uio_resid,
872		    cp->c_flags);
873#endif
874	if (getzoneid() != GLOBAL_ZONEID) {
875		error = EPERM;
876		goto out;
877	}
878	if (vp->v_type != VREG) {
879		error = EISDIR;
880		goto out;
881	}
882
883	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
884
885	if (uiop->uio_resid == 0) {
886		goto out;
887	}
888
889	/* Call backfilesystem to write if NFSv4 */
890	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
891		error = cachefs_write_backfs_nfsv4(vp, uiop, ioflag, cr, ct);
892		goto out2;
893	}
894
895	if (MANDLOCK(vp, cp->c_attr.va_mode)) {
896		error = chklock(vp, FWRITE, (offset_t)uiop->uio_loffset,
897		    uiop->uio_resid, uiop->uio_fmode, ct);
898		if (error)
899			goto out;
900	}
901
902	if (ioflag & FAPPEND) {
903		for (;;) {
904			/* do consistency check to get correct file size */
905			error = cachefs_cd_access(fscp, 0, 1);
906			if (error)
907				goto out;
908			mutex_enter(&cp->c_statelock);
909			error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
910			uiop->uio_loffset = cp->c_size;
911			mutex_exit(&cp->c_statelock);
912			if (CFS_TIMEOUT(fscp, error)) {
913				cachefs_cd_release(fscp);
914				cachefs_cd_timedout(fscp);
915				continue;
916			}
917			cachefs_cd_release(fscp);
918			if (error)
919				goto out;
920			break;
921		}
922	}
923
924	if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
925		limit = MAXOFFSET_T;
926
927	if (uiop->uio_loffset >= limit) {
928		proc_t *p = ttoproc(curthread);
929
930		mutex_enter(&p->p_lock);
931		(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
932		    p, RCA_UNSAFE_SIGINFO);
933		mutex_exit(&p->p_lock);
934		error = EFBIG;
935		goto out;
936	}
937	if (uiop->uio_loffset > fscp->fs_offmax) {
938		error = EFBIG;
939		goto out;
940	}
941
942	if (limit > fscp->fs_offmax)
943		limit = fscp->fs_offmax;
944
945	if (uiop->uio_loffset < (offset_t)0) {
946		error = EINVAL;
947		goto out;
948	}
949
950	offset = uiop->uio_loffset + uiop->uio_resid;
951	/*
952	 * Check to make sure that the process will not exceed
953	 * its limit on file size.  It is okay to write up to
954	 * the limit, but not beyond.  Thus, the write which
955	 * reaches the limit will be short and the next write
956	 * will return an error.
957	 */
958	remainder = 0;
959	if (offset > limit) {
960		remainder = (int)(offset - (u_offset_t)limit);
961		uiop->uio_resid = limit - uiop->uio_loffset;
962		if (uiop->uio_resid <= 0) {
963			proc_t *p = ttoproc(curthread);
964
965			uiop->uio_resid += remainder;
966			mutex_enter(&p->p_lock);
967			(void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE],
968			    p->p_rctls, p, RCA_UNSAFE_SIGINFO);
969			mutex_exit(&p->p_lock);
970			error = EFBIG;
971			goto out;
972		}
973	}
974
975	resid = uiop->uio_resid;
976	offset = uiop->uio_loffset;
977	bsize = vp->v_vfsp->vfs_bsize;
978
979	/* loop around and do the write in MAXBSIZE chunks */
980	do {
981		/* mapping offset */
982		off = uiop->uio_loffset & (offset_t)MAXBMASK;
983		on = uiop->uio_loffset & MAXBOFFSET; /* Rel. offset */
984		n = MAXBSIZE - on;
985		if (n > uiop->uio_resid)
986			n = (int)uiop->uio_resid;
987
988		/*
989		 * Touch the page and fault it in if it is not in
990		 * core before segmap_getmapflt can lock it. This
991		 * is to avoid the deadlock if the buffer is mapped
992		 * to the same file through mmap which we want to
993		 * write to.
994		 */
995		uio_prefaultpages((long)n, uiop);
996
997		base = segmap_getmap(segkmap, vp, off);
998		error = cachefs_writepage(vp, (base + on), n, uiop);
999		if (error == 0) {
1000			flags = 0;
1001			/*
1002			 * Have written a whole block.Start an
1003			 * asynchronous write and mark the buffer to
1004			 * indicate that it won't be needed again
1005			 * soon.
1006			 */
1007			if (n + on == bsize) {
1008				flags = SM_WRITE |SM_ASYNC |SM_DONTNEED;
1009			}
1010#if 0
1011			/* XXX need to understand this */
1012			if ((ioflag & (FSYNC|FDSYNC)) ||
1013			    (cp->c_backvp && vn_has_flocks(cp->c_backvp))) {
1014				flags &= ~SM_ASYNC;
1015				flags |= SM_WRITE;
1016			}
1017#else
1018			if (ioflag & (FSYNC|FDSYNC)) {
1019				flags &= ~SM_ASYNC;
1020				flags |= SM_WRITE;
1021			}
1022#endif
1023			error = segmap_release(segkmap, base, flags);
1024		} else {
1025			(void) segmap_release(segkmap, base, 0);
1026		}
1027	} while (error == 0 && uiop->uio_resid > 0);
1028
1029out:
1030	if (error == EINTR && (ioflag & (FSYNC|FDSYNC))) {
1031		uiop->uio_resid = resid;
1032		uiop->uio_loffset = offset;
1033	} else
1034		uiop->uio_resid += remainder;
1035
1036out2:
1037#ifdef CFSDEBUG
1038	CFS_DEBUG(CFSDEBUG_VOPS)
1039		printf("cachefs_write: EXIT error %d\n", error);
1040#endif
1041	return (error);
1042}
1043
1044/*
1045 * cachefs_write_backfs_nfsv4
1046 *
1047 * Call NFSv4 back filesystem to handle the write (cachefs
1048 * pass-through support for NFSv4).
1049 */
1050static int
1051cachefs_write_backfs_nfsv4(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
1052			caller_context_t *ct)
1053{
1054	cnode_t *cp = VTOC(vp);
1055	fscache_t *fscp = C_TO_FSCACHE(cp);
1056	vnode_t *backvp;
1057	int error;
1058
1059	/*
1060	 * For NFSv4 pass-through to work, only connected operation
1061	 * is supported, the cnode backvp must exist, and cachefs
1062	 * optional (eg., disconnectable) flags are turned off. Assert
1063	 * these conditions for the read operation.
1064	 */
1065	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1066	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1067
1068	/* Call backfs vnode op after extracting the backvp */
1069	mutex_enter(&cp->c_statelock);
1070	backvp = cp->c_backvp;
1071	mutex_exit(&cp->c_statelock);
1072
1073	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_write_backfs_nfsv4: cnode %p, "
1074	    "backvp %p\n", cp, backvp));
1075	(void) VOP_RWLOCK(backvp, V_WRITELOCK_TRUE, ct);
1076	error = VOP_WRITE(backvp, uiop, ioflag, cr, ct);
1077	VOP_RWUNLOCK(backvp, V_WRITELOCK_TRUE, ct);
1078
1079	return (error);
1080}
1081
1082/*
1083 * see if we've charged ourselves for frontfile data at
1084 * the given offset.  If not, allocate a block for it now.
1085 */
1086static int
1087cachefs_charge_page(struct cnode *cp, u_offset_t offset)
1088{
1089	u_offset_t blockoff;
1090	int error;
1091	int inc;
1092
1093	ASSERT(MUTEX_HELD(&cp->c_statelock));
1094	/*LINTED*/
1095	ASSERT(PAGESIZE <= MAXBSIZE);
1096
1097	error = 0;
1098	blockoff = offset & (offset_t)MAXBMASK;
1099
1100	/* get the front file if necessary so allocblocks works */
1101	if ((cp->c_frontvp == NULL) &&
1102	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1103		(void) cachefs_getfrontfile(cp);
1104	}
1105	if (cp->c_flags & CN_NOCACHE)
1106		return (1);
1107
1108	if (cachefs_check_allocmap(cp, blockoff))
1109		return (0);
1110
1111	for (inc = PAGESIZE; inc < MAXBSIZE; inc += PAGESIZE)
1112		if (cachefs_check_allocmap(cp, blockoff+inc))
1113			return (0);
1114
1115	error = cachefs_allocblocks(C_TO_FSCACHE(cp)->fs_cache, 1,
1116	    cp->c_metadata.md_rltype);
1117	if (error == 0) {
1118		cp->c_metadata.md_frontblks++;
1119		cp->c_flags |= CN_UPDATED;
1120	}
1121	return (error);
1122}
1123
1124/*
1125 * Called only by cachefs_write to write 1 page or less of data.
1126 *	base   - base address kernel addr space
1127 *	tcount - Total bytes to move - < MAXBSIZE
1128 */
1129static int
1130cachefs_writepage(vnode_t *vp, caddr_t base, int tcount, uio_t *uiop)
1131{
1132	struct cnode *cp =  VTOC(vp);
1133	fscache_t *fscp = C_TO_FSCACHE(cp);
1134	register int n;
1135	register u_offset_t offset;
1136	int error = 0, terror;
1137	extern struct as kas;
1138	u_offset_t lastpage_off;
1139	int pagecreate = 0;
1140	int newpage;
1141
1142#ifdef CFSDEBUG
1143	CFS_DEBUG(CFSDEBUG_VOPS)
1144		printf(
1145		    "cachefs_writepage: ENTER vp %p offset %llu len %ld\\\n",
1146		    (void *)vp, uiop->uio_loffset, uiop->uio_resid);
1147#endif
1148
1149	/*
1150	 * Move bytes in PAGESIZE chunks. We must avoid spanning pages in
1151	 * uiomove() because page faults may cause the cache to be invalidated
1152	 * out from under us.
1153	 */
1154	do {
1155		offset = uiop->uio_loffset;
1156		lastpage_off = (cp->c_size - 1) & (offset_t)PAGEMASK;
1157
1158		/*
1159		 * If not connected then need to make sure we have space
1160		 * to perform the write.  We could make this check
1161		 * a little tighter by only doing it if we are growing the file.
1162		 */
1163		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1164			error = cachefs_allocblocks(fscp->fs_cache, 1,
1165			    cp->c_metadata.md_rltype);
1166			if (error)
1167				break;
1168			cachefs_freeblocks(fscp->fs_cache, 1,
1169			    cp->c_metadata.md_rltype);
1170		}
1171
1172		/*
1173		 * n is the number of bytes required to satisfy the request
1174		 * or the number of bytes to fill out the page.
1175		 */
1176		n = (int)(PAGESIZE - ((uintptr_t)base & PAGEOFFSET));
1177		if (n > tcount)
1178			n = tcount;
1179
1180		/*
1181		 * The number of bytes of data in the last page can not
1182		 * be accurately be determined while page is being
1183		 * uiomove'd to and the size of the file being updated.
1184		 * Thus, inform threads which need to know accurately
1185		 * how much data is in the last page of the file.  They
1186		 * will not do the i/o immediately, but will arrange for
1187		 * the i/o to happen later when this modify operation
1188		 * will have finished.
1189		 *
1190		 * in similar NFS code, this is done right before the
1191		 * uiomove(), which is best.  but here in cachefs, we
1192		 * have two uiomove()s, so we must do it here.
1193		 */
1194		ASSERT(!(cp->c_flags & CN_CMODINPROG));
1195		mutex_enter(&cp->c_statelock);
1196		cp->c_flags |= CN_CMODINPROG;
1197		cp->c_modaddr = (offset & (offset_t)MAXBMASK);
1198		mutex_exit(&cp->c_statelock);
1199
1200		/*
1201		 * Check to see if we can skip reading in the page
1202		 * and just allocate the memory.  We can do this
1203		 * if we are going to rewrite the entire mapping
1204		 * or if we are going to write to or beyond the current
1205		 * end of file from the beginning of the mapping.
1206		 */
1207		if ((offset > (lastpage_off + PAGEOFFSET)) ||
1208		    ((cp->c_size == 0) && (offset < PAGESIZE)) ||
1209		    ((uintptr_t)base & PAGEOFFSET) == 0 && (n == PAGESIZE ||
1210		    ((offset + n) >= cp->c_size))) {
1211			pagecreate = 1;
1212
1213			/*
1214			 * segmap_pagecreate() returns 1 if it calls
1215			 * page_create_va() to allocate any pages.
1216			 */
1217			newpage = segmap_pagecreate(segkmap,
1218			    (caddr_t)((uintptr_t)base & (uintptr_t)PAGEMASK),
1219			    PAGESIZE, 0);
1220			/* do not zero page if we are overwriting all of it */
1221			if (!((((uintptr_t)base & PAGEOFFSET) == 0) &&
1222			    (n == PAGESIZE))) {
1223				(void) kzero((void *)
1224				    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1225				    PAGESIZE);
1226			}
1227			error = uiomove(base, n, UIO_WRITE, uiop);
1228
1229			/*
1230			 * Unlock the page allocated by page_create_va()
1231			 * in segmap_pagecreate()
1232			 */
1233			if (newpage)
1234				segmap_pageunlock(segkmap,
1235				    (caddr_t)((uintptr_t)base &
1236				    (uintptr_t)PAGEMASK),
1237				    PAGESIZE, S_WRITE);
1238		} else {
1239			/*
1240			 * KLUDGE ! Use segmap_fault instead of faulting and
1241			 * using as_fault() to avoid a recursive readers lock
1242			 * on kas.
1243			 */
1244			error = segmap_fault(kas.a_hat, segkmap, (caddr_t)
1245			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1246			    PAGESIZE, F_SOFTLOCK, S_WRITE);
1247			if (error) {
1248				if (FC_CODE(error) == FC_OBJERR)
1249					error =  FC_ERRNO(error);
1250				else
1251					error = EIO;
1252				break;
1253			}
1254			error = uiomove(base, n, UIO_WRITE, uiop);
1255			(void) segmap_fault(kas.a_hat, segkmap, (caddr_t)
1256			    ((uintptr_t)base & (uintptr_t)PAGEMASK),
1257			    PAGESIZE, F_SOFTUNLOCK, S_WRITE);
1258		}
1259		n = (int)(uiop->uio_loffset - offset); /* n = # bytes written */
1260		base += n;
1261		tcount -= n;
1262
1263		/* get access to the file system */
1264		if ((terror = cachefs_cd_access(fscp, 0, 1)) != 0) {
1265			error = terror;
1266			break;
1267		}
1268
1269		/*
1270		 * cp->c_attr.va_size is the maximum number of
1271		 * bytes known to be in the file.
1272		 * Make sure it is at least as high as the
1273		 * last byte we just wrote into the buffer.
1274		 */
1275		mutex_enter(&cp->c_statelock);
1276		if (cp->c_size < uiop->uio_loffset) {
1277			cp->c_size = uiop->uio_loffset;
1278		}
1279		if (cp->c_size != cp->c_attr.va_size) {
1280			cp->c_attr.va_size = cp->c_size;
1281			cp->c_flags |= CN_UPDATED;
1282		}
1283		/* c_size is now correct, so we can clear modinprog */
1284		cp->c_flags &= ~CN_CMODINPROG;
1285		if (error == 0) {
1286			cp->c_flags |= CDIRTY;
1287			if (pagecreate && (cp->c_flags & CN_NOCACHE) == 0) {
1288				/*
1289				 * if we're not in NOCACHE mode
1290				 * (i.e., single-writer), we update the
1291				 * allocmap here rather than waiting until
1292				 * cachefspush is called.  This prevents
1293				 * getpage from clustering up pages from
1294				 * the backfile and stomping over the changes
1295				 * we make here.
1296				 */
1297				if (cachefs_charge_page(cp, offset) == 0) {
1298					cachefs_update_allocmap(cp,
1299					    offset & (offset_t)PAGEMASK,
1300					    (size_t)PAGESIZE);
1301				}
1302
1303				/* else we ran out of space */
1304				else {
1305					/* nocache file if connected */
1306					if (fscp->fs_cdconnected ==
1307					    CFS_CD_CONNECTED)
1308						cachefs_nocache(cp);
1309					/*
1310					 * If disconnected then cannot
1311					 * nocache the file.  Let it have
1312					 * the space.
1313					 */
1314					else {
1315						cp->c_metadata.md_frontblks++;
1316						cp->c_flags |= CN_UPDATED;
1317						cachefs_update_allocmap(cp,
1318						    offset & (offset_t)PAGEMASK,
1319						    (size_t)PAGESIZE);
1320					}
1321				}
1322			}
1323		}
1324		mutex_exit(&cp->c_statelock);
1325		cachefs_cd_release(fscp);
1326	} while (tcount > 0 && error == 0);
1327
1328	if (cp->c_flags & CN_CMODINPROG) {
1329		/* XXX assert error != 0?  FC_ERRNO() makes this more risky. */
1330		mutex_enter(&cp->c_statelock);
1331		cp->c_flags &= ~CN_CMODINPROG;
1332		mutex_exit(&cp->c_statelock);
1333	}
1334
1335#ifdef CFS_CD_DEBUG
1336	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1337#endif
1338
1339#ifdef CFSDEBUG
1340	CFS_DEBUG(CFSDEBUG_VOPS)
1341		printf("cachefs_writepage: EXIT error %d\n", error);
1342#endif
1343
1344	return (error);
1345}
1346
1347/*
1348 * Pushes out pages to the back and/or front file system.
1349 */
1350static int
1351cachefs_push(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1352    int flags, cred_t *cr)
1353{
1354	struct cnode *cp = VTOC(vp);
1355	struct buf *bp;
1356	int error;
1357	fscache_t *fscp = C_TO_FSCACHE(cp);
1358	u_offset_t iooff;
1359	size_t iolen;
1360	u_offset_t lbn;
1361	u_offset_t lbn_off;
1362	uint_t bsize;
1363
1364	ASSERT((flags & B_ASYNC) == 0);
1365	ASSERT(!vn_is_readonly(vp));
1366	ASSERT(pp != NULL);
1367	ASSERT(cr != NULL);
1368
1369	bsize = MAX(vp->v_vfsp->vfs_bsize, PAGESIZE);
1370	lbn = pp->p_offset / bsize;
1371	lbn_off = lbn * bsize;
1372
1373	/*
1374	 * Find a kluster that fits in one block, or in
1375	 * one page if pages are bigger than blocks.  If
1376	 * there is less file space allocated than a whole
1377	 * page, we'll shorten the i/o request below.
1378	 */
1379
1380	pp = pvn_write_kluster(vp, pp, &iooff, &iolen, lbn_off,
1381	    roundup(bsize, PAGESIZE), flags);
1382
1383	/*
1384	 * The CN_CMODINPROG flag makes sure that we use a correct
1385	 * value of c_size, below.  CN_CMODINPROG is set in
1386	 * cachefs_writepage().  When CN_CMODINPROG is set it
1387	 * indicates that a uiomove() is in progress and the c_size
1388	 * has not been made consistent with the new size of the
1389	 * file. When the uiomove() completes the c_size is updated
1390	 * and the CN_CMODINPROG flag is cleared.
1391	 *
1392	 * The CN_CMODINPROG flag makes sure that cachefs_push_front
1393	 * and cachefs_push_connected see a consistent value of
1394	 * c_size.  Without this handshaking, it is possible that
1395	 * these routines will pick up the old value of c_size before
1396	 * the uiomove() in cachefs_writepage() completes.  This will
1397	 * result in the vn_rdwr() being too small, and data loss.
1398	 *
1399	 * More precisely, there is a window between the time the
1400	 * uiomove() completes and the time the c_size is updated. If
1401	 * a VOP_PUTPAGE() operation intervenes in this window, the
1402	 * page will be picked up, because it is dirty; it will be
1403	 * unlocked, unless it was pagecreate'd. When the page is
1404	 * picked up as dirty, the dirty bit is reset
1405	 * (pvn_getdirty()). In cachefs_push_connected(), c_size is
1406	 * checked.  This will still be the old size.  Therefore, the
1407	 * page will not be written out to the correct length, and the
1408	 * page will be clean, so the data may disappear.
1409	 */
1410	if (cp->c_flags & CN_CMODINPROG) {
1411		mutex_enter(&cp->c_statelock);
1412		if ((cp->c_flags & CN_CMODINPROG) &&
1413		    cp->c_modaddr + MAXBSIZE > iooff &&
1414		    cp->c_modaddr < iooff + iolen) {
1415			page_t *plist;
1416
1417			/*
1418			 * A write is in progress for this region of
1419			 * the file.  If we did not detect
1420			 * CN_CMODINPROG here then this path through
1421			 * cachefs_push_connected() would eventually
1422			 * do the vn_rdwr() and may not write out all
1423			 * of the data in the pages.  We end up losing
1424			 * data. So we decide to set the modified bit
1425			 * on each page in the page list and mark the
1426			 * cnode with CDIRTY.  This push will be
1427			 * restarted at some later time.
1428			 */
1429
1430			plist = pp;
1431			while (plist != NULL) {
1432				pp = plist;
1433				page_sub(&plist, pp);
1434				hat_setmod(pp);
1435				page_io_unlock(pp);
1436				page_unlock(pp);
1437			}
1438			cp->c_flags |= CDIRTY;
1439			mutex_exit(&cp->c_statelock);
1440			if (offp)
1441				*offp = iooff;
1442			if (lenp)
1443				*lenp = iolen;
1444			return (0);
1445		}
1446		mutex_exit(&cp->c_statelock);
1447	}
1448
1449	/*
1450	 * Set the pages up for pageout.
1451	 */
1452	bp = pageio_setup(pp, iolen, CTOV(cp), B_WRITE | flags);
1453	if (bp == NULL) {
1454
1455		/*
1456		 * currently, there is no way for pageio_setup() to
1457		 * return NULL, since it uses its own scheme for
1458		 * kmem_alloc()ing that shouldn't return NULL, and
1459		 * since pageio_setup() itself dereferences the thing
1460		 * it's about to return.  still, we need to be ready
1461		 * in case this ever does start happening.
1462		 */
1463
1464		error = ENOMEM;
1465		goto writedone;
1466	}
1467	/*
1468	 * pageio_setup should have set b_addr to 0.  This
1469	 * is correct since we want to do I/O on a page
1470	 * boundary.  bp_mapin will use this addr to calculate
1471	 * an offset, and then set b_addr to the kernel virtual
1472	 * address it allocated for us.
1473	 */
1474	bp->b_edev = 0;
1475	bp->b_dev = 0;
1476	bp->b_lblkno = (diskaddr_t)lbtodb(iooff);
1477	bp_mapin(bp);
1478
1479	iolen  = cp->c_size - ldbtob(bp->b_blkno);
1480	if (iolen > bp->b_bcount)
1481		iolen  = bp->b_bcount;
1482
1483	/* if connected */
1484	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1485		/* write to the back file first */
1486		error = cachefs_push_connected(vp, bp, iolen, iooff, cr);
1487
1488		/* write to the front file if allowed */
1489		if ((error == 0) && CFS_ISFS_NONSHARED(fscp) &&
1490		    ((cp->c_flags & CN_NOCACHE) == 0)) {
1491			/* try to write to the front file */
1492			(void) cachefs_push_front(vp, bp, iolen, iooff, cr);
1493		}
1494	}
1495
1496	/* else if disconnected */
1497	else {
1498		/* try to write to the front file */
1499		error = cachefs_push_front(vp, bp, iolen, iooff, cr);
1500	}
1501
1502	bp_mapout(bp);
1503	pageio_done(bp);
1504
1505writedone:
1506
1507	pvn_write_done(pp, ((error) ? B_ERROR : 0) | B_WRITE | flags);
1508	if (offp)
1509		*offp = iooff;
1510	if (lenp)
1511		*lenp = iolen;
1512
1513	/* XXX ask bob mastors how to fix this someday */
1514	mutex_enter(&cp->c_statelock);
1515	if (error) {
1516		if (error == ENOSPC) {
1517			if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
1518			    CFS_ISFS_SOFT(fscp)) {
1519				CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1520				cp->c_error = error;
1521			}
1522		} else if ((CFS_TIMEOUT(fscp, error) == 0) &&
1523		    (error != EINTR)) {
1524			CFSOP_INVALIDATE_COBJECT(fscp, cp, cr);
1525			cp->c_error = error;
1526		}
1527	} else if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1528		CFSOP_MODIFY_COBJECT(fscp, cp, cr);
1529	}
1530	mutex_exit(&cp->c_statelock);
1531
1532	return (error);
1533}
1534
1535/*
1536 * Pushes out pages to the back file system.
1537 */
1538static int
1539cachefs_push_connected(vnode_t *vp, struct buf *bp, size_t iolen,
1540    u_offset_t iooff, cred_t *cr)
1541{
1542	struct cnode *cp = VTOC(vp);
1543	int error = 0;
1544	int mode = 0;
1545	fscache_t *fscp = C_TO_FSCACHE(cp);
1546	ssize_t resid;
1547	vnode_t *backvp;
1548
1549	/* get the back file if necessary */
1550	mutex_enter(&cp->c_statelock);
1551	if (cp->c_backvp == NULL) {
1552		error = cachefs_getbackvp(fscp, cp);
1553		if (error) {
1554			mutex_exit(&cp->c_statelock);
1555			goto out;
1556		}
1557	}
1558	backvp = cp->c_backvp;
1559	VN_HOLD(backvp);
1560	mutex_exit(&cp->c_statelock);
1561
1562	if (CFS_ISFS_NONSHARED(fscp) && CFS_ISFS_SNR(fscp))
1563		mode = FSYNC;
1564
1565	/* write to the back file */
1566	error = bp->b_error = vn_rdwr(UIO_WRITE, backvp, bp->b_un.b_addr,
1567	    iolen, iooff, UIO_SYSSPACE, mode,
1568	    RLIM64_INFINITY, cr, &resid);
1569	if (error) {
1570#ifdef CFSDEBUG
1571		CFS_DEBUG(CFSDEBUG_VOPS | CFSDEBUG_BACK)
1572			printf("cachefspush: error %d cr %p\n",
1573			    error, (void *)cr);
1574#endif
1575		bp->b_flags |= B_ERROR;
1576	}
1577	VN_RELE(backvp);
1578out:
1579	return (error);
1580}
1581
1582/*
1583 * Pushes out pages to the front file system.
1584 * Called for both connected and disconnected states.
1585 */
1586static int
1587cachefs_push_front(vnode_t *vp, struct buf *bp, size_t iolen,
1588    u_offset_t iooff, cred_t *cr)
1589{
1590	struct cnode *cp = VTOC(vp);
1591	fscache_t *fscp = C_TO_FSCACHE(cp);
1592	int error = 0;
1593	ssize_t resid;
1594	u_offset_t popoff;
1595	off_t commit = 0;
1596	uint_t seq;
1597	enum cachefs_rl_type type;
1598	vnode_t *frontvp = NULL;
1599
1600	mutex_enter(&cp->c_statelock);
1601
1602	if (!CFS_ISFS_NONSHARED(fscp)) {
1603		error = ETIMEDOUT;
1604		goto out;
1605	}
1606
1607	/* get the front file if necessary */
1608	if ((cp->c_frontvp == NULL) &&
1609	    ((cp->c_flags & CN_NOCACHE) == 0)) {
1610		(void) cachefs_getfrontfile(cp);
1611	}
1612	if (cp->c_flags & CN_NOCACHE) {
1613		error = ETIMEDOUT;
1614		goto out;
1615	}
1616
1617	/* if disconnected, needs to be populated and have good attributes */
1618	if ((fscp->fs_cdconnected != CFS_CD_CONNECTED) &&
1619	    (((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1620	    (cp->c_metadata.md_flags & MD_NEEDATTRS))) {
1621		error = ETIMEDOUT;
1622		goto out;
1623	}
1624
1625	for (popoff = iooff; popoff < (iooff + iolen); popoff += MAXBSIZE) {
1626		if (cachefs_charge_page(cp, popoff)) {
1627			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1628				cachefs_nocache(cp);
1629				goto out;
1630			} else {
1631				error = ENOSPC;
1632				goto out;
1633			}
1634		}
1635	}
1636
1637	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1638		/* log the first putpage to a file */
1639		if ((cp->c_metadata.md_flags & MD_PUTPAGE) == 0) {
1640			/* uses open's creds if we have them */
1641			if (cp->c_cred)
1642				cr = cp->c_cred;
1643
1644			if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
1645				error = cachefs_dlog_cidmap(fscp);
1646				if (error) {
1647					error = ENOSPC;
1648					goto out;
1649				}
1650				cp->c_metadata.md_flags |= MD_MAPPING;
1651			}
1652
1653			commit = cachefs_dlog_modify(fscp, cp, cr, &seq);
1654			if (commit == 0) {
1655				/* out of space */
1656				error = ENOSPC;
1657				goto out;
1658			}
1659
1660			cp->c_metadata.md_seq = seq;
1661			type = cp->c_metadata.md_rltype;
1662			cachefs_modified(cp);
1663			cp->c_metadata.md_flags |= MD_PUTPAGE;
1664			cp->c_metadata.md_flags &= ~MD_PUSHDONE;
1665			cp->c_flags |= CN_UPDATED;
1666		}
1667
1668		/* subsequent putpages just get a new sequence number */
1669		else {
1670			/* but only if it matters */
1671			if (cp->c_metadata.md_seq != fscp->fs_dlogseq) {
1672				seq = cachefs_dlog_seqnext(fscp);
1673				if (seq == 0) {
1674					error = ENOSPC;
1675					goto out;
1676				}
1677				cp->c_metadata.md_seq = seq;
1678				cp->c_flags |= CN_UPDATED;
1679				/* XXX maybe should do write_metadata here */
1680			}
1681		}
1682	}
1683
1684	frontvp = cp->c_frontvp;
1685	VN_HOLD(frontvp);
1686	mutex_exit(&cp->c_statelock);
1687	error = bp->b_error = vn_rdwr(UIO_WRITE, frontvp,
1688	    bp->b_un.b_addr, iolen, iooff, UIO_SYSSPACE, 0,
1689	    RLIM64_INFINITY, kcred, &resid);
1690	mutex_enter(&cp->c_statelock);
1691	VN_RELE(frontvp);
1692	frontvp = NULL;
1693	if (error) {
1694		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1695			cachefs_nocache(cp);
1696			error = 0;
1697			goto out;
1698		} else {
1699			goto out;
1700		}
1701	}
1702
1703	(void) cachefs_update_allocmap(cp, iooff, iolen);
1704	cp->c_flags |= (CN_UPDATED | CN_NEED_FRONT_SYNC |
1705	    CN_POPULATION_PENDING);
1706	if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
1707		gethrestime(&cp->c_metadata.md_localmtime);
1708		cp->c_metadata.md_flags |= MD_LOCALMTIME;
1709	}
1710
1711out:
1712	if (commit) {
1713		/* commit the log record */
1714		ASSERT(fscp->fs_cdconnected == CFS_CD_DISCONNECTED);
1715		if (cachefs_dlog_commit(fscp, commit, error)) {
1716			/*EMPTY*/
1717			/* XXX fix on panic */
1718		}
1719	}
1720
1721	if (error && commit) {
1722		cp->c_metadata.md_flags &= ~MD_PUTPAGE;
1723		cachefs_rlent_moveto(fscp->fs_cache, type,
1724		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
1725		cp->c_metadata.md_rltype = type;
1726		cp->c_flags |= CN_UPDATED;
1727	}
1728	mutex_exit(&cp->c_statelock);
1729	return (error);
1730}
1731
1732/*ARGSUSED*/
1733static int
1734cachefs_dump(struct vnode *vp, caddr_t foo1, offset_t foo2, offset_t foo3,
1735    caller_context_t *ct)
1736{
1737	return (ENOSYS); /* should we panic if we get here? */
1738}
1739
1740/*ARGSUSED*/
1741static int
1742cachefs_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, cred_t *cred,
1743	int *rvalp, caller_context_t *ct)
1744{
1745	int error;
1746	struct cnode *cp = VTOC(vp);
1747	struct fscache *fscp = C_TO_FSCACHE(cp);
1748	struct cachefscache *cachep;
1749	extern kmutex_t cachefs_cachelock;
1750	extern cachefscache_t *cachefs_cachelist;
1751	cachefsio_pack_t *packp;
1752	STRUCT_DECL(cachefsio_dcmd, dcmd);
1753	int	inlen, outlen;	/* LP64: generic int for struct in/out len */
1754	void *dinp, *doutp;
1755	int (*dcmd_routine)(vnode_t *, void *, void *);
1756
1757	if (getzoneid() != GLOBAL_ZONEID)
1758		return (EPERM);
1759
1760	/*
1761	 * Cachefs only provides pass-through support for NFSv4,
1762	 * and all vnode operations are passed through to the
1763	 * back file system. For NFSv4 pass-through to work, only
1764	 * connected operation is supported, the cnode backvp must
1765	 * exist, and cachefs optional (eg., disconnectable) flags
1766	 * are turned off. Assert these conditions which ensure
1767	 * that only a subset of the ioctls are "truly supported"
1768	 * for NFSv4 (these are CFSDCMD_DAEMONID and CFSDCMD_GETSTATS.
1769	 * The packing operations are meaningless since there is
1770	 * no caching for NFSv4, and the called functions silently
1771	 * return if the backfilesystem is NFSv4. The daemon
1772	 * commands except for those above are essentially used
1773	 * for disconnectable operation support (including log
1774	 * rolling), so in each called function, we assert that
1775	 * NFSv4 is not in use. The _FIO* calls (except _FIOCOD)
1776	 * are from "cfsfstype" which is not a documented
1777	 * command. However, the command is visible in
1778	 * /usr/lib/fs/cachefs so the commands are simply let
1779	 * through (don't seem to impact pass-through functionality).
1780	 */
1781	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
1782	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
1783
1784	switch (cmd) {
1785	case CACHEFSIO_PACK:
1786		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1787		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1788		if (!error)
1789			error = cachefs_pack(vp, packp->p_name, cred);
1790		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1791		break;
1792
1793	case CACHEFSIO_UNPACK:
1794		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1795		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1796		if (!error)
1797			error = cachefs_unpack(vp, packp->p_name, cred);
1798		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1799		break;
1800
1801	case CACHEFSIO_PACKINFO:
1802		packp = cachefs_kmem_alloc(sizeof (cachefsio_pack_t), KM_SLEEP);
1803		error = xcopyin((void *)arg, packp, sizeof (cachefsio_pack_t));
1804		if (!error)
1805			error = cachefs_packinfo(vp, packp->p_name,
1806			    &packp->p_status, cred);
1807		if (!error)
1808			error = xcopyout(packp, (void *)arg,
1809			    sizeof (cachefsio_pack_t));
1810		cachefs_kmem_free(packp, sizeof (cachefsio_pack_t));
1811		break;
1812
1813	case CACHEFSIO_UNPACKALL:
1814		error = cachefs_unpackall(vp);
1815		break;
1816
1817	case CACHEFSIO_DCMD:
1818		/*
1819		 * This is a private interface between the cachefsd and
1820		 * this file system.
1821		 */
1822
1823		/* must be root to use these commands */
1824		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0)
1825			return (EPERM);
1826
1827		/* get the command packet */
1828		STRUCT_INIT(dcmd, flag & DATAMODEL_MASK);
1829		error = xcopyin((void *)arg, STRUCT_BUF(dcmd),
1830		    SIZEOF_STRUCT(cachefsio_dcmd, DATAMODEL_NATIVE));
1831		if (error)
1832			return (error);
1833
1834		/* copy in the data for the operation */
1835		dinp = NULL;
1836		if ((inlen = STRUCT_FGET(dcmd, d_slen)) > 0) {
1837			dinp = cachefs_kmem_alloc(inlen, KM_SLEEP);
1838			error = xcopyin(STRUCT_FGETP(dcmd, d_sdata), dinp,
1839			    inlen);
1840			if (error)
1841				return (error);
1842		}
1843
1844		/* allocate space for the result */
1845		doutp = NULL;
1846		if ((outlen = STRUCT_FGET(dcmd, d_rlen)) > 0)
1847			doutp = cachefs_kmem_alloc(outlen, KM_SLEEP);
1848
1849		/*
1850		 * Assert NFSv4 only allows the daemonid and getstats
1851		 * daemon requests
1852		 */
1853		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0 ||
1854		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_DAEMONID ||
1855		    STRUCT_FGET(dcmd, d_cmd) == CFSDCMD_GETSTATS);
1856
1857		/* get the routine to execute */
1858		dcmd_routine = NULL;
1859		switch (STRUCT_FGET(dcmd, d_cmd)) {
1860		case CFSDCMD_DAEMONID:
1861			dcmd_routine = cachefs_io_daemonid;
1862			break;
1863		case CFSDCMD_STATEGET:
1864			dcmd_routine = cachefs_io_stateget;
1865			break;
1866		case CFSDCMD_STATESET:
1867			dcmd_routine = cachefs_io_stateset;
1868			break;
1869		case CFSDCMD_XWAIT:
1870			dcmd_routine = cachefs_io_xwait;
1871			break;
1872		case CFSDCMD_EXISTS:
1873			dcmd_routine = cachefs_io_exists;
1874			break;
1875		case CFSDCMD_LOSTFOUND:
1876			dcmd_routine = cachefs_io_lostfound;
1877			break;
1878		case CFSDCMD_GETINFO:
1879			dcmd_routine = cachefs_io_getinfo;
1880			break;
1881		case CFSDCMD_CIDTOFID:
1882			dcmd_routine = cachefs_io_cidtofid;
1883			break;
1884		case CFSDCMD_GETATTRFID:
1885			dcmd_routine = cachefs_io_getattrfid;
1886			break;
1887		case CFSDCMD_GETATTRNAME:
1888			dcmd_routine = cachefs_io_getattrname;
1889			break;
1890		case CFSDCMD_GETSTATS:
1891			dcmd_routine = cachefs_io_getstats;
1892			break;
1893		case CFSDCMD_ROOTFID:
1894			dcmd_routine = cachefs_io_rootfid;
1895			break;
1896		case CFSDCMD_CREATE:
1897			dcmd_routine = cachefs_io_create;
1898			break;
1899		case CFSDCMD_REMOVE:
1900			dcmd_routine = cachefs_io_remove;
1901			break;
1902		case CFSDCMD_LINK:
1903			dcmd_routine = cachefs_io_link;
1904			break;
1905		case CFSDCMD_RENAME:
1906			dcmd_routine = cachefs_io_rename;
1907			break;
1908		case CFSDCMD_MKDIR:
1909			dcmd_routine = cachefs_io_mkdir;
1910			break;
1911		case CFSDCMD_RMDIR:
1912			dcmd_routine = cachefs_io_rmdir;
1913			break;
1914		case CFSDCMD_SYMLINK:
1915			dcmd_routine = cachefs_io_symlink;
1916			break;
1917		case CFSDCMD_SETATTR:
1918			dcmd_routine = cachefs_io_setattr;
1919			break;
1920		case CFSDCMD_SETSECATTR:
1921			dcmd_routine = cachefs_io_setsecattr;
1922			break;
1923		case CFSDCMD_PUSHBACK:
1924			dcmd_routine = cachefs_io_pushback;
1925			break;
1926		default:
1927			error = ENOTTY;
1928			break;
1929		}
1930
1931		/* execute the routine */
1932		if (dcmd_routine)
1933			error = (*dcmd_routine)(vp, dinp, doutp);
1934
1935		/* copy out the result */
1936		if ((error == 0) && doutp)
1937			error = xcopyout(doutp, STRUCT_FGETP(dcmd, d_rdata),
1938			    outlen);
1939
1940		/* free allocated memory */
1941		if (dinp)
1942			cachefs_kmem_free(dinp, inlen);
1943		if (doutp)
1944			cachefs_kmem_free(doutp, outlen);
1945
1946		break;
1947
1948	case _FIOCOD:
1949		if (secpolicy_fs_config(cred, vp->v_vfsp) != 0) {
1950			error = EPERM;
1951			break;
1952		}
1953
1954		error = EBUSY;
1955		if (arg) {
1956			/* non-zero arg means do all filesystems */
1957			mutex_enter(&cachefs_cachelock);
1958			for (cachep = cachefs_cachelist; cachep != NULL;
1959			    cachep = cachep->c_next) {
1960				mutex_enter(&cachep->c_fslistlock);
1961				for (fscp = cachep->c_fslist;
1962				    fscp != NULL;
1963				    fscp = fscp->fs_next) {
1964					if (CFS_ISFS_CODCONST(fscp)) {
1965						gethrestime(&fscp->fs_cod_time);
1966						error = 0;
1967					}
1968				}
1969				mutex_exit(&cachep->c_fslistlock);
1970			}
1971			mutex_exit(&cachefs_cachelock);
1972		} else {
1973			if (CFS_ISFS_CODCONST(fscp)) {
1974				gethrestime(&fscp->fs_cod_time);
1975				error = 0;
1976			}
1977		}
1978		break;
1979
1980	case _FIOSTOPCACHE:
1981		error = cachefs_stop_cache(cp);
1982		break;
1983
1984	default:
1985		error = ENOTTY;
1986		break;
1987	}
1988
1989	/* return the result */
1990	return (error);
1991}
1992
1993ino64_t
1994cachefs_fileno_conflict(fscache_t *fscp, ino64_t old)
1995{
1996	ino64_t new;
1997
1998	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1999
2000	for (;;) {
2001		fscp->fs_info.fi_localfileno++;
2002		if (fscp->fs_info.fi_localfileno == 0)
2003			fscp->fs_info.fi_localfileno = 3;
2004		fscp->fs_flags |= CFS_FS_DIRTYINFO;
2005
2006		new = fscp->fs_info.fi_localfileno;
2007		if (! cachefs_fileno_inuse(fscp, new))
2008			break;
2009	}
2010
2011	cachefs_inum_register(fscp, old, new);
2012	cachefs_inum_register(fscp, new, 0);
2013	return (new);
2014}
2015
2016/*ARGSUSED*/
2017static int
2018cachefs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
2019	caller_context_t *ct)
2020{
2021	struct cnode *cp = VTOC(vp);
2022	fscache_t *fscp = C_TO_FSCACHE(cp);
2023	int error = 0;
2024	int held = 0;
2025	int connected = 0;
2026
2027#ifdef CFSDEBUG
2028	CFS_DEBUG(CFSDEBUG_VOPS)
2029		printf("cachefs_getattr: ENTER vp %p\n", (void *)vp);
2030#endif
2031
2032	if (getzoneid() != GLOBAL_ZONEID)
2033		return (EPERM);
2034
2035	/* Call backfilesystem getattr if NFSv4 */
2036	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
2037		error = cachefs_getattr_backfs_nfsv4(vp, vap, flags, cr, ct);
2038		goto out;
2039	}
2040
2041	/*
2042	 * If it has been specified that the return value will
2043	 * just be used as a hint, and we are only being asked
2044	 * for size, fsid or rdevid, then return the client's
2045	 * notion of these values without checking to make sure
2046	 * that the attribute cache is up to date.
2047	 * The whole point is to avoid an over the wire GETATTR
2048	 * call.
2049	 */
2050	if (flags & ATTR_HINT) {
2051		if (vap->va_mask ==
2052		    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2053			if (vap->va_mask | AT_SIZE)
2054				vap->va_size = cp->c_size;
2055			/*
2056			 * Return the FSID of the cachefs filesystem,
2057			 * not the back filesystem
2058			 */
2059			if (vap->va_mask | AT_FSID)
2060				vap->va_fsid = vp->v_vfsp->vfs_dev;
2061			if (vap->va_mask | AT_RDEV)
2062				vap->va_rdev = cp->c_attr.va_rdev;
2063			return (0);
2064		}
2065	}
2066
2067	/*
2068	 * Only need to flush pages if asking for the mtime
2069	 * and if there any dirty pages.
2070	 */
2071	if (vap->va_mask & AT_MTIME) {
2072		/*EMPTY*/
2073#if 0
2074		/*
2075		 * XXX bob: stolen from nfs code, need to do something similar
2076		 */
2077		rp = VTOR(vp);
2078		if ((rp->r_flags & RDIRTY) || rp->r_iocnt > 0)
2079			(void) nfs3_putpage(vp, (offset_t)0, 0, 0, cr);
2080#endif
2081	}
2082
2083	for (;;) {
2084		/* get (or renew) access to the file system */
2085		if (held) {
2086			cachefs_cd_release(fscp);
2087			held = 0;
2088		}
2089		error = cachefs_cd_access(fscp, connected, 0);
2090		if (error)
2091			goto out;
2092		held = 1;
2093
2094		/*
2095		 * If it has been specified that the return value will
2096		 * just be used as a hint, and we are only being asked
2097		 * for size, fsid or rdevid, then return the client's
2098		 * notion of these values without checking to make sure
2099		 * that the attribute cache is up to date.
2100		 * The whole point is to avoid an over the wire GETATTR
2101		 * call.
2102		 */
2103		if (flags & ATTR_HINT) {
2104			if (vap->va_mask ==
2105			    (vap->va_mask & (AT_SIZE | AT_FSID | AT_RDEV))) {
2106				if (vap->va_mask | AT_SIZE)
2107					vap->va_size = cp->c_size;
2108				/*
2109				 * Return the FSID of the cachefs filesystem,
2110				 * not the back filesystem
2111				 */
2112				if (vap->va_mask | AT_FSID)
2113					vap->va_fsid = vp->v_vfsp->vfs_dev;
2114				if (vap->va_mask | AT_RDEV)
2115					vap->va_rdev = cp->c_attr.va_rdev;
2116				goto out;
2117			}
2118		}
2119
2120		mutex_enter(&cp->c_statelock);
2121		if ((cp->c_metadata.md_flags & MD_NEEDATTRS) &&
2122		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2123			mutex_exit(&cp->c_statelock);
2124			connected = 1;
2125			continue;
2126		}
2127
2128		error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2129		if (CFS_TIMEOUT(fscp, error)) {
2130			mutex_exit(&cp->c_statelock);
2131			cachefs_cd_release(fscp);
2132			held = 0;
2133			cachefs_cd_timedout(fscp);
2134			continue;
2135		}
2136		if (error) {
2137			mutex_exit(&cp->c_statelock);
2138			break;
2139		}
2140
2141		/* check for fileno conflict */
2142		if ((fscp->fs_inum_size > 0) &&
2143		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) {
2144			ino64_t fakenum;
2145
2146			mutex_exit(&cp->c_statelock);
2147			mutex_enter(&fscp->fs_fslock);
2148			fakenum = cachefs_inum_real2fake(fscp,
2149			    cp->c_attr.va_nodeid);
2150			if (fakenum == 0) {
2151				fakenum = cachefs_fileno_conflict(fscp,
2152				    cp->c_attr.va_nodeid);
2153			}
2154			mutex_exit(&fscp->fs_fslock);
2155
2156			mutex_enter(&cp->c_statelock);
2157			cp->c_metadata.md_flags |= MD_LOCALFILENO;
2158			cp->c_metadata.md_localfileno = fakenum;
2159			cp->c_flags |= CN_UPDATED;
2160		}
2161
2162		/* copy out the attributes */
2163		*vap = cp->c_attr;
2164
2165		/*
2166		 * return the FSID of the cachefs filesystem,
2167		 * not the back filesystem
2168		 */
2169		vap->va_fsid = vp->v_vfsp->vfs_dev;
2170
2171		/* return our idea of the size */
2172		if (cp->c_size > vap->va_size)
2173			vap->va_size = cp->c_size;
2174
2175		/* overwrite with our version of fileno and timestamps */
2176		vap->va_nodeid = cp->c_metadata.md_localfileno;
2177		vap->va_mtime = cp->c_metadata.md_localmtime;
2178		vap->va_ctime = cp->c_metadata.md_localctime;
2179
2180		mutex_exit(&cp->c_statelock);
2181		break;
2182	}
2183out:
2184	if (held)
2185		cachefs_cd_release(fscp);
2186#ifdef CFS_CD_DEBUG
2187	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2188#endif
2189
2190#ifdef CFSDEBUG
2191	CFS_DEBUG(CFSDEBUG_VOPS)
2192		printf("cachefs_getattr: EXIT error = %d\n", error);
2193#endif
2194	return (error);
2195}
2196
2197/*
2198 * cachefs_getattr_backfs_nfsv4
2199 *
2200 * Call NFSv4 back filesystem to handle the getattr (cachefs
2201 * pass-through support for NFSv4).
2202 */
2203static int
2204cachefs_getattr_backfs_nfsv4(vnode_t *vp, vattr_t *vap,
2205    int flags, cred_t *cr, caller_context_t *ct)
2206{
2207	cnode_t *cp = VTOC(vp);
2208	fscache_t *fscp = C_TO_FSCACHE(cp);
2209	vnode_t *backvp;
2210	int error;
2211
2212	/*
2213	 * For NFSv4 pass-through to work, only connected operation
2214	 * is supported, the cnode backvp must exist, and cachefs
2215	 * optional (eg., disconnectable) flags are turned off. Assert
2216	 * these conditions for the getattr operation.
2217	 */
2218	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2219	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2220
2221	/* Call backfs vnode op after extracting backvp */
2222	mutex_enter(&cp->c_statelock);
2223	backvp = cp->c_backvp;
2224	mutex_exit(&cp->c_statelock);
2225
2226	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_getattr_backfs_nfsv4: cnode %p,"
2227	    " backvp %p\n", cp, backvp));
2228	error = VOP_GETATTR(backvp, vap, flags, cr, ct);
2229
2230	/* Update attributes */
2231	cp->c_attr = *vap;
2232
2233	/*
2234	 * return the FSID of the cachefs filesystem,
2235	 * not the back filesystem
2236	 */
2237	vap->va_fsid = vp->v_vfsp->vfs_dev;
2238
2239	return (error);
2240}
2241
2242/*ARGSUSED4*/
2243static int
2244cachefs_setattr(
2245	vnode_t *vp,
2246	vattr_t *vap,
2247	int flags,
2248	cred_t *cr,
2249	caller_context_t *ct)
2250{
2251	cnode_t *cp = VTOC(vp);
2252	fscache_t *fscp = C_TO_FSCACHE(cp);
2253	int error;
2254	int connected;
2255	int held = 0;
2256
2257	if (getzoneid() != GLOBAL_ZONEID)
2258		return (EPERM);
2259
2260	/*
2261	 * Cachefs only provides pass-through support for NFSv4,
2262	 * and all vnode operations are passed through to the
2263	 * back file system. For NFSv4 pass-through to work, only
2264	 * connected operation is supported, the cnode backvp must
2265	 * exist, and cachefs optional (eg., disconnectable) flags
2266	 * are turned off. Assert these conditions to ensure that
2267	 * the backfilesystem is called for the setattr operation.
2268	 */
2269	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2270	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2271
2272	connected = 0;
2273	for (;;) {
2274		/* drop hold on file system */
2275		if (held) {
2276			/* Won't loop with NFSv4 connected behavior */
2277			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2278			cachefs_cd_release(fscp);
2279			held = 0;
2280		}
2281
2282		/* acquire access to the file system */
2283		error = cachefs_cd_access(fscp, connected, 1);
2284		if (error)
2285			break;
2286		held = 1;
2287
2288		/* perform the setattr */
2289		error = cachefs_setattr_common(vp, vap, flags, cr, ct);
2290		if (error) {
2291			/* if connected */
2292			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2293				if (CFS_TIMEOUT(fscp, error)) {
2294					cachefs_cd_release(fscp);
2295					held = 0;
2296					cachefs_cd_timedout(fscp);
2297					connected = 0;
2298					continue;
2299				}
2300			}
2301
2302			/* else must be disconnected */
2303			else {
2304				if (CFS_TIMEOUT(fscp, error)) {
2305					connected = 1;
2306					continue;
2307				}
2308			}
2309		}
2310		break;
2311	}
2312
2313	if (held) {
2314		cachefs_cd_release(fscp);
2315	}
2316#ifdef CFS_CD_DEBUG
2317	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2318#endif
2319	return (error);
2320}
2321
2322static int
2323cachefs_setattr_common(
2324	vnode_t *vp,
2325	vattr_t *vap,
2326	int flags,
2327	cred_t *cr,
2328	caller_context_t *ct)
2329{
2330	cnode_t *cp = VTOC(vp);
2331	fscache_t *fscp = C_TO_FSCACHE(cp);
2332	cachefscache_t *cachep = fscp->fs_cache;
2333	uint_t mask = vap->va_mask;
2334	int error = 0;
2335	uint_t bcnt;
2336
2337	/* Cannot set these attributes. */
2338	if (mask & AT_NOSET)
2339		return (EINVAL);
2340
2341	/*
2342	 * Truncate file.  Must have write permission and not be a directory.
2343	 */
2344	if (mask & AT_SIZE) {
2345		if (vp->v_type == VDIR) {
2346			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE))
2347				cachefs_log_truncate(cachep, EISDIR,
2348				    fscp->fs_cfsvfsp,
2349				    &cp->c_metadata.md_cookie,
2350				    cp->c_id.cid_fileno,
2351				    crgetuid(cr), vap->va_size);
2352			return (EISDIR);
2353		}
2354	}
2355
2356	/*
2357	 * Gotta deal with one special case here, where we're setting the
2358	 * size of the file. First, we zero out part of the page after the
2359	 * new size of the file. Then we toss (not write) all pages after
2360	 * page in which the new offset occurs. Note that the NULL passed
2361	 * in instead of a putapage() fn parameter is correct, since
2362	 * no dirty pages will be found (B_TRUNC | B_INVAL).
2363	 */
2364
2365	rw_enter(&cp->c_rwlock, RW_WRITER);
2366
2367	/* sync dirty pages */
2368	if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
2369		error = cachefs_putpage_common(vp, (offset_t)0, 0, 0, cr);
2370		if (error == EINTR)
2371			goto out;
2372	}
2373	error = 0;
2374
2375	/* if connected */
2376	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2377		error = cachefs_setattr_connected(vp, vap, flags, cr, ct);
2378	}
2379	/* else must be disconnected */
2380	else {
2381		error = cachefs_setattr_disconnected(vp, vap, flags, cr, ct);
2382	}
2383	if (error)
2384		goto out;
2385
2386	/*
2387	 * If the file size has been changed then
2388	 * toss whole pages beyond the end of the file and zero
2389	 * the portion of the last page that is beyond the end of the file.
2390	 */
2391	if (mask & AT_SIZE && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2392		bcnt = (uint_t)(cp->c_size & PAGEOFFSET);
2393		if (bcnt)
2394			pvn_vpzero(vp, cp->c_size, PAGESIZE - bcnt);
2395		(void) pvn_vplist_dirty(vp, cp->c_size, cachefs_push,
2396		    B_TRUNC | B_INVAL, cr);
2397	}
2398
2399out:
2400	rw_exit(&cp->c_rwlock);
2401
2402	if ((mask & AT_SIZE) &&
2403	    (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_TRUNCATE)))
2404		cachefs_log_truncate(cachep, error, fscp->fs_cfsvfsp,
2405		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
2406		    crgetuid(cr), vap->va_size);
2407
2408	return (error);
2409}
2410
2411static int
2412cachefs_setattr_connected(
2413	vnode_t *vp,
2414	vattr_t *vap,
2415	int flags,
2416	cred_t *cr,
2417	caller_context_t *ct)
2418{
2419	cnode_t *cp = VTOC(vp);
2420	fscache_t *fscp = C_TO_FSCACHE(cp);
2421	uint_t mask = vap->va_mask;
2422	int error = 0;
2423	int setsize;
2424
2425	mutex_enter(&cp->c_statelock);
2426
2427	if (cp->c_backvp == NULL) {
2428		error = cachefs_getbackvp(fscp, cp);
2429		if (error)
2430			goto out;
2431	}
2432
2433	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2434	if (error)
2435		goto out;
2436
2437	CFS_DPRINT_BACKFS_NFSV4(fscp, ("cachefs_setattr (nfsv4): cnode %p, "
2438	    "backvp %p\n", cp, cp->c_backvp));
2439	error = VOP_SETATTR(cp->c_backvp, vap, flags, cr, ct);
2440	if (error) {
2441		goto out;
2442	}
2443
2444	/* if the size of the file is being changed */
2445	if (mask & AT_SIZE) {
2446		cp->c_size = vap->va_size;
2447		error = 0;
2448		setsize = 0;
2449
2450		/* see if okay to try to set the file size */
2451		if (((cp->c_flags & CN_NOCACHE) == 0) &&
2452		    CFS_ISFS_NONSHARED(fscp)) {
2453			/* okay to set size if file is populated */
2454			if (cp->c_metadata.md_flags & MD_POPULATED)
2455				setsize = 1;
2456
2457			/*
2458			 * Okay to set size if front file exists and setting
2459			 * file size to zero.
2460			 */
2461			if ((cp->c_metadata.md_flags & MD_FILE) &&
2462			    (vap->va_size == 0))
2463				setsize = 1;
2464		}
2465
2466		/* if okay to try to set the file size */
2467		if (setsize) {
2468			error = 0;
2469			if (cp->c_frontvp == NULL)
2470				error = cachefs_getfrontfile(cp);
2471			if (error == 0)
2472				error = cachefs_frontfile_size(cp, cp->c_size);
2473		} else if (cp->c_metadata.md_flags & MD_FILE) {
2474			/* make sure file gets nocached */
2475			error = EEXIST;
2476		}
2477
2478		/* if we have to nocache the file */
2479		if (error) {
2480			if ((cp->c_flags & CN_NOCACHE) == 0 &&
2481			    !CFS_ISFS_BACKFS_NFSV4(fscp))
2482				cachefs_nocache(cp);
2483			error = 0;
2484		}
2485	}
2486
2487	cp->c_flags |= CN_UPDATED;
2488
2489	/* XXX bob: given what modify_cobject does this seems unnecessary */
2490	cp->c_attr.va_mask = AT_ALL;
2491	error = VOP_GETATTR(cp->c_backvp, &cp->c_attr, 0, cr, ct);
2492	if (error)
2493		goto out;
2494
2495	cp->c_attr.va_size = MAX(cp->c_attr.va_size, cp->c_size);
2496	cp->c_size = cp->c_attr.va_size;
2497
2498	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
2499out:
2500	mutex_exit(&cp->c_statelock);
2501	return (error);
2502}
2503
2504/*
2505 * perform the setattr on the local file system
2506 */
2507/*ARGSUSED4*/
2508static int
2509cachefs_setattr_disconnected(
2510	vnode_t *vp,
2511	vattr_t *vap,
2512	int flags,
2513	cred_t *cr,
2514	caller_context_t *ct)
2515{
2516	cnode_t *cp = VTOC(vp);
2517	fscache_t *fscp = C_TO_FSCACHE(cp);
2518	int mask;
2519	int error;
2520	int newfile;
2521	off_t commit = 0;
2522
2523	if (CFS_ISFS_WRITE_AROUND(fscp))
2524		return (ETIMEDOUT);
2525
2526	/* if we do not have good attributes */
2527	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
2528		return (ETIMEDOUT);
2529
2530	/* primary concern is to keep this routine as much like ufs_setattr */
2531
2532	mutex_enter(&cp->c_statelock);
2533
2534	error = secpolicy_vnode_setattr(cr, vp, vap, &cp->c_attr, flags,
2535	    cachefs_access_local, cp);
2536
2537	if (error)
2538		goto out;
2539
2540	mask = vap->va_mask;
2541
2542	/* if changing the size of the file */
2543	if (mask & AT_SIZE) {
2544		if (vp->v_type == VDIR) {
2545			error = EISDIR;
2546			goto out;
2547		}
2548
2549		if (vp->v_type == VFIFO) {
2550			error = 0;
2551			goto out;
2552		}
2553
2554		if ((vp->v_type != VREG) &&
2555		    !((vp->v_type == VLNK) && (vap->va_size == 0))) {
2556			error = EINVAL;
2557			goto out;
2558		}
2559
2560		if (vap->va_size > fscp->fs_offmax) {
2561			error = EFBIG;
2562			goto out;
2563		}
2564
2565		/* if the file is not populated and we are not truncating it */
2566		if (((cp->c_metadata.md_flags & MD_POPULATED) == 0) &&
2567		    (vap->va_size != 0)) {
2568			error = ETIMEDOUT;
2569			goto out;
2570		}
2571
2572		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2573			error = cachefs_dlog_cidmap(fscp);
2574			if (error) {
2575				error = ENOSPC;
2576				goto out;
2577			}
2578			cp->c_metadata.md_flags |= MD_MAPPING;
2579		}
2580
2581		/* log the operation */
2582		commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2583		if (commit == 0) {
2584			error = ENOSPC;
2585			goto out;
2586		}
2587		cp->c_flags &= ~CN_NOCACHE;
2588
2589		/* special case truncating fast sym links */
2590		if ((vp->v_type == VLNK) &&
2591		    (cp->c_metadata.md_flags & MD_FASTSYMLNK)) {
2592			/* XXX how can we get here */
2593			/* XXX should update mtime */
2594			cp->c_size = 0;
2595			error = 0;
2596			goto out;
2597		}
2598
2599		/* get the front file, this may create one */
2600		newfile = (cp->c_metadata.md_flags & MD_FILE) ? 0 : 1;
2601		if (cp->c_frontvp == NULL) {
2602			error = cachefs_getfrontfile(cp);
2603			if (error)
2604				goto out;
2605		}
2606		ASSERT(cp->c_frontvp);
2607		if (newfile && (cp->c_flags & CN_UPDATED)) {
2608			/* allocate space for the metadata */
2609			ASSERT((cp->c_flags & CN_ALLOC_PENDING) == 0);
2610			ASSERT((cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR)
2611			    == 0);
2612			error = filegrp_write_metadata(cp->c_filegrp,
2613			    &cp->c_id, &cp->c_metadata);
2614			if (error)
2615				goto out;
2616		}
2617
2618		/* change the size of the front file */
2619		error = cachefs_frontfile_size(cp, vap->va_size);
2620		if (error)
2621			goto out;
2622		cp->c_attr.va_size = cp->c_size = vap->va_size;
2623		gethrestime(&cp->c_metadata.md_localmtime);
2624		cp->c_metadata.md_flags |= MD_POPULATED | MD_LOCALMTIME;
2625		cachefs_modified(cp);
2626		cp->c_flags |= CN_UPDATED;
2627	}
2628
2629	if (mask & AT_MODE) {
2630		/* mark as modified */
2631		if (cachefs_modified_alloc(cp)) {
2632			error = ENOSPC;
2633			goto out;
2634		}
2635
2636		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2637			error = cachefs_dlog_cidmap(fscp);
2638			if (error) {
2639				error = ENOSPC;
2640				goto out;
2641			}
2642			cp->c_metadata.md_flags |= MD_MAPPING;
2643		}
2644
2645		/* log the operation if not already logged */
2646		if (commit == 0) {
2647			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2648			if (commit == 0) {
2649				error = ENOSPC;
2650				goto out;
2651			}
2652		}
2653
2654		cp->c_attr.va_mode &= S_IFMT;
2655		cp->c_attr.va_mode |= vap->va_mode & ~S_IFMT;
2656		gethrestime(&cp->c_metadata.md_localctime);
2657		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2658		cp->c_flags |= CN_UPDATED;
2659	}
2660
2661	if (mask & (AT_UID|AT_GID)) {
2662
2663		/* mark as modified */
2664		if (cachefs_modified_alloc(cp)) {
2665			error = ENOSPC;
2666			goto out;
2667		}
2668
2669		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2670			error = cachefs_dlog_cidmap(fscp);
2671			if (error) {
2672				error = ENOSPC;
2673				goto out;
2674			}
2675			cp->c_metadata.md_flags |= MD_MAPPING;
2676		}
2677
2678		/* log the operation if not already logged */
2679		if (commit == 0) {
2680			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2681			if (commit == 0) {
2682				error = ENOSPC;
2683				goto out;
2684			}
2685		}
2686
2687		if (mask & AT_UID)
2688			cp->c_attr.va_uid = vap->va_uid;
2689
2690		if (mask & AT_GID)
2691			cp->c_attr.va_gid = vap->va_gid;
2692		gethrestime(&cp->c_metadata.md_localctime);
2693		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2694		cp->c_flags |= CN_UPDATED;
2695	}
2696
2697
2698	if (mask & (AT_MTIME|AT_ATIME)) {
2699		/* mark as modified */
2700		if (cachefs_modified_alloc(cp)) {
2701			error = ENOSPC;
2702			goto out;
2703		}
2704
2705		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
2706			error = cachefs_dlog_cidmap(fscp);
2707			if (error) {
2708				error = ENOSPC;
2709				goto out;
2710			}
2711			cp->c_metadata.md_flags |= MD_MAPPING;
2712		}
2713
2714		/* log the operation if not already logged */
2715		if (commit == 0) {
2716			commit = cachefs_dlog_setattr(fscp, vap, flags, cp, cr);
2717			if (commit == 0) {
2718				error = ENOSPC;
2719				goto out;
2720			}
2721		}
2722
2723		if (mask & AT_MTIME) {
2724			cp->c_metadata.md_localmtime = vap->va_mtime;
2725			cp->c_metadata.md_flags |= MD_LOCALMTIME;
2726		}
2727		if (mask & AT_ATIME)
2728			cp->c_attr.va_atime = vap->va_atime;
2729		gethrestime(&cp->c_metadata.md_localctime);
2730		cp->c_metadata.md_flags |= MD_LOCALCTIME;
2731		cp->c_flags |= CN_UPDATED;
2732	}
2733
2734out:
2735	mutex_exit(&cp->c_statelock);
2736
2737	/* commit the log entry */
2738	if (commit) {
2739		if (cachefs_dlog_commit(fscp, commit, error)) {
2740			/*EMPTY*/
2741			/* XXX bob: fix on panic */
2742		}
2743	}
2744	return (error);
2745}
2746
2747/* ARGSUSED */
2748static int
2749cachefs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
2750	caller_context_t *ct)
2751{
2752	cnode_t *cp = VTOC(vp);
2753	fscache_t *fscp = C_TO_FSCACHE(cp);
2754	int error;
2755	int held = 0;
2756	int connected = 0;
2757
2758#ifdef CFSDEBUG
2759	CFS_DEBUG(CFSDEBUG_VOPS)
2760		printf("cachefs_access: ENTER vp %p\n", (void *)vp);
2761#endif
2762	if (getzoneid() != GLOBAL_ZONEID) {
2763		error = EPERM;
2764		goto out;
2765	}
2766
2767	/*
2768	 * Cachefs only provides pass-through support for NFSv4,
2769	 * and all vnode operations are passed through to the
2770	 * back file system. For NFSv4 pass-through to work, only
2771	 * connected operation is supported, the cnode backvp must
2772	 * exist, and cachefs optional (eg., disconnectable) flags
2773	 * are turned off. Assert these conditions to ensure that
2774	 * the backfilesystem is called for the access operation.
2775	 */
2776	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2777	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2778
2779	for (;;) {
2780		/* get (or renew) access to the file system */
2781		if (held) {
2782			/* Won't loop with NFSv4 connected behavior */
2783			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2784			cachefs_cd_release(fscp);
2785			held = 0;
2786		}
2787		error = cachefs_cd_access(fscp, connected, 0);
2788		if (error)
2789			break;
2790		held = 1;
2791
2792		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2793			error = cachefs_access_connected(vp, mode, flags,
2794			    cr);
2795			if (CFS_TIMEOUT(fscp, error)) {
2796				cachefs_cd_release(fscp);
2797				held = 0;
2798				cachefs_cd_timedout(fscp);
2799				connected = 0;
2800				continue;
2801			}
2802		} else {
2803			mutex_enter(&cp->c_statelock);
2804			error = cachefs_access_local(cp, mode, cr);
2805			mutex_exit(&cp->c_statelock);
2806			if (CFS_TIMEOUT(fscp, error)) {
2807				if (cachefs_cd_access_miss(fscp)) {
2808					mutex_enter(&cp->c_statelock);
2809					if (cp->c_backvp == NULL) {
2810						(void) cachefs_getbackvp(fscp,
2811						    cp);
2812					}
2813					mutex_exit(&cp->c_statelock);
2814					error = cachefs_access_connected(vp,
2815					    mode, flags, cr);
2816					if (!CFS_TIMEOUT(fscp, error))
2817						break;
2818					delay(5*hz);
2819					connected = 0;
2820					continue;
2821				}
2822				connected = 1;
2823				continue;
2824			}
2825		}
2826		break;
2827	}
2828	if (held)
2829		cachefs_cd_release(fscp);
2830#ifdef CFS_CD_DEBUG
2831	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
2832#endif
2833out:
2834#ifdef CFSDEBUG
2835	CFS_DEBUG(CFSDEBUG_VOPS)
2836		printf("cachefs_access: EXIT error = %d\n", error);
2837#endif
2838	return (error);
2839}
2840
2841static int
2842cachefs_access_connected(struct vnode *vp, int mode, int flags, cred_t *cr)
2843{
2844	cnode_t *cp = VTOC(vp);
2845	fscache_t *fscp = C_TO_FSCACHE(cp);
2846	int error = 0;
2847
2848	mutex_enter(&cp->c_statelock);
2849
2850	/* Make sure the cnode attrs are valid first. */
2851	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
2852	if (error)
2853		goto out;
2854
2855	/* see if can do a local file system check */
2856	if ((fscp->fs_info.fi_mntflags & CFS_ACCESS_BACKFS) == 0 &&
2857	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
2858		error = cachefs_access_local(cp, mode, cr);
2859		goto out;
2860	}
2861
2862	/* else do a remote file system check */
2863	else {
2864		if (cp->c_backvp == NULL) {
2865			error = cachefs_getbackvp(fscp, cp);
2866			if (error)
2867				goto out;
2868		}
2869
2870		CFS_DPRINT_BACKFS_NFSV4(fscp,
2871		    ("cachefs_access (nfsv4): cnode %p, backvp %p\n",
2872		    cp, cp->c_backvp));
2873		error = VOP_ACCESS(cp->c_backvp, mode, flags, cr, NULL);
2874
2875		/*
2876		 * even though we don't `need' the ACL to do access
2877		 * via the backvp, we should cache it here to make our
2878		 * behavior more reasonable if we go disconnected.
2879		 */
2880
2881		if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
2882		    (cachefs_vtype_aclok(vp)) &&
2883		    ((cp->c_flags & CN_NOCACHE) == 0) &&
2884		    (!CFS_ISFS_BACKFS_NFSV4(fscp)) &&
2885		    ((cp->c_metadata.md_flags & MD_ACL) == 0))
2886			(void) cachefs_cacheacl(cp, NULL);
2887	}
2888out:
2889	/*
2890	 * If NFS returned ESTALE, mark this cnode as stale, so that
2891	 * the vn_open retry will read the file anew from backfs
2892	 */
2893	if (error == ESTALE)
2894		cachefs_cnode_stale(cp);
2895
2896	mutex_exit(&cp->c_statelock);
2897	return (error);
2898}
2899
2900/*
2901 * CFS has a fastsymlink scheme. If the size of the link is < C_FSL_SIZE, then
2902 * the link is placed in the metadata itself (no front file is allocated).
2903 */
2904/*ARGSUSED*/
2905static int
2906cachefs_readlink(vnode_t *vp, uio_t *uiop, cred_t *cr, caller_context_t *ct)
2907{
2908	int error = 0;
2909	cnode_t *cp = VTOC(vp);
2910	fscache_t *fscp = C_TO_FSCACHE(cp);
2911	cachefscache_t *cachep = fscp->fs_cache;
2912	int held = 0;
2913	int connected = 0;
2914
2915	if (getzoneid() != GLOBAL_ZONEID)
2916		return (EPERM);
2917
2918	if (vp->v_type != VLNK)
2919		return (EINVAL);
2920
2921	/*
2922	 * Cachefs only provides pass-through support for NFSv4,
2923	 * and all vnode operations are passed through to the
2924	 * back file system. For NFSv4 pass-through to work, only
2925	 * connected operation is supported, the cnode backvp must
2926	 * exist, and cachefs optional (eg., disconnectable) flags
2927	 * are turned off. Assert these conditions to ensure that
2928	 * the backfilesystem is called for the readlink operation.
2929	 */
2930	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
2931	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
2932
2933	for (;;) {
2934		/* get (or renew) access to the file system */
2935		if (held) {
2936			/* Won't loop with NFSv4 connected behavior */
2937			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2938			cachefs_cd_release(fscp);
2939			held = 0;
2940		}
2941		error = cachefs_cd_access(fscp, connected, 0);
2942		if (error)
2943			break;
2944		held = 1;
2945
2946		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
2947			/*
2948			 * since readlink_connected will call stuffsymlink
2949			 * on success, have to serialize access
2950			 */
2951			if (!rw_tryenter(&cp->c_rwlock, RW_WRITER)) {
2952				cachefs_cd_release(fscp);
2953				rw_enter(&cp->c_rwlock, RW_WRITER);
2954				error = cachefs_cd_access(fscp, connected, 0);
2955				if (error) {
2956					held = 0;
2957					rw_exit(&cp->c_rwlock);
2958					break;
2959				}
2960			}
2961			error = cachefs_readlink_connected(vp, uiop, cr);
2962			rw_exit(&cp->c_rwlock);
2963			if (CFS_TIMEOUT(fscp, error)) {
2964				cachefs_cd_release(fscp);
2965				held = 0;
2966				cachefs_cd_timedout(fscp);
2967				connected = 0;
2968				continue;
2969			}
2970		} else {
2971			error = cachefs_readlink_disconnected(vp, uiop);
2972			if (CFS_TIMEOUT(fscp, error)) {
2973				if (cachefs_cd_access_miss(fscp)) {
2974					/* as above */
2975					if (!rw_tryenter(&cp->c_rwlock,
2976					    RW_WRITER)) {
2977						cachefs_cd_release(fscp);
2978						rw_enter(&cp->c_rwlock,
2979						    RW_WRITER);
2980						error = cachefs_cd_access(fscp,
2981						    connected, 0);
2982						if (error) {
2983							held = 0;
2984							rw_exit(&cp->c_rwlock);
2985							break;
2986						}
2987					}
2988					error = cachefs_readlink_connected(vp,
2989					    uiop, cr);
2990					rw_exit(&cp->c_rwlock);
2991					if (!CFS_TIMEOUT(fscp, error))
2992						break;
2993					delay(5*hz);
2994					connected = 0;
2995					continue;
2996				}
2997				connected = 1;
2998				continue;
2999			}
3000		}
3001		break;
3002	}
3003	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READLINK))
3004		cachefs_log_readlink(cachep, error, fscp->fs_cfsvfsp,
3005		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
3006		    crgetuid(cr), cp->c_size);
3007
3008	if (held)
3009		cachefs_cd_release(fscp);
3010#ifdef CFS_CD_DEBUG
3011	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3012#endif
3013
3014	/*
3015	 * The over the wire error for attempting to readlink something
3016	 * other than a symbolic link is ENXIO.  However, we need to
3017	 * return EINVAL instead of ENXIO, so we map it here.
3018	 */
3019	return (error == ENXIO ? EINVAL : error);
3020}
3021
3022static int
3023cachefs_readlink_connected(vnode_t *vp, uio_t *uiop, cred_t *cr)
3024{
3025	int error;
3026	cnode_t *cp = VTOC(vp);
3027	fscache_t *fscp = C_TO_FSCACHE(cp);
3028	caddr_t buf;
3029	int buflen;
3030	int readcache = 0;
3031
3032	mutex_enter(&cp->c_statelock);
3033
3034	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
3035	if (error)
3036		goto out;
3037
3038	/* if the sym link is cached as a fast sym link */
3039	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3040		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3041		error = uiomove(cp->c_metadata.md_allocinfo,
3042		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3043#ifdef CFSDEBUG
3044		readcache = 1;
3045		goto out;
3046#else /* CFSDEBUG */
3047		/* XXX KLUDGE! correct for insidious 0-len symlink */
3048		if (cp->c_size != 0) {
3049			readcache = 1;
3050			goto out;
3051		}
3052#endif /* CFSDEBUG */
3053	}
3054
3055	/* if the sym link is cached in a front file */
3056	if (cp->c_metadata.md_flags & MD_POPULATED) {
3057		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3058		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3059		if (cp->c_frontvp == NULL) {
3060			(void) cachefs_getfrontfile(cp);
3061		}
3062		if (cp->c_metadata.md_flags & MD_POPULATED) {
3063			/* read symlink data from frontfile */
3064			uiop->uio_offset = 0;
3065			(void) VOP_RWLOCK(cp->c_frontvp,
3066			    V_WRITELOCK_FALSE, NULL);
3067			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3068			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3069
3070			/* XXX KLUDGE! correct for insidious 0-len symlink */
3071			if (cp->c_size != 0) {
3072				readcache = 1;
3073				goto out;
3074			}
3075		}
3076	}
3077
3078	/* get the sym link contents from the back fs */
3079	error = cachefs_readlink_back(cp, cr, &buf, &buflen);
3080	if (error)
3081		goto out;
3082
3083	/* copy the contents out to the user */
3084	error = uiomove(buf, MIN(buflen, uiop->uio_resid), UIO_READ, uiop);
3085
3086	/*
3087	 * try to cache the sym link, note that its a noop if NOCACHE is set
3088	 * or if NFSv4 pass-through is enabled.
3089	 */
3090	if (cachefs_stuffsymlink(cp, buf, buflen)) {
3091		cachefs_nocache(cp);
3092	}
3093
3094	cachefs_kmem_free(buf, MAXPATHLEN);
3095
3096out:
3097	mutex_exit(&cp->c_statelock);
3098	if (error == 0) {
3099		if (readcache)
3100			fscp->fs_stats.st_hits++;
3101		else
3102			fscp->fs_stats.st_misses++;
3103	}
3104	return (error);
3105}
3106
3107static int
3108cachefs_readlink_disconnected(vnode_t *vp, uio_t *uiop)
3109{
3110	int error;
3111	cnode_t *cp = VTOC(vp);
3112	fscache_t *fscp = C_TO_FSCACHE(cp);
3113	int readcache = 0;
3114
3115	mutex_enter(&cp->c_statelock);
3116
3117	/* if the sym link is cached as a fast sym link */
3118	if (cp->c_metadata.md_flags & MD_FASTSYMLNK) {
3119		error = uiomove(cp->c_metadata.md_allocinfo,
3120		    MIN(cp->c_size, uiop->uio_resid), UIO_READ, uiop);
3121		readcache = 1;
3122		goto out;
3123	}
3124
3125	/* if the sym link is cached in a front file */
3126	if (cp->c_metadata.md_flags & MD_POPULATED) {
3127		ASSERT(cp->c_metadata.md_flags & MD_FILE);
3128		if (cp->c_frontvp == NULL) {
3129			(void) cachefs_getfrontfile(cp);
3130		}
3131		if (cp->c_metadata.md_flags & MD_POPULATED) {
3132			/* read symlink data from frontfile */
3133			uiop->uio_offset = 0;
3134			(void) VOP_RWLOCK(cp->c_frontvp,
3135			    V_WRITELOCK_FALSE, NULL);
3136			error = VOP_READ(cp->c_frontvp, uiop, 0, kcred, NULL);
3137			VOP_RWUNLOCK(cp->c_frontvp, V_WRITELOCK_FALSE, NULL);
3138			readcache = 1;
3139			goto out;
3140		}
3141	}
3142	error = ETIMEDOUT;
3143
3144out:
3145	mutex_exit(&cp->c_statelock);
3146	if (error == 0) {
3147		if (readcache)
3148			fscp->fs_stats.st_hits++;
3149		else
3150			fscp->fs_stats.st_misses++;
3151	}
3152	return (error);
3153}
3154
3155/*ARGSUSED*/
3156static int
3157cachefs_fsync(vnode_t *vp, int syncflag, cred_t *cr, caller_context_t *ct)
3158{
3159	cnode_t *cp = VTOC(vp);
3160	int error = 0;
3161	fscache_t *fscp = C_TO_FSCACHE(cp);
3162	int held = 0;
3163	int connected = 0;
3164
3165#ifdef CFSDEBUG
3166	CFS_DEBUG(CFSDEBUG_VOPS)
3167		printf("cachefs_fsync: ENTER vp %p\n", (void *)vp);
3168#endif
3169
3170	if (getzoneid() != GLOBAL_ZONEID) {
3171		error = EPERM;
3172		goto out;
3173	}
3174
3175	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
3176		goto out;
3177
3178	/*
3179	 * Cachefs only provides pass-through support for NFSv4,
3180	 * and all vnode operations are passed through to the
3181	 * back file system. For NFSv4 pass-through to work, only
3182	 * connected operation is supported, the cnode backvp must
3183	 * exist, and cachefs optional (eg., disconnectable) flags
3184	 * are turned off. Assert these conditions to ensure that
3185	 * the backfilesystem is called for the fsync operation.
3186	 */
3187	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3188	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3189
3190	for (;;) {
3191		/* get (or renew) access to the file system */
3192		if (held) {
3193			/* Won't loop with NFSv4 connected behavior */
3194			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3195			cachefs_cd_release(fscp);
3196			held = 0;
3197		}
3198		error = cachefs_cd_access(fscp, connected, 1);
3199		if (error)
3200			break;
3201		held = 1;
3202		connected = 0;
3203
3204		/* if a regular file, write out the pages */
3205		if ((vp->v_type == VREG) && vn_has_cached_data(vp) &&
3206		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
3207			error = cachefs_putpage_common(vp, (offset_t)0,
3208			    0, 0, cr);
3209			if (CFS_TIMEOUT(fscp, error)) {
3210				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3211					cachefs_cd_release(fscp);
3212					held = 0;
3213					cachefs_cd_timedout(fscp);
3214					continue;
3215				} else {
3216					connected = 1;
3217					continue;
3218				}
3219			}
3220
3221			/* if no space left in cache, wait until connected */
3222			if ((error == ENOSPC) &&
3223			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
3224				connected = 1;
3225				continue;
3226			}
3227
3228			/* clear the cnode error if putpage worked */
3229			if ((error == 0) && cp->c_error) {
3230				mutex_enter(&cp->c_statelock);
3231				cp->c_error = 0;
3232				mutex_exit(&cp->c_statelock);
3233			}
3234
3235			if (error)
3236				break;
3237		}
3238
3239		/* if connected, sync the backvp */
3240		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
3241		    cp->c_backvp) {
3242			mutex_enter(&cp->c_statelock);
3243			if (cp->c_backvp) {
3244				CFS_DPRINT_BACKFS_NFSV4(fscp,
3245				    ("cachefs_fsync (nfsv4): cnode %p, "
3246				    "backvp %p\n", cp, cp->c_backvp));
3247				error = VOP_FSYNC(cp->c_backvp, syncflag, cr,
3248				    ct);
3249				if (CFS_TIMEOUT(fscp, error)) {
3250					mutex_exit(&cp->c_statelock);
3251					cachefs_cd_release(fscp);
3252					held = 0;
3253					cachefs_cd_timedout(fscp);
3254					continue;
3255				} else if (error && (error != EINTR))
3256					cp->c_error = error;
3257			}
3258			mutex_exit(&cp->c_statelock);
3259		}
3260
3261		/* sync the metadata and the front file to the front fs */
3262		if (!CFS_ISFS_BACKFS_NFSV4(fscp)) {
3263			error = cachefs_sync_metadata(cp);
3264			if (error &&
3265			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
3266				error = 0;
3267		}
3268		break;
3269	}
3270
3271	if (error == 0)
3272		error = cp->c_error;
3273
3274	if (held)
3275		cachefs_cd_release(fscp);
3276
3277out:
3278#ifdef CFS_CD_DEBUG
3279	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3280#endif
3281
3282#ifdef CFSDEBUG
3283	CFS_DEBUG(CFSDEBUG_VOPS)
3284		printf("cachefs_fsync: EXIT vp %p\n", (void *)vp);
3285#endif
3286	return (error);
3287}
3288
3289/*
3290 * Called from cachefs_inactive(), to make sure all the data goes out to disk.
3291 */
3292int
3293cachefs_sync_metadata(cnode_t *cp)
3294{
3295	int error = 0;
3296	struct filegrp *fgp;
3297	struct vattr va;
3298	fscache_t *fscp = C_TO_FSCACHE(cp);
3299
3300#ifdef CFSDEBUG
3301	CFS_DEBUG(CFSDEBUG_VOPS)
3302		printf("c_sync_metadata: ENTER cp %p cflag %x\n",
3303		    (void *)cp, cp->c_flags);
3304#endif
3305
3306	mutex_enter(&cp->c_statelock);
3307	if ((cp->c_flags & CN_UPDATED) == 0)
3308		goto out;
3309	if (cp->c_flags & (CN_STALE | CN_DESTROY))
3310		goto out;
3311	fgp = cp->c_filegrp;
3312	if ((fgp->fg_flags & CFS_FG_WRITE) == 0)
3313		goto out;
3314	if (CFS_ISFS_BACKFS_NFSV4(fscp))
3315		goto out;
3316
3317	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
3318		mutex_exit(&cp->c_statelock);
3319		error = filegrp_allocattr(fgp);
3320		mutex_enter(&cp->c_statelock);
3321		if (error) {
3322			error = 0;
3323			goto out;
3324		}
3325	}
3326
3327	if (cp->c_flags & CN_ALLOC_PENDING) {
3328		error = filegrp_create_metadata(fgp, &cp->c_metadata,
3329		    &cp->c_id);
3330		if (error)
3331			goto out;
3332		cp->c_flags &= ~CN_ALLOC_PENDING;
3333	}
3334
3335	if (cp->c_flags & CN_NEED_FRONT_SYNC) {
3336		if (cp->c_frontvp != NULL) {
3337			error = VOP_FSYNC(cp->c_frontvp, FSYNC, kcred, NULL);
3338			if (error) {
3339				cp->c_metadata.md_timestamp.tv_sec = 0;
3340			} else {
3341				va.va_mask = AT_MTIME;
3342				error = VOP_GETATTR(cp->c_frontvp, &va, 0,
3343				    kcred, NULL);
3344				if (error)
3345					goto out;
3346				cp->c_metadata.md_timestamp = va.va_mtime;
3347				cp->c_flags &=
3348				    ~(CN_NEED_FRONT_SYNC |
3349				    CN_POPULATION_PENDING);
3350			}
3351		} else {
3352			cp->c_flags &=
3353			    ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING);
3354		}
3355	}
3356
3357	/*
3358	 * XXX tony: How can CN_ALLOC_PENDING still be set??
3359	 * XXX tony: How can CN_UPDATED not be set?????
3360	 */
3361	if ((cp->c_flags & CN_ALLOC_PENDING) == 0 &&
3362	    (cp->c_flags & CN_UPDATED)) {
3363		error = filegrp_write_metadata(fgp, &cp->c_id,
3364		    &cp->c_metadata);
3365		if (error)
3366			goto out;
3367	}
3368out:
3369	if (error) {
3370		/* XXX modified files? */
3371		if (cp->c_metadata.md_rlno) {
3372			cachefs_removefrontfile(&cp->c_metadata,
3373			    &cp->c_id, fgp);
3374			cachefs_rlent_moveto(C_TO_FSCACHE(cp)->fs_cache,
3375			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
3376			cp->c_metadata.md_rlno = 0;
3377			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
3378			if (cp->c_frontvp) {
3379				VN_RELE(cp->c_frontvp);
3380				cp->c_frontvp = NULL;
3381			}
3382		}
3383		if ((cp->c_flags & CN_ALLOC_PENDING) == 0)
3384			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
3385		cp->c_flags |= CN_ALLOC_PENDING;
3386		cachefs_nocache(cp);
3387	}
3388	/*
3389	 * we clear the updated bit even on errors because a retry
3390	 * will probably fail also.
3391	 */
3392	cp->c_flags &= ~CN_UPDATED;
3393	mutex_exit(&cp->c_statelock);
3394
3395#ifdef CFSDEBUG
3396	CFS_DEBUG(CFSDEBUG_VOPS)
3397		printf("c_sync_metadata: EXIT cp %p cflag %x\n",
3398		    (void *)cp, cp->c_flags);
3399#endif
3400
3401	return (error);
3402}
3403
3404/*
3405 * This is the vop entry point for inactivating a vnode.
3406 * It just queues the request for the async thread which
3407 * calls cachefs_inactive.
3408 * Because of the dnlc, it is not safe to grab most locks here.
3409 */
3410/*ARGSUSED*/
3411static void
3412cachefs_inactive(struct vnode *vp, cred_t *cr, caller_context_t *ct)
3413{
3414	cnode_t *cp;
3415	struct cachefs_req *rp;
3416	fscache_t *fscp;
3417
3418#ifdef CFSDEBUG
3419	CFS_DEBUG(CFSDEBUG_VOPS)
3420		printf("cachefs_inactive: ENTER vp %p\n", (void *)vp);
3421#endif
3422
3423	cp = VTOC(vp);
3424	fscp = C_TO_FSCACHE(cp);
3425
3426	ASSERT((cp->c_flags & CN_IDLE) == 0);
3427
3428	/*
3429	 * Cachefs only provides pass-through support for NFSv4,
3430	 * and all vnode operations are passed through to the
3431	 * back file system. For NFSv4 pass-through to work, only
3432	 * connected operation is supported, the cnode backvp must
3433	 * exist, and cachefs optional (eg., disconnectable) flags
3434	 * are turned off. Assert these conditions to ensure that
3435	 * the backfilesystem is called for the inactive operation.
3436	 */
3437	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3438	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
3439
3440	/* vn_rele() set the v_count == 1 */
3441
3442	cp->c_ipending = 1;
3443
3444	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3445	rp->cfs_cmd = CFS_IDLE;
3446	rp->cfs_cr = cr;
3447	crhold(rp->cfs_cr);
3448	rp->cfs_req_u.cu_idle.ci_vp = vp;
3449	cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
3450
3451#ifdef CFSDEBUG
3452	CFS_DEBUG(CFSDEBUG_VOPS)
3453		printf("cachefs_inactive: EXIT vp %p\n", (void *)vp);
3454#endif
3455}
3456
3457/* ARGSUSED */
3458static int
3459cachefs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp,
3460    struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr,
3461    caller_context_t *ct, int *direntflags, pathname_t *realpnp)
3462
3463{
3464	int error = 0;
3465	cnode_t *dcp = VTOC(dvp);
3466	fscache_t *fscp = C_TO_FSCACHE(dcp);
3467	int held = 0;
3468	int connected = 0;
3469
3470#ifdef CFSDEBUG
3471	CFS_DEBUG(CFSDEBUG_VOPS)
3472		printf("cachefs_lookup: ENTER dvp %p nm %s\n", (void *)dvp, nm);
3473#endif
3474
3475	if (getzoneid() != GLOBAL_ZONEID) {
3476		error = EPERM;
3477		goto out;
3478	}
3479
3480	/*
3481	 * Cachefs only provides pass-through support for NFSv4,
3482	 * and all vnode operations are passed through to the
3483	 * back file system. For NFSv4 pass-through to work, only
3484	 * connected operation is supported, the cnode backvp must
3485	 * exist, and cachefs optional (eg., disconnectable) flags
3486	 * are turned off. Assert these conditions to ensure that
3487	 * the backfilesystem is called for the lookup operation.
3488	 */
3489	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3490	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3491
3492	for (;;) {
3493		/* get (or renew) access to the file system */
3494		if (held) {
3495			/* Won't loop with NFSv4 connected behavior */
3496			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3497			cachefs_cd_release(fscp);
3498			held = 0;
3499		}
3500		error = cachefs_cd_access(fscp, connected, 0);
3501		if (error)
3502			break;
3503		held = 1;
3504
3505		error = cachefs_lookup_common(dvp, nm, vpp, pnp,
3506			flags, rdir, cr);
3507		if (CFS_TIMEOUT(fscp, error)) {
3508			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3509				cachefs_cd_release(fscp);
3510				held = 0;
3511				cachefs_cd_timedout(fscp);
3512				connected = 0;
3513				continue;
3514			} else {
3515				if (cachefs_cd_access_miss(fscp)) {
3516					rw_enter(&dcp->c_rwlock, RW_READER);
3517					error = cachefs_lookup_back(dvp, nm,
3518					    vpp, cr);
3519					rw_exit(&dcp->c_rwlock);
3520					if (!CFS_TIMEOUT(fscp, error))
3521						break;
3522					delay(5*hz);
3523					connected = 0;
3524					continue;
3525				}
3526				connected = 1;
3527				continue;
3528			}
3529		}
3530		break;
3531	}
3532	if (held)
3533		cachefs_cd_release(fscp);
3534
3535	if (error == 0 && IS_DEVVP(*vpp)) {
3536		struct vnode *newvp;
3537		newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3538		VN_RELE(*vpp);
3539		if (newvp == NULL) {
3540			error = ENOSYS;
3541		} else {
3542			*vpp = newvp;
3543		}
3544	}
3545
3546#ifdef CFS_CD_DEBUG
3547	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3548#endif
3549out:
3550#ifdef CFSDEBUG
3551	CFS_DEBUG(CFSDEBUG_VOPS)
3552		printf("cachefs_lookup: EXIT error = %d\n", error);
3553#endif
3554
3555	return (error);
3556}
3557
3558/* ARGSUSED */
3559int
3560cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
3561    struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr)
3562{
3563	int error = 0;
3564	cnode_t *cp, *dcp = VTOC(dvp);
3565	fscache_t *fscp = C_TO_FSCACHE(dcp);
3566	struct fid cookie;
3567	u_offset_t d_offset;
3568	struct cachefs_req *rp;
3569	cfs_cid_t cid, dircid;
3570	uint_t flag;
3571	uint_t uncached = 0;
3572
3573	*vpp = NULL;
3574
3575	/*
3576	 * If lookup is for "", just return dvp.  Don't need
3577	 * to send it over the wire, look it up in the dnlc,
3578	 * or perform any access checks.
3579	 */
3580	if (*nm == '\0') {
3581		VN_HOLD(dvp);
3582		*vpp = dvp;
3583		return (0);
3584	}
3585
3586	/* can't do lookups in non-directories */
3587	if (dvp->v_type != VDIR)
3588		return (ENOTDIR);
3589
3590	/* perform access check, also does consistency check if connected */
3591	if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3592		error = cachefs_access_connected(dvp, VEXEC, 0, cr);
3593	} else {
3594		mutex_enter(&dcp->c_statelock);
3595		error = cachefs_access_local(dcp, VEXEC, cr);
3596		mutex_exit(&dcp->c_statelock);
3597	}
3598	if (error)
3599		return (error);
3600
3601	/*
3602	 * If lookup is for ".", just return dvp.  Don't need
3603	 * to send it over the wire or look it up in the dnlc,
3604	 * just need to check access.
3605	 */
3606	if (strcmp(nm, ".") == 0) {
3607		VN_HOLD(dvp);
3608		*vpp = dvp;
3609		return (0);
3610	}
3611
3612	/* check the dnlc */
3613	*vpp = (vnode_t *)dnlc_lookup(dvp, nm);
3614	if (*vpp)
3615		return (0);
3616
3617	/* read lock the dir before starting the search */
3618	rw_enter(&dcp->c_rwlock, RW_READER);
3619
3620	mutex_enter(&dcp->c_statelock);
3621	dircid = dcp->c_id;
3622
3623	dcp->c_usage++;
3624
3625	/* if front file is not usable, lookup on the back fs */
3626	if ((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
3627	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
3628	    ((dcp->c_filegrp->fg_flags & CFS_FG_READ) == 0)) {
3629		mutex_exit(&dcp->c_statelock);
3630		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3631			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3632		else
3633			error = ETIMEDOUT;
3634		goto out;
3635	}
3636
3637	/* if the front file is not populated, try to populate it */
3638	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
3639		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
3640			error = ETIMEDOUT;
3641			mutex_exit(&dcp->c_statelock);
3642			goto out;
3643		}
3644
3645		if (cachefs_async_okay()) {
3646			/* cannot populate if cache is not writable */
3647			ASSERT((dcp->c_flags &
3648			    (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0);
3649			dcp->c_flags |= CN_ASYNC_POPULATE;
3650
3651			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
3652			rp->cfs_cmd = CFS_POPULATE;
3653			rp->cfs_req_u.cu_populate.cpop_vp = dvp;
3654			rp->cfs_cr = cr;
3655
3656			crhold(cr);
3657			VN_HOLD(dvp);
3658
3659			cachefs_addqueue(rp, &fscp->fs_workq);
3660		} else if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
3661			error = cachefs_dir_fill(dcp, cr);
3662			if (error != 0) {
3663				mutex_exit(&dcp->c_statelock);
3664				goto out;
3665			}
3666		}
3667		/* no populate if too many asyncs and we have to cache ACLs */
3668
3669		mutex_exit(&dcp->c_statelock);
3670
3671		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3672			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3673		else
3674			error = ETIMEDOUT;
3675		goto out;
3676	}
3677
3678	/* by now we have a valid cached front file that we can search */
3679
3680	ASSERT((dcp->c_flags & CN_ASYNC_POPULATE) == 0);
3681	error = cachefs_dir_look(dcp, nm, &cookie, &flag,
3682	    &d_offset, &cid);
3683	mutex_exit(&dcp->c_statelock);
3684
3685	if (error) {
3686		/* if the entry does not have the fid, go get it */
3687		if (error == EINVAL) {
3688			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3689				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3690			else
3691				error = ETIMEDOUT;
3692		}
3693
3694		/* errors other than does not exist */
3695		else if (error != ENOENT) {
3696			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
3697				error = cachefs_lookup_back(dvp, nm, vpp, cr);
3698			else
3699				error = ETIMEDOUT;
3700		}
3701		goto out;
3702	}
3703
3704	/*
3705	 * Else we found the entry in the cached directory.
3706	 * Make a cnode for it.
3707	 */
3708	error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
3709	    cr, 0, &cp);
3710	if (error == ESTALE) {
3711		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3712		mutex_enter(&dcp->c_statelock);
3713		cachefs_nocache(dcp);
3714		mutex_exit(&dcp->c_statelock);
3715		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3716			error = cachefs_lookup_back(dvp, nm, vpp, cr);
3717			uncached = 1;
3718		} else
3719			error = ETIMEDOUT;
3720	} else if (error == 0) {
3721		*vpp = CTOV(cp);
3722	}
3723
3724out:
3725	if (error == 0) {
3726		/* put the entry in the dnlc */
3727		if (cachefs_dnlc)
3728			dnlc_enter(dvp, nm, *vpp);
3729
3730		/* save the cid of the parent so can find the name */
3731		cp = VTOC(*vpp);
3732		if (bcmp(&cp->c_metadata.md_parent, &dircid,
3733		    sizeof (cfs_cid_t)) != 0) {
3734			mutex_enter(&cp->c_statelock);
3735			cp->c_metadata.md_parent = dircid;
3736			cp->c_flags |= CN_UPDATED;
3737			mutex_exit(&cp->c_statelock);
3738		}
3739	}
3740
3741	rw_exit(&dcp->c_rwlock);
3742	if (uncached && dcp->c_metadata.md_flags & MD_PACKED)
3743		(void) cachefs_pack_common(dvp, cr);
3744	return (error);
3745}
3746
3747/*
3748 * Called from cachefs_lookup_common when the back file system needs to be
3749 * examined to perform the lookup.
3750 */
3751static int
3752cachefs_lookup_back(vnode_t *dvp, char *nm, vnode_t **vpp,
3753    cred_t *cr)
3754{
3755	int error = 0;
3756	cnode_t *cp, *dcp = VTOC(dvp);
3757	fscache_t *fscp = C_TO_FSCACHE(dcp);
3758	vnode_t *backvp = NULL;
3759	struct vattr va;
3760	struct fid cookie;
3761	cfs_cid_t cid;
3762	uint32_t valid_fid;
3763
3764	mutex_enter(&dcp->c_statelock);
3765
3766	/* do a lookup on the back FS to get the back vnode */
3767	if (dcp->c_backvp == NULL) {
3768		error = cachefs_getbackvp(fscp, dcp);
3769		if (error)
3770			goto out;
3771	}
3772
3773	CFS_DPRINT_BACKFS_NFSV4(fscp,
3774	    ("cachefs_lookup (nfsv4): dcp %p, dbackvp %p, name %s\n",
3775	    dcp, dcp->c_backvp, nm));
3776	error = VOP_LOOKUP(dcp->c_backvp, nm, &backvp, (struct pathname *)NULL,
3777	    0, (vnode_t *)NULL, cr, NULL, NULL, NULL);
3778	if (error)
3779		goto out;
3780	if (IS_DEVVP(backvp)) {
3781		struct vnode *devvp = backvp;
3782
3783		if (VOP_REALVP(devvp, &backvp, NULL) == 0) {
3784			VN_HOLD(backvp);
3785			VN_RELE(devvp);
3786		}
3787	}
3788
3789	/* get the fid and attrs from the back fs */
3790	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
3791	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
3792	if (error)
3793		goto out;
3794
3795	cid.cid_fileno = va.va_nodeid;
3796	cid.cid_flags = 0;
3797
3798#if 0
3799	/* XXX bob: this is probably no longer necessary */
3800	/* if the directory entry was incomplete, we can complete it now */
3801	if ((dcp->c_metadata.md_flags & MD_POPULATED) &&
3802	    ((dcp->c_flags & CN_ASYNC_POPULATE) == 0) &&
3803	    (dcp->c_filegrp->fg_flags & CFS_FG_WRITE)) {
3804		cachefs_dir_modentry(dcp, d_offset, &cookie, &cid);
3805	}
3806#endif
3807
3808out:
3809	mutex_exit(&dcp->c_statelock);
3810
3811	/* create the cnode */
3812	if (error == 0) {
3813		error = cachefs_cnode_make(&cid, fscp,
3814		    (valid_fid ? &cookie : NULL),
3815		    &va, backvp, cr, 0, &cp);
3816		if (error == 0) {
3817			*vpp = CTOV(cp);
3818		}
3819	}
3820
3821	if (backvp)
3822		VN_RELE(backvp);
3823
3824	return (error);
3825}
3826
3827/*ARGSUSED7*/
3828static int
3829cachefs_create(vnode_t *dvp, char *nm, vattr_t *vap,
3830    vcexcl_t exclusive, int mode, vnode_t **vpp, cred_t *cr, int flag,
3831    caller_context_t *ct, vsecattr_t *vsecp)
3832
3833{
3834	cnode_t *dcp = VTOC(dvp);
3835	fscache_t *fscp = C_TO_FSCACHE(dcp);
3836	cachefscache_t *cachep = fscp->fs_cache;
3837	int error;
3838	int connected = 0;
3839	int held = 0;
3840
3841#ifdef CFSDEBUG
3842	CFS_DEBUG(CFSDEBUG_VOPS)
3843		printf("cachefs_create: ENTER dvp %p, nm %s\n",
3844		    (void *)dvp, nm);
3845#endif
3846	if (getzoneid() != GLOBAL_ZONEID) {
3847		error = EPERM;
3848		goto out;
3849	}
3850
3851	/*
3852	 * Cachefs only provides pass-through support for NFSv4,
3853	 * and all vnode operations are passed through to the
3854	 * back file system. For NFSv4 pass-through to work, only
3855	 * connected operation is supported, the cnode backvp must
3856	 * exist, and cachefs optional (eg., disconnectable) flags
3857	 * are turned off. Assert these conditions to ensure that
3858	 * the backfilesystem is called for the create operation.
3859	 */
3860	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
3861	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
3862
3863	for (;;) {
3864		/* get (or renew) access to the file system */
3865		if (held) {
3866			/* Won't loop with NFSv4 connected behavior */
3867			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
3868			cachefs_cd_release(fscp);
3869			held = 0;
3870		}
3871		error = cachefs_cd_access(fscp, connected, 1);
3872		if (error)
3873			break;
3874		held = 1;
3875
3876		/*
3877		 * if we are connected, perform the remote portion of the
3878		 * create.
3879		 */
3880		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
3881			error = cachefs_create_connected(dvp, nm, vap,
3882			    exclusive, mode, vpp, cr);
3883			if (CFS_TIMEOUT(fscp, error)) {
3884				cachefs_cd_release(fscp);
3885				held = 0;
3886				cachefs_cd_timedout(fscp);
3887				connected = 0;
3888				continue;
3889			} else if (error) {
3890				break;
3891			}
3892		}
3893
3894		/* else we must be disconnected */
3895		else {
3896			error = cachefs_create_disconnected(dvp, nm, vap,
3897			    exclusive, mode, vpp, cr);
3898			if (CFS_TIMEOUT(fscp, error)) {
3899				connected = 1;
3900				continue;
3901			} else if (error) {
3902				break;
3903			}
3904		}
3905		break;
3906	}
3907
3908	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CREATE)) {
3909		fid_t *fidp = NULL;
3910		ino64_t fileno = 0;
3911		cnode_t *cp = NULL;
3912		if (error == 0)
3913			cp = VTOC(*vpp);
3914
3915		if (cp != NULL) {
3916			fidp = &cp->c_metadata.md_cookie;
3917			fileno = cp->c_id.cid_fileno;
3918		}
3919		cachefs_log_create(cachep, error, fscp->fs_cfsvfsp,
3920		    fidp, fileno, crgetuid(cr));
3921	}
3922
3923	if (held)
3924		cachefs_cd_release(fscp);
3925
3926	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
3927		(void) cachefs_pack(dvp, nm, cr);
3928	if (error == 0 && IS_DEVVP(*vpp)) {
3929		struct vnode *spcvp;
3930
3931		spcvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cr);
3932		VN_RELE(*vpp);
3933		if (spcvp == NULL) {
3934			error = ENOSYS;
3935		} else {
3936			*vpp = spcvp;
3937		}
3938	}
3939
3940#ifdef CFS_CD_DEBUG
3941	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
3942#endif
3943out:
3944#ifdef CFSDEBUG
3945	CFS_DEBUG(CFSDEBUG_VOPS)
3946		printf("cachefs_create: EXIT error %d\n", error);
3947#endif
3948	return (error);
3949}
3950
3951
3952static int
3953cachefs_create_connected(vnode_t *dvp, char *nm, vattr_t *vap,
3954    enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
3955{
3956	cnode_t *dcp = VTOC(dvp);
3957	fscache_t *fscp = C_TO_FSCACHE(dcp);
3958	int error;
3959	vnode_t *tvp = NULL;
3960	vnode_t *devvp;
3961	fid_t cookie;
3962	vattr_t va;
3963	cnode_t *ncp;
3964	cfs_cid_t cid;
3965	vnode_t *vp;
3966	uint32_t valid_fid;
3967
3968	/* special case if file already exists */
3969	error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
3970	if (CFS_TIMEOUT(fscp, error))
3971		return (error);
3972	if (error == 0) {
3973		if (exclusive == EXCL)
3974			error = EEXIST;
3975		else if (vp->v_type == VDIR && (mode & VWRITE))
3976			error = EISDIR;
3977		else if ((error =
3978		    cachefs_access_connected(vp, mode, 0, cr)) == 0) {
3979			if ((vap->va_mask & AT_SIZE) && (vp->v_type == VREG)) {
3980				vap->va_mask = AT_SIZE;
3981				error = cachefs_setattr_common(vp, vap, 0,
3982				    cr, NULL);
3983			}
3984		}
3985		if (error) {
3986			VN_RELE(vp);
3987		} else
3988			*vpp = vp;
3989		return (error);
3990	}
3991
3992	rw_enter(&dcp->c_rwlock, RW_WRITER);
3993	mutex_enter(&dcp->c_statelock);
3994
3995	/* consistency check the directory */
3996	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
3997	if (error) {
3998		mutex_exit(&dcp->c_statelock);
3999		goto out;
4000	}
4001
4002	/* get the backvp if necessary */
4003	if (dcp->c_backvp == NULL) {
4004		error = cachefs_getbackvp(fscp, dcp);
4005		if (error) {
4006			mutex_exit(&dcp->c_statelock);
4007			goto out;
4008		}
4009	}
4010
4011	/* create the file on the back fs */
4012	CFS_DPRINT_BACKFS_NFSV4(fscp,
4013	    ("cachefs_create (nfsv4): dcp %p, dbackvp %p,"
4014	    "name %s\n", dcp, dcp->c_backvp, nm));
4015	error = VOP_CREATE(dcp->c_backvp, nm, vap, exclusive, mode,
4016	    &devvp, cr, 0, NULL, NULL);
4017	mutex_exit(&dcp->c_statelock);
4018	if (error)
4019		goto out;
4020	if (VOP_REALVP(devvp, &tvp, NULL) == 0) {
4021		VN_HOLD(tvp);
4022		VN_RELE(devvp);
4023	} else {
4024		tvp = devvp;
4025	}
4026
4027	/* get the fid and attrs from the back fs */
4028	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
4029	error = cachefs_getcookie(tvp, &cookie, &va, cr, valid_fid);
4030	if (error)
4031		goto out;
4032
4033	/* make the cnode */
4034	cid.cid_fileno = va.va_nodeid;
4035	cid.cid_flags = 0;
4036	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
4037	    &va, tvp, cr, 0, &ncp);
4038	if (error)
4039		goto out;
4040
4041	*vpp = CTOV(ncp);
4042
4043	/* enter it in the parent directory */
4044	mutex_enter(&dcp->c_statelock);
4045	if (CFS_ISFS_NONSHARED(fscp) &&
4046	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4047		/* see if entry already exists */
4048		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4049		error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
4050		if (error == ENOENT) {
4051			/* entry, does not exist, add the new file */
4052			error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4053			    &ncp->c_id, SM_ASYNC);
4054			if (error) {
4055				cachefs_nocache(dcp);
4056				error = 0;
4057			}
4058			/* XXX should this be done elsewhere, too? */
4059			dnlc_enter(dvp, nm, *vpp);
4060		} else {
4061			/* entry exists or some other problem */
4062			cachefs_nocache(dcp);
4063			error = 0;
4064		}
4065	}
4066	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4067	mutex_exit(&dcp->c_statelock);
4068
4069out:
4070	rw_exit(&dcp->c_rwlock);
4071	if (tvp)
4072		VN_RELE(tvp);
4073
4074	return (error);
4075}
4076
4077static int
4078cachefs_create_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
4079	enum vcexcl exclusive, int mode, vnode_t **vpp, cred_t *cr)
4080{
4081	cnode_t *dcp = VTOC(dvp);
4082	cnode_t *cp;
4083	cnode_t *ncp = NULL;
4084	vnode_t *vp;
4085	fscache_t *fscp = C_TO_FSCACHE(dcp);
4086	int error = 0;
4087	struct vattr va;
4088	timestruc_t current_time;
4089	off_t commit = 0;
4090	fid_t cookie;
4091	cfs_cid_t cid;
4092
4093	rw_enter(&dcp->c_rwlock, RW_WRITER);
4094	mutex_enter(&dcp->c_statelock);
4095
4096	/* give up if the directory is not populated */
4097	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4098		mutex_exit(&dcp->c_statelock);
4099		rw_exit(&dcp->c_rwlock);
4100		return (ETIMEDOUT);
4101	}
4102
4103	/* special case if file already exists */
4104	error = cachefs_dir_look(dcp, nm, &cookie, NULL, NULL, &cid);
4105	if (error == EINVAL) {
4106		mutex_exit(&dcp->c_statelock);
4107		rw_exit(&dcp->c_rwlock);
4108		return (ETIMEDOUT);
4109	}
4110	if (error == 0) {
4111		mutex_exit(&dcp->c_statelock);
4112		rw_exit(&dcp->c_rwlock);
4113		error = cachefs_cnode_make(&cid, fscp, &cookie, NULL, NULL,
4114		    cr, 0, &cp);
4115		if (error) {
4116			return (error);
4117		}
4118		vp = CTOV(cp);
4119
4120		if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4121			error = ETIMEDOUT;
4122		else if (exclusive == EXCL)
4123			error = EEXIST;
4124		else if (vp->v_type == VDIR && (mode & VWRITE))
4125			error = EISDIR;
4126		else {
4127			mutex_enter(&cp->c_statelock);
4128			error = cachefs_access_local(cp, mode, cr);
4129			mutex_exit(&cp->c_statelock);
4130			if (!error) {
4131				if ((vap->va_mask & AT_SIZE) &&
4132				    (vp->v_type == VREG)) {
4133					vap->va_mask = AT_SIZE;
4134					error = cachefs_setattr_common(vp,
4135					    vap, 0, cr, NULL);
4136				}
4137			}
4138		}
4139		if (error) {
4140			VN_RELE(vp);
4141		} else
4142			*vpp = vp;
4143		return (error);
4144	}
4145
4146	/* give up if cannot modify the cache */
4147	if (CFS_ISFS_WRITE_AROUND(fscp)) {
4148		mutex_exit(&dcp->c_statelock);
4149		error = ETIMEDOUT;
4150		goto out;
4151	}
4152
4153	/* check access */
4154	if (error = cachefs_access_local(dcp, VWRITE, cr)) {
4155		mutex_exit(&dcp->c_statelock);
4156		goto out;
4157	}
4158
4159	/* mark dir as modified */
4160	cachefs_modified(dcp);
4161	mutex_exit(&dcp->c_statelock);
4162
4163	/* must be privileged to set sticky bit */
4164	if ((vap->va_mode & VSVTX) && secpolicy_vnode_stky_modify(cr) != 0)
4165		vap->va_mode &= ~VSVTX;
4166
4167	/* make up a reasonable set of attributes */
4168	cachefs_attr_setup(vap, &va, dcp, cr);
4169
4170	/* create the cnode */
4171	error = cachefs_cnode_create(fscp, &va, 0, &ncp);
4172	if (error)
4173		goto out;
4174
4175	mutex_enter(&ncp->c_statelock);
4176
4177	/* get the front file now instead of later */
4178	if (vap->va_type == VREG) {
4179		error = cachefs_getfrontfile(ncp);
4180		if (error) {
4181			mutex_exit(&ncp->c_statelock);
4182			goto out;
4183		}
4184		ASSERT(ncp->c_frontvp != NULL);
4185		ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4186		ncp->c_metadata.md_flags |= MD_POPULATED;
4187	} else {
4188		ASSERT(ncp->c_flags & CN_ALLOC_PENDING);
4189		if (ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
4190			(void) filegrp_allocattr(ncp->c_filegrp);
4191		}
4192		error = filegrp_create_metadata(ncp->c_filegrp,
4193		    &ncp->c_metadata, &ncp->c_id);
4194		if (error) {
4195			mutex_exit(&ncp->c_statelock);
4196			goto out;
4197		}
4198		ncp->c_flags &= ~CN_ALLOC_PENDING;
4199	}
4200	mutex_enter(&dcp->c_statelock);
4201	cachefs_creategid(dcp, ncp, vap, cr);
4202	cachefs_createacl(dcp, ncp);
4203	mutex_exit(&dcp->c_statelock);
4204
4205	/* set times on the file */
4206	gethrestime(&current_time);
4207	ncp->c_metadata.md_vattr.va_atime = current_time;
4208	ncp->c_metadata.md_localctime = current_time;
4209	ncp->c_metadata.md_localmtime = current_time;
4210	ncp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
4211
4212	/* reserve space for the daemon cid mapping */
4213	error = cachefs_dlog_cidmap(fscp);
4214	if (error) {
4215		mutex_exit(&ncp->c_statelock);
4216		goto out;
4217	}
4218	ncp->c_metadata.md_flags |= MD_MAPPING;
4219
4220	/* mark the new file as modified */
4221	if (cachefs_modified_alloc(ncp)) {
4222		mutex_exit(&ncp->c_statelock);
4223		error = ENOSPC;
4224		goto out;
4225	}
4226	ncp->c_flags |= CN_UPDATED;
4227
4228	/*
4229	 * write the metadata now rather than waiting until
4230	 * inactive so that if there's no space we can let
4231	 * the caller know.
4232	 */
4233	ASSERT((ncp->c_flags & CN_ALLOC_PENDING) == 0);
4234	ASSERT((ncp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
4235	error = filegrp_write_metadata(ncp->c_filegrp,
4236	    &ncp->c_id, &ncp->c_metadata);
4237	if (error) {
4238		mutex_exit(&ncp->c_statelock);
4239		goto out;
4240	}
4241
4242	/* log the operation */
4243	commit = cachefs_dlog_create(fscp, dcp, nm, vap, exclusive,
4244	    mode, ncp, 0, cr);
4245	if (commit == 0) {
4246		mutex_exit(&ncp->c_statelock);
4247		error = ENOSPC;
4248		goto out;
4249	}
4250
4251	mutex_exit(&ncp->c_statelock);
4252
4253	mutex_enter(&dcp->c_statelock);
4254
4255	/* update parent dir times */
4256	dcp->c_metadata.md_localmtime = current_time;
4257	dcp->c_metadata.md_flags |= MD_LOCALMTIME;
4258	dcp->c_flags |= CN_UPDATED;
4259
4260	/* enter new file name in the parent directory */
4261	if (dcp->c_metadata.md_flags & MD_POPULATED) {
4262		error = cachefs_dir_enter(dcp, nm, &ncp->c_cookie,
4263		    &ncp->c_id, 0);
4264		if (error) {
4265			cachefs_nocache(dcp);
4266			mutex_exit(&dcp->c_statelock);
4267			error = ETIMEDOUT;
4268			goto out;
4269		}
4270		dnlc_enter(dvp, nm, CTOV(ncp));
4271	} else {
4272		mutex_exit(&dcp->c_statelock);
4273		error = ETIMEDOUT;
4274		goto out;
4275	}
4276	mutex_exit(&dcp->c_statelock);
4277
4278out:
4279	rw_exit(&dcp->c_rwlock);
4280
4281	if (commit) {
4282		if (cachefs_dlog_commit(fscp, commit, error)) {
4283			/*EMPTY*/
4284			/* XXX bob: fix on panic */
4285		}
4286	}
4287	if (error) {
4288		/* destroy the cnode we created */
4289		if (ncp) {
4290			mutex_enter(&ncp->c_statelock);
4291			ncp->c_flags |= CN_DESTROY;
4292			mutex_exit(&ncp->c_statelock);
4293			VN_RELE(CTOV(ncp));
4294		}
4295	} else {
4296		*vpp = CTOV(ncp);
4297	}
4298	return (error);
4299}
4300
4301/*ARGSUSED*/
4302static int
4303cachefs_remove(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
4304    int flags)
4305{
4306	cnode_t *dcp = VTOC(dvp);
4307	fscache_t *fscp = C_TO_FSCACHE(dcp);
4308	cachefscache_t *cachep = fscp->fs_cache;
4309	int error = 0;
4310	int held = 0;
4311	int connected = 0;
4312	size_t namlen;
4313	vnode_t *vp = NULL;
4314	int vfslock = 0;
4315
4316#ifdef CFSDEBUG
4317	CFS_DEBUG(CFSDEBUG_VOPS)
4318		printf("cachefs_remove: ENTER dvp %p name %s\n",
4319		    (void *)dvp, nm);
4320#endif
4321	if (getzoneid() != GLOBAL_ZONEID) {
4322		error = EPERM;
4323		goto out;
4324	}
4325
4326	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4327		ASSERT(dcp->c_flags & CN_NOCACHE);
4328
4329	/*
4330	 * Cachefs only provides pass-through support for NFSv4,
4331	 * and all vnode operations are passed through to the
4332	 * back file system. For NFSv4 pass-through to work, only
4333	 * connected operation is supported, the cnode backvp must
4334	 * exist, and cachefs optional (eg., disconnectable) flags
4335	 * are turned off. Assert these conditions to ensure that
4336	 * the backfilesystem is called for the remove operation.
4337	 */
4338	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4339	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
4340
4341	for (;;) {
4342		if (vfslock) {
4343			vn_vfsunlock(vp);
4344			vfslock = 0;
4345		}
4346		if (vp) {
4347			VN_RELE(vp);
4348			vp = NULL;
4349		}
4350
4351		/* get (or renew) access to the file system */
4352		if (held) {
4353			/* Won't loop with NFSv4 connected behavior */
4354			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4355			cachefs_cd_release(fscp);
4356			held = 0;
4357		}
4358		error = cachefs_cd_access(fscp, connected, 1);
4359		if (error)
4360			break;
4361		held = 1;
4362
4363		/* if disconnected, do some extra error checking */
4364		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
4365			/* check permissions */
4366			mutex_enter(&dcp->c_statelock);
4367			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
4368			mutex_exit(&dcp->c_statelock);
4369			if (CFS_TIMEOUT(fscp, error)) {
4370				connected = 1;
4371				continue;
4372			}
4373			if (error)
4374				break;
4375
4376			namlen = strlen(nm);
4377			if (namlen == 0) {
4378				error = EINVAL;
4379				break;
4380			}
4381
4382			/* cannot remove . and .. */
4383			if (nm[0] == '.') {
4384				if (namlen == 1) {
4385					error = EINVAL;
4386					break;
4387				} else if (namlen == 2 && nm[1] == '.') {
4388					error = EEXIST;
4389					break;
4390				}
4391			}
4392
4393		}
4394
4395		/* get the cnode of the file to delete */
4396		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
4397		if (error) {
4398			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4399				if (CFS_TIMEOUT(fscp, error)) {
4400					cachefs_cd_release(fscp);
4401					held = 0;
4402					cachefs_cd_timedout(fscp);
4403					connected = 0;
4404					continue;
4405				}
4406			} else {
4407				if (CFS_TIMEOUT(fscp, error)) {
4408					connected = 1;
4409					continue;
4410				}
4411			}
4412			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE)) {
4413				struct fid foo;
4414
4415				bzero(&foo, sizeof (foo));
4416				cachefs_log_remove(cachep, error,
4417				    fscp->fs_cfsvfsp, &foo, 0, crgetuid(cr));
4418			}
4419			break;
4420		}
4421
4422		if (vp->v_type == VDIR) {
4423			/* must be privileged to remove dirs with unlink() */
4424			if ((error = secpolicy_fs_linkdir(cr, vp->v_vfsp)) != 0)
4425				break;
4426
4427			/* see ufs_dirremove for why this is done, mount race */
4428			if (vn_vfswlock(vp)) {
4429				error = EBUSY;
4430				break;
4431			}
4432			vfslock = 1;
4433			if (vn_mountedvfs(vp) != NULL) {
4434				error = EBUSY;
4435				break;
4436			}
4437		}
4438
4439		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4440			error = cachefs_remove_connected(dvp, nm, cr, vp);
4441			if (CFS_TIMEOUT(fscp, error)) {
4442				cachefs_cd_release(fscp);
4443				held = 0;
4444				cachefs_cd_timedout(fscp);
4445				connected = 0;
4446				continue;
4447			}
4448		} else {
4449			error = cachefs_remove_disconnected(dvp, nm, cr,
4450			    vp);
4451			if (CFS_TIMEOUT(fscp, error)) {
4452				connected = 1;
4453				continue;
4454			}
4455		}
4456		break;
4457	}
4458
4459#if 0
4460	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_REMOVE))
4461		cachefs_log_remove(cachep, error, fscp->fs_cfsvfsp,
4462		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
4463		    crgetuid(cr));
4464#endif
4465
4466	if (held)
4467		cachefs_cd_release(fscp);
4468
4469	if (vfslock)
4470		vn_vfsunlock(vp);
4471
4472	if (vp)
4473		VN_RELE(vp);
4474
4475#ifdef CFS_CD_DEBUG
4476	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4477#endif
4478out:
4479#ifdef CFSDEBUG
4480	CFS_DEBUG(CFSDEBUG_VOPS)
4481		printf("cachefs_remove: EXIT dvp %p\n", (void *)dvp);
4482#endif
4483
4484	return (error);
4485}
4486
4487int
4488cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4489{
4490	cnode_t *dcp = VTOC(dvp);
4491	cnode_t *cp = VTOC(vp);
4492	fscache_t *fscp = C_TO_FSCACHE(dcp);
4493	int error = 0;
4494
4495	/*
4496	 * Acquire the rwlock (WRITER) on the directory to prevent other
4497	 * activity on the directory.
4498	 */
4499	rw_enter(&dcp->c_rwlock, RW_WRITER);
4500
4501	/* purge dnlc of this entry so can get accurate vnode count */
4502	dnlc_purge_vp(vp);
4503
4504	/*
4505	 * If the cnode is active, make a link to the file
4506	 * so operations on the file will continue.
4507	 */
4508	if ((vp->v_type != VDIR) &&
4509	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4510		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4511		if (error)
4512			goto out;
4513	}
4514
4515	/* else call backfs NFSv4 handler if NFSv4 */
4516	else if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
4517		error = cachefs_remove_backfs_nfsv4(dvp, nm, cr, vp);
4518		goto out;
4519	}
4520
4521	/* else drop the backvp so nfs does not do rename */
4522	else if (cp->c_backvp) {
4523		mutex_enter(&cp->c_statelock);
4524		if (cp->c_backvp) {
4525			VN_RELE(cp->c_backvp);
4526			cp->c_backvp = NULL;
4527		}
4528		mutex_exit(&cp->c_statelock);
4529	}
4530
4531	mutex_enter(&dcp->c_statelock);
4532
4533	/* get the backvp */
4534	if (dcp->c_backvp == NULL) {
4535		error = cachefs_getbackvp(fscp, dcp);
4536		if (error) {
4537			mutex_exit(&dcp->c_statelock);
4538			goto out;
4539		}
4540	}
4541
4542	/* check directory consistency */
4543	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4544	if (error) {
4545		mutex_exit(&dcp->c_statelock);
4546		goto out;
4547	}
4548
4549	/* perform the remove on the back fs */
4550	error = VOP_REMOVE(dcp->c_backvp, nm, cr, NULL, 0);
4551	if (error) {
4552		mutex_exit(&dcp->c_statelock);
4553		goto out;
4554	}
4555
4556	/* the dir has been modified */
4557	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
4558
4559	/* remove the entry from the populated directory */
4560	if (CFS_ISFS_NONSHARED(fscp) &&
4561	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
4562		error = cachefs_dir_rmentry(dcp, nm);
4563		if (error) {
4564			cachefs_nocache(dcp);
4565			error = 0;
4566		}
4567	}
4568	mutex_exit(&dcp->c_statelock);
4569
4570	/* fix up the file we deleted */
4571	mutex_enter(&cp->c_statelock);
4572	if (cp->c_attr.va_nlink == 1)
4573		cp->c_flags |= CN_DESTROY;
4574	else
4575		cp->c_flags |= CN_UPDATED;
4576
4577	cp->c_attr.va_nlink--;
4578	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
4579	mutex_exit(&cp->c_statelock);
4580
4581out:
4582	rw_exit(&dcp->c_rwlock);
4583	return (error);
4584}
4585
4586/*
4587 * cachefs_remove_backfs_nfsv4
4588 *
4589 * Call NFSv4 back filesystem to handle the remove (cachefs
4590 * pass-through support for NFSv4).
4591 */
4592int
4593cachefs_remove_backfs_nfsv4(vnode_t *dvp, char *nm, cred_t *cr, vnode_t *vp)
4594{
4595	cnode_t *dcp = VTOC(dvp);
4596	cnode_t *cp = VTOC(vp);
4597	vnode_t *dbackvp;
4598	fscache_t *fscp = C_TO_FSCACHE(dcp);
4599	int error = 0;
4600
4601	/*
4602	 * For NFSv4 pass-through to work, only connected operation
4603	 * is supported, the cnode backvp must exist, and cachefs
4604	 * optional (eg., disconnectable) flags are turned off. Assert
4605	 * these conditions for the getattr operation.
4606	 */
4607	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4608	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
4609
4610	/* Should hold the directory readwrite lock to update directory */
4611	ASSERT(RW_WRITE_HELD(&dcp->c_rwlock));
4612
4613	/*
4614	 * Update attributes for directory. Note that
4615	 * CFSOP_CHECK_COBJECT asserts for c_statelock being
4616	 * held, so grab it before calling the routine.
4617	 */
4618	mutex_enter(&dcp->c_statelock);
4619	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
4620	mutex_exit(&dcp->c_statelock);
4621	if (error)
4622		goto out;
4623
4624	/*
4625	 * Update attributes for cp. Note that CFSOP_CHECK_COBJECT
4626	 * asserts for c_statelock being held, so grab it before
4627	 * calling the routine.
4628	 */
4629	mutex_enter(&cp->c_statelock);
4630	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
4631	if (error) {
4632		mutex_exit(&cp->c_statelock);
4633		goto out;
4634	}
4635
4636	/*
4637	 * Drop the backvp so nfs if the link count is 1 so that
4638	 * nfs does not do rename. Ensure that we will destroy the cnode
4639	 * since this cnode no longer contains the backvp. Note that we
4640	 * maintain lock on this cnode to prevent change till the remove
4641	 * completes, otherwise other operations will encounter an ESTALE
4642	 * if they try to use the cnode with CN_DESTROY set (see
4643	 * cachefs_get_backvp()), or change the state of the cnode
4644	 * while we're removing it.
4645	 */
4646	if (cp->c_attr.va_nlink == 1) {
4647		/*
4648		 * The unldvp information is created for the case
4649		 * when there is more than one reference on the
4650		 * vnode when a remove operation is called. If the
4651		 * remove itself was holding a reference to the
4652		 * vnode, then a subsequent remove will remove the
4653		 * backvp, so we need to get rid of the unldvp
4654		 * before removing the backvp. An alternate would
4655		 * be to simply ignore the remove and let the
4656		 * inactivation routine do the deletion of the
4657		 * unldvp.
4658		 */
4659		if (cp->c_unldvp) {
4660			VN_RELE(cp->c_unldvp);
4661			cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
4662			crfree(cp->c_unlcred);
4663			cp->c_unldvp = NULL;
4664			cp->c_unlcred = NULL;
4665		}
4666		cp->c_flags |= CN_DESTROY;
4667		cp->c_attr.va_nlink = 0;
4668		VN_RELE(cp->c_backvp);
4669		cp->c_backvp = NULL;
4670	}
4671
4672	/* perform the remove on back fs after extracting directory backvp */
4673	mutex_enter(&dcp->c_statelock);
4674	dbackvp = dcp->c_backvp;
4675	mutex_exit(&dcp->c_statelock);
4676
4677	CFS_DPRINT_BACKFS_NFSV4(fscp,
4678	    ("cachefs_remove (nfsv4): dcp %p, dbackvp %p, name %s\n",
4679	    dcp, dbackvp, nm));
4680	error = VOP_REMOVE(dbackvp, nm, cr, NULL, 0);
4681	if (error) {
4682		mutex_exit(&cp->c_statelock);
4683		goto out;
4684	}
4685
4686	/* fix up the file we deleted, if not destroying the cnode */
4687	if ((cp->c_flags & CN_DESTROY) == 0) {
4688		cp->c_attr.va_nlink--;
4689		cp->c_flags |= CN_UPDATED;
4690	}
4691
4692	mutex_exit(&cp->c_statelock);
4693
4694out:
4695	return (error);
4696}
4697
4698int
4699cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
4700    vnode_t *vp)
4701{
4702	cnode_t *dcp = VTOC(dvp);
4703	cnode_t *cp = VTOC(vp);
4704	fscache_t *fscp = C_TO_FSCACHE(dcp);
4705	int error = 0;
4706	off_t commit = 0;
4707	timestruc_t current_time;
4708
4709	if (CFS_ISFS_WRITE_AROUND(fscp))
4710		return (ETIMEDOUT);
4711
4712	if (cp->c_metadata.md_flags & MD_NEEDATTRS)
4713		return (ETIMEDOUT);
4714
4715	/*
4716	 * Acquire the rwlock (WRITER) on the directory to prevent other
4717	 * activity on the directory.
4718	 */
4719	rw_enter(&dcp->c_rwlock, RW_WRITER);
4720
4721	/* dir must be populated */
4722	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4723		error = ETIMEDOUT;
4724		goto out;
4725	}
4726
4727	mutex_enter(&dcp->c_statelock);
4728	mutex_enter(&cp->c_statelock);
4729
4730	error = cachefs_stickyrmchk(dcp, cp, cr);
4731
4732	mutex_exit(&cp->c_statelock);
4733	mutex_exit(&dcp->c_statelock);
4734	if (error)
4735		goto out;
4736
4737	/* purge dnlc of this entry so can get accurate vnode count */
4738	dnlc_purge_vp(vp);
4739
4740	/*
4741	 * If the cnode is active, make a link to the file
4742	 * so operations on the file will continue.
4743	 */
4744	if ((vp->v_type != VDIR) &&
4745	    !((vp->v_count == 1) || ((vp->v_count == 2) && cp->c_ipending))) {
4746		error = cachefs_remove_dolink(dvp, vp, nm, cr);
4747		if (error)
4748			goto out;
4749	}
4750
4751	if (cp->c_attr.va_nlink > 1) {
4752		mutex_enter(&cp->c_statelock);
4753		if (cachefs_modified_alloc(cp)) {
4754			mutex_exit(&cp->c_statelock);
4755			error = ENOSPC;
4756			goto out;
4757		}
4758		if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
4759			error = cachefs_dlog_cidmap(fscp);
4760			if (error) {
4761				mutex_exit(&cp->c_statelock);
4762				error = ENOSPC;
4763				goto out;
4764			}
4765			cp->c_metadata.md_flags |= MD_MAPPING;
4766			cp->c_flags |= CN_UPDATED;
4767		}
4768		mutex_exit(&cp->c_statelock);
4769	}
4770
4771	/* log the remove */
4772	commit = cachefs_dlog_remove(fscp, dcp, nm, cp, cr);
4773	if (commit == 0) {
4774		error = ENOSPC;
4775		goto out;
4776	}
4777
4778	/* remove the file from the dir */
4779	mutex_enter(&dcp->c_statelock);
4780	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
4781		mutex_exit(&dcp->c_statelock);
4782		error = ETIMEDOUT;
4783		goto out;
4784
4785	}
4786	cachefs_modified(dcp);
4787	error = cachefs_dir_rmentry(dcp, nm);
4788	if (error) {
4789		mutex_exit(&dcp->c_statelock);
4790		if (error == ENOTDIR)
4791			error = ETIMEDOUT;
4792		goto out;
4793	}
4794
4795	/* update parent dir times */
4796	gethrestime(&current_time);
4797	dcp->c_metadata.md_localctime = current_time;
4798	dcp->c_metadata.md_localmtime = current_time;
4799	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
4800	dcp->c_flags |= CN_UPDATED;
4801	mutex_exit(&dcp->c_statelock);
4802
4803	/* adjust file we are deleting */
4804	mutex_enter(&cp->c_statelock);
4805	cp->c_attr.va_nlink--;
4806	cp->c_metadata.md_localctime = current_time;
4807	cp->c_metadata.md_flags |= MD_LOCALCTIME;
4808	if (cp->c_attr.va_nlink == 0) {
4809		cp->c_flags |= CN_DESTROY;
4810	} else {
4811		cp->c_flags |= CN_UPDATED;
4812	}
4813	mutex_exit(&cp->c_statelock);
4814
4815out:
4816	if (commit) {
4817		/* commit the log entry */
4818		if (cachefs_dlog_commit(fscp, commit, error)) {
4819			/*EMPTY*/
4820			/* XXX bob: fix on panic */
4821		}
4822	}
4823
4824	rw_exit(&dcp->c_rwlock);
4825	return (error);
4826}
4827
4828/*ARGSUSED*/
4829static int
4830cachefs_link(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr,
4831    caller_context_t *ct, int flags)
4832{
4833	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4834	cnode_t *tdcp = VTOC(tdvp);
4835	struct vnode *realvp;
4836	int error = 0;
4837	int held = 0;
4838	int connected = 0;
4839
4840#ifdef CFSDEBUG
4841	CFS_DEBUG(CFSDEBUG_VOPS)
4842		printf("cachefs_link: ENTER fvp %p tdvp %p tnm %s\n",
4843		    (void *)fvp, (void *)tdvp, tnm);
4844#endif
4845
4846	if (getzoneid() != GLOBAL_ZONEID) {
4847		error = EPERM;
4848		goto out;
4849	}
4850
4851	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
4852		ASSERT(tdcp->c_flags & CN_NOCACHE);
4853
4854	if (VOP_REALVP(fvp, &realvp, ct) == 0) {
4855		fvp = realvp;
4856	}
4857
4858	/*
4859	 * Cachefs only provides pass-through support for NFSv4,
4860	 * and all vnode operations are passed through to the
4861	 * back file system. For NFSv4 pass-through to work, only
4862	 * connected operation is supported, the cnode backvp must
4863	 * exist, and cachefs optional (eg., disconnectable) flags
4864	 * are turned off. Assert these conditions to ensure that
4865	 * the backfilesystem is called for the link operation.
4866	 */
4867
4868	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
4869	CFS_BACKFS_NFSV4_ASSERT_CNODE(tdcp);
4870
4871	for (;;) {
4872		/* get (or renew) access to the file system */
4873		if (held) {
4874			/* Won't loop with NFSv4 connected behavior */
4875			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
4876			rw_exit(&tdcp->c_rwlock);
4877			cachefs_cd_release(fscp);
4878			held = 0;
4879		}
4880		error = cachefs_cd_access(fscp, connected, 1);
4881		if (error)
4882			break;
4883		rw_enter(&tdcp->c_rwlock, RW_WRITER);
4884		held = 1;
4885
4886		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
4887			error = cachefs_link_connected(tdvp, fvp, tnm, cr);
4888			if (CFS_TIMEOUT(fscp, error)) {
4889				rw_exit(&tdcp->c_rwlock);
4890				cachefs_cd_release(fscp);
4891				held = 0;
4892				cachefs_cd_timedout(fscp);
4893				connected = 0;
4894				continue;
4895			}
4896		} else {
4897			error = cachefs_link_disconnected(tdvp, fvp, tnm,
4898			    cr);
4899			if (CFS_TIMEOUT(fscp, error)) {
4900				connected = 1;
4901				continue;
4902			}
4903		}
4904		break;
4905	}
4906
4907	if (held) {
4908		rw_exit(&tdcp->c_rwlock);
4909		cachefs_cd_release(fscp);
4910	}
4911
4912#ifdef CFS_CD_DEBUG
4913	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
4914#endif
4915out:
4916#ifdef CFSDEBUG
4917	CFS_DEBUG(CFSDEBUG_VOPS)
4918		printf("cachefs_link: EXIT fvp %p tdvp %p tnm %s\n",
4919		    (void *)fvp, (void *)tdvp, tnm);
4920#endif
4921	return (error);
4922}
4923
4924static int
4925cachefs_link_connected(vnode_t *tdvp, vnode_t *fvp, char *tnm, cred_t *cr)
4926{
4927	cnode_t *tdcp = VTOC(tdvp);
4928	cnode_t *fcp = VTOC(fvp);
4929	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
4930	int error = 0;
4931	vnode_t *backvp = NULL;
4932
4933	if (tdcp != fcp) {
4934		mutex_enter(&fcp->c_statelock);
4935
4936		if (fcp->c_backvp == NULL) {
4937			error = cachefs_getbackvp(fscp, fcp);
4938			if (error) {
4939				mutex_exit(&fcp->c_statelock);
4940				goto out;
4941			}
4942		}
4943
4944		error = CFSOP_CHECK_COBJECT(fscp, fcp, 0, cr);
4945		if (error) {
4946			mutex_exit(&fcp->c_statelock);
4947			goto out;
4948		}
4949		backvp = fcp->c_backvp;
4950		VN_HOLD(backvp);
4951		mutex_exit(&fcp->c_statelock);
4952	}
4953
4954	mutex_enter(&tdcp->c_statelock);
4955
4956	/* get backvp of target directory */
4957	if (tdcp->c_backvp == NULL) {
4958		error = cachefs_getbackvp(fscp, tdcp);
4959		if (error) {
4960			mutex_exit(&tdcp->c_statelock);
4961			goto out;
4962		}
4963	}
4964
4965	/* consistency check target directory */
4966	error = CFSOP_CHECK_COBJECT(fscp, tdcp, 0, cr);
4967	if (error) {
4968		mutex_exit(&tdcp->c_statelock);
4969		goto out;
4970	}
4971	if (backvp == NULL) {
4972		backvp = tdcp->c_backvp;
4973		VN_HOLD(backvp);
4974	}
4975
4976	/* perform the link on the back fs */
4977	CFS_DPRINT_BACKFS_NFSV4(fscp,
4978	    ("cachefs_link (nfsv4): tdcp %p, tdbackvp %p, "
4979	    "name %s\n", tdcp, tdcp->c_backvp, tnm));
4980	error = VOP_LINK(tdcp->c_backvp, backvp, tnm, cr, NULL, 0);
4981	if (error) {
4982		mutex_exit(&tdcp->c_statelock);
4983		goto out;
4984	}
4985
4986	CFSOP_MODIFY_COBJECT(fscp, tdcp, cr);
4987
4988	/* if the dir is populated, add the new link */
4989	if (CFS_ISFS_NONSHARED(fscp) &&
4990	    (tdcp->c_metadata.md_flags & MD_POPULATED)) {
4991		error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
4992		    &fcp->c_id, SM_ASYNC);
4993		if (error) {
4994			cachefs_nocache(tdcp);
4995			error = 0;
4996		}
4997	}
4998	mutex_exit(&tdcp->c_statelock);
4999
5000	/* get the new link count on the file */
5001	mutex_enter(&fcp->c_statelock);
5002	fcp->c_flags |= CN_UPDATED;
5003	CFSOP_MODIFY_COBJECT(fscp, fcp, cr);
5004	if (fcp->c_backvp == NULL) {
5005		error = cachefs_getbackvp(fscp, fcp);
5006		if (error) {
5007			mutex_exit(&fcp->c_statelock);
5008			goto out;
5009		}
5010	}
5011
5012	/* XXX bob: given what modify_cobject does this seems unnecessary */
5013	fcp->c_attr.va_mask = AT_ALL;
5014	error = VOP_GETATTR(fcp->c_backvp, &fcp->c_attr, 0, cr, NULL);
5015	mutex_exit(&fcp->c_statelock);
5016out:
5017	if (backvp)
5018		VN_RELE(backvp);
5019
5020	return (error);
5021}
5022
5023static int
5024cachefs_link_disconnected(vnode_t *tdvp, vnode_t *fvp, char *tnm,
5025    cred_t *cr)
5026{
5027	cnode_t *tdcp = VTOC(tdvp);
5028	cnode_t *fcp = VTOC(fvp);
5029	fscache_t *fscp = VFS_TO_FSCACHE(tdvp->v_vfsp);
5030	int error = 0;
5031	timestruc_t current_time;
5032	off_t commit = 0;
5033
5034	if (fvp->v_type == VDIR && secpolicy_fs_linkdir(cr, fvp->v_vfsp) != 0 ||
5035	    fcp->c_attr.va_uid != crgetuid(cr) && secpolicy_basic_link(cr) != 0)
5036		return (EPERM);
5037
5038	if (CFS_ISFS_WRITE_AROUND(fscp))
5039		return (ETIMEDOUT);
5040
5041	if (fcp->c_metadata.md_flags & MD_NEEDATTRS)
5042		return (ETIMEDOUT);
5043
5044	mutex_enter(&tdcp->c_statelock);
5045
5046	/* check permissions */
5047	if (error = cachefs_access_local(tdcp, (VEXEC|VWRITE), cr)) {
5048		mutex_exit(&tdcp->c_statelock);
5049		goto out;
5050	}
5051
5052	/* the directory front file must be populated */
5053	if ((tdcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5054		error = ETIMEDOUT;
5055		mutex_exit(&tdcp->c_statelock);
5056		goto out;
5057	}
5058
5059	/* make sure tnm does not already exist in the directory */
5060	error = cachefs_dir_look(tdcp, tnm, NULL, NULL, NULL, NULL);
5061	if (error == ENOTDIR) {
5062		error = ETIMEDOUT;
5063		mutex_exit(&tdcp->c_statelock);
5064		goto out;
5065	}
5066	if (error != ENOENT) {
5067		error = EEXIST;
5068		mutex_exit(&tdcp->c_statelock);
5069		goto out;
5070	}
5071
5072	mutex_enter(&fcp->c_statelock);
5073
5074	/* create a mapping for the file if necessary */
5075	if ((fcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5076		error = cachefs_dlog_cidmap(fscp);
5077		if (error) {
5078			mutex_exit(&fcp->c_statelock);
5079			mutex_exit(&tdcp->c_statelock);
5080			error = ENOSPC;
5081			goto out;
5082		}
5083		fcp->c_metadata.md_flags |= MD_MAPPING;
5084		fcp->c_flags |= CN_UPDATED;
5085	}
5086
5087	/* mark file as modified */
5088	if (cachefs_modified_alloc(fcp)) {
5089		mutex_exit(&fcp->c_statelock);
5090		mutex_exit(&tdcp->c_statelock);
5091		error = ENOSPC;
5092		goto out;
5093	}
5094	mutex_exit(&fcp->c_statelock);
5095
5096	/* log the operation */
5097	commit = cachefs_dlog_link(fscp, tdcp, tnm, fcp, cr);
5098	if (commit == 0) {
5099		mutex_exit(&tdcp->c_statelock);
5100		error = ENOSPC;
5101		goto out;
5102	}
5103
5104	gethrestime(&current_time);
5105
5106	/* make the new link */
5107	cachefs_modified(tdcp);
5108	error = cachefs_dir_enter(tdcp, tnm, &fcp->c_cookie,
5109	    &fcp->c_id, SM_ASYNC);
5110	if (error) {
5111		error = 0;
5112		mutex_exit(&tdcp->c_statelock);
5113		goto out;
5114	}
5115
5116	/* Update mtime/ctime of parent dir */
5117	tdcp->c_metadata.md_localmtime = current_time;
5118	tdcp->c_metadata.md_localctime = current_time;
5119	tdcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5120	tdcp->c_flags |= CN_UPDATED;
5121	mutex_exit(&tdcp->c_statelock);
5122
5123	/* update the file we linked to */
5124	mutex_enter(&fcp->c_statelock);
5125	fcp->c_attr.va_nlink++;
5126	fcp->c_metadata.md_localctime = current_time;
5127	fcp->c_metadata.md_flags |= MD_LOCALCTIME;
5128	fcp->c_flags |= CN_UPDATED;
5129	mutex_exit(&fcp->c_statelock);
5130
5131out:
5132	if (commit) {
5133		/* commit the log entry */
5134		if (cachefs_dlog_commit(fscp, commit, error)) {
5135			/*EMPTY*/
5136			/* XXX bob: fix on panic */
5137		}
5138	}
5139
5140	return (error);
5141}
5142
5143/*
5144 * Serialize all renames in CFS, to avoid deadlocks - We have to hold two
5145 * cnodes atomically.
5146 */
5147kmutex_t cachefs_rename_lock;
5148
5149/*ARGSUSED*/
5150static int
5151cachefs_rename(vnode_t *odvp, char *onm, vnode_t *ndvp,
5152    char *nnm, cred_t *cr, caller_context_t *ct, int flags)
5153{
5154	fscache_t *fscp = C_TO_FSCACHE(VTOC(odvp));
5155	cachefscache_t *cachep = fscp->fs_cache;
5156	int error = 0;
5157	int held = 0;
5158	int connected = 0;
5159	vnode_t *delvp = NULL;
5160	vnode_t *tvp = NULL;
5161	int vfslock = 0;
5162	struct vnode *realvp;
5163
5164	if (getzoneid() != GLOBAL_ZONEID)
5165		return (EPERM);
5166
5167	if (VOP_REALVP(ndvp, &realvp, ct) == 0)
5168		ndvp = realvp;
5169
5170	/*
5171	 * if the fs NOFILL or NOCACHE flags are on, then the old and new
5172	 * directory cnodes better indicate NOCACHE mode as well.
5173	 */
5174	ASSERT(
5175	    (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0 ||
5176	    ((VTOC(odvp)->c_flags & CN_NOCACHE) &&
5177	    (VTOC(ndvp)->c_flags & CN_NOCACHE)));
5178
5179	/*
5180	 * Cachefs only provides pass-through support for NFSv4,
5181	 * and all vnode operations are passed through to the
5182	 * back file system. For NFSv4 pass-through to work, only
5183	 * connected operation is supported, the cnode backvp must
5184	 * exist, and cachefs optional (eg., disconnectable) flags
5185	 * are turned off. Assert these conditions to ensure that
5186	 * the backfilesystem is called for the rename operation.
5187	 */
5188	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5189	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(odvp));
5190	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(ndvp));
5191
5192	for (;;) {
5193		if (vfslock) {
5194			vn_vfsunlock(delvp);
5195			vfslock = 0;
5196		}
5197		if (delvp) {
5198			VN_RELE(delvp);
5199			delvp = NULL;
5200		}
5201
5202		/* get (or renew) access to the file system */
5203		if (held) {
5204			/* Won't loop for NFSv4 connected support */
5205			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5206			cachefs_cd_release(fscp);
5207			held = 0;
5208		}
5209		error = cachefs_cd_access(fscp, connected, 1);
5210		if (error)
5211			break;
5212		held = 1;
5213
5214		/* sanity check */
5215		if ((odvp->v_type != VDIR) || (ndvp->v_type != VDIR)) {
5216			error = EINVAL;
5217			break;
5218		}
5219
5220		/* cannot rename from or to . or .. */
5221		if (strcmp(onm, ".") == 0 || strcmp(onm, "..") == 0 ||
5222		    strcmp(nnm, ".") == 0 || strcmp(nnm, "..") == 0) {
5223			error = EINVAL;
5224			break;
5225		}
5226
5227		if (odvp != ndvp) {
5228			/*
5229			 * if moving a directory, its notion
5230			 * of ".." will change
5231			 */
5232			error = cachefs_lookup_common(odvp, onm, &tvp,
5233			    NULL, 0, NULL, cr);
5234			if (error == 0) {
5235				ASSERT(tvp != NULL);
5236				if (tvp->v_type == VDIR) {
5237					cnode_t *cp = VTOC(tvp);
5238
5239					dnlc_remove(tvp, "..");
5240
5241					mutex_enter(&cp->c_statelock);
5242					CFSOP_MODIFY_COBJECT(fscp, cp, cr);
5243					mutex_exit(&cp->c_statelock);
5244				}
5245			} else {
5246				tvp = NULL;
5247				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5248					if (CFS_TIMEOUT(fscp, error)) {
5249						cachefs_cd_release(fscp);
5250						held = 0;
5251						cachefs_cd_timedout(fscp);
5252						connected = 0;
5253						continue;
5254					}
5255				} else {
5256					if (CFS_TIMEOUT(fscp, error)) {
5257						connected = 1;
5258						continue;
5259					}
5260				}
5261				break;
5262			}
5263		}
5264
5265		/* get the cnode if file being deleted */
5266		error = cachefs_lookup_common(ndvp, nnm, &delvp, NULL, 0,
5267		    NULL, cr);
5268		if (error) {
5269			delvp = NULL;
5270			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5271				if (CFS_TIMEOUT(fscp, error)) {
5272					cachefs_cd_release(fscp);
5273					held = 0;
5274					cachefs_cd_timedout(fscp);
5275					connected = 0;
5276					continue;
5277				}
5278			} else {
5279				if (CFS_TIMEOUT(fscp, error)) {
5280					connected = 1;
5281					continue;
5282				}
5283			}
5284			if (error != ENOENT)
5285				break;
5286		}
5287
5288		if (delvp && delvp->v_type == VDIR) {
5289			/* see ufs_dirremove for why this is done, mount race */
5290			if (vn_vfswlock(delvp)) {
5291				error = EBUSY;
5292				break;
5293			}
5294			vfslock = 1;
5295			if (vn_mountedvfs(delvp) != NULL) {
5296				error = EBUSY;
5297				break;
5298			}
5299		}
5300
5301		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5302			error = cachefs_rename_connected(odvp, onm,
5303			    ndvp, nnm, cr, delvp);
5304			if (CFS_TIMEOUT(fscp, error)) {
5305				cachefs_cd_release(fscp);
5306				held = 0;
5307				cachefs_cd_timedout(fscp);
5308				connected = 0;
5309				continue;
5310			}
5311		} else {
5312			error = cachefs_rename_disconnected(odvp, onm,
5313			    ndvp, nnm, cr, delvp);
5314			if (CFS_TIMEOUT(fscp, error)) {
5315				connected = 1;
5316				continue;
5317			}
5318		}
5319		break;
5320	}
5321
5322	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RENAME)) {
5323		struct fid gone;
5324
5325		bzero(&gone, sizeof (gone));
5326		gone.fid_len = MAXFIDSZ;
5327		if (delvp != NULL)
5328			(void) VOP_FID(delvp, &gone, ct);
5329
5330		cachefs_log_rename(cachep, error, fscp->fs_cfsvfsp,
5331		    &gone, 0, (delvp != NULL), crgetuid(cr));
5332	}
5333
5334	if (held)
5335		cachefs_cd_release(fscp);
5336
5337	if (vfslock)
5338		vn_vfsunlock(delvp);
5339
5340	if (delvp)
5341		VN_RELE(delvp);
5342	if (tvp)
5343		VN_RELE(tvp);
5344
5345#ifdef CFS_CD_DEBUG
5346	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5347#endif
5348	return (error);
5349}
5350
5351static int
5352cachefs_rename_connected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5353    char *nnm, cred_t *cr, vnode_t *delvp)
5354{
5355	cnode_t *odcp = VTOC(odvp);
5356	cnode_t *ndcp = VTOC(ndvp);
5357	vnode_t *revp = NULL;
5358	cnode_t *recp;
5359	cnode_t *delcp;
5360	fscache_t *fscp = C_TO_FSCACHE(odcp);
5361	int error = 0;
5362	struct fid cookie;
5363	struct fid *cookiep;
5364	cfs_cid_t cid;
5365	int gotdirent;
5366
5367	/* find the file we are renaming */
5368	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5369	if (error)
5370		return (error);
5371	recp = VTOC(revp);
5372
5373	/*
5374	 * To avoid deadlock, we acquire this global rename lock before
5375	 * we try to get the locks for the source and target directories.
5376	 */
5377	mutex_enter(&cachefs_rename_lock);
5378	rw_enter(&odcp->c_rwlock, RW_WRITER);
5379	if (odcp != ndcp) {
5380		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5381	}
5382	mutex_exit(&cachefs_rename_lock);
5383
5384	ASSERT((odcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5385	ASSERT((ndcp->c_flags & CN_ASYNC_POP_WORKING) == 0);
5386
5387	mutex_enter(&odcp->c_statelock);
5388	if (odcp->c_backvp == NULL) {
5389		error = cachefs_getbackvp(fscp, odcp);
5390		if (error) {
5391			mutex_exit(&odcp->c_statelock);
5392			goto out;
5393		}
5394	}
5395
5396	error = CFSOP_CHECK_COBJECT(fscp, odcp, 0, cr);
5397	if (error) {
5398		mutex_exit(&odcp->c_statelock);
5399		goto out;
5400	}
5401	mutex_exit(&odcp->c_statelock);
5402
5403	if (odcp != ndcp) {
5404		mutex_enter(&ndcp->c_statelock);
5405		if (ndcp->c_backvp == NULL) {
5406			error = cachefs_getbackvp(fscp, ndcp);
5407			if (error) {
5408				mutex_exit(&ndcp->c_statelock);
5409				goto out;
5410			}
5411		}
5412
5413		error = CFSOP_CHECK_COBJECT(fscp, ndcp, 0, cr);
5414		if (error) {
5415			mutex_exit(&ndcp->c_statelock);
5416			goto out;
5417		}
5418		mutex_exit(&ndcp->c_statelock);
5419	}
5420
5421	/* if a file is being deleted because of this rename */
5422	if (delvp) {
5423		/* if src and dest file are same */
5424		if (delvp == revp) {
5425			error = 0;
5426			goto out;
5427		}
5428
5429		/*
5430		 * If the cnode is active, make a link to the file
5431		 * so operations on the file will continue.
5432		 */
5433		dnlc_purge_vp(delvp);
5434		delcp = VTOC(delvp);
5435		if ((delvp->v_type != VDIR) &&
5436		    !((delvp->v_count == 1) ||
5437		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5438			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5439			if (error)
5440				goto out;
5441		}
5442	}
5443
5444	/* do the rename on the back fs */
5445	CFS_DPRINT_BACKFS_NFSV4(fscp,
5446	    ("cachefs_rename (nfsv4): odcp %p, odbackvp %p, "
5447	    " ndcp %p, ndbackvp %p, onm %s, nnm %s\n",
5448	    odcp, odcp->c_backvp, ndcp, ndcp->c_backvp, onm, nnm));
5449	error = VOP_RENAME(odcp->c_backvp, onm, ndcp->c_backvp, nnm, cr, NULL,
5450	    0);
5451	if (error)
5452		goto out;
5453
5454	/* purge mappings to file in the old directory */
5455	dnlc_purge_vp(odvp);
5456
5457	/* purge mappings in the new dir if we deleted a file */
5458	if (delvp && (odvp != ndvp))
5459		dnlc_purge_vp(ndvp);
5460
5461	/* update the file we just deleted */
5462	if (delvp) {
5463		mutex_enter(&delcp->c_statelock);
5464		if (delcp->c_attr.va_nlink == 1) {
5465			delcp->c_flags |= CN_DESTROY;
5466		} else {
5467			delcp->c_flags |= CN_UPDATED;
5468		}
5469		delcp->c_attr.va_nlink--;
5470		CFSOP_MODIFY_COBJECT(fscp, delcp, cr);
5471		mutex_exit(&delcp->c_statelock);
5472	}
5473
5474	/* find the entry in the old directory */
5475	mutex_enter(&odcp->c_statelock);
5476	gotdirent = 0;
5477	cookiep = NULL;
5478	if (CFS_ISFS_NONSHARED(fscp) &&
5479	    (odcp->c_metadata.md_flags & MD_POPULATED)) {
5480		error = cachefs_dir_look(odcp, onm, &cookie,
5481		    NULL, NULL, &cid);
5482		if (error == 0 || error == EINVAL) {
5483			gotdirent = 1;
5484			if (error == 0)
5485				cookiep = &cookie;
5486		} else {
5487			cachefs_inval_object(odcp);
5488		}
5489	}
5490	error = 0;
5491
5492	/* remove the directory entry from the old directory */
5493	if (gotdirent) {
5494		error = cachefs_dir_rmentry(odcp, onm);
5495		if (error) {
5496			cachefs_nocache(odcp);
5497			error = 0;
5498		}
5499	}
5500	CFSOP_MODIFY_COBJECT(fscp, odcp, cr);
5501	mutex_exit(&odcp->c_statelock);
5502
5503	/* install the directory entry in the new directory */
5504	mutex_enter(&ndcp->c_statelock);
5505	if (CFS_ISFS_NONSHARED(fscp) &&
5506	    (ndcp->c_metadata.md_flags & MD_POPULATED)) {
5507		error = 1;
5508		if (gotdirent) {
5509			ASSERT(cid.cid_fileno != 0);
5510			error = 0;
5511			if (delvp) {
5512				error = cachefs_dir_rmentry(ndcp, nnm);
5513			}
5514			if (error == 0) {
5515				error = cachefs_dir_enter(ndcp, nnm, cookiep,
5516				    &cid, SM_ASYNC);
5517			}
5518		}
5519		if (error) {
5520			cachefs_nocache(ndcp);
5521			error = 0;
5522		}
5523	}
5524	if (odcp != ndcp)
5525		CFSOP_MODIFY_COBJECT(fscp, ndcp, cr);
5526	mutex_exit(&ndcp->c_statelock);
5527
5528	/* ctime of renamed file has changed */
5529	mutex_enter(&recp->c_statelock);
5530	CFSOP_MODIFY_COBJECT(fscp, recp, cr);
5531	mutex_exit(&recp->c_statelock);
5532
5533out:
5534	if (odcp != ndcp)
5535		rw_exit(&ndcp->c_rwlock);
5536	rw_exit(&odcp->c_rwlock);
5537
5538	VN_RELE(revp);
5539
5540	return (error);
5541}
5542
5543static int
5544cachefs_rename_disconnected(vnode_t *odvp, char *onm, vnode_t *ndvp,
5545    char *nnm, cred_t *cr, vnode_t *delvp)
5546{
5547	cnode_t *odcp = VTOC(odvp);
5548	cnode_t *ndcp = VTOC(ndvp);
5549	cnode_t *delcp = NULL;
5550	vnode_t *revp = NULL;
5551	cnode_t *recp;
5552	fscache_t *fscp = C_TO_FSCACHE(odcp);
5553	int error = 0;
5554	struct fid cookie;
5555	struct fid *cookiep;
5556	cfs_cid_t cid;
5557	off_t commit = 0;
5558	timestruc_t current_time;
5559
5560	if (CFS_ISFS_WRITE_AROUND(fscp))
5561		return (ETIMEDOUT);
5562
5563	/* find the file we are renaming */
5564	error = cachefs_lookup_common(odvp, onm, &revp, NULL, 0, NULL, cr);
5565	if (error)
5566		return (error);
5567	recp = VTOC(revp);
5568
5569	/*
5570	 * To avoid deadlock, we acquire this global rename lock before
5571	 * we try to get the locks for the source and target directories.
5572	 */
5573	mutex_enter(&cachefs_rename_lock);
5574	rw_enter(&odcp->c_rwlock, RW_WRITER);
5575	if (odcp != ndcp) {
5576		rw_enter(&ndcp->c_rwlock, RW_WRITER);
5577	}
5578	mutex_exit(&cachefs_rename_lock);
5579
5580	if (recp->c_metadata.md_flags & MD_NEEDATTRS) {
5581		error = ETIMEDOUT;
5582		goto out;
5583	}
5584
5585	if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5586		mutex_enter(&recp->c_statelock);
5587		if ((recp->c_metadata.md_flags & MD_MAPPING) == 0) {
5588			error = cachefs_dlog_cidmap(fscp);
5589			if (error) {
5590				mutex_exit(&recp->c_statelock);
5591				error = ENOSPC;
5592				goto out;
5593			}
5594			recp->c_metadata.md_flags |= MD_MAPPING;
5595			recp->c_flags |= CN_UPDATED;
5596		}
5597		mutex_exit(&recp->c_statelock);
5598	}
5599
5600	/* check permissions */
5601	/* XXX clean up this mutex junk sometime */
5602	mutex_enter(&odcp->c_statelock);
5603	error = cachefs_access_local(odcp, (VEXEC|VWRITE), cr);
5604	mutex_exit(&odcp->c_statelock);
5605	if (error != 0)
5606		goto out;
5607	mutex_enter(&ndcp->c_statelock);
5608	error = cachefs_access_local(ndcp, (VEXEC|VWRITE), cr);
5609	mutex_exit(&ndcp->c_statelock);
5610	if (error != 0)
5611		goto out;
5612	mutex_enter(&odcp->c_statelock);
5613	error = cachefs_stickyrmchk(odcp, recp, cr);
5614	mutex_exit(&odcp->c_statelock);
5615	if (error != 0)
5616		goto out;
5617
5618	/* dirs must be populated */
5619	if (((odcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
5620	    ((ndcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
5621		error = ETIMEDOUT;
5622		goto out;
5623	}
5624
5625	/* for now do not allow moving dirs because could cause cycles */
5626	if ((((revp->v_type == VDIR) && (odvp != ndvp))) ||
5627	    (revp == odvp)) {
5628		error = ETIMEDOUT;
5629		goto out;
5630	}
5631
5632	/* if a file is being deleted because of this rename */
5633	if (delvp) {
5634		delcp = VTOC(delvp);
5635
5636		/* if src and dest file are the same */
5637		if (delvp == revp) {
5638			error = 0;
5639			goto out;
5640		}
5641
5642		if (delcp->c_metadata.md_flags & MD_NEEDATTRS) {
5643			error = ETIMEDOUT;
5644			goto out;
5645		}
5646
5647		/* if there are hard links to this file */
5648		if (delcp->c_attr.va_nlink > 1) {
5649			mutex_enter(&delcp->c_statelock);
5650			if (cachefs_modified_alloc(delcp)) {
5651				mutex_exit(&delcp->c_statelock);
5652				error = ENOSPC;
5653				goto out;
5654			}
5655
5656			if ((delcp->c_metadata.md_flags & MD_MAPPING) == 0) {
5657				error = cachefs_dlog_cidmap(fscp);
5658				if (error) {
5659					mutex_exit(&delcp->c_statelock);
5660					error = ENOSPC;
5661					goto out;
5662				}
5663				delcp->c_metadata.md_flags |= MD_MAPPING;
5664				delcp->c_flags |= CN_UPDATED;
5665			}
5666			mutex_exit(&delcp->c_statelock);
5667		}
5668
5669		/* make sure we can delete file */
5670		mutex_enter(&ndcp->c_statelock);
5671		error = cachefs_stickyrmchk(ndcp, delcp, cr);
5672		mutex_exit(&ndcp->c_statelock);
5673		if (error != 0)
5674			goto out;
5675
5676		/*
5677		 * If the cnode is active, make a link to the file
5678		 * so operations on the file will continue.
5679		 */
5680		dnlc_purge_vp(delvp);
5681		if ((delvp->v_type != VDIR) &&
5682		    !((delvp->v_count == 1) ||
5683		    ((delvp->v_count == 2) && delcp->c_ipending))) {
5684			error = cachefs_remove_dolink(ndvp, delvp, nnm, cr);
5685			if (error)
5686				goto out;
5687		}
5688	}
5689
5690	/* purge mappings to file in the old directory */
5691	dnlc_purge_vp(odvp);
5692
5693	/* purge mappings in the new dir if we deleted a file */
5694	if (delvp && (odvp != ndvp))
5695		dnlc_purge_vp(ndvp);
5696
5697	/* find the entry in the old directory */
5698	mutex_enter(&odcp->c_statelock);
5699	if ((odcp->c_metadata.md_flags & MD_POPULATED) == 0) {
5700		mutex_exit(&odcp->c_statelock);
5701		error = ETIMEDOUT;
5702		goto out;
5703	}
5704	cookiep = NULL;
5705	error = cachefs_dir_look(odcp, onm, &cookie, NULL, NULL, &cid);
5706	if (error == 0 || error == EINVAL) {
5707		if (error == 0)
5708			cookiep = &cookie;
5709	} else {
5710		mutex_exit(&odcp->c_statelock);
5711		if (error == ENOTDIR)
5712			error = ETIMEDOUT;
5713		goto out;
5714	}
5715	error = 0;
5716
5717	/* write the log entry */
5718	commit = cachefs_dlog_rename(fscp, odcp, onm, ndcp, nnm, cr,
5719	    recp, delcp);
5720	if (commit == 0) {
5721		mutex_exit(&odcp->c_statelock);
5722		error = ENOSPC;
5723		goto out;
5724	}
5725
5726	/* remove the directory entry from the old directory */
5727	cachefs_modified(odcp);
5728	error = cachefs_dir_rmentry(odcp, onm);
5729	if (error) {
5730		mutex_exit(&odcp->c_statelock);
5731		if (error == ENOTDIR)
5732			error = ETIMEDOUT;
5733		goto out;
5734	}
5735	mutex_exit(&odcp->c_statelock);
5736
5737	/* install the directory entry in the new directory */
5738	mutex_enter(&ndcp->c_statelock);
5739	error = ENOTDIR;
5740	if (ndcp->c_metadata.md_flags & MD_POPULATED) {
5741		ASSERT(cid.cid_fileno != 0);
5742		cachefs_modified(ndcp);
5743		error = 0;
5744		if (delvp) {
5745			error = cachefs_dir_rmentry(ndcp, nnm);
5746		}
5747		if (error == 0) {
5748			error = cachefs_dir_enter(ndcp, nnm, cookiep,
5749			    &cid, SM_ASYNC);
5750		}
5751	}
5752	if (error) {
5753		cachefs_nocache(ndcp);
5754		mutex_exit(&ndcp->c_statelock);
5755		mutex_enter(&odcp->c_statelock);
5756		cachefs_nocache(odcp);
5757		mutex_exit(&odcp->c_statelock);
5758		if (error == ENOTDIR)
5759			error = ETIMEDOUT;
5760		goto out;
5761	}
5762	mutex_exit(&ndcp->c_statelock);
5763
5764	gethrestime(&current_time);
5765
5766	/* update the file we just deleted */
5767	if (delvp) {
5768		mutex_enter(&delcp->c_statelock);
5769		delcp->c_attr.va_nlink--;
5770		delcp->c_metadata.md_localctime = current_time;
5771		delcp->c_metadata.md_flags |= MD_LOCALCTIME;
5772		if (delcp->c_attr.va_nlink == 0) {
5773			delcp->c_flags |= CN_DESTROY;
5774		} else {
5775			delcp->c_flags |= CN_UPDATED;
5776		}
5777		mutex_exit(&delcp->c_statelock);
5778	}
5779
5780	/* update the file we renamed */
5781	mutex_enter(&recp->c_statelock);
5782	recp->c_metadata.md_localctime = current_time;
5783	recp->c_metadata.md_flags |= MD_LOCALCTIME;
5784	recp->c_flags |= CN_UPDATED;
5785	mutex_exit(&recp->c_statelock);
5786
5787	/* update the source directory */
5788	mutex_enter(&odcp->c_statelock);
5789	odcp->c_metadata.md_localctime = current_time;
5790	odcp->c_metadata.md_localmtime = current_time;
5791	odcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5792	odcp->c_flags |= CN_UPDATED;
5793	mutex_exit(&odcp->c_statelock);
5794
5795	/* update the destination directory */
5796	if (odcp != ndcp) {
5797		mutex_enter(&ndcp->c_statelock);
5798		ndcp->c_metadata.md_localctime = current_time;
5799		ndcp->c_metadata.md_localmtime = current_time;
5800		ndcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
5801		ndcp->c_flags |= CN_UPDATED;
5802		mutex_exit(&ndcp->c_statelock);
5803	}
5804
5805out:
5806	if (commit) {
5807		/* commit the log entry */
5808		if (cachefs_dlog_commit(fscp, commit, error)) {
5809			/*EMPTY*/
5810			/* XXX bob: fix on panic */
5811		}
5812	}
5813
5814	if (odcp != ndcp)
5815		rw_exit(&ndcp->c_rwlock);
5816	rw_exit(&odcp->c_rwlock);
5817
5818	VN_RELE(revp);
5819
5820	return (error);
5821}
5822
5823/*ARGSUSED*/
5824static int
5825cachefs_mkdir(vnode_t *dvp, char *nm, vattr_t *vap, vnode_t **vpp,
5826    cred_t *cr, caller_context_t *ct, int flags, vsecattr_t *vsecp)
5827{
5828	cnode_t *dcp = VTOC(dvp);
5829	fscache_t *fscp = C_TO_FSCACHE(dcp);
5830	cachefscache_t *cachep = fscp->fs_cache;
5831	int error = 0;
5832	int held = 0;
5833	int connected = 0;
5834
5835#ifdef CFSDEBUG
5836	CFS_DEBUG(CFSDEBUG_VOPS)
5837		printf("cachefs_mkdir: ENTER dvp %p\n", (void *)dvp);
5838#endif
5839
5840	if (getzoneid() != GLOBAL_ZONEID) {
5841		error = EPERM;
5842		goto out;
5843	}
5844
5845	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5846		ASSERT(dcp->c_flags & CN_NOCACHE);
5847
5848	/*
5849	 * Cachefs only provides pass-through support for NFSv4,
5850	 * and all vnode operations are passed through to the
5851	 * back file system. For NFSv4 pass-through to work, only
5852	 * connected operation is supported, the cnode backvp must
5853	 * exist, and cachefs optional (eg., disconnectable) flags
5854	 * are turned off. Assert these conditions to ensure that
5855	 * the backfilesystem is called for the mkdir operation.
5856	 */
5857	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
5858	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
5859
5860	for (;;) {
5861		/* get (or renew) access to the file system */
5862		if (held) {
5863			/* Won't loop with NFSv4 connected behavior */
5864			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
5865			rw_exit(&dcp->c_rwlock);
5866			cachefs_cd_release(fscp);
5867			held = 0;
5868		}
5869		error = cachefs_cd_access(fscp, connected, 1);
5870		if (error)
5871			break;
5872		rw_enter(&dcp->c_rwlock, RW_WRITER);
5873		held = 1;
5874
5875		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
5876			error = cachefs_mkdir_connected(dvp, nm, vap,
5877			    vpp, cr);
5878			if (CFS_TIMEOUT(fscp, error)) {
5879				rw_exit(&dcp->c_rwlock);
5880				cachefs_cd_release(fscp);
5881				held = 0;
5882				cachefs_cd_timedout(fscp);
5883				connected = 0;
5884				continue;
5885			}
5886		} else {
5887			error = cachefs_mkdir_disconnected(dvp, nm, vap,
5888			    vpp, cr);
5889			if (CFS_TIMEOUT(fscp, error)) {
5890				connected = 1;
5891				continue;
5892			}
5893		}
5894		break;
5895	}
5896
5897	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_MKDIR)) {
5898		fid_t *fidp = NULL;
5899		ino64_t fileno = 0;
5900		cnode_t *cp = NULL;
5901		if (error == 0)
5902			cp = VTOC(*vpp);
5903
5904		if (cp != NULL) {
5905			fidp = &cp->c_metadata.md_cookie;
5906			fileno = cp->c_id.cid_fileno;
5907		}
5908
5909		cachefs_log_mkdir(cachep, error, fscp->fs_cfsvfsp,
5910		    fidp, fileno, crgetuid(cr));
5911	}
5912
5913	if (held) {
5914		rw_exit(&dcp->c_rwlock);
5915		cachefs_cd_release(fscp);
5916	}
5917	if (error == 0 && CFS_ISFS_NONSHARED(fscp))
5918		(void) cachefs_pack(dvp, nm, cr);
5919
5920#ifdef CFS_CD_DEBUG
5921	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
5922#endif
5923out:
5924#ifdef CFSDEBUG
5925	CFS_DEBUG(CFSDEBUG_VOPS)
5926		printf("cachefs_mkdir: EXIT error = %d\n", error);
5927#endif
5928	return (error);
5929}
5930
5931static int
5932cachefs_mkdir_connected(vnode_t *dvp, char *nm, vattr_t *vap,
5933    vnode_t **vpp, cred_t *cr)
5934{
5935	cnode_t *newcp = NULL, *dcp = VTOC(dvp);
5936	struct vnode *vp = NULL;
5937	int error = 0;
5938	fscache_t *fscp = C_TO_FSCACHE(dcp);
5939	struct fid cookie;
5940	struct vattr attr;
5941	cfs_cid_t cid, dircid;
5942	uint32_t valid_fid;
5943
5944	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
5945		ASSERT(dcp->c_flags & CN_NOCACHE);
5946
5947	mutex_enter(&dcp->c_statelock);
5948
5949	/* get backvp of dir */
5950	if (dcp->c_backvp == NULL) {
5951		error = cachefs_getbackvp(fscp, dcp);
5952		if (error) {
5953			mutex_exit(&dcp->c_statelock);
5954			goto out;
5955		}
5956	}
5957
5958	/* consistency check the directory */
5959	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
5960	if (error) {
5961		mutex_exit(&dcp->c_statelock);
5962		goto out;
5963	}
5964	dircid = dcp->c_id;
5965
5966	/* make the dir on the back fs */
5967	CFS_DPRINT_BACKFS_NFSV4(fscp,
5968	    ("cachefs_mkdir (nfsv4): dcp %p, dbackvp %p, "
5969	    "name %s\n", dcp, dcp->c_backvp, nm));
5970	error = VOP_MKDIR(dcp->c_backvp, nm, vap, &vp, cr, NULL, 0, NULL);
5971	mutex_exit(&dcp->c_statelock);
5972	if (error) {
5973		goto out;
5974	}
5975
5976	/* get the cookie and make the cnode */
5977	attr.va_mask = AT_ALL;
5978	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
5979	error = cachefs_getcookie(vp, &cookie, &attr, cr, valid_fid);
5980	if (error) {
5981		goto out;
5982	}
5983	cid.cid_flags = 0;
5984	cid.cid_fileno = attr.va_nodeid;
5985	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
5986	    &attr, vp, cr, 0, &newcp);
5987	if (error) {
5988		goto out;
5989	}
5990	ASSERT(CTOV(newcp)->v_type == VDIR);
5991	*vpp = CTOV(newcp);
5992
5993	/* if the dir is populated, add the new entry */
5994	mutex_enter(&dcp->c_statelock);
5995	if (CFS_ISFS_NONSHARED(fscp) &&
5996	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
5997		error = cachefs_dir_enter(dcp, nm, &cookie, &newcp->c_id,
5998		    SM_ASYNC);
5999		if (error) {
6000			cachefs_nocache(dcp);
6001			error = 0;
6002		}
6003	}
6004	dcp->c_attr.va_nlink++;
6005	dcp->c_flags |= CN_UPDATED;
6006	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6007	mutex_exit(&dcp->c_statelock);
6008
6009	/* XXX bob: should we do a filldir here? or just add . and .. */
6010	/* maybe should kick off an async filldir so caller does not wait */
6011
6012	/* put the entry in the dnlc */
6013	if (cachefs_dnlc)
6014		dnlc_enter(dvp, nm, *vpp);
6015
6016	/* save the fileno of the parent so can find the name */
6017	if (bcmp(&newcp->c_metadata.md_parent, &dircid,
6018	    sizeof (cfs_cid_t)) != 0) {
6019		mutex_enter(&newcp->c_statelock);
6020		newcp->c_metadata.md_parent = dircid;
6021		newcp->c_flags |= CN_UPDATED;
6022		mutex_exit(&newcp->c_statelock);
6023	}
6024out:
6025	if (vp)
6026		VN_RELE(vp);
6027
6028	return (error);
6029}
6030
6031static int
6032cachefs_mkdir_disconnected(vnode_t *dvp, char *nm, vattr_t *vap,
6033    vnode_t **vpp, cred_t *cr)
6034{
6035	cnode_t *dcp = VTOC(dvp);
6036	fscache_t *fscp = C_TO_FSCACHE(dcp);
6037	int error;
6038	cnode_t *newcp = NULL;
6039	struct vattr va;
6040	timestruc_t current_time;
6041	off_t commit = 0;
6042	char *s;
6043	int namlen;
6044
6045	/* don't allow '/' characters in pathname component */
6046	for (s = nm, namlen = 0; *s; s++, namlen++)
6047		if (*s == '/')
6048			return (EACCES);
6049	if (namlen == 0)
6050		return (EINVAL);
6051
6052	if (CFS_ISFS_WRITE_AROUND(fscp))
6053		return (ETIMEDOUT);
6054
6055	mutex_enter(&dcp->c_statelock);
6056
6057	/* check permissions */
6058	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6059		mutex_exit(&dcp->c_statelock);
6060		goto out;
6061	}
6062
6063	/* the directory front file must be populated */
6064	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6065		error = ETIMEDOUT;
6066		mutex_exit(&dcp->c_statelock);
6067		goto out;
6068	}
6069
6070	/* make sure nm does not already exist in the directory */
6071	error = cachefs_dir_look(dcp, nm, NULL, NULL, NULL, NULL);
6072	if (error == ENOTDIR) {
6073		error = ETIMEDOUT;
6074		mutex_exit(&dcp->c_statelock);
6075		goto out;
6076	}
6077	if (error != ENOENT) {
6078		error = EEXIST;
6079		mutex_exit(&dcp->c_statelock);
6080		goto out;
6081	}
6082
6083	/* make up a reasonable set of attributes */
6084	cachefs_attr_setup(vap, &va, dcp, cr);
6085	va.va_type = VDIR;
6086	va.va_mode |= S_IFDIR;
6087	va.va_nlink = 2;
6088
6089	mutex_exit(&dcp->c_statelock);
6090
6091	/* create the cnode */
6092	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6093	if (error)
6094		goto out;
6095
6096	mutex_enter(&newcp->c_statelock);
6097
6098	error = cachefs_dlog_cidmap(fscp);
6099	if (error) {
6100		mutex_exit(&newcp->c_statelock);
6101		goto out;
6102	}
6103
6104	cachefs_creategid(dcp, newcp, vap, cr);
6105	mutex_enter(&dcp->c_statelock);
6106	cachefs_createacl(dcp, newcp);
6107	mutex_exit(&dcp->c_statelock);
6108	gethrestime(&current_time);
6109	newcp->c_metadata.md_vattr.va_atime = current_time;
6110	newcp->c_metadata.md_localctime = current_time;
6111	newcp->c_metadata.md_localmtime = current_time;
6112	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6113	    MD_LOCALCTIME;
6114	newcp->c_flags |= CN_UPDATED;
6115
6116	/* make a front file for the new directory, add . and .. */
6117	error = cachefs_dir_new(dcp, newcp);
6118	if (error) {
6119		mutex_exit(&newcp->c_statelock);
6120		goto out;
6121	}
6122	cachefs_modified(newcp);
6123
6124	/*
6125	 * write the metadata now rather than waiting until
6126	 * inactive so that if there's no space we can let
6127	 * the caller know.
6128	 */
6129	ASSERT(newcp->c_frontvp);
6130	ASSERT((newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) == 0);
6131	ASSERT((newcp->c_flags & CN_ALLOC_PENDING) == 0);
6132	error = filegrp_write_metadata(newcp->c_filegrp,
6133	    &newcp->c_id, &newcp->c_metadata);
6134	if (error) {
6135		mutex_exit(&newcp->c_statelock);
6136		goto out;
6137	}
6138	mutex_exit(&newcp->c_statelock);
6139
6140	/* log the operation */
6141	commit = cachefs_dlog_mkdir(fscp, dcp, newcp, nm, &va, cr);
6142	if (commit == 0) {
6143		error = ENOSPC;
6144		goto out;
6145	}
6146
6147	mutex_enter(&dcp->c_statelock);
6148
6149	/* make sure directory is still populated */
6150	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6151		mutex_exit(&dcp->c_statelock);
6152		error = ETIMEDOUT;
6153		goto out;
6154	}
6155	cachefs_modified(dcp);
6156
6157	/* enter the new file in the directory */
6158	error = cachefs_dir_enter(dcp, nm, &newcp->c_metadata.md_cookie,
6159	    &newcp->c_id, SM_ASYNC);
6160	if (error) {
6161		mutex_exit(&dcp->c_statelock);
6162		goto out;
6163	}
6164
6165	/* update parent dir times */
6166	dcp->c_metadata.md_localctime = current_time;
6167	dcp->c_metadata.md_localmtime = current_time;
6168	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6169	dcp->c_attr.va_nlink++;
6170	dcp->c_flags |= CN_UPDATED;
6171	mutex_exit(&dcp->c_statelock);
6172
6173out:
6174	if (commit) {
6175		/* commit the log entry */
6176		if (cachefs_dlog_commit(fscp, commit, error)) {
6177			/*EMPTY*/
6178			/* XXX bob: fix on panic */
6179		}
6180	}
6181	if (error) {
6182		if (newcp) {
6183			mutex_enter(&newcp->c_statelock);
6184			newcp->c_flags |= CN_DESTROY;
6185			mutex_exit(&newcp->c_statelock);
6186			VN_RELE(CTOV(newcp));
6187		}
6188	} else {
6189		*vpp = CTOV(newcp);
6190	}
6191	return (error);
6192}
6193
6194/*ARGSUSED*/
6195static int
6196cachefs_rmdir(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6197    caller_context_t *ct, int flags)
6198{
6199	cnode_t *dcp = VTOC(dvp);
6200	fscache_t *fscp = C_TO_FSCACHE(dcp);
6201	cachefscache_t *cachep = fscp->fs_cache;
6202	int error = 0;
6203	int held = 0;
6204	int connected = 0;
6205	size_t namlen;
6206	vnode_t *vp = NULL;
6207	int vfslock = 0;
6208
6209#ifdef CFSDEBUG
6210	CFS_DEBUG(CFSDEBUG_VOPS)
6211		printf("cachefs_rmdir: ENTER vp %p\n", (void *)dvp);
6212#endif
6213
6214	if (getzoneid() != GLOBAL_ZONEID) {
6215		error = EPERM;
6216		goto out;
6217	}
6218
6219	if (fscp->fs_cache->c_flags & (CACHE_NOFILL | CACHE_NOCACHE))
6220		ASSERT(dcp->c_flags & CN_NOCACHE);
6221
6222	/*
6223	 * Cachefs only provides pass-through support for NFSv4,
6224	 * and all vnode operations are passed through to the
6225	 * back file system. For NFSv4 pass-through to work, only
6226	 * connected operation is supported, the cnode backvp must
6227	 * exist, and cachefs optional (eg., disconnectable) flags
6228	 * are turned off. Assert these conditions to ensure that
6229	 * the backfilesystem is called for the rmdir operation.
6230	 */
6231	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6232	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6233
6234	for (;;) {
6235		if (vfslock) {
6236			vn_vfsunlock(vp);
6237			vfslock = 0;
6238		}
6239		if (vp) {
6240			VN_RELE(vp);
6241			vp = NULL;
6242		}
6243
6244		/* get (or renew) access to the file system */
6245		if (held) {
6246			/* Won't loop with NFSv4 connected behavior */
6247			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6248			cachefs_cd_release(fscp);
6249			held = 0;
6250		}
6251		error = cachefs_cd_access(fscp, connected, 1);
6252		if (error)
6253			break;
6254		held = 1;
6255
6256		/* if disconnected, do some extra error checking */
6257		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
6258			/* check permissions */
6259			mutex_enter(&dcp->c_statelock);
6260			error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr);
6261			mutex_exit(&dcp->c_statelock);
6262			if (CFS_TIMEOUT(fscp, error)) {
6263				connected = 1;
6264				continue;
6265			}
6266			if (error)
6267				break;
6268
6269			namlen = strlen(nm);
6270			if (namlen == 0) {
6271				error = EINVAL;
6272				break;
6273			}
6274
6275			/* cannot remove . and .. */
6276			if (nm[0] == '.') {
6277				if (namlen == 1) {
6278					error = EINVAL;
6279					break;
6280				} else if (namlen == 2 && nm[1] == '.') {
6281					error = EEXIST;
6282					break;
6283				}
6284			}
6285
6286		}
6287
6288		/* get the cnode of the dir to remove */
6289		error = cachefs_lookup_common(dvp, nm, &vp, NULL, 0, NULL, cr);
6290		if (error) {
6291			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6292				if (CFS_TIMEOUT(fscp, error)) {
6293					cachefs_cd_release(fscp);
6294					held = 0;
6295					cachefs_cd_timedout(fscp);
6296					connected = 0;
6297					continue;
6298				}
6299			} else {
6300				if (CFS_TIMEOUT(fscp, error)) {
6301					connected = 1;
6302					continue;
6303				}
6304			}
6305			break;
6306		}
6307
6308		/* must be a dir */
6309		if (vp->v_type != VDIR) {
6310			error = ENOTDIR;
6311			break;
6312		}
6313
6314		/* must not be current dir */
6315		if (VOP_CMP(vp, cdir, ct)) {
6316			error = EINVAL;
6317			break;
6318		}
6319
6320		/* see ufs_dirremove for why this is done, mount race */
6321		if (vn_vfswlock(vp)) {
6322			error = EBUSY;
6323			break;
6324		}
6325		vfslock = 1;
6326		if (vn_mountedvfs(vp) != NULL) {
6327			error = EBUSY;
6328			break;
6329		}
6330
6331		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6332			error = cachefs_rmdir_connected(dvp, nm, cdir,
6333			    cr, vp);
6334			if (CFS_TIMEOUT(fscp, error)) {
6335				cachefs_cd_release(fscp);
6336				held = 0;
6337				cachefs_cd_timedout(fscp);
6338				connected = 0;
6339				continue;
6340			}
6341		} else {
6342			error = cachefs_rmdir_disconnected(dvp, nm, cdir,
6343			    cr, vp);
6344			if (CFS_TIMEOUT(fscp, error)) {
6345				connected = 1;
6346				continue;
6347			}
6348		}
6349		break;
6350	}
6351
6352	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_RMDIR)) {
6353		ino64_t fileno = 0;
6354		fid_t *fidp = NULL;
6355		cnode_t *cp = NULL;
6356		if (vp)
6357			cp = VTOC(vp);
6358
6359		if (cp != NULL) {
6360			fidp = &cp->c_metadata.md_cookie;
6361			fileno = cp->c_id.cid_fileno;
6362		}
6363
6364		cachefs_log_rmdir(cachep, error, fscp->fs_cfsvfsp,
6365		    fidp, fileno, crgetuid(cr));
6366	}
6367
6368	if (held) {
6369		cachefs_cd_release(fscp);
6370	}
6371
6372	if (vfslock)
6373		vn_vfsunlock(vp);
6374
6375	if (vp)
6376		VN_RELE(vp);
6377
6378#ifdef CFS_CD_DEBUG
6379	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6380#endif
6381out:
6382#ifdef CFSDEBUG
6383	CFS_DEBUG(CFSDEBUG_VOPS)
6384		printf("cachefs_rmdir: EXIT error = %d\n", error);
6385#endif
6386
6387	return (error);
6388}
6389
6390static int
6391cachefs_rmdir_connected(vnode_t *dvp, char *nm, vnode_t *cdir, cred_t *cr,
6392    vnode_t *vp)
6393{
6394	cnode_t *dcp = VTOC(dvp);
6395	cnode_t *cp = VTOC(vp);
6396	int error = 0;
6397	fscache_t *fscp = C_TO_FSCACHE(dcp);
6398
6399	rw_enter(&dcp->c_rwlock, RW_WRITER);
6400	mutex_enter(&dcp->c_statelock);
6401	mutex_enter(&cp->c_statelock);
6402
6403	if (dcp->c_backvp == NULL) {
6404		error = cachefs_getbackvp(fscp, dcp);
6405		if (error) {
6406			goto out;
6407		}
6408	}
6409
6410	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6411	if (error)
6412		goto out;
6413
6414	/* rmdir on the back fs */
6415	CFS_DPRINT_BACKFS_NFSV4(fscp,
6416	    ("cachefs_rmdir (nfsv4): dcp %p, dbackvp %p, "
6417	    "name %s\n", dcp, dcp->c_backvp, nm));
6418	error = VOP_RMDIR(dcp->c_backvp, nm, cdir, cr, NULL, 0);
6419	if (error)
6420		goto out;
6421
6422	/* if the dir is populated, remove the entry from it */
6423	if (CFS_ISFS_NONSHARED(fscp) &&
6424	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6425		error = cachefs_dir_rmentry(dcp, nm);
6426		if (error) {
6427			cachefs_nocache(dcp);
6428			error = 0;
6429		}
6430	}
6431
6432	/*
6433	 * *if* the (hard) link count goes to 0, then we set the CDESTROY
6434	 * flag on the cnode. The cached object will then be destroyed
6435	 * at inactive time where the chickens come home to roost :-)
6436	 * The link cnt for directories is bumped down by 2 'cause the "."
6437	 * entry has to be elided too ! The link cnt for the parent goes down
6438	 * by 1 (because of "..").
6439	 */
6440	cp->c_attr.va_nlink -= 2;
6441	dcp->c_attr.va_nlink--;
6442	if (cp->c_attr.va_nlink == 0) {
6443		cp->c_flags |= CN_DESTROY;
6444	} else {
6445		cp->c_flags |= CN_UPDATED;
6446	}
6447	dcp->c_flags |= CN_UPDATED;
6448
6449	dnlc_purge_vp(vp);
6450	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6451
6452out:
6453	mutex_exit(&cp->c_statelock);
6454	mutex_exit(&dcp->c_statelock);
6455	rw_exit(&dcp->c_rwlock);
6456
6457	return (error);
6458}
6459
6460static int
6461/*ARGSUSED*/
6462cachefs_rmdir_disconnected(vnode_t *dvp, char *nm, vnode_t *cdir,
6463    cred_t *cr, vnode_t *vp)
6464{
6465	cnode_t *dcp = VTOC(dvp);
6466	cnode_t *cp = VTOC(vp);
6467	fscache_t *fscp = C_TO_FSCACHE(dcp);
6468	int error = 0;
6469	off_t commit = 0;
6470	timestruc_t current_time;
6471
6472	if (CFS_ISFS_WRITE_AROUND(fscp))
6473		return (ETIMEDOUT);
6474
6475	rw_enter(&dcp->c_rwlock, RW_WRITER);
6476	mutex_enter(&dcp->c_statelock);
6477	mutex_enter(&cp->c_statelock);
6478
6479	/* both directories must be populated */
6480	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) ||
6481	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
6482		error = ETIMEDOUT;
6483		goto out;
6484	}
6485
6486	/* if sticky bit set on the dir, more access checks to perform */
6487	if (error = cachefs_stickyrmchk(dcp, cp, cr)) {
6488		goto out;
6489	}
6490
6491	/* make sure dir is empty */
6492	if (cp->c_attr.va_nlink > 2) {
6493		error = cachefs_dir_empty(cp);
6494		if (error) {
6495			if (error == ENOTDIR)
6496				error = ETIMEDOUT;
6497			goto out;
6498		}
6499		cachefs_modified(cp);
6500	}
6501	cachefs_modified(dcp);
6502
6503	/* log the operation */
6504	commit = cachefs_dlog_rmdir(fscp, dcp, nm, cp, cr);
6505	if (commit == 0) {
6506		error = ENOSPC;
6507		goto out;
6508	}
6509
6510	/* remove name from parent dir */
6511	error = cachefs_dir_rmentry(dcp, nm);
6512	if (error == ENOTDIR) {
6513		error = ETIMEDOUT;
6514		goto out;
6515	}
6516	if (error)
6517		goto out;
6518
6519	gethrestime(&current_time);
6520
6521	/* update deleted dir values */
6522	cp->c_attr.va_nlink -= 2;
6523	if (cp->c_attr.va_nlink == 0)
6524		cp->c_flags |= CN_DESTROY;
6525	else {
6526		cp->c_metadata.md_localctime = current_time;
6527		cp->c_metadata.md_flags |= MD_LOCALCTIME;
6528		cp->c_flags |= CN_UPDATED;
6529	}
6530
6531	/* update parent values */
6532	dcp->c_metadata.md_localctime = current_time;
6533	dcp->c_metadata.md_localmtime = current_time;
6534	dcp->c_metadata.md_flags |= MD_LOCALCTIME | MD_LOCALMTIME;
6535	dcp->c_attr.va_nlink--;
6536	dcp->c_flags |= CN_UPDATED;
6537
6538out:
6539	mutex_exit(&cp->c_statelock);
6540	mutex_exit(&dcp->c_statelock);
6541	rw_exit(&dcp->c_rwlock);
6542	if (commit) {
6543		/* commit the log entry */
6544		if (cachefs_dlog_commit(fscp, commit, error)) {
6545			/*EMPTY*/
6546			/* XXX bob: fix on panic */
6547		}
6548		dnlc_purge_vp(vp);
6549	}
6550	return (error);
6551}
6552
6553/*ARGSUSED*/
6554static int
6555cachefs_symlink(vnode_t *dvp, char *lnm, vattr_t *tva,
6556    char *tnm, cred_t *cr, caller_context_t *ct, int flags)
6557{
6558	cnode_t *dcp = VTOC(dvp);
6559	fscache_t *fscp = C_TO_FSCACHE(dcp);
6560	cachefscache_t *cachep = fscp->fs_cache;
6561	int error = 0;
6562	int held = 0;
6563	int connected = 0;
6564
6565#ifdef CFSDEBUG
6566	CFS_DEBUG(CFSDEBUG_VOPS)
6567		printf("cachefs_symlink: ENTER dvp %p lnm %s tnm %s\n",
6568		    (void *)dvp, lnm, tnm);
6569#endif
6570
6571	if (getzoneid() != GLOBAL_ZONEID) {
6572		error = EPERM;
6573		goto out;
6574	}
6575
6576	if (fscp->fs_cache->c_flags & CACHE_NOCACHE)
6577		ASSERT(dcp->c_flags & CN_NOCACHE);
6578
6579	/*
6580	 * Cachefs only provides pass-through support for NFSv4,
6581	 * and all vnode operations are passed through to the
6582	 * back file system. For NFSv4 pass-through to work, only
6583	 * connected operation is supported, the cnode backvp must
6584	 * exist, and cachefs optional (eg., disconnectable) flags
6585	 * are turned off. Assert these conditions to ensure that
6586	 * the backfilesystem is called for the symlink operation.
6587	 */
6588	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6589	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6590
6591	for (;;) {
6592		/* get (or renew) access to the file system */
6593		if (held) {
6594			/* Won't loop with NFSv4 connected behavior */
6595			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6596			rw_exit(&dcp->c_rwlock);
6597			cachefs_cd_release(fscp);
6598			held = 0;
6599		}
6600		error = cachefs_cd_access(fscp, connected, 1);
6601		if (error)
6602			break;
6603		rw_enter(&dcp->c_rwlock, RW_WRITER);
6604		held = 1;
6605
6606		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6607			error = cachefs_symlink_connected(dvp, lnm, tva,
6608			    tnm, cr);
6609			if (CFS_TIMEOUT(fscp, error)) {
6610				rw_exit(&dcp->c_rwlock);
6611				cachefs_cd_release(fscp);
6612				held = 0;
6613				cachefs_cd_timedout(fscp);
6614				connected = 0;
6615				continue;
6616			}
6617		} else {
6618			error = cachefs_symlink_disconnected(dvp, lnm, tva,
6619			    tnm, cr);
6620			if (CFS_TIMEOUT(fscp, error)) {
6621				connected = 1;
6622				continue;
6623			}
6624		}
6625		break;
6626	}
6627
6628	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_SYMLINK))
6629		cachefs_log_symlink(cachep, error, fscp->fs_cfsvfsp,
6630		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
6631		    crgetuid(cr), (uint_t)strlen(tnm));
6632
6633	if (held) {
6634		rw_exit(&dcp->c_rwlock);
6635		cachefs_cd_release(fscp);
6636	}
6637
6638#ifdef CFS_CD_DEBUG
6639	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
6640#endif
6641out:
6642#ifdef CFSDEBUG
6643	CFS_DEBUG(CFSDEBUG_VOPS)
6644		printf("cachefs_symlink: EXIT error = %d\n", error);
6645#endif
6646	return (error);
6647}
6648
6649static int
6650cachefs_symlink_connected(vnode_t *dvp, char *lnm, vattr_t *tva,
6651    char *tnm, cred_t *cr)
6652{
6653	cnode_t *dcp = VTOC(dvp);
6654	fscache_t *fscp = C_TO_FSCACHE(dcp);
6655	int error = 0;
6656	vnode_t *backvp = NULL;
6657	cnode_t *newcp = NULL;
6658	struct vattr va;
6659	struct fid cookie;
6660	cfs_cid_t cid;
6661	uint32_t valid_fid;
6662
6663	mutex_enter(&dcp->c_statelock);
6664
6665	if (dcp->c_backvp == NULL) {
6666		error = cachefs_getbackvp(fscp, dcp);
6667		if (error) {
6668			cachefs_nocache(dcp);
6669			mutex_exit(&dcp->c_statelock);
6670			goto out;
6671		}
6672	}
6673
6674	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
6675	if (error) {
6676		mutex_exit(&dcp->c_statelock);
6677		goto out;
6678	}
6679	CFS_DPRINT_BACKFS_NFSV4(fscp,
6680	    ("cachefs_symlink (nfsv4): dcp %p, dbackvp %p, "
6681	    "lnm %s, tnm %s\n", dcp, dcp->c_backvp, lnm, tnm));
6682	error = VOP_SYMLINK(dcp->c_backvp, lnm, tva, tnm, cr, NULL, 0);
6683	if (error) {
6684		mutex_exit(&dcp->c_statelock);
6685		goto out;
6686	}
6687	if ((dcp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
6688	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
6689		cachefs_nocache(dcp);
6690		mutex_exit(&dcp->c_statelock);
6691		goto out;
6692	}
6693
6694	CFSOP_MODIFY_COBJECT(fscp, dcp, cr);
6695
6696	/* lookup the symlink we just created and get its fid and attrs */
6697	(void) VOP_LOOKUP(dcp->c_backvp, lnm, &backvp, NULL, 0, NULL, cr,
6698	    NULL, NULL, NULL);
6699	if (backvp == NULL) {
6700		if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
6701			cachefs_nocache(dcp);
6702		mutex_exit(&dcp->c_statelock);
6703		goto out;
6704	}
6705
6706	valid_fid = (CFS_ISFS_BACKFS_NFSV4(fscp) ? FALSE : TRUE);
6707	error = cachefs_getcookie(backvp, &cookie, &va, cr, valid_fid);
6708	if (error) {
6709		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6710		error = 0;
6711		cachefs_nocache(dcp);
6712		mutex_exit(&dcp->c_statelock);
6713		goto out;
6714	}
6715	cid.cid_fileno = va.va_nodeid;
6716	cid.cid_flags = 0;
6717
6718	/* if the dir is cached, add the symlink to it */
6719	if (CFS_ISFS_NONSHARED(fscp) &&
6720	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
6721		error = cachefs_dir_enter(dcp, lnm, &cookie, &cid, SM_ASYNC);
6722		if (error) {
6723			cachefs_nocache(dcp);
6724			error = 0;
6725		}
6726	}
6727	mutex_exit(&dcp->c_statelock);
6728
6729	/* make the cnode for the sym link */
6730	error = cachefs_cnode_make(&cid, fscp, (valid_fid ? &cookie : NULL),
6731	    &va, backvp, cr, 0, &newcp);
6732	if (error) {
6733		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6734		cachefs_nocache(dcp);
6735		error = 0;
6736		goto out;
6737	}
6738
6739	/* try to cache the symlink contents */
6740	rw_enter(&newcp->c_rwlock, RW_WRITER);
6741	mutex_enter(&newcp->c_statelock);
6742
6743	/*
6744	 * try to cache the sym link, note that its a noop if NOCACHE
6745	 * or NFSv4 is set
6746	 */
6747	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6748	if (error) {
6749		cachefs_nocache(newcp);
6750		error = 0;
6751	}
6752	mutex_exit(&newcp->c_statelock);
6753	rw_exit(&newcp->c_rwlock);
6754
6755out:
6756	if (backvp)
6757		VN_RELE(backvp);
6758	if (newcp)
6759		VN_RELE(CTOV(newcp));
6760	return (error);
6761}
6762
6763static int
6764cachefs_symlink_disconnected(vnode_t *dvp, char *lnm, vattr_t *tva,
6765    char *tnm, cred_t *cr)
6766{
6767	cnode_t *dcp = VTOC(dvp);
6768	fscache_t *fscp = C_TO_FSCACHE(dcp);
6769	int error;
6770	cnode_t *newcp = NULL;
6771	struct vattr va;
6772	timestruc_t current_time;
6773	off_t commit = 0;
6774
6775	if (CFS_ISFS_WRITE_AROUND(fscp))
6776		return (ETIMEDOUT);
6777
6778	mutex_enter(&dcp->c_statelock);
6779
6780	/* check permissions */
6781	if (error = cachefs_access_local(dcp, (VEXEC|VWRITE), cr)) {
6782		mutex_exit(&dcp->c_statelock);
6783		goto out;
6784	}
6785
6786	/* the directory front file must be populated */
6787	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6788		error = ETIMEDOUT;
6789		mutex_exit(&dcp->c_statelock);
6790		goto out;
6791	}
6792
6793	/* make sure lnm does not already exist in the directory */
6794	error = cachefs_dir_look(dcp, lnm, NULL, NULL, NULL, NULL);
6795	if (error == ENOTDIR) {
6796		error = ETIMEDOUT;
6797		mutex_exit(&dcp->c_statelock);
6798		goto out;
6799	}
6800	if (error != ENOENT) {
6801		error = EEXIST;
6802		mutex_exit(&dcp->c_statelock);
6803		goto out;
6804	}
6805
6806	/* make up a reasonable set of attributes */
6807	cachefs_attr_setup(tva, &va, dcp, cr);
6808	va.va_type = VLNK;
6809	va.va_mode |= S_IFLNK;
6810	va.va_size = strlen(tnm);
6811
6812	mutex_exit(&dcp->c_statelock);
6813
6814	/* create the cnode */
6815	error = cachefs_cnode_create(fscp, &va, 0, &newcp);
6816	if (error)
6817		goto out;
6818
6819	rw_enter(&newcp->c_rwlock, RW_WRITER);
6820	mutex_enter(&newcp->c_statelock);
6821
6822	error = cachefs_dlog_cidmap(fscp);
6823	if (error) {
6824		mutex_exit(&newcp->c_statelock);
6825		rw_exit(&newcp->c_rwlock);
6826		error = ENOSPC;
6827		goto out;
6828	}
6829
6830	cachefs_creategid(dcp, newcp, tva, cr);
6831	mutex_enter(&dcp->c_statelock);
6832	cachefs_createacl(dcp, newcp);
6833	mutex_exit(&dcp->c_statelock);
6834	gethrestime(&current_time);
6835	newcp->c_metadata.md_vattr.va_atime = current_time;
6836	newcp->c_metadata.md_localctime = current_time;
6837	newcp->c_metadata.md_localmtime = current_time;
6838	newcp->c_metadata.md_flags |= MD_MAPPING | MD_LOCALMTIME |
6839	    MD_LOCALCTIME;
6840	newcp->c_flags |= CN_UPDATED;
6841
6842	/* log the operation */
6843	commit = cachefs_dlog_symlink(fscp, dcp, newcp, lnm, tva, tnm, cr);
6844	if (commit == 0) {
6845		mutex_exit(&newcp->c_statelock);
6846		rw_exit(&newcp->c_rwlock);
6847		error = ENOSPC;
6848		goto out;
6849	}
6850
6851	/* store the symlink contents */
6852	error = cachefs_stuffsymlink(newcp, tnm, (int)newcp->c_size);
6853	if (error) {
6854		mutex_exit(&newcp->c_statelock);
6855		rw_exit(&newcp->c_rwlock);
6856		goto out;
6857	}
6858	if (cachefs_modified_alloc(newcp)) {
6859		mutex_exit(&newcp->c_statelock);
6860		rw_exit(&newcp->c_rwlock);
6861		error = ENOSPC;
6862		goto out;
6863	}
6864
6865	/*
6866	 * write the metadata now rather than waiting until
6867	 * inactive so that if there's no space we can let
6868	 * the caller know.
6869	 */
6870	if (newcp->c_flags & CN_ALLOC_PENDING) {
6871		if (newcp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
6872			(void) filegrp_allocattr(newcp->c_filegrp);
6873		}
6874		error = filegrp_create_metadata(newcp->c_filegrp,
6875		    &newcp->c_metadata, &newcp->c_id);
6876		if (error) {
6877			mutex_exit(&newcp->c_statelock);
6878			rw_exit(&newcp->c_rwlock);
6879			goto out;
6880		}
6881		newcp->c_flags &= ~CN_ALLOC_PENDING;
6882	}
6883	error = filegrp_write_metadata(newcp->c_filegrp,
6884	    &newcp->c_id, &newcp->c_metadata);
6885	if (error) {
6886		mutex_exit(&newcp->c_statelock);
6887		rw_exit(&newcp->c_rwlock);
6888		goto out;
6889	}
6890	mutex_exit(&newcp->c_statelock);
6891	rw_exit(&newcp->c_rwlock);
6892
6893	mutex_enter(&dcp->c_statelock);
6894
6895	/* enter the new file in the directory */
6896	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
6897		error = ETIMEDOUT;
6898		mutex_exit(&dcp->c_statelock);
6899		goto out;
6900	}
6901	cachefs_modified(dcp);
6902	error = cachefs_dir_enter(dcp, lnm, &newcp->c_metadata.md_cookie,
6903	    &newcp->c_id, SM_ASYNC);
6904	if (error) {
6905		mutex_exit(&dcp->c_statelock);
6906		goto out;
6907	}
6908
6909	/* update parent dir times */
6910	dcp->c_metadata.md_localctime = current_time;
6911	dcp->c_metadata.md_localmtime = current_time;
6912	dcp->c_metadata.md_flags |= MD_LOCALMTIME | MD_LOCALCTIME;
6913	dcp->c_flags |= CN_UPDATED;
6914	mutex_exit(&dcp->c_statelock);
6915
6916out:
6917	if (commit) {
6918		/* commit the log entry */
6919		if (cachefs_dlog_commit(fscp, commit, error)) {
6920			/*EMPTY*/
6921			/* XXX bob: fix on panic */
6922		}
6923	}
6924
6925	if (error) {
6926		if (newcp) {
6927			mutex_enter(&newcp->c_statelock);
6928			newcp->c_flags |= CN_DESTROY;
6929			mutex_exit(&newcp->c_statelock);
6930		}
6931	}
6932	if (newcp) {
6933		VN_RELE(CTOV(newcp));
6934	}
6935
6936	return (error);
6937}
6938
6939/*ARGSUSED*/
6940static int
6941cachefs_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
6942    caller_context_t *ct, int flags)
6943{
6944	cnode_t *dcp = VTOC(vp);
6945	fscache_t *fscp = C_TO_FSCACHE(dcp);
6946	cachefscache_t *cachep = fscp->fs_cache;
6947	int error = 0;
6948	int held = 0;
6949	int connected = 0;
6950
6951#ifdef CFSDEBUG
6952	CFS_DEBUG(CFSDEBUG_VOPS)
6953		printf("cachefs_readdir: ENTER vp %p\n", (void *)vp);
6954#endif
6955	if (getzoneid() != GLOBAL_ZONEID) {
6956		error = EPERM;
6957		goto out;
6958	}
6959
6960	/*
6961	 * Cachefs only provides pass-through support for NFSv4,
6962	 * and all vnode operations are passed through to the
6963	 * back file system. For NFSv4 pass-through to work, only
6964	 * connected operation is supported, the cnode backvp must
6965	 * exist, and cachefs optional (eg., disconnectable) flags
6966	 * are turned off. Assert these conditions to ensure that
6967	 * the backfilesystem is called for the readdir operation.
6968	 */
6969	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
6970	CFS_BACKFS_NFSV4_ASSERT_CNODE(dcp);
6971
6972	for (;;) {
6973		/* get (or renew) access to the file system */
6974		if (held) {
6975			/* Won't loop with NFSv4 connected behavior */
6976			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
6977			rw_exit(&dcp->c_rwlock);
6978			cachefs_cd_release(fscp);
6979			held = 0;
6980		}
6981		error = cachefs_cd_access(fscp, connected, 0);
6982		if (error)
6983			break;
6984		rw_enter(&dcp->c_rwlock, RW_READER);
6985		held = 1;
6986
6987		/* quit if link count of zero (posix) */
6988		if (dcp->c_attr.va_nlink == 0) {
6989			if (eofp)
6990				*eofp = 1;
6991			error = 0;
6992			break;
6993		}
6994
6995		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
6996			error = cachefs_readdir_connected(vp, uiop, cr,
6997			    eofp);
6998			if (CFS_TIMEOUT(fscp, error)) {
6999				rw_exit(&dcp->c_rwlock);
7000				cachefs_cd_release(fscp);
7001				held = 0;
7002				cachefs_cd_timedout(fscp);
7003				connected = 0;
7004				continue;
7005			}
7006		} else {
7007			error = cachefs_readdir_disconnected(vp, uiop, cr,
7008			    eofp);
7009			if (CFS_TIMEOUT(fscp, error)) {
7010				if (cachefs_cd_access_miss(fscp)) {
7011					error = cachefs_readdir_connected(vp,
7012					    uiop, cr, eofp);
7013					if (!CFS_TIMEOUT(fscp, error))
7014						break;
7015					delay(5*hz);
7016					connected = 0;
7017					continue;
7018				}
7019				connected = 1;
7020				continue;
7021			}
7022		}
7023		break;
7024	}
7025
7026	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_READDIR))
7027		cachefs_log_readdir(cachep, error, fscp->fs_cfsvfsp,
7028		    &dcp->c_metadata.md_cookie, dcp->c_id.cid_fileno,
7029		    crgetuid(cr), uiop->uio_loffset, *eofp);
7030
7031	if (held) {
7032		rw_exit(&dcp->c_rwlock);
7033		cachefs_cd_release(fscp);
7034	}
7035
7036#ifdef CFS_CD_DEBUG
7037	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7038#endif
7039out:
7040#ifdef CFSDEBUG
7041	CFS_DEBUG(CFSDEBUG_VOPS)
7042		printf("cachefs_readdir: EXIT error = %d\n", error);
7043#endif
7044
7045	return (error);
7046}
7047
7048static int
7049cachefs_readdir_connected(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp)
7050{
7051	cnode_t *dcp = VTOC(vp);
7052	int error;
7053	fscache_t *fscp = C_TO_FSCACHE(dcp);
7054	struct cachefs_req *rp;
7055
7056	mutex_enter(&dcp->c_statelock);
7057
7058	/* check directory consistency */
7059	error = CFSOP_CHECK_COBJECT(fscp, dcp, 0, cr);
7060	if (error)
7061		goto out;
7062	dcp->c_usage++;
7063
7064	/* if dir was modified, toss old contents */
7065	if (dcp->c_metadata.md_flags & MD_INVALREADDIR) {
7066		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7067		cachefs_inval_object(dcp);
7068	}
7069
7070	error = 0;
7071	if (((dcp->c_metadata.md_flags & MD_POPULATED) == 0) &&
7072	    ((dcp->c_flags & (CN_ASYNC_POPULATE | CN_NOCACHE)) == 0) &&
7073	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7074	    (fscp->fs_cdconnected == CFS_CD_CONNECTED)) {
7075
7076		if (cachefs_async_okay()) {
7077
7078			/*
7079			 * Set up asynchronous request to fill this
7080			 * directory.
7081			 */
7082
7083			dcp->c_flags |= CN_ASYNC_POPULATE;
7084
7085			rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
7086			rp->cfs_cmd = CFS_POPULATE;
7087			rp->cfs_req_u.cu_populate.cpop_vp = vp;
7088			rp->cfs_cr = cr;
7089
7090			crhold(cr);
7091			VN_HOLD(vp);
7092
7093			cachefs_addqueue(rp, &fscp->fs_workq);
7094		} else {
7095			error = cachefs_dir_fill(dcp, cr);
7096			if (error != 0)
7097				cachefs_nocache(dcp);
7098		}
7099	}
7100
7101	/* if front file is populated */
7102	if (((dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) == 0) &&
7103	    !CFS_ISFS_BACKFS_NFSV4(fscp) &&
7104	    (dcp->c_metadata.md_flags & MD_POPULATED)) {
7105		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7106		error = cachefs_dir_read(dcp, uiop, eofp);
7107		if (error == 0)
7108			fscp->fs_stats.st_hits++;
7109	}
7110
7111	/* if front file could not be used */
7112	if ((error != 0) ||
7113	    CFS_ISFS_BACKFS_NFSV4(fscp) ||
7114	    (dcp->c_flags & (CN_NOCACHE | CN_ASYNC_POPULATE)) ||
7115	    ((dcp->c_metadata.md_flags & MD_POPULATED) == 0)) {
7116
7117		if (error && !(dcp->c_flags & CN_NOCACHE) &&
7118		    !CFS_ISFS_BACKFS_NFSV4(fscp))
7119			cachefs_nocache(dcp);
7120
7121		/* get the back vp */
7122		if (dcp->c_backvp == NULL) {
7123			error = cachefs_getbackvp(fscp, dcp);
7124			if (error)
7125				goto out;
7126		}
7127
7128		if (fscp->fs_inum_size > 0) {
7129			error = cachefs_readback_translate(dcp, uiop, cr, eofp);
7130		} else {
7131			/* do the dir read from the back fs */
7132			(void) VOP_RWLOCK(dcp->c_backvp,
7133			    V_WRITELOCK_FALSE, NULL);
7134			CFS_DPRINT_BACKFS_NFSV4(fscp,
7135			    ("cachefs_readdir (nfsv4): "
7136			    "dcp %p, dbackvp %p\n", dcp, dcp->c_backvp));
7137			error = VOP_READDIR(dcp->c_backvp, uiop, cr, eofp,
7138			    NULL, 0);
7139			VOP_RWUNLOCK(dcp->c_backvp, V_WRITELOCK_FALSE, NULL);
7140		}
7141
7142		if (error == 0)
7143			fscp->fs_stats.st_misses++;
7144	}
7145
7146out:
7147	mutex_exit(&dcp->c_statelock);
7148
7149	return (error);
7150}
7151
7152static int
7153cachefs_readback_translate(cnode_t *cp, uio_t *uiop, cred_t *cr, int *eofp)
7154{
7155	int error = 0;
7156	fscache_t *fscp = C_TO_FSCACHE(cp);
7157	caddr_t buffy = NULL;
7158	int buffysize = MAXBSIZE;
7159	caddr_t chrp, end;
7160	ino64_t newinum;
7161	struct dirent64 *de;
7162	uio_t uioin;
7163	iovec_t iov;
7164
7165	ASSERT(cp->c_backvp != NULL);
7166	ASSERT(fscp->fs_inum_size > 0);
7167
7168	if (uiop->uio_resid < buffysize)
7169		buffysize = (int)uiop->uio_resid;
7170	buffy = cachefs_kmem_alloc(buffysize, KM_SLEEP);
7171
7172	iov.iov_base = buffy;
7173	iov.iov_len = buffysize;
7174	uioin.uio_iov = &iov;
7175	uioin.uio_iovcnt = 1;
7176	uioin.uio_segflg = UIO_SYSSPACE;
7177	uioin.uio_fmode = 0;
7178	uioin.uio_extflg = UIO_COPY_CACHED;
7179	uioin.uio_loffset = uiop->uio_loffset;
7180	uioin.uio_resid = buffysize;
7181
7182	(void) VOP_RWLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7183	error = VOP_READDIR(cp->c_backvp, &uioin, cr, eofp, NULL, 0);
7184	VOP_RWUNLOCK(cp->c_backvp, V_WRITELOCK_FALSE, NULL);
7185
7186	if (error != 0)
7187		goto out;
7188
7189	end = buffy + buffysize - uioin.uio_resid;
7190
7191	mutex_exit(&cp->c_statelock);
7192	mutex_enter(&fscp->fs_fslock);
7193
7194
7195	for (chrp = buffy; chrp < end; chrp += de->d_reclen) {
7196		de = (dirent64_t *)chrp;
7197		newinum = cachefs_inum_real2fake(fscp, de->d_ino);
7198		if (newinum == 0)
7199			newinum = cachefs_fileno_conflict(fscp, de->d_ino);
7200		de->d_ino = newinum;
7201	}
7202	mutex_exit(&fscp->fs_fslock);
7203	mutex_enter(&cp->c_statelock);
7204
7205	error = uiomove(buffy, end - buffy, UIO_READ, uiop);
7206	uiop->uio_loffset = uioin.uio_loffset;
7207
7208out:
7209
7210	if (buffy != NULL)
7211		cachefs_kmem_free(buffy, buffysize);
7212
7213	return (error);
7214}
7215
7216static int
7217/*ARGSUSED*/
7218cachefs_readdir_disconnected(vnode_t *vp, uio_t *uiop, cred_t *cr,
7219    int *eofp)
7220{
7221	cnode_t *dcp = VTOC(vp);
7222	int error;
7223
7224	mutex_enter(&dcp->c_statelock);
7225	if ((dcp->c_metadata.md_flags & MD_POPULATED) == 0) {
7226		error = ETIMEDOUT;
7227	} else {
7228		error = cachefs_dir_read(dcp, uiop, eofp);
7229		if (error == ENOTDIR)
7230			error = ETIMEDOUT;
7231	}
7232	mutex_exit(&dcp->c_statelock);
7233
7234	return (error);
7235}
7236
7237/*ARGSUSED*/
7238static int
7239cachefs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
7240{
7241	int error = 0;
7242	struct cnode *cp = VTOC(vp);
7243	fscache_t *fscp = C_TO_FSCACHE(cp);
7244
7245	/*
7246	 * Cachefs only provides pass-through support for NFSv4,
7247	 * and all vnode operations are passed through to the
7248	 * back file system. For NFSv4 pass-through to work, only
7249	 * connected operation is supported, the cnode backvp must
7250	 * exist, and cachefs optional (eg., disconnectable) flags
7251	 * are turned off. Assert these conditions, then bail
7252	 * as  NFSv4 doesn't support VOP_FID.
7253	 */
7254	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7255	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7256	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7257		return (ENOTSUP);
7258	}
7259
7260	mutex_enter(&cp->c_statelock);
7261	if (fidp->fid_len < cp->c_metadata.md_cookie.fid_len) {
7262		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7263		error = ENOSPC;
7264	} else {
7265		bcopy(cp->c_metadata.md_cookie.fid_data, fidp->fid_data,
7266		    cp->c_metadata.md_cookie.fid_len);
7267		fidp->fid_len = cp->c_metadata.md_cookie.fid_len;
7268	}
7269	mutex_exit(&cp->c_statelock);
7270	return (error);
7271}
7272
7273/* ARGSUSED2 */
7274static int
7275cachefs_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7276{
7277	cnode_t *cp = VTOC(vp);
7278
7279	/*
7280	 * XXX - This is ifdef'ed out for now. The problem -
7281	 * getdents() acquires the read version of rwlock, then we come
7282	 * into cachefs_readdir() and that wants to acquire the write version
7283	 * of this lock (if its going to populate the directory). This is
7284	 * a problem, this can be solved by introducing another lock in the
7285	 * cnode.
7286	 */
7287/* XXX */
7288	if (vp->v_type != VREG)
7289		return (-1);
7290	if (write_lock)
7291		rw_enter(&cp->c_rwlock, RW_WRITER);
7292	else
7293		rw_enter(&cp->c_rwlock, RW_READER);
7294	return (write_lock);
7295}
7296
7297/* ARGSUSED */
7298static void
7299cachefs_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp)
7300{
7301	cnode_t *cp = VTOC(vp);
7302	if (vp->v_type != VREG)
7303		return;
7304	rw_exit(&cp->c_rwlock);
7305}
7306
7307/* ARGSUSED */
7308static int
7309cachefs_seek(struct vnode *vp, offset_t ooff, offset_t *noffp,
7310    caller_context_t *ct)
7311{
7312	return (0);
7313}
7314
7315static int cachefs_lostpage = 0;
7316/*
7317 * Return all the pages from [off..off+len] in file
7318 */
7319/*ARGSUSED*/
7320static int
7321cachefs_getpage(struct vnode *vp, offset_t off, size_t len,
7322	uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7323	caddr_t addr, enum seg_rw rw, cred_t *cr, caller_context_t *ct)
7324{
7325	cnode_t *cp = VTOC(vp);
7326	int error;
7327	fscache_t *fscp = C_TO_FSCACHE(cp);
7328	cachefscache_t *cachep = fscp->fs_cache;
7329	int held = 0;
7330	int connected = 0;
7331
7332#ifdef CFSDEBUG
7333	u_offset_t offx = (u_offset_t)off;
7334
7335	CFS_DEBUG(CFSDEBUG_VOPS)
7336		printf("cachefs_getpage: ENTER vp %p off %lld len %lu rw %d\n",
7337		    (void *)vp, offx, len, rw);
7338#endif
7339	if (getzoneid() != GLOBAL_ZONEID) {
7340		error = EPERM;
7341		goto out;
7342	}
7343
7344	if (vp->v_flag & VNOMAP) {
7345		error = ENOSYS;
7346		goto out;
7347	}
7348
7349	/* Call backfilesystem if NFSv4 */
7350	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7351		error = cachefs_getpage_backfs_nfsv4(vp, off, len, protp, pl,
7352		    plsz, seg, addr, rw, cr);
7353		goto out;
7354	}
7355
7356	/* XXX sam: make this do an async populate? */
7357	if (pl == NULL) {
7358		error = 0;
7359		goto out;
7360	}
7361	if (protp != NULL)
7362		*protp = PROT_ALL;
7363
7364	for (;;) {
7365		/* get (or renew) access to the file system */
7366		if (held) {
7367			cachefs_cd_release(fscp);
7368			held = 0;
7369		}
7370		error = cachefs_cd_access(fscp, connected, 0);
7371		if (error)
7372			break;
7373		held = 1;
7374
7375		/*
7376		 * If we are getting called as a side effect of a
7377		 * cachefs_write()
7378		 * operation the local file size might not be extended yet.
7379		 * In this case we want to be able to return pages of zeroes.
7380		 */
7381		if ((u_offset_t)off + len >
7382		    ((cp->c_size + PAGEOFFSET) & (offset_t)PAGEMASK)) {
7383			if (seg != segkmap) {
7384				error = EFAULT;
7385				break;
7386			}
7387		}
7388		if (len <= PAGESIZE)
7389			error = cachefs_getapage(vp, (u_offset_t)off, len,
7390			    protp, pl, plsz, seg, addr, rw, cr);
7391		else
7392			error = pvn_getpages(cachefs_getapage, vp,
7393			    (u_offset_t)off, len, protp, pl, plsz, seg, addr,
7394			    rw, cr);
7395		if (error == 0)
7396			break;
7397
7398		if (((cp->c_flags & CN_NOCACHE) && (error == ENOSPC)) ||
7399		    error == EAGAIN) {
7400			connected = 0;
7401			continue;
7402		}
7403		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7404			if (CFS_TIMEOUT(fscp, error)) {
7405				cachefs_cd_release(fscp);
7406				held = 0;
7407				cachefs_cd_timedout(fscp);
7408				connected = 0;
7409				continue;
7410			}
7411		} else {
7412			if (CFS_TIMEOUT(fscp, error)) {
7413				if (cachefs_cd_access_miss(fscp)) {
7414					if (len <= PAGESIZE)
7415						error = cachefs_getapage_back(
7416						    vp, (u_offset_t)off,
7417						    len, protp, pl,
7418						    plsz, seg, addr, rw, cr);
7419					else
7420						error = pvn_getpages(
7421						    cachefs_getapage_back, vp,
7422						    (u_offset_t)off, len,
7423						    protp, pl,
7424						    plsz, seg, addr, rw, cr);
7425					if (!CFS_TIMEOUT(fscp, error) &&
7426					    (error != EAGAIN))
7427						break;
7428					delay(5*hz);
7429					connected = 0;
7430					continue;
7431				}
7432				connected = 1;
7433				continue;
7434			}
7435		}
7436		break;
7437	}
7438
7439	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GETPAGE))
7440		cachefs_log_getpage(cachep, error, vp->v_vfsp,
7441		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
7442		    crgetuid(cr), off, len);
7443
7444	if (held) {
7445		cachefs_cd_release(fscp);
7446	}
7447
7448out:
7449#ifdef CFS_CD_DEBUG
7450	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7451#endif
7452#ifdef CFSDEBUG
7453	CFS_DEBUG(CFSDEBUG_VOPS)
7454		printf("cachefs_getpage: EXIT vp %p error %d\n",
7455		    (void *)vp, error);
7456#endif
7457	return (error);
7458}
7459
7460/*
7461 * cachefs_getpage_backfs_nfsv4
7462 *
7463 * Call NFSv4 back filesystem to handle the getpage (cachefs
7464 * pass-through support for NFSv4).
7465 */
7466static int
7467cachefs_getpage_backfs_nfsv4(struct vnode *vp, offset_t off, size_t len,
7468			uint_t *protp, struct page *pl[], size_t plsz,
7469			struct seg *seg, caddr_t addr, enum seg_rw rw,
7470			cred_t *cr)
7471{
7472	cnode_t *cp = VTOC(vp);
7473	fscache_t *fscp = C_TO_FSCACHE(cp);
7474	vnode_t *backvp;
7475	int error;
7476
7477	/*
7478	 * For NFSv4 pass-through to work, only connected operation is
7479	 * supported, the cnode backvp must exist, and cachefs optional
7480	 * (eg., disconnectable) flags are turned off. Assert these
7481	 * conditions for the getpage operation.
7482	 */
7483	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7484	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7485
7486	/* Call backfs vnode op after extracting backvp */
7487	mutex_enter(&cp->c_statelock);
7488	backvp = cp->c_backvp;
7489	mutex_exit(&cp->c_statelock);
7490
7491	CFS_DPRINT_BACKFS_NFSV4(fscp,
7492	    ("cachefs_getpage_backfs_nfsv4: cnode %p, backvp %p\n",
7493	    cp, backvp));
7494	error = VOP_GETPAGE(backvp, off, len, protp, pl, plsz, seg,
7495	    addr, rw, cr, NULL);
7496
7497	return (error);
7498}
7499
7500/*
7501 * Called from pvn_getpages or cachefs_getpage to get a particular page.
7502 */
7503/*ARGSUSED*/
7504static int
7505cachefs_getapage(struct vnode *vp, u_offset_t off, size_t len, uint_t *protp,
7506	struct page *pl[], size_t plsz, struct seg *seg, caddr_t addr,
7507	enum seg_rw rw, cred_t *cr)
7508{
7509	cnode_t *cp = VTOC(vp);
7510	page_t **ppp, *pp = NULL;
7511	fscache_t *fscp = C_TO_FSCACHE(cp);
7512	cachefscache_t *cachep = fscp->fs_cache;
7513	int error = 0;
7514	struct page **ourpl;
7515	struct page *ourstackpl[17]; /* see ASSERT() below for 17 */
7516	int index = 0;
7517	int downgrade;
7518	int have_statelock = 0;
7519	u_offset_t popoff;
7520	size_t popsize = 0;
7521
7522	/*LINTED*/
7523	ASSERT(((DEF_POP_SIZE / PAGESIZE) + 1) <= 17);
7524
7525	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7526		ourpl = cachefs_kmem_alloc(sizeof (struct page *) *
7527		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1), KM_SLEEP);
7528	else
7529		ourpl = ourstackpl;
7530
7531	ourpl[0] = NULL;
7532	off = off & (offset_t)PAGEMASK;
7533again:
7534	/*
7535	 * Look for the page
7536	 */
7537	if (page_exists(vp, off) == 0) {
7538		/*
7539		 * Need to do work to get the page.
7540		 * Grab our lock because we are going to
7541		 * modify the state of the cnode.
7542		 */
7543		if (! have_statelock) {
7544			mutex_enter(&cp->c_statelock);
7545			have_statelock = 1;
7546		}
7547		/*
7548		 * If we're in NOCACHE mode, we will need a backvp
7549		 */
7550		if (cp->c_flags & CN_NOCACHE) {
7551			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7552				error = ETIMEDOUT;
7553				goto out;
7554			}
7555			if (cp->c_backvp == NULL) {
7556				error = cachefs_getbackvp(fscp, cp);
7557				if (error)
7558					goto out;
7559			}
7560			error = VOP_GETPAGE(cp->c_backvp, off,
7561			    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7562			    addr, S_READ, cr, NULL);
7563			/*
7564			 * backfs returns EFAULT when we are trying for a
7565			 * page beyond EOF but cachefs has the knowledge that
7566			 * it is not beyond EOF be cause cp->c_size is
7567			 * greater then the offset requested.
7568			 */
7569			if (error == EFAULT) {
7570				error = 0;
7571				pp = page_create_va(vp, off, PAGESIZE,
7572				    PG_EXCL | PG_WAIT, seg, addr);
7573				if (pp == NULL)
7574					goto again;
7575				pagezero(pp, 0, PAGESIZE);
7576				pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
7577				goto out;
7578			}
7579			if (error)
7580				goto out;
7581			goto getpages;
7582		}
7583		/*
7584		 * We need a front file. If we can't get it,
7585		 * put the cnode in NOCACHE mode and try again.
7586		 */
7587		if (cp->c_frontvp == NULL) {
7588			error = cachefs_getfrontfile(cp);
7589			if (error) {
7590				cachefs_nocache(cp);
7591				error = EAGAIN;
7592				goto out;
7593			}
7594		}
7595		/*
7596		 * Check if the front file needs population.
7597		 * If population is necessary, make sure we have a
7598		 * backvp as well. We will get the page from the backvp.
7599		 * bug 4152459-
7600		 * But if the file system is in disconnected mode
7601		 * and the file is a local file then do not check the
7602		 * allocmap.
7603		 */
7604		if (((fscp->fs_cdconnected == CFS_CD_CONNECTED) ||
7605		    ((cp->c_metadata.md_flags & MD_LOCALFILENO) == 0)) &&
7606		    (cachefs_check_allocmap(cp, off) == 0)) {
7607			if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
7608				error = ETIMEDOUT;
7609				goto out;
7610			}
7611			if (cp->c_backvp == NULL) {
7612				error = cachefs_getbackvp(fscp, cp);
7613				if (error)
7614					goto out;
7615			}
7616			if (cp->c_filegrp->fg_flags & CFS_FG_WRITE) {
7617				cachefs_cluster_allocmap(off, &popoff,
7618				    &popsize,
7619				    fscp->fs_info.fi_popsize, cp);
7620				if (popsize != 0) {
7621					error = cachefs_populate(cp,
7622					    popoff, popsize,
7623					    cp->c_frontvp, cp->c_backvp,
7624					    cp->c_size, cr);
7625					if (error) {
7626						cachefs_nocache(cp);
7627						error = EAGAIN;
7628						goto out;
7629					} else {
7630						cp->c_flags |=
7631						    CN_UPDATED |
7632						    CN_NEED_FRONT_SYNC |
7633						    CN_POPULATION_PENDING;
7634					}
7635					popsize = popsize - (off - popoff);
7636				} else {
7637					popsize = PAGESIZE;
7638				}
7639			}
7640			/* else XXX assert CN_NOCACHE? */
7641			error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7642			    PAGESIZE, protp, ourpl, popsize,
7643			    seg, addr, S_READ, cr, NULL);
7644			if (error)
7645				goto out;
7646			fscp->fs_stats.st_misses++;
7647		} else {
7648			if (cp->c_flags & CN_POPULATION_PENDING) {
7649				error = VOP_FSYNC(cp->c_frontvp, FSYNC, cr,
7650				    NULL);
7651				cp->c_flags &= ~CN_POPULATION_PENDING;
7652				if (error) {
7653					cachefs_nocache(cp);
7654					error = EAGAIN;
7655					goto out;
7656				}
7657			}
7658			/*
7659			 * File was populated so we get the page from the
7660			 * frontvp
7661			 */
7662			error = VOP_GETPAGE(cp->c_frontvp, (offset_t)off,
7663			    PAGESIZE, protp, ourpl, PAGESIZE, seg, addr,
7664			    rw, cr, NULL);
7665			if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_GPFRONT))
7666				cachefs_log_gpfront(cachep, error,
7667				    fscp->fs_cfsvfsp,
7668				    &cp->c_metadata.md_cookie, cp->c_fileno,
7669				    crgetuid(cr), off, PAGESIZE);
7670			if (error) {
7671				cachefs_nocache(cp);
7672				error = EAGAIN;
7673				goto out;
7674			}
7675			fscp->fs_stats.st_hits++;
7676		}
7677getpages:
7678		ASSERT(have_statelock);
7679		if (have_statelock) {
7680			mutex_exit(&cp->c_statelock);
7681			have_statelock = 0;
7682		}
7683		downgrade = 0;
7684		for (ppp = ourpl; *ppp; ppp++) {
7685			if ((*ppp)->p_offset < off) {
7686				index++;
7687				page_unlock(*ppp);
7688				continue;
7689			}
7690			if (PAGE_SHARED(*ppp)) {
7691				if (page_tryupgrade(*ppp) == 0) {
7692					for (ppp = &ourpl[index]; *ppp; ppp++)
7693						page_unlock(*ppp);
7694					error = EAGAIN;
7695					goto out;
7696				}
7697				downgrade = 1;
7698			}
7699			ASSERT(PAGE_EXCL(*ppp));
7700			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7701			page_rename(*ppp, vp, (*ppp)->p_offset);
7702		}
7703		pl[0] = ourpl[index];
7704		pl[1] = NULL;
7705		if (downgrade) {
7706			page_downgrade(ourpl[index]);
7707		}
7708		/* Unlock the rest of the pages from the cluster */
7709		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7710			page_unlock(*ppp);
7711	} else {
7712		ASSERT(! have_statelock);
7713		if (have_statelock) {
7714			mutex_exit(&cp->c_statelock);
7715			have_statelock = 0;
7716		}
7717		/* XXX SE_SHARED probably isn't what we *always* want */
7718		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7719			cachefs_lostpage++;
7720			goto again;
7721		}
7722		pl[0] = pp;
7723		pl[1] = NULL;
7724		/* XXX increment st_hits?  i don't think so, but... */
7725	}
7726
7727out:
7728	if (have_statelock) {
7729		mutex_exit(&cp->c_statelock);
7730		have_statelock = 0;
7731	}
7732	if (fscp->fs_info.fi_popsize > DEF_POP_SIZE)
7733		cachefs_kmem_free(ourpl, sizeof (struct page *) *
7734		    ((fscp->fs_info.fi_popsize / PAGESIZE) + 1));
7735	return (error);
7736}
7737
7738/* gets a page but only from the back fs */
7739/*ARGSUSED*/
7740static int
7741cachefs_getapage_back(struct vnode *vp, u_offset_t off, size_t len,
7742    uint_t *protp, struct page *pl[], size_t plsz, struct seg *seg,
7743    caddr_t addr, enum seg_rw rw, cred_t *cr)
7744{
7745	cnode_t *cp = VTOC(vp);
7746	page_t **ppp, *pp = NULL;
7747	fscache_t *fscp = C_TO_FSCACHE(cp);
7748	int error = 0;
7749	struct page *ourpl[17];
7750	int index = 0;
7751	int have_statelock = 0;
7752	int downgrade;
7753
7754	/*
7755	 * Grab the cnode statelock so the cnode state won't change
7756	 * while we're in here.
7757	 */
7758	ourpl[0] = NULL;
7759	off = off & (offset_t)PAGEMASK;
7760again:
7761	if (page_exists(vp, off) == 0) {
7762		if (! have_statelock) {
7763			mutex_enter(&cp->c_statelock);
7764			have_statelock = 1;
7765		}
7766
7767		if (cp->c_backvp == NULL) {
7768			error = cachefs_getbackvp(fscp, cp);
7769			if (error)
7770				goto out;
7771		}
7772		error = VOP_GETPAGE(cp->c_backvp, (offset_t)off,
7773		    PAGESIZE, protp, ourpl, PAGESIZE, seg,
7774		    addr, S_READ, cr, NULL);
7775		if (error)
7776			goto out;
7777
7778		if (have_statelock) {
7779			mutex_exit(&cp->c_statelock);
7780			have_statelock = 0;
7781		}
7782		downgrade = 0;
7783		for (ppp = ourpl; *ppp; ppp++) {
7784			if ((*ppp)->p_offset < off) {
7785				index++;
7786				page_unlock(*ppp);
7787				continue;
7788			}
7789			if (PAGE_SHARED(*ppp)) {
7790				if (page_tryupgrade(*ppp) == 0) {
7791					for (ppp = &ourpl[index]; *ppp; ppp++)
7792						page_unlock(*ppp);
7793					error = EAGAIN;
7794					goto out;
7795				}
7796				downgrade = 1;
7797			}
7798			ASSERT(PAGE_EXCL(*ppp));
7799			(void) hat_pageunload((*ppp), HAT_FORCE_PGUNLOAD);
7800			page_rename(*ppp, vp, (*ppp)->p_offset);
7801		}
7802		pl[0] = ourpl[index];
7803		pl[1] = NULL;
7804		if (downgrade) {
7805			page_downgrade(ourpl[index]);
7806		}
7807		/* Unlock the rest of the pages from the cluster */
7808		for (ppp = &ourpl[index+1]; *ppp; ppp++)
7809			page_unlock(*ppp);
7810	} else {
7811		ASSERT(! have_statelock);
7812		if (have_statelock) {
7813			mutex_exit(&cp->c_statelock);
7814			have_statelock = 0;
7815		}
7816		if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
7817			cachefs_lostpage++;
7818			goto again;
7819		}
7820		pl[0] = pp;
7821		pl[1] = NULL;
7822	}
7823
7824out:
7825	if (have_statelock) {
7826		mutex_exit(&cp->c_statelock);
7827		have_statelock = 0;
7828	}
7829	return (error);
7830}
7831
7832/*ARGSUSED*/
7833static int
7834cachefs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
7835    caller_context_t *ct)
7836{
7837	cnode_t *cp = VTOC(vp);
7838	int error = 0;
7839	fscache_t *fscp = C_TO_FSCACHE(cp);
7840	int held = 0;
7841	int connected = 0;
7842
7843	if (getzoneid() != GLOBAL_ZONEID)
7844		return (EPERM);
7845
7846	/* Call backfilesytem if NFSv4 */
7847	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
7848		error = cachefs_putpage_backfs_nfsv4(vp, off, len, flags, cr);
7849		goto out;
7850	}
7851
7852	for (;;) {
7853		/* get (or renew) access to the file system */
7854		if (held) {
7855			cachefs_cd_release(fscp);
7856			held = 0;
7857		}
7858		error = cachefs_cd_access(fscp, connected, 1);
7859		if (error)
7860			break;
7861		held = 1;
7862
7863		error = cachefs_putpage_common(vp, off, len, flags, cr);
7864		if (error == 0)
7865			break;
7866
7867		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
7868			if (CFS_TIMEOUT(fscp, error)) {
7869				cachefs_cd_release(fscp);
7870				held = 0;
7871				cachefs_cd_timedout(fscp);
7872				connected = 0;
7873				continue;
7874			}
7875		} else {
7876			if (NOMEMWAIT()) {
7877				error = 0;
7878				goto out;
7879			}
7880			if (CFS_TIMEOUT(fscp, error)) {
7881				connected = 1;
7882				continue;
7883			}
7884		}
7885		break;
7886	}
7887
7888out:
7889
7890	if (held) {
7891		cachefs_cd_release(fscp);
7892	}
7893
7894#ifdef CFS_CD_DEBUG
7895	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
7896#endif
7897	return (error);
7898}
7899
7900/*
7901 * cachefs_putpage_backfs_nfsv4
7902 *
7903 * Call NFSv4 back filesystem to handle the putpage (cachefs
7904 * pass-through support for NFSv4).
7905 */
7906static int
7907cachefs_putpage_backfs_nfsv4(vnode_t *vp, offset_t off, size_t len, int flags,
7908			cred_t *cr)
7909{
7910	cnode_t *cp = VTOC(vp);
7911	fscache_t *fscp = C_TO_FSCACHE(cp);
7912	vnode_t *backvp;
7913	int error;
7914
7915	/*
7916	 * For NFSv4 pass-through to work, only connected operation is
7917	 * supported, the cnode backvp must exist, and cachefs optional
7918	 * (eg., disconnectable) flags are turned off. Assert these
7919	 * conditions for the putpage operation.
7920	 */
7921	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
7922	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
7923
7924	/* Call backfs vnode op after extracting backvp */
7925	mutex_enter(&cp->c_statelock);
7926	backvp = cp->c_backvp;
7927	mutex_exit(&cp->c_statelock);
7928
7929	CFS_DPRINT_BACKFS_NFSV4(fscp,
7930	    ("cachefs_putpage_backfs_nfsv4: cnode %p, backvp %p\n",
7931	    cp, backvp));
7932	error = VOP_PUTPAGE(backvp, off, len, flags, cr, NULL);
7933
7934	return (error);
7935}
7936
7937/*
7938 * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
7939 * If len == 0, do from off to EOF.
7940 *
7941 * The normal cases should be len == 0 & off == 0 (entire vp list),
7942 * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
7943 * (from pageout).
7944 */
7945
7946/*ARGSUSED*/
7947int
7948cachefs_putpage_common(struct vnode *vp, offset_t off, size_t len,
7949    int flags, cred_t *cr)
7950{
7951	struct cnode *cp  = VTOC(vp);
7952	struct page *pp;
7953	size_t io_len;
7954	u_offset_t eoff, io_off;
7955	int error = 0;
7956	fscache_t *fscp = C_TO_FSCACHE(cp);
7957	cachefscache_t *cachep = fscp->fs_cache;
7958
7959	if (len == 0 && (flags & B_INVAL) == 0 && vn_is_readonly(vp)) {
7960		return (0);
7961	}
7962	if (!vn_has_cached_data(vp) || (off >= cp->c_size &&
7963	    (flags & B_INVAL) == 0))
7964		return (0);
7965
7966	/*
7967	 * Should never have cached data for the cachefs vnode
7968	 * if NFSv4 is in use.
7969	 */
7970	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
7971
7972	/*
7973	 * If this is an async putpage let a thread handle it.
7974	 */
7975	if (flags & B_ASYNC) {
7976		struct cachefs_req *rp;
7977		int tflags = (flags & ~(B_ASYNC|B_DONTNEED));
7978
7979		if (ttoproc(curthread) == proc_pageout) {
7980			/*
7981			 * If this is the page daemon we
7982			 * do the push synchronously (Dangerous!) and hope
7983			 * we can free enough to keep running...
7984			 */
7985			flags &= ~B_ASYNC;
7986			goto again;
7987		}
7988
7989		if (! cachefs_async_okay()) {
7990
7991			/*
7992			 * this is somewhat like NFS's behavior.  keep
7993			 * the system from thrashing.  we've seen
7994			 * cases where async queues get out of
7995			 * control, especially if
7996			 * madvise(MADV_SEQUENTIAL) is done on a large
7997			 * mmap()ed file that is read sequentially.
7998			 */
7999
8000			flags &= ~B_ASYNC;
8001			goto again;
8002		}
8003
8004		/*
8005		 * if no flags other than B_ASYNC were set,
8006		 * we coalesce putpage requests into a single one for the
8007		 * whole file (len = off = 0).  If such a request is
8008		 * already queued, we're done.
8009		 *
8010		 * If there are other flags set (e.g., B_INVAL), we don't
8011		 * attempt to coalesce and we use the specified length and
8012		 * offset.
8013		 */
8014		rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
8015		mutex_enter(&cp->c_iomutex);
8016		if ((cp->c_ioflags & CIO_PUTPAGES) == 0 || tflags != 0) {
8017			rp->cfs_cmd = CFS_PUTPAGE;
8018			rp->cfs_req_u.cu_putpage.cp_vp = vp;
8019			if (tflags == 0) {
8020				off = len = 0;
8021				cp->c_ioflags |= CIO_PUTPAGES;
8022			}
8023			rp->cfs_req_u.cu_putpage.cp_off = off;
8024			rp->cfs_req_u.cu_putpage.cp_len = (uint_t)len;
8025			rp->cfs_req_u.cu_putpage.cp_flags = flags & ~B_ASYNC;
8026			rp->cfs_cr = cr;
8027			crhold(rp->cfs_cr);
8028			VN_HOLD(vp);
8029			cp->c_nio++;
8030			cachefs_addqueue(rp, &(C_TO_FSCACHE(cp)->fs_workq));
8031		} else {
8032			kmem_cache_free(cachefs_req_cache, rp);
8033		}
8034
8035		mutex_exit(&cp->c_iomutex);
8036		return (0);
8037	}
8038
8039
8040again:
8041	if (len == 0) {
8042		/*
8043		 * Search the entire vp list for pages >= off
8044		 */
8045		error = pvn_vplist_dirty(vp, off, cachefs_push, flags, cr);
8046	} else {
8047		/*
8048		 * Do a range from [off...off + len] looking for pages
8049		 * to deal with.
8050		 */
8051		eoff = (u_offset_t)off + len;
8052		for (io_off = off; io_off < eoff && io_off < cp->c_size;
8053		    io_off += io_len) {
8054			/*
8055			 * If we are not invalidating, synchronously
8056			 * freeing or writing pages use the routine
8057			 * page_lookup_nowait() to prevent reclaiming
8058			 * them from the free list.
8059			 */
8060			if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
8061				pp = page_lookup(vp, io_off,
8062				    (flags & (B_INVAL | B_FREE)) ?
8063				    SE_EXCL : SE_SHARED);
8064			} else {
8065				/* XXX this looks like dead code */
8066				pp = page_lookup_nowait(vp, io_off,
8067				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
8068			}
8069
8070			if (pp == NULL || pvn_getdirty(pp, flags) == 0)
8071				io_len = PAGESIZE;
8072			else {
8073				error = cachefs_push(vp, pp, &io_off,
8074				    &io_len, flags, cr);
8075				if (error != 0)
8076					break;
8077				/*
8078				 * "io_off" and "io_len" are returned as
8079				 * the range of pages we actually wrote.
8080				 * This allows us to skip ahead more quickly
8081				 * since several pages may've been dealt
8082				 * with by this iteration of the loop.
8083				 */
8084			}
8085		}
8086	}
8087
8088	if (error == 0 && off == 0 && (len == 0 || len >= cp->c_size)) {
8089		cp->c_flags &= ~CDIRTY;
8090	}
8091
8092	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_PUTPAGE))
8093		cachefs_log_putpage(cachep, error, fscp->fs_cfsvfsp,
8094		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
8095		    crgetuid(cr), off, len);
8096
8097	return (error);
8098
8099}
8100
8101/*ARGSUSED*/
8102static int
8103cachefs_map(struct vnode *vp, offset_t off, struct as *as, caddr_t *addrp,
8104    size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
8105    caller_context_t *ct)
8106{
8107	cnode_t *cp = VTOC(vp);
8108	fscache_t *fscp = C_TO_FSCACHE(cp);
8109	struct segvn_crargs vn_a;
8110	int error;
8111	int held = 0;
8112	int writing;
8113	int connected = 0;
8114
8115#ifdef CFSDEBUG
8116	u_offset_t offx = (u_offset_t)off;
8117
8118	CFS_DEBUG(CFSDEBUG_VOPS)
8119		printf("cachefs_map: ENTER vp %p off %lld len %lu flags %d\n",
8120		    (void *)vp, offx, len, flags);
8121#endif
8122	if (getzoneid() != GLOBAL_ZONEID) {
8123		error = EPERM;
8124		goto out;
8125	}
8126
8127	if (vp->v_flag & VNOMAP) {
8128		error = ENOSYS;
8129		goto out;
8130	}
8131	if (off < 0 || (offset_t)(off + len) < 0) {
8132		error = ENXIO;
8133		goto out;
8134	}
8135	if (vp->v_type != VREG) {
8136		error = ENODEV;
8137		goto out;
8138	}
8139
8140	/*
8141	 * Check to see if the vnode is currently marked as not cachable.
8142	 * If so, we have to refuse the map request as this violates the
8143	 * don't cache attribute.
8144	 */
8145	if (vp->v_flag & VNOCACHE)
8146		return (EAGAIN);
8147
8148#ifdef OBSOLETE
8149	/*
8150	 * If file is being locked, disallow mapping.
8151	 */
8152	if (vn_has_flocks(vp)) {
8153		error = EAGAIN;
8154		goto out;
8155	}
8156#endif
8157
8158	/* call backfilesystem if NFSv4 */
8159	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8160		error = cachefs_map_backfs_nfsv4(vp, off, as, addrp, len, prot,
8161		    maxprot, flags, cr);
8162		goto out;
8163	}
8164
8165	writing = (prot & PROT_WRITE && ((flags & MAP_PRIVATE) == 0));
8166
8167	for (;;) {
8168		/* get (or renew) access to the file system */
8169		if (held) {
8170			cachefs_cd_release(fscp);
8171			held = 0;
8172		}
8173		error = cachefs_cd_access(fscp, connected, writing);
8174		if (error)
8175			break;
8176		held = 1;
8177
8178		if (writing) {
8179			mutex_enter(&cp->c_statelock);
8180			if (CFS_ISFS_WRITE_AROUND(fscp)) {
8181				if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8182					connected = 1;
8183					continue;
8184				} else {
8185					cachefs_nocache(cp);
8186				}
8187			}
8188
8189			/*
8190			 * CN_MAPWRITE is for an optimization in cachefs_delmap.
8191			 * If CN_MAPWRITE is not set then cachefs_delmap does
8192			 * not need to try to push out any pages.
8193			 * This bit gets cleared when the cnode goes inactive.
8194			 */
8195			cp->c_flags |= CN_MAPWRITE;
8196
8197			mutex_exit(&cp->c_statelock);
8198		}
8199		break;
8200	}
8201
8202	if (held) {
8203		cachefs_cd_release(fscp);
8204	}
8205
8206	as_rangelock(as);
8207	error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
8208	if (error != 0) {
8209		as_rangeunlock(as);
8210		goto out;
8211	}
8212
8213	/*
8214	 * package up all the data passed in into a segvn_args struct and
8215	 * call as_map with segvn_create function to create a new segment
8216	 * in the address space.
8217	 */
8218	vn_a.vp = vp;
8219	vn_a.offset = off;
8220	vn_a.type = flags & MAP_TYPE;
8221	vn_a.prot = (uchar_t)prot;
8222	vn_a.maxprot = (uchar_t)maxprot;
8223	vn_a.cred = cr;
8224	vn_a.amp = NULL;
8225	vn_a.flags = flags & ~MAP_TYPE;
8226	vn_a.szc = 0;
8227	vn_a.lgrp_mem_policy_flags = 0;
8228	error = as_map(as, *addrp, len, segvn_create, &vn_a);
8229	as_rangeunlock(as);
8230out:
8231
8232#ifdef CFS_CD_DEBUG
8233	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8234#endif
8235#ifdef CFSDEBUG
8236	CFS_DEBUG(CFSDEBUG_VOPS)
8237		printf("cachefs_map: EXIT vp %p error %d\n", (void *)vp, error);
8238#endif
8239	return (error);
8240}
8241
8242/*
8243 * cachefs_map_backfs_nfsv4
8244 *
8245 * Call NFSv4 back filesystem to handle the map (cachefs
8246 * pass-through support for NFSv4).
8247 */
8248static int
8249cachefs_map_backfs_nfsv4(struct vnode *vp, offset_t off, struct as *as,
8250			caddr_t *addrp, size_t len, uchar_t prot,
8251			uchar_t maxprot, uint_t flags, cred_t *cr)
8252{
8253	cnode_t *cp = VTOC(vp);
8254	fscache_t *fscp = C_TO_FSCACHE(cp);
8255	vnode_t *backvp;
8256	int error;
8257
8258	/*
8259	 * For NFSv4 pass-through to work, only connected operation is
8260	 * supported, the cnode backvp must exist, and cachefs optional
8261	 * (eg., disconnectable) flags are turned off. Assert these
8262	 * conditions for the map operation.
8263	 */
8264	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8265	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8266
8267	/* Call backfs vnode op after extracting backvp */
8268	mutex_enter(&cp->c_statelock);
8269	backvp = cp->c_backvp;
8270	mutex_exit(&cp->c_statelock);
8271
8272	CFS_DPRINT_BACKFS_NFSV4(fscp,
8273	    ("cachefs_map_backfs_nfsv4: cnode %p, backvp %p\n",
8274	    cp, backvp));
8275	error = VOP_MAP(backvp, off, as, addrp, len, prot, maxprot, flags, cr,
8276	    NULL);
8277
8278	return (error);
8279}
8280
8281/*ARGSUSED*/
8282static int
8283cachefs_addmap(struct vnode *vp, offset_t off, struct as *as,
8284    caddr_t addr, size_t len, uchar_t prot, uchar_t maxprot, uint_t flags,
8285    cred_t *cr, caller_context_t *ct)
8286{
8287	cnode_t *cp = VTOC(vp);
8288	fscache_t *fscp = C_TO_FSCACHE(cp);
8289
8290	if (getzoneid() != GLOBAL_ZONEID)
8291		return (EPERM);
8292
8293	if (vp->v_flag & VNOMAP)
8294		return (ENOSYS);
8295
8296	/*
8297	 * Check this is not an NFSv4 filesystem, as the mapping
8298	 * is not done on the cachefs filesystem if NFSv4 is in
8299	 * use.
8300	 */
8301	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8302
8303	mutex_enter(&cp->c_statelock);
8304	cp->c_mapcnt += btopr(len);
8305	mutex_exit(&cp->c_statelock);
8306	return (0);
8307}
8308
8309/*ARGSUSED*/
8310static int
8311cachefs_delmap(struct vnode *vp, offset_t off, struct as *as,
8312	caddr_t addr, size_t len, uint_t prot, uint_t maxprot, uint_t flags,
8313	cred_t *cr, caller_context_t *ct)
8314{
8315	cnode_t *cp = VTOC(vp);
8316	fscache_t *fscp = C_TO_FSCACHE(cp);
8317	int error;
8318	int connected = 0;
8319	int held = 0;
8320
8321	/*
8322	 * The file may be passed in to (or inherited into) the zone, so we
8323	 * need to let this operation go through since it happens as part of
8324	 * exiting.
8325	 */
8326	if (vp->v_flag & VNOMAP)
8327		return (ENOSYS);
8328
8329	/*
8330	 * Check this is not an NFSv4 filesystem, as the mapping
8331	 * is not done on the cachefs filesystem if NFSv4 is in
8332	 * use.
8333	 */
8334	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8335
8336	mutex_enter(&cp->c_statelock);
8337	cp->c_mapcnt -= btopr(len);
8338	ASSERT(cp->c_mapcnt >= 0);
8339	mutex_exit(&cp->c_statelock);
8340
8341	if (cp->c_mapcnt || !vn_has_cached_data(vp) ||
8342	    ((cp->c_flags & CN_MAPWRITE) == 0))
8343		return (0);
8344
8345	for (;;) {
8346		/* get (or renew) access to the file system */
8347		if (held) {
8348			cachefs_cd_release(fscp);
8349			held = 0;
8350		}
8351		error = cachefs_cd_access(fscp, connected, 1);
8352		if (error)
8353			break;
8354		held = 1;
8355		connected = 0;
8356
8357		error = cachefs_putpage_common(vp, (offset_t)0,
8358		    (uint_t)0, 0, cr);
8359		if (CFS_TIMEOUT(fscp, error)) {
8360			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8361				cachefs_cd_release(fscp);
8362				held = 0;
8363				cachefs_cd_timedout(fscp);
8364				continue;
8365			} else {
8366				connected = 1;
8367				continue;
8368			}
8369		}
8370
8371		/* if no space left in cache, wait until connected */
8372		if ((error == ENOSPC) &&
8373		    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
8374			connected = 1;
8375			continue;
8376		}
8377
8378		mutex_enter(&cp->c_statelock);
8379		if (!error)
8380			error = cp->c_error;
8381		cp->c_error = 0;
8382		mutex_exit(&cp->c_statelock);
8383		break;
8384	}
8385
8386	if (held)
8387		cachefs_cd_release(fscp);
8388
8389#ifdef CFS_CD_DEBUG
8390	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8391#endif
8392	return (error);
8393}
8394
8395/* ARGSUSED */
8396static int
8397cachefs_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8398	offset_t offset, struct flk_callback *flk_cbp, cred_t *cr,
8399	caller_context_t *ct)
8400{
8401	struct cnode *cp = VTOC(vp);
8402	int error;
8403	struct fscache *fscp = C_TO_FSCACHE(cp);
8404	vnode_t *backvp;
8405	int held = 0;
8406	int connected = 0;
8407
8408	if (getzoneid() != GLOBAL_ZONEID)
8409		return (EPERM);
8410
8411	if ((cmd != F_GETLK) && (cmd != F_SETLK) && (cmd != F_SETLKW))
8412		return (EINVAL);
8413
8414	/* Disallow locking of files that are currently mapped */
8415	if (((cmd == F_SETLK) || (cmd == F_SETLKW)) && (cp->c_mapcnt > 0)) {
8416		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8417		return (EAGAIN);
8418	}
8419
8420	/*
8421	 * Cachefs only provides pass-through support for NFSv4,
8422	 * and all vnode operations are passed through to the
8423	 * back file system. For NFSv4 pass-through to work, only
8424	 * connected operation is supported, the cnode backvp must
8425	 * exist, and cachefs optional (eg., disconnectable) flags
8426	 * are turned off. Assert these conditions to ensure that
8427	 * the backfilesystem is called for the frlock operation.
8428	 */
8429	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8430	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8431
8432	/* XXX bob: nfs does a bunch more checks than we do */
8433	if (CFS_ISFS_LLOCK(fscp)) {
8434		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8435		return (fs_frlock(vp, cmd, bfp, flag, offset, flk_cbp, cr, ct));
8436	}
8437
8438	for (;;) {
8439		/* get (or renew) access to the file system */
8440		if (held) {
8441			/* Won't loop with NFSv4 connected behavior */
8442			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8443			cachefs_cd_release(fscp);
8444			held = 0;
8445		}
8446		error = cachefs_cd_access(fscp, connected, 0);
8447		if (error)
8448			break;
8449		held = 1;
8450
8451		/* if not connected, quit or wait */
8452		if (fscp->fs_cdconnected != CFS_CD_CONNECTED) {
8453			connected = 1;
8454			continue;
8455		}
8456
8457		/* nocache the file */
8458		if ((cp->c_flags & CN_NOCACHE) == 0 &&
8459		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8460			mutex_enter(&cp->c_statelock);
8461			cachefs_nocache(cp);
8462			mutex_exit(&cp->c_statelock);
8463		}
8464
8465		/*
8466		 * XXX bob: probably should do a consistency check
8467		 * Pass arguments unchanged if NFSv4 is the backfs.
8468		 */
8469		if (bfp->l_whence == 2 && CFS_ISFS_BACKFS_NFSV4(fscp) == 0) {
8470			bfp->l_start += cp->c_size;
8471			bfp->l_whence = 0;
8472		}
8473
8474		/* get the back vp */
8475		mutex_enter(&cp->c_statelock);
8476		if (cp->c_backvp == NULL) {
8477			error = cachefs_getbackvp(fscp, cp);
8478			if (error) {
8479				mutex_exit(&cp->c_statelock);
8480				break;
8481			}
8482		}
8483		backvp = cp->c_backvp;
8484		VN_HOLD(backvp);
8485		mutex_exit(&cp->c_statelock);
8486
8487		/*
8488		 * make sure we can flush currently dirty pages before
8489		 * allowing the lock
8490		 */
8491		if (bfp->l_type != F_UNLCK && cmd != F_GETLK &&
8492		    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8493			error = cachefs_putpage(
8494			    vp, (offset_t)0, 0, B_INVAL, cr, ct);
8495			if (error) {
8496				error = ENOLCK;
8497				VN_RELE(backvp);
8498				break;
8499			}
8500		}
8501
8502		/* do lock on the back file */
8503		CFS_DPRINT_BACKFS_NFSV4(fscp,
8504		    ("cachefs_frlock (nfsv4): cp %p, backvp %p\n",
8505		    cp, backvp));
8506		error = VOP_FRLOCK(backvp, cmd, bfp, flag, offset, NULL, cr,
8507		    ct);
8508		VN_RELE(backvp);
8509		if (CFS_TIMEOUT(fscp, error)) {
8510			connected = 1;
8511			continue;
8512		}
8513		break;
8514	}
8515
8516	if (held) {
8517		cachefs_cd_release(fscp);
8518	}
8519
8520	/*
8521	 * If we are setting a lock mark the vnode VNOCACHE so the page
8522	 * cache does not give inconsistent results on locked files shared
8523	 * between clients.  The VNOCACHE flag is never turned off as long
8524	 * as the vnode is active because it is hard to figure out when the
8525	 * last lock is gone.
8526	 * XXX - what if some already has the vnode mapped in?
8527	 * XXX bob: see nfs3_frlock, do not allow locking if vnode mapped in.
8528	 */
8529	if ((error == 0) && (bfp->l_type != F_UNLCK) && (cmd != F_GETLK) &&
8530	    !CFS_ISFS_BACKFS_NFSV4(fscp))
8531		vp->v_flag |= VNOCACHE;
8532
8533#ifdef CFS_CD_DEBUG
8534	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8535#endif
8536	return (error);
8537}
8538
8539/*
8540 * Free storage space associated with the specified vnode.  The portion
8541 * to be freed is specified by bfp->l_start and bfp->l_len (already
8542 * normalized to a "whence" of 0).
8543 *
8544 * This is an experimental facility whose continued existence is not
8545 * guaranteed.  Currently, we only support the special case
8546 * of l_len == 0, meaning free to end of file.
8547 */
8548/* ARGSUSED */
8549static int
8550cachefs_space(struct vnode *vp, int cmd, struct flock64 *bfp, int flag,
8551	offset_t offset, cred_t *cr, caller_context_t *ct)
8552{
8553	cnode_t *cp = VTOC(vp);
8554	fscache_t *fscp = C_TO_FSCACHE(cp);
8555	int error;
8556
8557	ASSERT(vp->v_type == VREG);
8558	if (getzoneid() != GLOBAL_ZONEID)
8559		return (EPERM);
8560	if (cmd != F_FREESP)
8561		return (EINVAL);
8562
8563	/* call backfilesystem if NFSv4 */
8564	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
8565		error = cachefs_space_backfs_nfsv4(vp, cmd, bfp, flag,
8566		    offset, cr, ct);
8567		goto out;
8568	}
8569
8570	if ((error = convoff(vp, bfp, 0, offset)) == 0) {
8571		ASSERT(bfp->l_start >= 0);
8572		if (bfp->l_len == 0) {
8573			struct vattr va;
8574
8575			va.va_size = bfp->l_start;
8576			va.va_mask = AT_SIZE;
8577			error = cachefs_setattr(vp, &va, 0, cr, ct);
8578		} else
8579			error = EINVAL;
8580	}
8581
8582out:
8583	return (error);
8584}
8585
8586/*
8587 * cachefs_space_backfs_nfsv4
8588 *
8589 * Call NFSv4 back filesystem to handle the space (cachefs
8590 * pass-through support for NFSv4).
8591 */
8592static int
8593cachefs_space_backfs_nfsv4(struct vnode *vp, int cmd, struct flock64 *bfp,
8594		int flag, offset_t offset, cred_t *cr, caller_context_t *ct)
8595{
8596	cnode_t *cp = VTOC(vp);
8597	fscache_t *fscp = C_TO_FSCACHE(cp);
8598	vnode_t *backvp;
8599	int error;
8600
8601	/*
8602	 * For NFSv4 pass-through to work, only connected operation is
8603	 * supported, the cnode backvp must exist, and cachefs optional
8604	 * (eg., disconnectable) flags are turned off. Assert these
8605	 * conditions for the space operation.
8606	 */
8607	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8608	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8609
8610	/* Call backfs vnode op after extracting backvp */
8611	mutex_enter(&cp->c_statelock);
8612	backvp = cp->c_backvp;
8613	mutex_exit(&cp->c_statelock);
8614
8615	CFS_DPRINT_BACKFS_NFSV4(fscp,
8616	    ("cachefs_space_backfs_nfsv4: cnode %p, backvp %p\n",
8617	    cp, backvp));
8618	error = VOP_SPACE(backvp, cmd, bfp, flag, offset, cr, ct);
8619
8620	return (error);
8621}
8622
8623/*ARGSUSED*/
8624static int
8625cachefs_realvp(struct vnode *vp, struct vnode **vpp, caller_context_t *ct)
8626{
8627	return (EINVAL);
8628}
8629
8630/*ARGSUSED*/
8631static int
8632cachefs_pageio(struct vnode *vp, page_t *pp, u_offset_t io_off, size_t io_len,
8633	int flags, cred_t *cr, caller_context_t *ct)
8634{
8635	return (ENOSYS);
8636}
8637
8638static int
8639cachefs_setsecattr_connected(cnode_t *cp,
8640    vsecattr_t *vsec, int flag, cred_t *cr)
8641{
8642	fscache_t *fscp = C_TO_FSCACHE(cp);
8643	int error = 0;
8644
8645	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
8646	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8647
8648	mutex_enter(&cp->c_statelock);
8649
8650	if (cp->c_backvp == NULL) {
8651		error = cachefs_getbackvp(fscp, cp);
8652		if (error) {
8653			cachefs_nocache(cp);
8654			goto out;
8655		}
8656	}
8657
8658	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
8659	if (error)
8660		goto out;
8661
8662	/* only owner can set acl */
8663	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8664		error = EINVAL;
8665		goto out;
8666	}
8667
8668
8669	CFS_DPRINT_BACKFS_NFSV4(fscp,
8670	    ("cachefs_setsecattr (nfsv4): cp %p, backvp %p",
8671	    cp, cp->c_backvp));
8672	error = VOP_SETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
8673	if (error) {
8674		goto out;
8675	}
8676
8677	if ((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0 &&
8678	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8679		cachefs_nocache(cp);
8680		goto out;
8681	}
8682
8683	CFSOP_MODIFY_COBJECT(fscp, cp, cr);
8684
8685	/* acl may have changed permissions -- handle this. */
8686	if (!CFS_ISFS_BACKFS_NFSV4(fscp))
8687		cachefs_acl2perm(cp, vsec);
8688
8689	if ((cp->c_flags & CN_NOCACHE) == 0 &&
8690	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
8691		error = cachefs_cacheacl(cp, vsec);
8692		if (error != 0) {
8693#ifdef CFSDEBUG
8694			CFS_DEBUG(CFSDEBUG_VOPS)
8695				printf("cachefs_setacl: cacheacl: error %d\n",
8696				    error);
8697#endif /* CFSDEBUG */
8698			error = 0;
8699			cachefs_nocache(cp);
8700		}
8701	}
8702
8703out:
8704	mutex_exit(&cp->c_statelock);
8705
8706	return (error);
8707}
8708
8709static int
8710cachefs_setsecattr_disconnected(cnode_t *cp,
8711    vsecattr_t *vsec, int flag, cred_t *cr)
8712{
8713	fscache_t *fscp = C_TO_FSCACHE(cp);
8714	mode_t failmode = cp->c_metadata.md_vattr.va_mode;
8715	off_t commit = 0;
8716	int error = 0;
8717
8718	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
8719
8720	if (CFS_ISFS_WRITE_AROUND(fscp))
8721		return (ETIMEDOUT);
8722
8723	mutex_enter(&cp->c_statelock);
8724
8725	/* only owner can set acl */
8726	if (cp->c_metadata.md_vattr.va_uid != crgetuid(cr)) {
8727		error = EINVAL;
8728		goto out;
8729	}
8730
8731	if (cp->c_metadata.md_flags & MD_NEEDATTRS) {
8732		error = ETIMEDOUT;
8733		goto out;
8734	}
8735
8736	/* XXX do i need this?  is this right? */
8737	if (cp->c_flags & CN_ALLOC_PENDING) {
8738		if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
8739			(void) filegrp_allocattr(cp->c_filegrp);
8740		}
8741		error = filegrp_create_metadata(cp->c_filegrp,
8742		    &cp->c_metadata, &cp->c_id);
8743		if (error) {
8744			goto out;
8745		}
8746		cp->c_flags &= ~CN_ALLOC_PENDING;
8747	}
8748
8749	/* XXX is this right? */
8750	if ((cp->c_metadata.md_flags & MD_MAPPING) == 0) {
8751		error = cachefs_dlog_cidmap(fscp);
8752		if (error) {
8753			error = ENOSPC;
8754			goto out;
8755		}
8756		cp->c_metadata.md_flags |= MD_MAPPING;
8757		cp->c_flags |= CN_UPDATED;
8758	}
8759
8760	commit = cachefs_dlog_setsecattr(fscp, vsec, flag, cp, cr);
8761	if (commit == 0)
8762		goto out;
8763
8764	/* fix modes in metadata */
8765	cachefs_acl2perm(cp, vsec);
8766
8767	if ((cp->c_flags & CN_NOCACHE) == 0) {
8768		error = cachefs_cacheacl(cp, vsec);
8769		if (error != 0) {
8770			goto out;
8771		}
8772	}
8773
8774	/* XXX is this right? */
8775	if (cachefs_modified_alloc(cp)) {
8776		error = ENOSPC;
8777		goto out;
8778	}
8779
8780out:
8781	if (error != 0)
8782		cp->c_metadata.md_vattr.va_mode = failmode;
8783
8784	mutex_exit(&cp->c_statelock);
8785
8786	if (commit) {
8787		if (cachefs_dlog_commit(fscp, commit, error)) {
8788			/*EMPTY*/
8789			/* XXX fix on panic? */
8790		}
8791	}
8792
8793	return (error);
8794}
8795
8796/*ARGSUSED*/
8797static int
8798cachefs_setsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8799    caller_context_t *ct)
8800{
8801	cnode_t *cp = VTOC(vp);
8802	fscache_t *fscp = C_TO_FSCACHE(cp);
8803	int connected = 0;
8804	int held = 0;
8805	int error = 0;
8806
8807#ifdef CFSDEBUG
8808	CFS_DEBUG(CFSDEBUG_VOPS)
8809		printf("cachefs_setsecattr: ENTER vp %p\n", (void *)vp);
8810#endif
8811	if (getzoneid() != GLOBAL_ZONEID) {
8812		error = EPERM;
8813		goto out;
8814	}
8815
8816	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8817		error = ENOSYS;
8818		goto out;
8819	}
8820
8821	if (! cachefs_vtype_aclok(vp)) {
8822		error = EINVAL;
8823		goto out;
8824	}
8825
8826	/*
8827	 * Cachefs only provides pass-through support for NFSv4,
8828	 * and all vnode operations are passed through to the
8829	 * back file system. For NFSv4 pass-through to work, only
8830	 * connected operation is supported, the cnode backvp must
8831	 * exist, and cachefs optional (eg., disconnectable) flags
8832	 * are turned off. Assert these conditions to ensure that
8833	 * the backfilesystem is called for the setsecattr operation.
8834	 */
8835	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8836	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8837
8838	for (;;) {
8839		/* drop hold on file system */
8840		if (held) {
8841			/* Won't loop with NFSv4 connected operation */
8842			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8843			cachefs_cd_release(fscp);
8844			held = 0;
8845		}
8846
8847		/* acquire access to the file system */
8848		error = cachefs_cd_access(fscp, connected, 1);
8849		if (error)
8850			break;
8851		held = 1;
8852
8853		/* perform the setattr */
8854		if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
8855			error = cachefs_setsecattr_connected(cp,
8856			    vsec, flag, cr);
8857		else
8858			error = cachefs_setsecattr_disconnected(cp,
8859			    vsec, flag, cr);
8860		if (error) {
8861			/* if connected */
8862			if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8863				if (CFS_TIMEOUT(fscp, error)) {
8864					cachefs_cd_release(fscp);
8865					held = 0;
8866					cachefs_cd_timedout(fscp);
8867					connected = 0;
8868					continue;
8869				}
8870			}
8871
8872			/* else must be disconnected */
8873			else {
8874				if (CFS_TIMEOUT(fscp, error)) {
8875					connected = 1;
8876					continue;
8877				}
8878			}
8879		}
8880		break;
8881	}
8882
8883	if (held) {
8884		cachefs_cd_release(fscp);
8885	}
8886	return (error);
8887
8888out:
8889#ifdef CFS_CD_DEBUG
8890	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
8891#endif
8892
8893#ifdef CFSDEBUG
8894	CFS_DEBUG(CFSDEBUG_VOPS)
8895		printf("cachefs_setsecattr: EXIT error = %d\n", error);
8896#endif
8897	return (error);
8898}
8899
8900/*
8901 * call this BEFORE calling cachefs_cacheacl(), as the latter will
8902 * sanitize the acl.
8903 */
8904
8905static void
8906cachefs_acl2perm(cnode_t *cp, vsecattr_t *vsec)
8907{
8908	aclent_t *aclp;
8909	int i;
8910
8911	for (i = 0; i < vsec->vsa_aclcnt; i++) {
8912		aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
8913		switch (aclp->a_type) {
8914		case USER_OBJ:
8915			cp->c_metadata.md_vattr.va_mode &= (~0700);
8916			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 6);
8917			break;
8918
8919		case GROUP_OBJ:
8920			cp->c_metadata.md_vattr.va_mode &= (~070);
8921			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm << 3);
8922			break;
8923
8924		case OTHER_OBJ:
8925			cp->c_metadata.md_vattr.va_mode &= (~07);
8926			cp->c_metadata.md_vattr.va_mode |= (aclp->a_perm);
8927			break;
8928
8929		case CLASS_OBJ:
8930			cp->c_metadata.md_aclclass = aclp->a_perm;
8931			break;
8932		}
8933	}
8934
8935	cp->c_flags |= CN_UPDATED;
8936}
8937
8938static int
8939cachefs_getsecattr(vnode_t *vp, vsecattr_t *vsec, int flag, cred_t *cr,
8940    caller_context_t *ct)
8941{
8942	cnode_t *cp = VTOC(vp);
8943	fscache_t *fscp = C_TO_FSCACHE(cp);
8944	int held = 0, connected = 0;
8945	int error = 0;
8946
8947#ifdef CFSDEBUG
8948	CFS_DEBUG(CFSDEBUG_VOPS)
8949		printf("cachefs_getsecattr: ENTER vp %p\n", (void *)vp);
8950#endif
8951
8952	if (getzoneid() != GLOBAL_ZONEID) {
8953		error = EPERM;
8954		goto out;
8955	}
8956
8957	/*
8958	 * Cachefs only provides pass-through support for NFSv4,
8959	 * and all vnode operations are passed through to the
8960	 * back file system. For NFSv4 pass-through to work, only
8961	 * connected operation is supported, the cnode backvp must
8962	 * exist, and cachefs optional (eg., disconnectable) flags
8963	 * are turned off. Assert these conditions to ensure that
8964	 * the backfilesystem is called for the getsecattr operation.
8965	 */
8966	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
8967	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
8968
8969	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
8970		error = fs_fab_acl(vp, vsec, flag, cr, ct);
8971		goto out;
8972	}
8973
8974	for (;;) {
8975		if (held) {
8976			/* Won't loop with NFSv4 connected behavior */
8977			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
8978			cachefs_cd_release(fscp);
8979			held = 0;
8980		}
8981		error = cachefs_cd_access(fscp, connected, 0);
8982		if (error)
8983			break;
8984		held = 1;
8985
8986		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
8987			error = cachefs_getsecattr_connected(vp, vsec, flag,
8988			    cr);
8989			if (CFS_TIMEOUT(fscp, error)) {
8990				cachefs_cd_release(fscp);
8991				held = 0;
8992				cachefs_cd_timedout(fscp);
8993				connected = 0;
8994				continue;
8995			}
8996		} else {
8997			error = cachefs_getsecattr_disconnected(vp, vsec, flag,
8998			    cr);
8999			if (CFS_TIMEOUT(fscp, error)) {
9000				if (cachefs_cd_access_miss(fscp)) {
9001					error = cachefs_getsecattr_connected(vp,
9002					    vsec, flag, cr);
9003					if (!CFS_TIMEOUT(fscp, error))
9004						break;
9005					delay(5*hz);
9006					connected = 0;
9007					continue;
9008				}
9009				connected = 1;
9010				continue;
9011			}
9012		}
9013		break;
9014	}
9015
9016out:
9017	if (held)
9018		cachefs_cd_release(fscp);
9019
9020#ifdef CFS_CD_DEBUG
9021	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
9022#endif
9023#ifdef CFSDEBUG
9024	CFS_DEBUG(CFSDEBUG_VOPS)
9025		printf("cachefs_getsecattr: EXIT error = %d\n", error);
9026#endif
9027	return (error);
9028}
9029
9030static int
9031cachefs_shrlock(vnode_t *vp, int cmd, struct shrlock *shr, int flag, cred_t *cr,
9032    caller_context_t *ct)
9033{
9034	cnode_t *cp = VTOC(vp);
9035	fscache_t *fscp = C_TO_FSCACHE(cp);
9036	int error = 0;
9037	vnode_t *backvp;
9038
9039#ifdef CFSDEBUG
9040	CFS_DEBUG(CFSDEBUG_VOPS)
9041		printf("cachefs_shrlock: ENTER vp %p\n", (void *)vp);
9042#endif
9043
9044	if (getzoneid() != GLOBAL_ZONEID) {
9045		error = EPERM;
9046		goto out;
9047	}
9048
9049	/*
9050	 * Cachefs only provides pass-through support for NFSv4,
9051	 * and all vnode operations are passed through to the
9052	 * back file system. For NFSv4 pass-through to work, only
9053	 * connected operation is supported, the cnode backvp must
9054	 * exist, and cachefs optional (eg., disconnectable) flags
9055	 * are turned off. Assert these conditions to ensure that
9056	 * the backfilesystem is called for the shrlock operation.
9057	 */
9058	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
9059	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp);
9060
9061	mutex_enter(&cp->c_statelock);
9062	if (cp->c_backvp == NULL)
9063		error = cachefs_getbackvp(fscp, cp);
9064	backvp = cp->c_backvp;
9065	mutex_exit(&cp->c_statelock);
9066	ASSERT((error != 0) || (backvp != NULL));
9067
9068	if (error == 0) {
9069		CFS_DPRINT_BACKFS_NFSV4(fscp,
9070		    ("cachefs_shrlock (nfsv4): cp %p, backvp %p",
9071		    cp, backvp));
9072		error = VOP_SHRLOCK(backvp, cmd, shr, flag, cr, ct);
9073	}
9074
9075out:
9076#ifdef CFSDEBUG
9077	CFS_DEBUG(CFSDEBUG_VOPS)
9078		printf("cachefs_shrlock: EXIT error = %d\n", error);
9079#endif
9080	return (error);
9081}
9082
9083static int
9084cachefs_getsecattr_connected(vnode_t *vp, vsecattr_t *vsec, int flag,
9085    cred_t *cr)
9086{
9087	cnode_t *cp = VTOC(vp);
9088	fscache_t *fscp = C_TO_FSCACHE(cp);
9089	int hit = 0;
9090	int error = 0;
9091
9092
9093	mutex_enter(&cp->c_statelock);
9094	error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
9095	if (error)
9096		goto out;
9097
9098	/* read from the cache if we can */
9099	if ((cp->c_metadata.md_flags & MD_ACL) &&
9100	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9101	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9102		ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9103		error = cachefs_getaclfromcache(cp, vsec);
9104		if (error) {
9105			cachefs_nocache(cp);
9106			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9107			error = 0;
9108		} else {
9109			hit = 1;
9110			goto out;
9111		}
9112	}
9113
9114	ASSERT(error == 0);
9115	if (cp->c_backvp == NULL)
9116		error = cachefs_getbackvp(fscp, cp);
9117	if (error)
9118		goto out;
9119
9120	CFS_DPRINT_BACKFS_NFSV4(fscp,
9121	    ("cachefs_getsecattr (nfsv4): cp %p, backvp %p",
9122	    cp, cp->c_backvp));
9123	error = VOP_GETSECATTR(cp->c_backvp, vsec, flag, cr, NULL);
9124	if (error)
9125		goto out;
9126
9127	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9128	    (cachefs_vtype_aclok(vp)) &&
9129	    ((cp->c_flags & CN_NOCACHE) == 0) &&
9130	    !CFS_ISFS_BACKFS_NFSV4(fscp)) {
9131		error = cachefs_cacheacl(cp, vsec);
9132		if (error) {
9133			error = 0;
9134			cachefs_nocache(cp);
9135		}
9136	}
9137
9138out:
9139	if (error == 0) {
9140		if (hit)
9141			fscp->fs_stats.st_hits++;
9142		else
9143			fscp->fs_stats.st_misses++;
9144	}
9145	mutex_exit(&cp->c_statelock);
9146
9147	return (error);
9148}
9149
9150static int
9151/*ARGSUSED*/
9152cachefs_getsecattr_disconnected(vnode_t *vp, vsecattr_t *vsec, int flag,
9153    cred_t *cr)
9154{
9155	cnode_t *cp = VTOC(vp);
9156	fscache_t *fscp = C_TO_FSCACHE(cp);
9157	int hit = 0;
9158	int error = 0;
9159
9160
9161	mutex_enter(&cp->c_statelock);
9162
9163	/* read from the cache if we can */
9164	if (((cp->c_flags & CN_NOCACHE) == 0) &&
9165	    (cp->c_metadata.md_flags & MD_ACL)) {
9166		error = cachefs_getaclfromcache(cp, vsec);
9167		if (error) {
9168			cachefs_nocache(cp);
9169			ASSERT((cp->c_metadata.md_flags & MD_ACL) == 0);
9170			error = 0;
9171		} else {
9172			hit = 1;
9173			goto out;
9174		}
9175	}
9176	error = ETIMEDOUT;
9177
9178out:
9179	if (error == 0) {
9180		if (hit)
9181			fscp->fs_stats.st_hits++;
9182		else
9183			fscp->fs_stats.st_misses++;
9184	}
9185	mutex_exit(&cp->c_statelock);
9186
9187	return (error);
9188}
9189
9190/*
9191 * cachefs_cacheacl() -- cache an ACL, which we do by applying it to
9192 * the frontfile if possible; otherwise, the adjunct directory.
9193 *
9194 * inputs:
9195 * cp - the cnode, with its statelock already held
9196 * vsecp - a pointer to a vsecattr_t you'd like us to cache as-is,
9197 *  or NULL if you want us to do the VOP_GETSECATTR(backvp).
9198 *
9199 * returns:
9200 * 0 - all is well
9201 * nonzero - errno
9202 */
9203
9204int
9205cachefs_cacheacl(cnode_t *cp, vsecattr_t *vsecp)
9206{
9207	fscache_t *fscp = C_TO_FSCACHE(cp);
9208	vsecattr_t vsec;
9209	aclent_t *aclp;
9210	int gotvsec = 0;
9211	int error = 0;
9212	vnode_t *vp = NULL;
9213	void *aclkeep = NULL;
9214	int i;
9215
9216	ASSERT(MUTEX_HELD(&cp->c_statelock));
9217	ASSERT((cp->c_flags & CN_NOCACHE) == 0);
9218	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
9219	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9220	ASSERT(cachefs_vtype_aclok(CTOV(cp)));
9221
9222	if (fscp->fs_info.fi_mntflags & CFS_NOACL) {
9223		error = ENOSYS;
9224		goto out;
9225	}
9226
9227	if (vsecp == NULL) {
9228		if (cp->c_backvp == NULL)
9229			error = cachefs_getbackvp(fscp, cp);
9230		if (error != 0)
9231			goto out;
9232		vsecp = &vsec;
9233		bzero(&vsec, sizeof (vsec));
9234		vsecp->vsa_mask =
9235		    VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9236		error = VOP_GETSECATTR(cp->c_backvp, vsecp, 0, kcred, NULL);
9237		if (error != 0) {
9238			goto out;
9239		}
9240		gotvsec = 1;
9241	} else if (vsecp->vsa_mask & VSA_ACL) {
9242		aclkeep = vsecp->vsa_aclentp;
9243		vsecp->vsa_aclentp = cachefs_kmem_alloc(vsecp->vsa_aclcnt *
9244		    sizeof (aclent_t), KM_SLEEP);
9245		bcopy(aclkeep, vsecp->vsa_aclentp, vsecp->vsa_aclcnt *
9246		    sizeof (aclent_t));
9247	} else if ((vsecp->vsa_mask & (VSA_ACL | VSA_DFACL)) == 0) {
9248		/* unless there's real data, we can cache nothing. */
9249		return (0);
9250	}
9251
9252	/*
9253	 * prevent the ACL from chmoding our frontfile, and
9254	 * snarf the class info
9255	 */
9256
9257	if ((vsecp->vsa_mask & (VSA_ACL | VSA_ACLCNT)) ==
9258	    (VSA_ACL | VSA_ACLCNT)) {
9259		for (i = 0; i < vsecp->vsa_aclcnt; i++) {
9260			aclp = ((aclent_t *)vsecp->vsa_aclentp) + i;
9261			switch (aclp->a_type) {
9262			case CLASS_OBJ:
9263				cp->c_metadata.md_aclclass =
9264				    aclp->a_perm;
9265				/*FALLTHROUGH*/
9266			case USER_OBJ:
9267			case GROUP_OBJ:
9268			case OTHER_OBJ:
9269				aclp->a_perm = 06;
9270			}
9271		}
9272	}
9273
9274	/*
9275	 * if the frontfile exists, then we always do the work.  but,
9276	 * if there's no frontfile, and the ACL isn't a `real' ACL,
9277	 * then we don't want to do the work.  otherwise, an `ls -l'
9278	 * will create tons of emtpy frontfiles.
9279	 */
9280
9281	if (((cp->c_metadata.md_flags & MD_FILE) == 0) &&
9282	    ((vsecp->vsa_aclcnt + vsecp->vsa_dfaclcnt)
9283	    <= MIN_ACL_ENTRIES)) {
9284		cp->c_metadata.md_flags |= MD_ACL;
9285		cp->c_flags |= CN_UPDATED;
9286		goto out;
9287	}
9288
9289	/*
9290	 * if we have a default ACL, then we need a
9291	 * real live directory in the frontfs that we
9292	 * can apply the ACL to.  if not, then we just
9293	 * use the frontfile.  we get the frontfile
9294	 * regardless -- that way, we know the
9295	 * directory for the frontfile exists.
9296	 */
9297
9298	if (vsecp->vsa_dfaclcnt > 0) {
9299		if (cp->c_acldirvp == NULL)
9300			error = cachefs_getacldirvp(cp);
9301		if (error != 0)
9302			goto out;
9303		vp = cp->c_acldirvp;
9304	} else {
9305		if (cp->c_frontvp == NULL)
9306			error = cachefs_getfrontfile(cp);
9307		if (error != 0)
9308			goto out;
9309		vp = cp->c_frontvp;
9310	}
9311	ASSERT(vp != NULL);
9312
9313	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
9314	error = VOP_SETSECATTR(vp, vsecp, 0, kcred, NULL);
9315	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
9316	if (error != 0) {
9317#ifdef CFSDEBUG
9318		CFS_DEBUG(CFSDEBUG_VOPS)
9319			printf("cachefs_cacheacl: setsecattr: error %d\n",
9320			    error);
9321#endif /* CFSDEBUG */
9322		/*
9323		 * If there was an error, we don't want to call
9324		 * cachefs_nocache(); so, set error to 0.
9325		 * We will call cachefs_purgeacl(), in order to
9326		 * clean such things as adjunct ACL directories.
9327		 */
9328		cachefs_purgeacl(cp);
9329		error = 0;
9330		goto out;
9331	}
9332	if (vp == cp->c_frontvp)
9333		cp->c_flags |= CN_NEED_FRONT_SYNC;
9334
9335	cp->c_metadata.md_flags |= MD_ACL;
9336	cp->c_flags |= CN_UPDATED;
9337
9338out:
9339	if ((error) && (fscp->fs_cdconnected == CFS_CD_CONNECTED))
9340		cachefs_nocache(cp);
9341
9342	if (gotvsec) {
9343		if (vsec.vsa_aclcnt)
9344			kmem_free(vsec.vsa_aclentp,
9345			    vsec.vsa_aclcnt * sizeof (aclent_t));
9346		if (vsec.vsa_dfaclcnt)
9347			kmem_free(vsec.vsa_dfaclentp,
9348			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9349	} else if (aclkeep != NULL) {
9350		cachefs_kmem_free(vsecp->vsa_aclentp,
9351		    vsecp->vsa_aclcnt * sizeof (aclent_t));
9352		vsecp->vsa_aclentp = aclkeep;
9353	}
9354
9355	return (error);
9356}
9357
9358void
9359cachefs_purgeacl(cnode_t *cp)
9360{
9361	ASSERT(MUTEX_HELD(&cp->c_statelock));
9362
9363	ASSERT(!CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)));
9364
9365	if (cp->c_acldirvp != NULL) {
9366		VN_RELE(cp->c_acldirvp);
9367		cp->c_acldirvp = NULL;
9368	}
9369
9370	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9371		char name[CFS_FRONTFILE_NAME_SIZE + 2];
9372
9373		ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9374		make_ascii_name(&cp->c_id, name);
9375		(void) strcat(name, ".d");
9376
9377		(void) VOP_RMDIR(cp->c_filegrp->fg_dirvp, name,
9378		    cp->c_filegrp->fg_dirvp, kcred, NULL, 0);
9379	}
9380
9381	cp->c_metadata.md_flags &= ~(MD_ACL | MD_ACLDIR);
9382	cp->c_flags |= CN_UPDATED;
9383}
9384
9385static int
9386cachefs_getacldirvp(cnode_t *cp)
9387{
9388	char name[CFS_FRONTFILE_NAME_SIZE + 2];
9389	int error = 0;
9390
9391	ASSERT(MUTEX_HELD(&cp->c_statelock));
9392	ASSERT(cp->c_acldirvp == NULL);
9393
9394	if (cp->c_frontvp == NULL)
9395		error = cachefs_getfrontfile(cp);
9396	if (error != 0)
9397		goto out;
9398
9399	ASSERT(cp->c_filegrp->fg_dirvp != NULL);
9400	make_ascii_name(&cp->c_id, name);
9401	(void) strcat(name, ".d");
9402	error = VOP_LOOKUP(cp->c_filegrp->fg_dirvp,
9403	    name, &cp->c_acldirvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
9404	if ((error != 0) && (error != ENOENT))
9405		goto out;
9406
9407	if (error != 0) {
9408		vattr_t va;
9409
9410		va.va_mode = S_IFDIR | 0777;
9411		va.va_uid = 0;
9412		va.va_gid = 0;
9413		va.va_type = VDIR;
9414		va.va_mask = AT_TYPE | AT_MODE |
9415		    AT_UID | AT_GID;
9416		error =
9417		    VOP_MKDIR(cp->c_filegrp->fg_dirvp,
9418		    name, &va, &cp->c_acldirvp, kcred, NULL, 0, NULL);
9419		if (error != 0)
9420			goto out;
9421	}
9422
9423	ASSERT(cp->c_acldirvp != NULL);
9424	cp->c_metadata.md_flags |= MD_ACLDIR;
9425	cp->c_flags |= CN_UPDATED;
9426
9427out:
9428	if (error != 0)
9429		cp->c_acldirvp = NULL;
9430	return (error);
9431}
9432
9433static int
9434cachefs_getaclfromcache(cnode_t *cp, vsecattr_t *vsec)
9435{
9436	aclent_t *aclp;
9437	int error = 0;
9438	vnode_t *vp = NULL;
9439	int i;
9440
9441	ASSERT(cp->c_metadata.md_flags & MD_ACL);
9442	ASSERT(MUTEX_HELD(&cp->c_statelock));
9443	ASSERT(vsec->vsa_aclentp == NULL);
9444
9445	if (cp->c_metadata.md_flags & MD_ACLDIR) {
9446		if (cp->c_acldirvp == NULL)
9447			error = cachefs_getacldirvp(cp);
9448		if (error != 0)
9449			goto out;
9450		vp = cp->c_acldirvp;
9451	} else if (cp->c_metadata.md_flags & MD_FILE) {
9452		if (cp->c_frontvp == NULL)
9453			error = cachefs_getfrontfile(cp);
9454		if (error != 0)
9455			goto out;
9456		vp = cp->c_frontvp;
9457	} else {
9458
9459		/*
9460		 * if we get here, then we know that MD_ACL is on,
9461		 * meaning an ACL was successfully cached.  we also
9462		 * know that neither MD_ACLDIR nor MD_FILE are on, so
9463		 * this has to be an entry without a `real' ACL.
9464		 * thus, we forge whatever is necessary.
9465		 */
9466
9467		if (vsec->vsa_mask & VSA_ACLCNT)
9468			vsec->vsa_aclcnt = MIN_ACL_ENTRIES;
9469
9470		if (vsec->vsa_mask & VSA_ACL) {
9471			vsec->vsa_aclentp =
9472			    kmem_zalloc(MIN_ACL_ENTRIES *
9473			    sizeof (aclent_t), KM_SLEEP);
9474			aclp = (aclent_t *)vsec->vsa_aclentp;
9475			aclp->a_type = USER_OBJ;
9476			++aclp;
9477			aclp->a_type = GROUP_OBJ;
9478			++aclp;
9479			aclp->a_type = OTHER_OBJ;
9480			++aclp;
9481			aclp->a_type = CLASS_OBJ;
9482			ksort((caddr_t)vsec->vsa_aclentp, MIN_ACL_ENTRIES,
9483			    sizeof (aclent_t), cmp2acls);
9484		}
9485
9486		ASSERT(vp == NULL);
9487	}
9488
9489	if (vp != NULL) {
9490		if ((error = VOP_GETSECATTR(vp, vsec, 0, kcred, NULL)) != 0) {
9491#ifdef CFSDEBUG
9492			CFS_DEBUG(CFSDEBUG_VOPS)
9493				printf("cachefs_getaclfromcache: error %d\n",
9494				    error);
9495#endif /* CFSDEBUG */
9496			goto out;
9497		}
9498	}
9499
9500	if (vsec->vsa_aclentp != NULL) {
9501		for (i = 0; i < vsec->vsa_aclcnt; i++) {
9502			aclp = ((aclent_t *)vsec->vsa_aclentp) + i;
9503			switch (aclp->a_type) {
9504			case USER_OBJ:
9505				aclp->a_id = cp->c_metadata.md_vattr.va_uid;
9506				aclp->a_perm =
9507				    cp->c_metadata.md_vattr.va_mode & 0700;
9508				aclp->a_perm >>= 6;
9509				break;
9510
9511			case GROUP_OBJ:
9512				aclp->a_id = cp->c_metadata.md_vattr.va_gid;
9513				aclp->a_perm =
9514				    cp->c_metadata.md_vattr.va_mode & 070;
9515				aclp->a_perm >>= 3;
9516				break;
9517
9518			case OTHER_OBJ:
9519				aclp->a_perm =
9520				    cp->c_metadata.md_vattr.va_mode & 07;
9521				break;
9522
9523			case CLASS_OBJ:
9524				aclp->a_perm =
9525				    cp->c_metadata.md_aclclass;
9526				break;
9527			}
9528		}
9529	}
9530
9531out:
9532
9533	if (error != 0)
9534		cachefs_nocache(cp);
9535
9536	return (error);
9537}
9538
9539/*
9540 * Fills in targp with attribute information from srcp, cp
9541 * and if necessary the system.
9542 */
9543static void
9544cachefs_attr_setup(vattr_t *srcp, vattr_t *targp, cnode_t *cp, cred_t *cr)
9545{
9546	time_t	now;
9547
9548	ASSERT((srcp->va_mask & (AT_TYPE | AT_MODE)) == (AT_TYPE | AT_MODE));
9549
9550	/*
9551	 * Add code to fill in the va struct.  We use the fields from
9552	 * the srcp struct if they are populated, otherwise we guess
9553	 */
9554
9555	targp->va_mask = 0;	/* initialize all fields */
9556	targp->va_mode = srcp->va_mode;
9557	targp->va_type = srcp->va_type;
9558	targp->va_nlink = 1;
9559	targp->va_nodeid = 0;
9560
9561	if (srcp->va_mask & AT_UID)
9562		targp->va_uid = srcp->va_uid;
9563	else
9564		targp->va_uid = crgetuid(cr);
9565
9566	if (srcp->va_mask & AT_GID)
9567		targp->va_gid = srcp->va_gid;
9568	else
9569		targp->va_gid = crgetgid(cr);
9570
9571	if (srcp->va_mask & AT_FSID)
9572		targp->va_fsid = srcp->va_fsid;
9573	else
9574		targp->va_fsid = 0;	/* initialize all fields */
9575
9576	now = gethrestime_sec();
9577	if (srcp->va_mask & AT_ATIME)
9578		targp->va_atime = srcp->va_atime;
9579	else
9580		targp->va_atime.tv_sec = now;
9581
9582	if (srcp->va_mask & AT_MTIME)
9583		targp->va_mtime = srcp->va_mtime;
9584	else
9585		targp->va_mtime.tv_sec = now;
9586
9587	if (srcp->va_mask & AT_CTIME)
9588		targp->va_ctime = srcp->va_ctime;
9589	else
9590		targp->va_ctime.tv_sec = now;
9591
9592
9593	if (srcp->va_mask & AT_SIZE)
9594		targp->va_size = srcp->va_size;
9595	else
9596		targp->va_size = 0;
9597
9598	/*
9599	 * the remaing fields are set by the fs and not changable.
9600	 * we populate these entries useing the parent directory
9601	 * values.  It's a small hack, but should work.
9602	 */
9603	targp->va_blksize = cp->c_metadata.md_vattr.va_blksize;
9604	targp->va_rdev = cp->c_metadata.md_vattr.va_rdev;
9605	targp->va_nblocks = cp->c_metadata.md_vattr.va_nblocks;
9606	targp->va_seq = 0; /* Never keep the sequence number */
9607}
9608
9609/*
9610 * set the gid for a newly created file.  The algorithm is as follows:
9611 *
9612 *	1) If the gid is set in the attribute list, then use it if
9613 *	   the caller is privileged, belongs to the target group, or
9614 *	   the group is the same as the parent directory.
9615 *
9616 *	2) If the parent directory's set-gid bit is clear, then use
9617 *	   the process gid
9618 *
9619 *	3) Otherwise, use the gid of the parent directory.
9620 *
9621 * Note: newcp->c_attr.va_{mode,type} must already be set before calling
9622 * this routine.
9623 */
9624static void
9625cachefs_creategid(cnode_t *dcp, cnode_t *newcp, vattr_t *vap, cred_t *cr)
9626{
9627	if ((vap->va_mask & AT_GID) &&
9628	    ((vap->va_gid == dcp->c_attr.va_gid) ||
9629	    groupmember(vap->va_gid, cr) ||
9630	    secpolicy_vnode_create_gid(cr) != 0)) {
9631		newcp->c_attr.va_gid = vap->va_gid;
9632	} else {
9633		if (dcp->c_attr.va_mode & S_ISGID)
9634			newcp->c_attr.va_gid = dcp->c_attr.va_gid;
9635		else
9636			newcp->c_attr.va_gid = crgetgid(cr);
9637	}
9638
9639	/*
9640	 * if we're creating a directory, and the parent directory has the
9641	 * set-GID bit set, set it on the new directory.
9642	 * Otherwise, if the user is neither privileged nor a member of the
9643	 * file's new group, clear the file's set-GID bit.
9644	 */
9645	if (dcp->c_attr.va_mode & S_ISGID && newcp->c_attr.va_type == VDIR) {
9646		newcp->c_attr.va_mode |= S_ISGID;
9647	} else if ((newcp->c_attr.va_mode & S_ISGID) &&
9648	    secpolicy_vnode_setids_setgids(cr, newcp->c_attr.va_gid) != 0)
9649		newcp->c_attr.va_mode &= ~S_ISGID;
9650}
9651
9652/*
9653 * create an acl for the newly created file.  should be called right
9654 * after cachefs_creategid.
9655 */
9656
9657static void
9658cachefs_createacl(cnode_t *dcp, cnode_t *newcp)
9659{
9660	fscache_t *fscp = C_TO_FSCACHE(dcp);
9661	vsecattr_t vsec;
9662	int gotvsec = 0;
9663	int error = 0; /* placeholder */
9664	aclent_t *aclp;
9665	o_mode_t *classp = NULL;
9666	o_mode_t gunion = 0;
9667	int i;
9668
9669	if ((fscp->fs_info.fi_mntflags & CFS_NOACL) ||
9670	    (! cachefs_vtype_aclok(CTOV(newcp))))
9671		return;
9672
9673	ASSERT(dcp->c_metadata.md_flags & MD_ACL);
9674	ASSERT(MUTEX_HELD(&dcp->c_statelock));
9675	ASSERT(MUTEX_HELD(&newcp->c_statelock));
9676
9677	/*
9678	 * XXX should probably not do VSA_ACL and VSA_ACLCNT, but that
9679	 * would hit code paths that isn't hit anywhere else.
9680	 */
9681
9682	bzero(&vsec, sizeof (vsec));
9683	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9684	error = cachefs_getaclfromcache(dcp, &vsec);
9685	if (error != 0)
9686		goto out;
9687	gotvsec = 1;
9688
9689	if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL)) {
9690		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9691			kmem_free(vsec.vsa_aclentp,
9692			    vsec.vsa_aclcnt * sizeof (aclent_t));
9693
9694		vsec.vsa_aclcnt = vsec.vsa_dfaclcnt;
9695		vsec.vsa_aclentp = vsec.vsa_dfaclentp;
9696		vsec.vsa_dfaclcnt = 0;
9697		vsec.vsa_dfaclentp = NULL;
9698
9699		if (newcp->c_attr.va_type == VDIR) {
9700			vsec.vsa_dfaclentp = kmem_alloc(vsec.vsa_aclcnt *
9701			    sizeof (aclent_t), KM_SLEEP);
9702			vsec.vsa_dfaclcnt = vsec.vsa_aclcnt;
9703			bcopy(vsec.vsa_aclentp, vsec.vsa_dfaclentp,
9704			    vsec.vsa_aclcnt * sizeof (aclent_t));
9705		}
9706
9707		/*
9708		 * this function should be called pretty much after
9709		 * the rest of the file creation stuff is done.  so,
9710		 * uid, gid, etc. should be `right'.  we'll go with
9711		 * that, rather than trying to determine whether to
9712		 * get stuff from cr or va.
9713		 */
9714
9715		for (i = 0; i < vsec.vsa_aclcnt; i++) {
9716			aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9717			switch (aclp->a_type) {
9718			case DEF_USER_OBJ:
9719				aclp->a_type = USER_OBJ;
9720				aclp->a_id = newcp->c_metadata.md_vattr.va_uid;
9721				aclp->a_perm =
9722				    newcp->c_metadata.md_vattr.va_mode;
9723				aclp->a_perm &= 0700;
9724				aclp->a_perm >>= 6;
9725				break;
9726
9727			case DEF_GROUP_OBJ:
9728				aclp->a_type = GROUP_OBJ;
9729				aclp->a_id = newcp->c_metadata.md_vattr.va_gid;
9730				aclp->a_perm =
9731				    newcp->c_metadata.md_vattr.va_mode;
9732				aclp->a_perm &= 070;
9733				aclp->a_perm >>= 3;
9734				gunion |= aclp->a_perm;
9735				break;
9736
9737			case DEF_OTHER_OBJ:
9738				aclp->a_type = OTHER_OBJ;
9739				aclp->a_perm =
9740				    newcp->c_metadata.md_vattr.va_mode & 07;
9741				break;
9742
9743			case DEF_CLASS_OBJ:
9744				aclp->a_type = CLASS_OBJ;
9745				classp = &(aclp->a_perm);
9746				break;
9747
9748			case DEF_USER:
9749				aclp->a_type = USER;
9750				gunion |= aclp->a_perm;
9751				break;
9752
9753			case DEF_GROUP:
9754				aclp->a_type = GROUP;
9755				gunion |= aclp->a_perm;
9756				break;
9757			}
9758		}
9759
9760		/* XXX is this the POSIX thing to do? */
9761		if (classp != NULL)
9762			*classp &= gunion;
9763
9764		/*
9765		 * we don't need to log this; rather, we clear the
9766		 * MD_ACL bit when we reconnect.
9767		 */
9768
9769		error = cachefs_cacheacl(newcp, &vsec);
9770		if (error != 0)
9771			goto out;
9772	}
9773
9774	newcp->c_metadata.md_aclclass = 07; /* XXX check posix */
9775	newcp->c_metadata.md_flags |= MD_ACL;
9776	newcp->c_flags |= CN_UPDATED;
9777
9778out:
9779
9780	if (gotvsec) {
9781		if ((vsec.vsa_aclcnt > 0) && (vsec.vsa_aclentp != NULL))
9782			kmem_free(vsec.vsa_aclentp,
9783			    vsec.vsa_aclcnt * sizeof (aclent_t));
9784		if ((vsec.vsa_dfaclcnt > 0) && (vsec.vsa_dfaclentp != NULL))
9785			kmem_free(vsec.vsa_dfaclentp,
9786			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
9787	}
9788}
9789
9790/*
9791 * this is translated from the UFS code for access checking.
9792 */
9793
9794static int
9795cachefs_access_local(void *vcp, int mode, cred_t *cr)
9796{
9797	cnode_t *cp = vcp;
9798	fscache_t *fscp = C_TO_FSCACHE(cp);
9799	int shift = 0;
9800
9801	ASSERT(MUTEX_HELD(&cp->c_statelock));
9802
9803	if (mode & VWRITE) {
9804		/*
9805		 * Disallow write attempts on read-only
9806		 * file systems, unless the file is special.
9807		 */
9808		struct vnode *vp = CTOV(cp);
9809		if (vn_is_readonly(vp)) {
9810			if (!IS_DEVVP(vp)) {
9811				return (EROFS);
9812			}
9813		}
9814	}
9815
9816	/*
9817	 * if we need to do ACLs, do it.  this works whether anyone
9818	 * has explicitly made an ACL or not.
9819	 */
9820
9821	if (((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0) &&
9822	    (cachefs_vtype_aclok(CTOV(cp))))
9823		return (cachefs_acl_access(cp, mode, cr));
9824
9825	if (crgetuid(cr) != cp->c_attr.va_uid) {
9826		shift += 3;
9827		if (!groupmember(cp->c_attr.va_gid, cr))
9828			shift += 3;
9829	}
9830
9831	return (secpolicy_vnode_access2(cr, CTOV(cp), cp->c_attr.va_uid,
9832	    cp->c_attr.va_mode << shift, mode));
9833}
9834
9835/*
9836 * This is transcribed from ufs_acl_access().  If that changes, then
9837 * this should, too.
9838 *
9839 * Check the cnode's ACL's to see if this mode of access is
9840 * allowed; return 0 if allowed, EACCES if not.
9841 *
9842 * We follow the procedure defined in Sec. 3.3.5, ACL Access
9843 * Check Algorithm, of the POSIX 1003.6 Draft Standard.
9844 */
9845
9846#define	ACL_MODE_CHECK(M, PERM, C, I) \
9847    secpolicy_vnode_access2(C, CTOV(I), owner, (PERM), (M))
9848
9849static int
9850cachefs_acl_access(struct cnode *cp, int mode, cred_t *cr)
9851{
9852	int error = 0;
9853
9854	fscache_t *fscp = C_TO_FSCACHE(cp);
9855
9856	int mask = ~0;
9857	int ismask = 0;
9858
9859	int gperm = 0;
9860	int ngroup = 0;
9861
9862	vsecattr_t vsec;
9863	int gotvsec = 0;
9864	aclent_t *aclp;
9865
9866	uid_t owner = cp->c_attr.va_uid;
9867
9868	int i;
9869
9870	ASSERT(MUTEX_HELD(&cp->c_statelock));
9871	ASSERT((fscp->fs_info.fi_mntflags & CFS_NOACL) == 0);
9872
9873	/*
9874	 * strictly speaking, we shouldn't set VSA_DFACL and DFACLCNT,
9875	 * but then i believe we'd be the only thing exercising those
9876	 * code paths -- probably a bad thing.
9877	 */
9878
9879	bzero(&vsec, sizeof (vsec));
9880	vsec.vsa_mask = VSA_ACL | VSA_ACLCNT | VSA_DFACL | VSA_DFACLCNT;
9881
9882	/* XXX KLUDGE! correct insidious 0-class problem */
9883	if (cp->c_metadata.md_aclclass == 0 &&
9884	    fscp->fs_cdconnected == CFS_CD_CONNECTED)
9885		cachefs_purgeacl(cp);
9886again:
9887	if (cp->c_metadata.md_flags & MD_ACL) {
9888		error = cachefs_getaclfromcache(cp, &vsec);
9889		if (error != 0) {
9890#ifdef CFSDEBUG
9891			if (error != ETIMEDOUT)
9892				CFS_DEBUG(CFSDEBUG_VOPS)
9893					printf("cachefs_acl_access():"
9894					    "error %d from getaclfromcache()\n",
9895					    error);
9896#endif /* CFSDEBUG */
9897			if ((cp->c_metadata.md_flags & MD_ACL) == 0) {
9898				goto again;
9899			} else {
9900				goto out;
9901			}
9902		}
9903	} else {
9904		if (cp->c_backvp == NULL) {
9905			if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
9906				error = cachefs_getbackvp(fscp, cp);
9907			else
9908				error = ETIMEDOUT;
9909		}
9910		if (error == 0)
9911			error = VOP_GETSECATTR(cp->c_backvp, &vsec, 0, cr,
9912			    NULL);
9913		if (error != 0) {
9914#ifdef CFSDEBUG
9915			CFS_DEBUG(CFSDEBUG_VOPS)
9916				printf("cachefs_acl_access():"
9917				    "error %d from getsecattr(backvp)\n",
9918				    error);
9919#endif /* CFSDEBUG */
9920			goto out;
9921		}
9922		if ((cp->c_flags & CN_NOCACHE) == 0 &&
9923		    !CFS_ISFS_BACKFS_NFSV4(fscp))
9924			(void) cachefs_cacheacl(cp, &vsec);
9925	}
9926	gotvsec = 1;
9927
9928	ASSERT(error == 0);
9929	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9930		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9931		switch (aclp->a_type) {
9932		case USER_OBJ:
9933			/*
9934			 * this might look cleaner in the 2nd loop
9935			 * below, but we do it here as an
9936			 * optimization.
9937			 */
9938
9939			owner = aclp->a_id;
9940			if (crgetuid(cr) == owner) {
9941				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9942				    cr, cp);
9943				goto out;
9944			}
9945			break;
9946
9947		case CLASS_OBJ:
9948			mask = aclp->a_perm;
9949			ismask = 1;
9950			break;
9951		}
9952	}
9953
9954	ASSERT(error == 0);
9955	for (i = 0; i < vsec.vsa_aclcnt; i++) {
9956		aclp = ((aclent_t *)vsec.vsa_aclentp) + i;
9957		switch (aclp->a_type) {
9958		case USER:
9959			if (crgetuid(cr) == aclp->a_id) {
9960				error = ACL_MODE_CHECK(mode,
9961				    (aclp->a_perm & mask) << 6, cr, cp);
9962				goto out;
9963			}
9964			break;
9965
9966		case GROUP_OBJ:
9967			if (groupmember(aclp->a_id, cr)) {
9968				++ngroup;
9969				gperm |= aclp->a_perm;
9970				if (! ismask) {
9971					error = ACL_MODE_CHECK(mode,
9972					    aclp->a_perm << 6,
9973					    cr, cp);
9974					goto out;
9975				}
9976			}
9977			break;
9978
9979		case GROUP:
9980			if (groupmember(aclp->a_id, cr)) {
9981				++ngroup;
9982				gperm |= aclp->a_perm;
9983			}
9984			break;
9985
9986		case OTHER_OBJ:
9987			if (ngroup == 0) {
9988				error = ACL_MODE_CHECK(mode, aclp->a_perm << 6,
9989				    cr, cp);
9990				goto out;
9991			}
9992			break;
9993
9994		default:
9995			break;
9996		}
9997	}
9998
9999	ASSERT(ngroup > 0);
10000	error = ACL_MODE_CHECK(mode, (gperm & mask) << 6, cr, cp);
10001
10002out:
10003	if (gotvsec) {
10004		if (vsec.vsa_aclcnt && vsec.vsa_aclentp)
10005			kmem_free(vsec.vsa_aclentp,
10006			    vsec.vsa_aclcnt * sizeof (aclent_t));
10007		if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp)
10008			kmem_free(vsec.vsa_dfaclentp,
10009			    vsec.vsa_dfaclcnt * sizeof (aclent_t));
10010	}
10011
10012	return (error);
10013}
10014
10015/*
10016 * see if permissions allow for removal of the given file from
10017 * the given directory.
10018 */
10019static int
10020cachefs_stickyrmchk(struct cnode *dcp, struct cnode *cp, cred_t *cr)
10021{
10022	uid_t uid;
10023	/*
10024	 * If the containing directory is sticky, the user must:
10025	 *  - own the directory, or
10026	 *  - own the file, or
10027	 *  - be able to write the file (if it's a plain file), or
10028	 *  - be sufficiently privileged.
10029	 */
10030	if ((dcp->c_attr.va_mode & S_ISVTX) &&
10031	    ((uid = crgetuid(cr)) != dcp->c_attr.va_uid) &&
10032	    (uid != cp->c_attr.va_uid) &&
10033	    (cp->c_attr.va_type != VREG ||
10034	    cachefs_access_local(cp, VWRITE, cr) != 0))
10035		return (secpolicy_vnode_remove(cr));
10036
10037	return (0);
10038}
10039
10040/*
10041 * Returns a new name, may even be unique.
10042 * Stolen from nfs code.
10043 * Since now we will use renaming to .cfs* in place of .nfs*
10044 * for CacheFS. Both NFS and CacheFS will rename opened files.
10045 */
10046static char cachefs_prefix[] = ".cfs";
10047kmutex_t cachefs_newnum_lock;
10048
10049static char *
10050cachefs_newname(void)
10051{
10052	static uint_t newnum = 0;
10053	char *news;
10054	char *s, *p;
10055	uint_t id;
10056
10057	mutex_enter(&cachefs_newnum_lock);
10058	if (newnum == 0) {
10059		newnum = gethrestime_sec() & 0xfffff;
10060		newnum |= 0x10000;
10061	}
10062	id = newnum++;
10063	mutex_exit(&cachefs_newnum_lock);
10064
10065	news = cachefs_kmem_alloc(MAXNAMELEN, KM_SLEEP);
10066	s = news;
10067	p = cachefs_prefix;
10068	while (*p != '\0')
10069		*s++ = *p++;
10070	while (id != 0) {
10071		*s++ = "0123456789ABCDEF"[id & 0x0f];
10072		id >>= 4;
10073	}
10074	*s = '\0';
10075	return (news);
10076}
10077
10078/*
10079 * Called to rename the specified file to a temporary file so
10080 * operations to the file after remove work.
10081 * Must call this routine with the dir c_rwlock held as a writer.
10082 */
10083static int
10084/*ARGSUSED*/
10085cachefs_remove_dolink(vnode_t *dvp, vnode_t *vp, char *nm, cred_t *cr)
10086{
10087	cnode_t *cp = VTOC(vp);
10088	char *tmpname;
10089	fscache_t *fscp = C_TO_FSCACHE(cp);
10090	int error;
10091
10092	ASSERT(RW_WRITE_HELD(&(VTOC(dvp)->c_rwlock)));
10093
10094	/* get the new name for the file */
10095	tmpname = cachefs_newname();
10096
10097	/* do the link */
10098	if (fscp->fs_cdconnected == CFS_CD_CONNECTED)
10099		error = cachefs_link_connected(dvp, vp, tmpname, cr);
10100	else
10101		error = cachefs_link_disconnected(dvp, vp, tmpname, cr);
10102	if (error) {
10103		cachefs_kmem_free(tmpname, MAXNAMELEN);
10104		return (error);
10105	}
10106
10107	mutex_enter(&cp->c_statelock);
10108	if (cp->c_unldvp) {
10109		VN_RELE(cp->c_unldvp);
10110		cachefs_kmem_free(cp->c_unlname, MAXNAMELEN);
10111		crfree(cp->c_unlcred);
10112	}
10113
10114	VN_HOLD(dvp);
10115	cp->c_unldvp = dvp;
10116	crhold(cr);
10117	cp->c_unlcred = cr;
10118	cp->c_unlname = tmpname;
10119
10120	/* drop the backvp so NFS does not also do a rename */
10121	mutex_exit(&cp->c_statelock);
10122
10123	return (0);
10124}
10125
10126/*
10127 * Marks the cnode as modified.
10128 */
10129static void
10130cachefs_modified(cnode_t *cp)
10131{
10132	fscache_t *fscp = C_TO_FSCACHE(cp);
10133	struct vattr va;
10134	int error;
10135
10136	ASSERT(MUTEX_HELD(&cp->c_statelock));
10137	ASSERT(cp->c_metadata.md_rlno);
10138
10139	/* if not on the modify list */
10140	if (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) {
10141		/* put on modified list, also marks the file as modified */
10142		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_MODIFIED,
10143		    cp->c_metadata.md_rlno, cp->c_metadata.md_frontblks);
10144		cp->c_metadata.md_rltype = CACHEFS_RL_MODIFIED;
10145		cp->c_flags |= CN_UPDATED;
10146
10147		/* if a modified regular file that is not local */
10148		if (((cp->c_id.cid_flags & CFS_CID_LOCAL) == 0) &&
10149		    (cp->c_metadata.md_flags & MD_FILE) &&
10150		    (cp->c_attr.va_type == VREG)) {
10151
10152			if (cp->c_frontvp == NULL)
10153				(void) cachefs_getfrontfile(cp);
10154			if (cp->c_frontvp) {
10155				/* identify file so fsck knows it is modified */
10156				va.va_mode = 0766;
10157				va.va_mask = AT_MODE;
10158				error = VOP_SETATTR(cp->c_frontvp,
10159				    &va, 0, kcred, NULL);
10160				if (error) {
10161					cmn_err(CE_WARN,
10162					    "Cannot change ff mode.\n");
10163				}
10164			}
10165		}
10166	}
10167}
10168
10169/*
10170 * Marks the cnode as modified.
10171 * Allocates a rl slot for the cnode if necessary.
10172 * Returns 0 for success, !0 if cannot get an rl slot.
10173 */
10174static int
10175cachefs_modified_alloc(cnode_t *cp)
10176{
10177	fscache_t *fscp = C_TO_FSCACHE(cp);
10178	filegrp_t *fgp = cp->c_filegrp;
10179	int error;
10180	rl_entry_t rl_ent;
10181
10182	ASSERT(MUTEX_HELD(&cp->c_statelock));
10183
10184	/* get the rl slot if needed */
10185	if (cp->c_metadata.md_rlno == 0) {
10186		/* get a metadata slot if we do not have one yet */
10187		if (cp->c_flags & CN_ALLOC_PENDING) {
10188			if (cp->c_filegrp->fg_flags & CFS_FG_ALLOC_ATTR) {
10189				(void) filegrp_allocattr(cp->c_filegrp);
10190			}
10191			error = filegrp_create_metadata(cp->c_filegrp,
10192			    &cp->c_metadata, &cp->c_id);
10193			if (error)
10194				return (error);
10195			cp->c_flags &= ~CN_ALLOC_PENDING;
10196		}
10197
10198		/* get a free rl entry */
10199		rl_ent.rl_fileno = cp->c_id.cid_fileno;
10200		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
10201		rl_ent.rl_fsid = fscp->fs_cfsid;
10202		rl_ent.rl_attrc = 0;
10203		error = cachefs_rl_alloc(fscp->fs_cache, &rl_ent,
10204		    &cp->c_metadata.md_rlno);
10205		if (error)
10206			return (error);
10207		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
10208
10209		/* hold the filegrp so the attrcache file is not gc */
10210		error = filegrp_ffhold(fgp);
10211		if (error) {
10212			cachefs_rlent_moveto(fscp->fs_cache,
10213			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
10214			cp->c_metadata.md_rlno = 0;
10215			return (error);
10216		}
10217	}
10218	cachefs_modified(cp);
10219	return (0);
10220}
10221
10222int
10223cachefs_vtype_aclok(vnode_t *vp)
10224{
10225	vtype_t *vtp, oktypes[] = {VREG, VDIR, VFIFO, VNON};
10226
10227	if (vp->v_type == VNON)
10228		return (0);
10229
10230	for (vtp = oktypes; *vtp != VNON; vtp++)
10231		if (vp->v_type == *vtp)
10232			break;
10233
10234	return (*vtp != VNON);
10235}
10236
10237static int
10238cachefs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
10239    caller_context_t *ct)
10240{
10241	int error = 0;
10242	fscache_t *fscp = C_TO_FSCACHE(VTOC(vp));
10243
10244	/* Assert cachefs compatibility if NFSv4 is in use */
10245	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp);
10246	CFS_BACKFS_NFSV4_ASSERT_CNODE(VTOC(vp));
10247
10248	if (cmd == _PC_FILESIZEBITS) {
10249		u_offset_t maxsize = fscp->fs_offmax;
10250		(*valp) = 0;
10251		while (maxsize != 0) {
10252			maxsize >>= 1;
10253			(*valp)++;
10254		}
10255		(*valp)++;
10256	} else
10257		error = fs_pathconf(vp, cmd, valp, cr, ct);
10258
10259	return (error);
10260}
10261