nandfs_vnops.c revision 235537
1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_vnops.c 235537 2012-05-17 10:11:18Z gber $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/lockf.h>
38#include <sys/malloc.h>
39#include <sys/mount.h>
40#include <sys/mutex.h>
41#include <sys/namei.h>
42#include <sys/sysctl.h>
43#include <sys/unistd.h>
44#include <sys/vnode.h>
45#include <sys/buf.h>
46#include <sys/bio.h>
47#include <sys/fcntl.h>
48#include <sys/dirent.h>
49#include <sys/stat.h>
50#include <sys/priv.h>
51
52#include <vm/vm.h>
53#include <vm/vm_extern.h>
54#include <vm/vm_object.h>
55#include <vm/vnode_pager.h>
56
57#include <machine/_inttypes.h>
58
59#include <fs/nandfs/nandfs_mount.h>
60#include <fs/nandfs/nandfs.h>
61#include <fs/nandfs/nandfs_subr.h>
62
63extern uma_zone_t nandfs_node_zone;
64static void nandfs_read_filebuf(struct nandfs_node *, struct buf *);
65static void nandfs_itimes_locked(struct vnode *);
66static int nandfs_truncate(struct vnode *, uint64_t);
67
68static vop_pathconf_t	nandfs_pathconf;
69
70#define UPDATE_CLOSE 0
71#define UPDATE_WAIT 0
72
73static int
74nandfs_inactive(struct vop_inactive_args *ap)
75{
76	struct vnode *vp = ap->a_vp;
77	struct nandfs_node *node = VTON(vp);
78	int error = 0;
79
80	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node));
81
82	if (node == NULL) {
83		DPRINTF(NODE, ("%s: inactive NULL node\n", __func__));
84		return (0);
85	}
86
87	if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) {
88		nandfs_truncate(vp, 0);
89		error = nandfs_node_destroy(node);
90		if (error)
91			nandfs_error("%s: destroy node: %p\n", __func__, node);
92		node->nn_flags = 0;
93		vrecycle(vp);
94	}
95
96	return (error);
97}
98
99static int
100nandfs_reclaim(struct vop_reclaim_args *ap)
101{
102	struct vnode *vp = ap->a_vp;
103	struct nandfs_node *nandfs_node = VTON(vp);
104	struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev;
105	uint64_t ino = nandfs_node->nn_ino;
106
107	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node));
108
109	/* Invalidate all entries to a particular vnode. */
110	cache_purge(vp);
111
112	/* Destroy the vm object and flush associated pages. */
113	vnode_destroy_vobject(vp);
114
115	/* Remove from vfs hash if not system vnode */
116	if (!NANDFS_SYS_NODE(nandfs_node->nn_ino))
117		vfs_hash_remove(vp);
118
119	/* Dispose all node knowledge */
120	nandfs_dispose_node(&nandfs_node);
121
122	if (!NANDFS_SYS_NODE(ino))
123		NANDFS_WRITEUNLOCK(fsdev);
124
125	return (0);
126}
127
128static int
129nandfs_read(struct vop_read_args *ap)
130{
131	register struct vnode *vp = ap->a_vp;
132	register struct nandfs_node *node = VTON(vp);
133	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
134	struct uio *uio = ap->a_uio;
135	struct buf *bp;
136	uint64_t size;
137	uint32_t blocksize;
138	off_t bytesinfile;
139	ssize_t toread, off;
140	daddr_t lbn;
141	ssize_t resid;
142	int error = 0;
143
144	if (uio->uio_resid == 0)
145		return (0);
146
147	size = node->nn_inode.i_size;
148	if (uio->uio_offset >= size)
149		return (0);
150
151	blocksize = nandfsdev->nd_blocksize;
152	bytesinfile = size - uio->uio_offset;
153
154	resid = omin(uio->uio_resid, bytesinfile);
155
156	while (resid) {
157		lbn = uio->uio_offset / blocksize;
158		off = uio->uio_offset & (blocksize - 1);
159
160		toread = omin(resid, blocksize - off);
161
162		DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n",
163		    (uintmax_t)lbn, toread, blocksize));
164
165		error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
166		if (error) {
167			brelse(bp);
168			break;
169		}
170
171		error = uiomove(bp->b_data + off, toread, uio);
172		if (error) {
173			brelse(bp);
174			break;
175		}
176
177		brelse(bp);
178		resid -= toread;
179	}
180
181	return (error);
182}
183
184static int
185nandfs_write(struct vop_write_args *ap)
186{
187	struct nandfs_device *fsdev;
188	struct nandfs_node *node;
189	struct vnode *vp;
190	struct uio *uio;
191	struct buf *bp;
192	uint64_t file_size, vblk;
193	uint32_t blocksize;
194	ssize_t towrite, off;
195	daddr_t lbn;
196	ssize_t resid;
197	int error, ioflag, modified;
198
199	vp = ap->a_vp;
200	uio = ap->a_uio;
201	ioflag = ap->a_ioflag;
202	node = VTON(vp);
203	fsdev = node->nn_nandfsdev;
204
205	if (nandfs_fs_full(fsdev))
206		return (ENOSPC);
207
208	DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n",
209	    uio->uio_resid, (uintmax_t)uio->uio_offset));
210
211	if (uio->uio_offset < 0)
212		return (EINVAL);
213	if (uio->uio_resid == 0)
214		return (0);
215
216	blocksize = fsdev->nd_blocksize;
217	file_size = node->nn_inode.i_size;
218
219	switch (vp->v_type) {
220	case VREG:
221		if (ioflag & IO_APPEND)
222			uio->uio_offset = file_size;
223		break;
224	case VDIR:
225		return (EISDIR);
226	case VLNK:
227		break;
228	default:
229		panic("%s: bad file type vp: %p", __func__, vp);
230	}
231
232	/* If explicitly asked to append, uio_offset can be wrong? */
233	if (ioflag & IO_APPEND)
234		uio->uio_offset = file_size;
235
236	resid = uio->uio_resid;
237	modified = error = 0;
238
239	while (uio->uio_resid) {
240		lbn = uio->uio_offset / blocksize;
241		off = uio->uio_offset & (blocksize - 1);
242
243		towrite = omin(uio->uio_resid, blocksize - off);
244
245		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n",
246		    __func__, (uintmax_t)lbn, towrite, blocksize));
247
248		error = nandfs_bmap_lookup(node, lbn, &vblk);
249		if (error)
250			break;
251
252		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) "
253		    "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize,
254		    vblk));
255
256		if (vblk != 0)
257			error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
258		else
259			error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp);
260
261		DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__,
262		    vp, bp, (uintmax_t)lbn));
263		if (error) {
264			if (bp)
265				brelse(bp);
266			break;
267		}
268
269		error = uiomove((char *)bp->b_data + off, (int)towrite, uio);
270		if (error)
271			break;
272
273		error = nandfs_dirty_buf(bp, 0);
274		if (error)
275			break;
276
277		modified++;
278	}
279
280	/* XXX proper handling when only part of file was properly written */
281	if (modified) {
282		if (resid > uio->uio_resid && ap->a_cred &&
283		    ap->a_cred->cr_uid != 0)
284			node->nn_inode.i_mode &= ~(ISUID | ISGID);
285
286		if (file_size < uio->uio_offset + uio->uio_resid) {
287			node->nn_inode.i_size = uio->uio_offset +
288			    uio->uio_resid;
289			node->nn_flags |= IN_CHANGE | IN_UPDATE;
290			vnode_pager_setsize(vp, uio->uio_offset +
291			    uio->uio_resid);
292			nandfs_itimes(vp);
293		}
294	}
295
296	DPRINTF(WRITE, ("%s: return:%d\n", __func__, error));
297
298	return (error);
299}
300
301static int
302nandfs_lookup(struct vop_cachedlookup_args *ap)
303{
304	struct vnode *dvp, **vpp;
305	struct componentname *cnp;
306	struct ucred *cred;
307	struct thread *td;
308	struct nandfs_node *dir_node, *node;
309	struct nandfsmount *nmp;
310	uint64_t ino, off;
311	const char *name;
312	int namelen, nameiop, islastcn, mounted_ro;
313	int error, found;
314
315	DPRINTF(VNCALL, ("%s\n", __func__));
316
317	dvp = ap->a_dvp;
318	vpp = ap->a_vpp;
319	*vpp = NULL;
320
321	cnp = ap->a_cnp;
322	cred = cnp->cn_cred;
323	td = cnp->cn_thread;
324
325	dir_node = VTON(dvp);
326	nmp = dir_node->nn_nmp;
327
328	/* Simplify/clarification flags */
329	nameiop = cnp->cn_nameiop;
330	islastcn = cnp->cn_flags & ISLASTCN;
331	mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY;
332
333	/*
334	 * If requesting a modify on the last path element on a read-only
335	 * filingsystem, reject lookup;
336	 */
337	if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME))
338		return (EROFS);
339
340	if (dir_node->nn_inode.i_links_count == 0)
341		return (ENOENT);
342
343	/*
344	 * Obviously, the file is not (anymore) in the namecache, we have to
345	 * search for it. There are three basic cases: '.', '..' and others.
346	 *
347	 * Following the guidelines of VOP_LOOKUP manpage and tmpfs.
348	 */
349	error = 0;
350	if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
351		DPRINTF(LOOKUP, ("\tlookup '.'\n"));
352		/* Special case 1 '.' */
353		VREF(dvp);
354		*vpp = dvp;
355		/* Done */
356	} else if (cnp->cn_flags & ISDOTDOT) {
357		/* Special case 2 '..' */
358		DPRINTF(LOOKUP, ("\tlookup '..'\n"));
359
360		/* Get our node */
361		name = "..";
362		namelen = 2;
363		error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino,
364		    &found, &off);
365		if (error)
366			goto out;
367		if (!found)
368			error = ENOENT;
369
370		/* First unlock parent */
371		VOP_UNLOCK(dvp, 0);
372
373		if (error == 0) {
374			DPRINTF(LOOKUP, ("\tfound '..'\n"));
375			/* Try to create/reuse the node */
376			error = nandfs_get_node(nmp, ino, &node);
377
378			if (!error) {
379				DPRINTF(LOOKUP,
380				    ("\tnode retrieved/created OK\n"));
381				*vpp = NTOV(node);
382			}
383		}
384
385		/* Try to relock parent */
386		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
387	} else {
388		DPRINTF(LOOKUP, ("\tlookup file\n"));
389		/* All other files */
390		/* Look up filename in the directory returning its inode */
391		name = cnp->cn_nameptr;
392		namelen = cnp->cn_namelen;
393		error = nandfs_lookup_name_in_dir(dvp, name, namelen,
394		    &ino, &found, &off);
395		if (error)
396			goto out;
397		if (!found) {
398			DPRINTF(LOOKUP, ("\tNOT found\n"));
399			/*
400			 * UGH, didn't find name. If we're creating or
401			 * renaming on the last name this is OK and we ought
402			 * to return EJUSTRETURN if its allowed to be created.
403			 */
404			error = ENOENT;
405			if ((nameiop == CREATE || nameiop == RENAME) &&
406			    islastcn) {
407				error = VOP_ACCESS(dvp, VWRITE, cred,
408				    td);
409				if (!error) {
410					/* keep the component name */
411					cnp->cn_flags |= SAVENAME;
412					error = EJUSTRETURN;
413				}
414			}
415			/* Done */
416		} else {
417			if (ino == NANDFS_WHT_INO)
418				cnp->cn_flags |= ISWHITEOUT;
419
420			if ((cnp->cn_flags & ISWHITEOUT) &&
421			    (nameiop == LOOKUP))
422				return (ENOENT);
423
424			if ((nameiop == DELETE) && islastcn) {
425				if ((cnp->cn_flags & ISWHITEOUT) &&
426				    (cnp->cn_flags & DOWHITEOUT)) {
427					cnp->cn_flags |= SAVENAME;
428					dir_node->nn_diroff = off;
429					return (EJUSTRETURN);
430				}
431
432				error = VOP_ACCESS(dvp, VWRITE, cred,
433				    cnp->cn_thread);
434				if (error)
435					return (error);
436
437				/* Try to create/reuse the node */
438				error = nandfs_get_node(nmp, ino, &node);
439				if (!error) {
440					*vpp = NTOV(node);
441					node->nn_diroff = off;
442				}
443
444				if ((dir_node->nn_inode.i_mode & ISVTX) &&
445				    cred->cr_uid != 0 &&
446				    cred->cr_uid != dir_node->nn_inode.i_uid &&
447				    node->nn_inode.i_uid != cred->cr_uid) {
448					vput(*vpp);
449					*vpp = NULL;
450					return (EPERM);
451				}
452			} else if ((nameiop == RENAME) && islastcn) {
453				error = VOP_ACCESS(dvp, VWRITE, cred,
454				    cnp->cn_thread);
455				if (error)
456					return (error);
457
458				/* Try to create/reuse the node */
459				error = nandfs_get_node(nmp, ino, &node);
460				if (!error) {
461					*vpp = NTOV(node);
462					node->nn_diroff = off;
463				}
464			} else {
465				/* Try to create/reuse the node */
466				error = nandfs_get_node(nmp, ino, &node);
467				if (!error) {
468					*vpp = NTOV(node);
469					node->nn_diroff = off;
470				}
471			}
472		}
473	}
474
475out:
476	/*
477	 * Store result in the cache if requested. If we are creating a file,
478	 * the file might not be found and thus putting it into the namecache
479	 * might be seen as negative caching.
480	 */
481	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
482		cache_enter(dvp, *vpp, cnp);
483
484	return (error);
485
486}
487
488static int
489nandfs_getattr(struct vop_getattr_args *ap)
490{
491	struct vnode *vp = ap->a_vp;
492	struct vattr *vap = ap->a_vap;
493	struct nandfs_node *node = VTON(vp);
494	struct nandfs_inode *inode = &node->nn_inode;
495
496	DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp));
497	nandfs_itimes(vp);
498
499	/* Basic info */
500	VATTR_NULL(vap);
501	vap->va_atime.tv_sec = inode->i_mtime;
502	vap->va_atime.tv_nsec = inode->i_mtime_nsec;
503	vap->va_mtime.tv_sec = inode->i_mtime;
504	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
505	vap->va_ctime.tv_sec = inode->i_ctime;
506	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
507	vap->va_type = IFTOVT(inode->i_mode);
508	vap->va_mode = inode->i_mode & ~S_IFMT;
509	vap->va_nlink = inode->i_links_count;
510	vap->va_uid = inode->i_uid;
511	vap->va_gid = inode->i_gid;
512	vap->va_rdev = inode->i_special;
513	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
514	vap->va_fileid = node->nn_ino;
515	vap->va_size = inode->i_size;
516	vap->va_blocksize = node->nn_nandfsdev->nd_blocksize;
517	vap->va_gen = 0;
518	vap->va_flags = inode->i_flags;
519	vap->va_bytes = inode->i_blocks * vap->va_blocksize;
520	vap->va_filerev = 0;
521	vap->va_vaflags = 0;
522
523	return (0);
524}
525
526static int
527nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks)
528{
529	struct nandfs_device *nffsdev;
530	struct bufobj *bo;
531	struct buf *bp, *nbp;
532
533	bo = &vp->v_bufobj;
534	nffsdev = VTON(vp)->nn_nandfsdev;
535
536	ASSERT_VOP_LOCKED(vp, "nandfs_truncate");
537restart:
538	BO_LOCK(bo);
539restart_locked:
540	TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
541		if (bp->b_lblkno < nblks)
542			continue;
543		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
544			goto restart_locked;
545
546		bremfree(bp);
547		bp->b_flags |= (B_INVAL | B_RELBUF);
548		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
549		BO_UNLOCK(bo);
550		brelse(bp);
551		BO_LOCK(bo);
552	}
553
554	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
555		if (bp->b_lblkno < nblks)
556			continue;
557		if (BUF_LOCK(bp,
558		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
559		    BO_MTX(bo)) == ENOLCK)
560			goto restart;
561		bp->b_flags |= (B_INVAL | B_RELBUF);
562		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
563		brelse(bp);
564		nandfs_dirty_bufs_decrement(nffsdev);
565		BO_LOCK(bo);
566	}
567
568	BO_UNLOCK(bo);
569
570	return (0);
571}
572
573static int
574nandfs_truncate(struct vnode *vp, uint64_t newsize)
575{
576	struct nandfs_device *nffsdev;
577	struct nandfs_node *node;
578	struct nandfs_inode *inode;
579	struct buf *bp = NULL;
580	uint64_t oblks, nblks, vblk, size, rest;
581	int error;
582
583	node = VTON(vp);
584	nffsdev = node->nn_nandfsdev;
585	inode = &node->nn_inode;
586
587	/* Calculate end of file */
588	size = inode->i_size;
589
590	if (newsize == size) {
591		node->nn_flags |= IN_CHANGE | IN_UPDATE;
592		nandfs_itimes(vp);
593		return (0);
594	}
595
596	if (newsize > size) {
597		inode->i_size = newsize;
598		vnode_pager_setsize(vp, newsize);
599		node->nn_flags |= IN_CHANGE | IN_UPDATE;
600		nandfs_itimes(vp);
601		return (0);
602	}
603
604	nblks = howmany(newsize, nffsdev->nd_blocksize);
605	oblks = howmany(size, nffsdev->nd_blocksize);
606	rest = newsize % nffsdev->nd_blocksize;
607
608	if (rest) {
609		error = nandfs_bmap_lookup(node, nblks - 1, &vblk);
610		if (error)
611			return (error);
612
613		if (vblk != 0)
614			error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp);
615		else
616			error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp);
617
618		if (error) {
619			if (bp)
620				brelse(bp);
621			return (error);
622		}
623
624		bzero((char *)bp->b_data + rest,
625		    (u_int)(nffsdev->nd_blocksize - rest));
626		error = nandfs_dirty_buf(bp, 0);
627		if (error)
628			return (error);
629	}
630
631	DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks,
632	    nblks));
633
634	error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1);
635	if (error) {
636		if (bp)
637			nandfs_undirty_buf(bp);
638		return (error);
639	}
640
641	error = nandfs_vtruncbuf(vp, nblks);
642	if (error) {
643		if (bp)
644			nandfs_undirty_buf(bp);
645		return (error);
646	}
647
648	inode->i_size = newsize;
649	vnode_pager_setsize(vp, newsize);
650	node->nn_flags |= IN_CHANGE | IN_UPDATE;
651	nandfs_itimes(vp);
652
653	return (error);
654}
655
656static void
657nandfs_itimes_locked(struct vnode *vp)
658{
659	struct nandfs_node *node;
660	struct nandfs_inode *inode;
661	struct timespec ts;
662
663	ASSERT_VI_LOCKED(vp, __func__);
664
665	node = VTON(vp);
666	inode = &node->nn_inode;
667
668	if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
669		return;
670
671	if (((vp->v_mount->mnt_kern_flag &
672	    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
673	    (node->nn_flags & (IN_CHANGE | IN_UPDATE)))
674		node->nn_flags |= IN_MODIFIED;
675
676	vfs_timestamp(&ts);
677	if (node->nn_flags & IN_UPDATE) {
678		inode->i_mtime = ts.tv_sec;
679		inode->i_mtime_nsec = ts.tv_nsec;
680	}
681	if (node->nn_flags & IN_CHANGE) {
682		inode->i_ctime = ts.tv_sec;
683		inode->i_ctime_nsec = ts.tv_nsec;
684	}
685
686	node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
687}
688
689void
690nandfs_itimes(struct vnode *vp)
691{
692
693	VI_LOCK(vp);
694	nandfs_itimes_locked(vp);
695	VI_UNLOCK(vp);
696}
697
698static int
699nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
700{
701	struct nandfs_node *node = VTON(vp);
702	struct nandfs_inode *inode = &node->nn_inode;
703	uint16_t nmode;
704	int error = 0;
705
706	DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp,
707	    mode, cred, td));
708	/*
709	 * To modify the permissions on a file, must possess VADMIN
710	 * for that file.
711	 */
712	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
713		return (error);
714
715	/*
716	 * Privileged processes may set the sticky bit on non-directories,
717	 * as well as set the setgid bit on a file with a group that the
718	 * process is not a member of. Both of these are allowed in
719	 * jail(8).
720	 */
721	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
722		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
723			return (EFTYPE);
724	}
725	if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) {
726		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
727		if (error)
728			return (error);
729	}
730
731	/*
732	 * Deny setting setuid if we are not the file owner.
733	 */
734	if ((mode & ISUID) && inode->i_uid != cred->cr_uid) {
735		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
736		if (error)
737			return (error);
738	}
739
740	nmode = inode->i_mode;
741	nmode &= ~ALLPERMS;
742	nmode |= (mode & ALLPERMS);
743	inode->i_mode = nmode;
744	node->nn_flags |= IN_CHANGE;
745
746	DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode));
747
748	return (error);
749}
750
751static int
752nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
753    struct thread *td)
754{
755	struct nandfs_node *node = VTON(vp);
756	struct nandfs_inode *inode = &node->nn_inode;
757	uid_t ouid;
758	gid_t ogid;
759	int error = 0;
760
761	if (uid == (uid_t)VNOVAL)
762		uid = inode->i_uid;
763	if (gid == (gid_t)VNOVAL)
764		gid = inode->i_gid;
765	/*
766	 * To modify the ownership of a file, must possess VADMIN for that
767	 * file.
768	 */
769	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
770		return (error);
771	/*
772	 * To change the owner of a file, or change the group of a file to a
773	 * group of which we are not a member, the caller must have
774	 * privilege.
775	 */
776	if (((uid != inode->i_uid && uid != cred->cr_uid) ||
777	    (gid != inode->i_gid && !groupmember(gid, cred))) &&
778	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
779		return (error);
780	ogid = inode->i_gid;
781	ouid = inode->i_uid;
782
783	inode->i_gid = gid;
784	inode->i_uid = uid;
785
786	node->nn_flags |= IN_CHANGE;
787	if ((inode->i_mode & (ISUID | ISGID)) &&
788	    (ouid != uid || ogid != gid)) {
789		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
790			inode->i_mode &= ~(ISUID | ISGID);
791		}
792	}
793	DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp,
794	    cred, td));
795	return (0);
796}
797
798static int
799nandfs_setattr(struct vop_setattr_args *ap)
800{
801	struct vnode *vp = ap->a_vp;
802	struct nandfs_node *node = VTON(vp);
803	struct nandfs_inode *inode = &node->nn_inode;
804	struct vattr *vap = ap->a_vap;
805	struct ucred *cred = ap->a_cred;
806	struct thread *td = curthread;
807	uint32_t flags;
808	int error = 0;
809
810	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
811	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
812	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
813	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
814		DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__));
815		return (EINVAL);
816	}
817
818	if (vap->va_flags != VNOVAL) {
819		DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp,
820		    td, vap->va_flags));
821
822		if (vp->v_mount->mnt_flag & MNT_RDONLY)
823			return (EROFS);
824		/*
825		 * Callers may only modify the file flags on objects they
826		 * have VADMIN rights for.
827		 */
828		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
829			return (error);
830		/*
831		 * Unprivileged processes are not permitted to unset system
832		 * flags, or modify flags if any system flags are set.
833		 * Privileged non-jail processes may not modify system flags
834		 * if securelevel > 0 and any existing system flags are set.
835		 * Privileged jail processes behave like privileged non-jail
836		 * processes if the security.jail.chflags_allowed sysctl is
837		 * is non-zero; otherwise, they behave like unprivileged
838		 * processes.
839		 */
840
841		flags = inode->i_flags;
842		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
843			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
844				error = securelevel_gt(cred, 0);
845				if (error)
846					return (error);
847			}
848			/* Snapshot flag cannot be set or cleared */
849			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
850			    (flags & SF_SNAPSHOT) == 0) ||
851			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
852			    (flags & SF_SNAPSHOT) != 0))
853				return (EPERM);
854
855			inode->i_flags = vap->va_flags;
856		} else {
857			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
858			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
859				return (EPERM);
860
861			flags &= SF_SETTABLE;
862			flags |= (vap->va_flags & UF_SETTABLE);
863			inode->i_flags = flags;
864		}
865		node->nn_flags |= IN_CHANGE;
866		if (vap->va_flags & (IMMUTABLE | APPEND))
867			return (0);
868	}
869	if (inode->i_flags & (IMMUTABLE | APPEND))
870		return (EPERM);
871
872	if (vap->va_size != (u_quad_t)VNOVAL) {
873		DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td,
874		    (uintmax_t)vap->va_size));
875
876		switch (vp->v_type) {
877		case VDIR:
878			return (EISDIR);
879		case VLNK:
880		case VREG:
881			if (vp->v_mount->mnt_flag & MNT_RDONLY)
882				return (EROFS);
883			if ((inode->i_flags & SF_SNAPSHOT) != 0)
884				return (EPERM);
885			break;
886		default:
887			return (0);
888		}
889
890		if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize)
891			return (EFBIG);
892
893		KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type));
894		nandfs_truncate(vp, vap->va_size);
895		node->nn_flags |= IN_CHANGE;
896
897		return (0);
898	}
899
900	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
901		if (vp->v_mount->mnt_flag & MNT_RDONLY)
902			return (EROFS);
903		DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
904		    vp, td, vap->va_uid, vap->va_gid));
905		error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
906		if (error)
907			return (error);
908	}
909
910	if (vap->va_mode != (mode_t)VNOVAL) {
911		if (vp->v_mount->mnt_flag & MNT_RDONLY)
912			return (EROFS);
913		DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td,
914		    vap->va_mode));
915
916		error = nandfs_chmod(vp, (int)vap->va_mode, cred, td);
917		if (error)
918			return (error);
919	}
920	if (vap->va_atime.tv_sec != VNOVAL ||
921	    vap->va_mtime.tv_sec != VNOVAL ||
922	    vap->va_birthtime.tv_sec != VNOVAL) {
923		DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n",
924		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
925		    (uintmax_t)vap->va_mtime.tv_sec,
926		    (uintmax_t)vap->va_birthtime.tv_sec));
927
928		if (vap->va_atime.tv_sec != VNOVAL)
929			node->nn_flags |= IN_ACCESS;
930		if (vap->va_mtime.tv_sec != VNOVAL)
931			node->nn_flags |= IN_CHANGE | IN_UPDATE;
932		if (vap->va_birthtime.tv_sec != VNOVAL)
933			node->nn_flags |= IN_MODIFIED;
934		nandfs_itimes(vp);
935		return (0);
936	}
937
938	return (0);
939}
940
941static int
942nandfs_open(struct vop_open_args *ap)
943{
944	struct nandfs_node *node = VTON(ap->a_vp);
945	uint64_t filesize;
946
947	DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode));
948
949	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
950		return (EOPNOTSUPP);
951
952	if ((node->nn_inode.i_flags & APPEND) &&
953	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
954		return (EPERM);
955
956	filesize = node->nn_inode.i_size;
957	vnode_create_vobject(ap->a_vp, filesize, ap->a_td);
958
959	return (0);
960}
961
962static int
963nandfs_close(struct vop_close_args *ap)
964{
965	struct vnode *vp = ap->a_vp;
966	struct nandfs_node *node = VTON(vp);
967
968	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
969
970	mtx_lock(&vp->v_interlock);
971	if (vp->v_usecount > 1)
972		nandfs_itimes_locked(vp);
973	mtx_unlock(&vp->v_interlock);
974
975	return (0);
976}
977
978static int
979nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
980{
981
982	/* Check if we are allowed to write */
983	switch (vap->va_type) {
984	case VDIR:
985	case VLNK:
986	case VREG:
987		/*
988		 * Normal nodes: check if we're on a read-only mounted
989		 * filingsystem and bomb out if we're trying to write.
990		 */
991		if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY))
992			return (EROFS);
993		break;
994	case VBLK:
995	case VCHR:
996	case VSOCK:
997	case VFIFO:
998		/*
999		 * Special nodes: even on read-only mounted filingsystems
1000		 * these are allowed to be written to if permissions allow.
1001		 */
1002		break;
1003	default:
1004		/* No idea what this is */
1005		return (EINVAL);
1006	}
1007
1008	/* Noone may write immutable files */
1009	if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE))
1010		return (EPERM);
1011
1012	return (0);
1013}
1014
1015static int
1016nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode,
1017    struct ucred *cred)
1018{
1019
1020	return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode,
1021	    cred, NULL));
1022}
1023
1024static int
1025nandfs_advlock(struct vop_advlock_args *ap)
1026{
1027	struct nandfs_node *nvp;
1028	quad_t size;
1029
1030	nvp = VTON(ap->a_vp);
1031	size = nvp->nn_inode.i_size;
1032	return (lf_advlock(ap, &(nvp->nn_lockf), size));
1033}
1034
1035static int
1036nandfs_access(struct vop_access_args *ap)
1037{
1038	struct vnode *vp = ap->a_vp;
1039	accmode_t accmode = ap->a_accmode;
1040	struct ucred *cred = ap->a_cred;
1041	struct vattr vap;
1042	int error;
1043
1044	DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode));
1045
1046	error = VOP_GETATTR(vp, &vap, NULL);
1047	if (error)
1048		return (error);
1049
1050	error = nandfs_check_possible(vp, &vap, accmode);
1051	if (error) {
1052		return (error);
1053	}
1054
1055	error = nandfs_check_permitted(vp, &vap, accmode, cred);
1056
1057	return (error);
1058}
1059
1060static int
1061nandfs_print(struct vop_print_args *ap)
1062{
1063	struct vnode *vp = ap->a_vp;
1064	struct nandfs_node *nvp = VTON(vp);
1065
1066	printf("\tvp=%p, nandfs_node=%p\n", vp, nvp);
1067	printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino);
1068	printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS);
1069
1070	return (0);
1071}
1072
1073static void
1074nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp)
1075{
1076	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1077	struct buf *nbp;
1078	nandfs_daddr_t vblk, pblk;
1079	nandfs_lbn_t from;
1080	uint32_t blocksize;
1081	int error = 0;
1082	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
1083
1084	/*
1085	 * Translate all the block sectors into a series of buffers to read
1086	 * asynchronously from the nandfs device. Note that this lookup may
1087	 * induce readin's too.
1088	 */
1089
1090	blocksize = nandfsdev->nd_blocksize;
1091	if (bp->b_bcount / blocksize != 1)
1092		panic("invalid b_count in bp %p\n", bp);
1093
1094	from = bp->b_blkno;
1095
1096	DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx"
1097	    " count %#lx\n", (uintmax_t)node->nn_ino, from,
1098	    bp->b_bcount));
1099
1100	/* Get virtual block numbers for the vnode's buffer span */
1101	error = nandfs_bmap_lookup(node, from, &vblk);
1102	if (error) {
1103		bp->b_error = EINVAL;
1104		bp->b_ioflags |= BIO_ERROR;
1105		bufdone(bp);
1106		return;
1107	}
1108
1109	/* Translate virtual block numbers to physical block numbers */
1110	error = nandfs_vtop(node, vblk, &pblk);
1111	if (error) {
1112		bp->b_error = EINVAL;
1113		bp->b_ioflags |= BIO_ERROR;
1114		bufdone(bp);
1115		return;
1116	}
1117
1118	/* Issue translated blocks */
1119	bp->b_resid = bp->b_bcount;
1120
1121	/* Note virtual block 0 marks not mapped */
1122	if (vblk == 0) {
1123		vfs_bio_clrbuf(bp);
1124		bufdone(bp);
1125		return;
1126	}
1127
1128	nbp = bp;
1129	nbp->b_blkno = pblk * blk2dev;
1130	bp->b_iooffset = dbtob(nbp->b_blkno);
1131	MPASS(bp->b_iooffset >= 0);
1132	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp);
1133	nandfs_vblk_set(bp, vblk);
1134	DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> "
1135	    "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino,
1136	    (uintmax_t)(from), (uintmax_t)vblk,
1137	    (uintmax_t)pblk, nbp));
1138}
1139
1140static void
1141nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp)
1142{
1143	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1144
1145	bp->b_iooffset = dbtob(bp->b_blkno);
1146	MPASS(bp->b_iooffset >= 0);
1147	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp);
1148}
1149
1150static int
1151nandfs_strategy(struct vop_strategy_args *ap)
1152{
1153	struct vnode *vp = ap->a_vp;
1154	struct buf *bp = ap->a_bp;
1155	struct nandfs_node *node = VTON(vp);
1156
1157
1158	/* check if we ought to be here */
1159	KASSERT((vp->v_type != VBLK && vp->v_type != VCHR),
1160	    ("nandfs_strategy on type %d", vp->v_type));
1161
1162	/* Translate if needed and pass on */
1163	if (bp->b_iocmd == BIO_READ) {
1164		nandfs_read_filebuf(node, bp);
1165		return (0);
1166	}
1167
1168	/* Send to segment collector */
1169	nandfs_write_filebuf(node, bp);
1170	return (0);
1171}
1172
1173static int
1174nandfs_readdir(struct vop_readdir_args *ap)
1175{
1176	struct uio *uio = ap->a_uio;
1177	struct vnode *vp = ap->a_vp;
1178	struct nandfs_node *node = VTON(vp);
1179	struct nandfs_dir_entry *ndirent;
1180	struct dirent dirent;
1181	struct buf *bp;
1182	uint64_t file_size, diroffset, transoffset, blkoff;
1183	uint64_t blocknr;
1184	uint32_t blocksize = node->nn_nandfsdev->nd_blocksize;
1185	uint8_t *pos, name_len;
1186	int error;
1187
1188	DPRINTF(READDIR, ("nandfs_readdir called\n"));
1189
1190	if (vp->v_type != VDIR)
1191		return (ENOTDIR);
1192
1193	file_size = node->nn_inode.i_size;
1194	DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n",
1195	    (uintmax_t)file_size, uio->uio_resid ));
1196
1197	/* We are called just as long as we keep on pushing data in */
1198	error = 0;
1199	if ((uio->uio_offset < file_size) &&
1200	    (uio->uio_resid >= sizeof(struct dirent))) {
1201		diroffset = uio->uio_offset;
1202		transoffset = diroffset;
1203
1204		blocknr = diroffset / blocksize;
1205		blkoff = diroffset % blocksize;
1206		error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
1207		if (error) {
1208			brelse(bp);
1209			return (EIO);
1210		}
1211		while (diroffset < file_size) {
1212			DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n",
1213			    diroffset));
1214			if (blkoff >= blocksize) {
1215				blkoff = 0; blocknr++;
1216				brelse(bp);
1217				error = nandfs_bread(node, blocknr, NOCRED, 0,
1218				    &bp);
1219				if (error) {
1220					brelse(bp);
1221					return (EIO);
1222				}
1223			}
1224
1225			/* Read in one dirent */
1226			pos = (uint8_t *)bp->b_data + blkoff;
1227			ndirent = (struct nandfs_dir_entry *)pos;
1228
1229			name_len = ndirent->name_len;
1230			memset(&dirent, 0, sizeof(struct dirent));
1231			dirent.d_fileno = ndirent->inode;
1232			if (dirent.d_fileno) {
1233				dirent.d_type = ndirent->file_type;
1234				dirent.d_namlen = name_len;
1235				strncpy(dirent.d_name, ndirent->name, name_len);
1236				dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
1237				DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
1238				    name_len, dirent.d_name));
1239			}
1240
1241			/*
1242			 * If there isn't enough space in the uio to return a
1243			 * whole dirent, break off read
1244			 */
1245			if (uio->uio_resid < GENERIC_DIRSIZ(&dirent))
1246				break;
1247
1248			/* Transfer */
1249			if (dirent.d_fileno)
1250				uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio);
1251
1252			/* Advance */
1253			diroffset += ndirent->rec_len;
1254			blkoff += ndirent->rec_len;
1255
1256			/* Remember the last entry we transfered */
1257			transoffset = diroffset;
1258		}
1259		brelse(bp);
1260
1261		/* Pass on last transfered offset */
1262		uio->uio_offset = transoffset;
1263	}
1264
1265	if (ap->a_eofflag)
1266		*ap->a_eofflag = (uio->uio_offset >= file_size);
1267
1268	return (error);
1269}
1270
1271static int
1272nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred)
1273{
1274	struct nandfs_node *dnode = VTON(dvp);
1275	struct nandfs_dir_entry *dirent;
1276	uint64_t file_size = dnode->nn_inode.i_size;
1277	uint64_t blockcount = dnode->nn_inode.i_blocks;
1278	uint64_t blocknr;
1279	uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize;
1280	uint32_t limit;
1281	uint32_t off;
1282	uint8_t	*pos;
1283	struct buf *bp;
1284	int error;
1285
1286	DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp,
1287	    (uintmax_t)parentino, cred));
1288
1289	KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp));
1290
1291	blocknr = 0;
1292	while (blocknr < blockcount) {
1293		error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp);
1294		if (error) {
1295			brelse(bp);
1296			return (0);
1297		}
1298
1299		pos = (uint8_t *)bp->b_data;
1300		off = 0;
1301
1302		if (blocknr == (blockcount - 1))
1303			limit = file_size % blocksize;
1304		else
1305			limit = blocksize;
1306
1307		while (off < limit) {
1308			dirent = (struct nandfs_dir_entry *)(pos + off);
1309			off += dirent->rec_len;
1310
1311			if (dirent->inode == 0)
1312				continue;
1313
1314			switch (dirent->name_len) {
1315			case 0:
1316				break;
1317			case 1:
1318				if (dirent->name[0] != '.')
1319					goto notempty;
1320
1321				KASSERT(dirent->inode == dnode->nn_ino,
1322				    (".'s inode does not match dir"));
1323				break;
1324			case 2:
1325				if (dirent->name[0] != '.' &&
1326				    dirent->name[1] != '.')
1327					goto notempty;
1328
1329				KASSERT(dirent->inode == parentino,
1330				    ("..'s inode does not match parent"));
1331				break;
1332			default:
1333				goto notempty;
1334			}
1335		}
1336
1337		brelse(bp);
1338		blocknr++;
1339	}
1340
1341	return (1);
1342notempty:
1343	brelse(bp);
1344	return (0);
1345}
1346
1347static int
1348nandfs_link(struct vop_link_args *ap)
1349{
1350	struct vnode *tdvp = ap->a_tdvp;
1351	struct vnode *vp = ap->a_vp;
1352	struct componentname *cnp = ap->a_cnp;
1353	struct nandfs_node *node = VTON(vp);
1354	struct nandfs_inode *inode = &node->nn_inode;
1355	int error;
1356
1357	if (tdvp->v_mount != vp->v_mount)
1358		return (EXDEV);
1359
1360	if (inode->i_links_count >= LINK_MAX)
1361		return (EMLINK);
1362
1363	if (inode->i_flags & (IMMUTABLE | APPEND))
1364		return (EPERM);
1365
1366	/* Update link count */
1367	inode->i_links_count++;
1368
1369	/* Add dir entry */
1370	error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr,
1371	    cnp->cn_namelen, IFTODT(inode->i_mode));
1372	if (error) {
1373		inode->i_links_count--;
1374	}
1375
1376	node->nn_flags |= IN_CHANGE;
1377	nandfs_itimes(vp);
1378	DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n",
1379	    __func__, tdvp, vp, cnp));
1380
1381	return (0);
1382}
1383
1384static int
1385nandfs_create(struct vop_create_args *ap)
1386{
1387	struct vnode *dvp = ap->a_dvp;
1388	struct vnode **vpp = ap->a_vpp;
1389	struct componentname *cnp = ap->a_cnp;
1390	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1391	struct nandfs_node *dir_node = VTON(dvp);
1392	struct nandfsmount *nmp = dir_node->nn_nmp;
1393	struct nandfs_node *node;
1394	int error;
1395
1396	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1397
1398	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1399		return (ENOSPC);
1400
1401	/* Create new vnode/inode */
1402	error = nandfs_node_create(nmp, &node, mode);
1403	if (error)
1404		return (error);
1405	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1406	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1407
1408	/* Add new dir entry */
1409	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1410	    cnp->cn_namelen, IFTODT(mode));
1411	if (error) {
1412		if (nandfs_node_destroy(node)) {
1413			nandfs_error("%s: error destroying node %p\n",
1414			    __func__, node);
1415		}
1416		return (error);
1417	}
1418	*vpp = NTOV(node);
1419
1420	DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node,
1421	    (uintmax_t)node->nn_ino));
1422	return (0);
1423}
1424
1425static int
1426nandfs_remove(struct vop_remove_args *ap)
1427{
1428	struct vnode *vp = ap->a_vp;
1429	struct vnode *dvp = ap->a_dvp;
1430	struct nandfs_node *node = VTON(vp);
1431	struct nandfs_node *dnode = VTON(dvp);
1432	struct componentname *cnp = ap->a_cnp;
1433
1434	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n",
1435	    __func__, dvp, vp, node, (uintmax_t)node->nn_ino,
1436	    node->nn_inode.i_links_count));
1437
1438	if (vp->v_type == VDIR)
1439		return (EISDIR);
1440
1441	/* Files marked as immutable or append-only cannot be deleted. */
1442	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1443	    (dnode->nn_inode.i_flags & APPEND))
1444		return (EPERM);
1445
1446	nandfs_remove_dirent(dvp, node, cnp);
1447	node->nn_inode.i_links_count--;
1448	node->nn_flags |= IN_CHANGE;
1449
1450	return (0);
1451}
1452
1453/*
1454 * Check if source directory is in the path of the target directory.
1455 * Target is supplied locked, source is unlocked.
1456 * The target is always vput before returning.
1457 */
1458static int
1459nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest,
1460    struct ucred *cred)
1461{
1462	struct vnode *vp;
1463	int error, rootino;
1464	struct nandfs_dir_entry dirent;
1465
1466	vp = NTOV(dest);
1467	if (src->nn_ino == dest->nn_ino) {
1468		error = EEXIST;
1469		goto out;
1470	}
1471	rootino = NANDFS_ROOT_INO;
1472	error = 0;
1473	if (dest->nn_ino == rootino)
1474		goto out;
1475
1476	for (;;) {
1477		if (vp->v_type != VDIR) {
1478			error = ENOTDIR;
1479			break;
1480		}
1481
1482		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent,
1483		    NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE,
1484		    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED,
1485		    NULL, NULL);
1486		if (error != 0)
1487			break;
1488		if (dirent.name_len != 2 ||
1489		    dirent.name[0] != '.' ||
1490		    dirent.name[1] != '.') {
1491			error = ENOTDIR;
1492			break;
1493		}
1494		if (dirent.inode == src->nn_ino) {
1495			error = EINVAL;
1496			break;
1497		}
1498		if (dirent.inode == rootino)
1499			break;
1500		vput(vp);
1501		if ((error = VFS_VGET(vp->v_mount, dirent.inode,
1502		    LK_EXCLUSIVE, &vp)) != 0) {
1503			vp = NULL;
1504			break;
1505		}
1506	}
1507
1508out:
1509	if (error == ENOTDIR)
1510		printf("checkpath: .. not a directory\n");
1511	if (vp != NULL)
1512		vput(vp);
1513	return (error);
1514}
1515
1516static int
1517nandfs_rename(struct vop_rename_args *ap)
1518{
1519	struct vnode *tvp = ap->a_tvp;
1520	struct vnode *tdvp = ap->a_tdvp;
1521	struct vnode *fvp = ap->a_fvp;
1522	struct vnode *fdvp = ap->a_fdvp;
1523	struct componentname *tcnp = ap->a_tcnp;
1524	struct componentname *fcnp = ap->a_fcnp;
1525	int doingdirectory = 0, oldparent = 0, newparent = 0;
1526	int error = 0;
1527
1528	struct nandfs_node *fdnode, *fnode, *fnode1;
1529	struct nandfs_node *tdnode = VTON(tdvp);
1530	struct nandfs_node *tnode;
1531
1532	uint32_t tdflags, fflags, fdflags;
1533	uint16_t mode;
1534
1535	DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp,
1536	    fvp, tdvp, tvp));
1537
1538	/*
1539	 * Check for cross-device rename.
1540	 */
1541	if ((fvp->v_mount != tdvp->v_mount) ||
1542	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1543		error = EXDEV;
1544abortit:
1545		if (tdvp == tvp)
1546			vrele(tdvp);
1547		else
1548			vput(tdvp);
1549		if (tvp)
1550			vput(tvp);
1551		vrele(fdvp);
1552		vrele(fvp);
1553		return (error);
1554	}
1555
1556	tdflags = tdnode->nn_inode.i_flags;
1557	if (tvp &&
1558	    ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1559	    (tdflags & APPEND))) {
1560		error = EPERM;
1561		goto abortit;
1562	}
1563
1564	/*
1565	 * Renaming a file to itself has no effect.  The upper layers should
1566	 * not call us in that case.  Temporarily just warn if they do.
1567	 */
1568	if (fvp == tvp) {
1569		printf("nandfs_rename: fvp == tvp (can't happen)\n");
1570		error = 0;
1571		goto abortit;
1572	}
1573
1574	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
1575		goto abortit;
1576
1577	fdnode = VTON(fdvp);
1578	fnode = VTON(fvp);
1579
1580	if (fnode->nn_inode.i_links_count >= LINK_MAX) {
1581		VOP_UNLOCK(fvp, 0);
1582		error = EMLINK;
1583		goto abortit;
1584	}
1585
1586	fflags = fnode->nn_inode.i_flags;
1587	fdflags = fdnode->nn_inode.i_flags;
1588
1589	if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1590	    (fdflags & APPEND)) {
1591		VOP_UNLOCK(fvp, 0);
1592		error = EPERM;
1593		goto abortit;
1594	}
1595
1596	mode = fnode->nn_inode.i_mode;
1597	if ((mode & S_IFMT) == S_IFDIR) {
1598		/*
1599		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1600		 */
1601
1602		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1603		    (fdvp == fvp) ||
1604		    ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) ||
1605		    (fnode->nn_flags & IN_RENAME)) {
1606			VOP_UNLOCK(fvp, 0);
1607			error = EINVAL;
1608			goto abortit;
1609		}
1610		fnode->nn_flags |= IN_RENAME;
1611		doingdirectory = 1;
1612		DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__,
1613		    tdvp));
1614		oldparent = fdnode->nn_ino;
1615	}
1616
1617	vrele(fdvp);
1618
1619	tnode = NULL;
1620	if (tvp)
1621		tnode = VTON(tvp);
1622
1623	/*
1624	 * Bump link count on fvp while we are moving stuff around. If we
1625	 * crash before completing the work, the link count may be wrong
1626	 * but correctable.
1627	 */
1628	fnode->nn_inode.i_links_count++;
1629
1630	/* Check for in path moving XXX */
1631	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1632	VOP_UNLOCK(fvp, 0);
1633	if (oldparent != tdnode->nn_ino)
1634		newparent = tdnode->nn_ino;
1635	if (doingdirectory && newparent) {
1636		if (error)	/* write access check above */
1637			goto bad;
1638		if (tnode != NULL)
1639			vput(tvp);
1640
1641		error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred);
1642		if (error)
1643			goto out;
1644
1645		VREF(tdvp);
1646		error = relookup(tdvp, &tvp, tcnp);
1647		if (error)
1648			goto out;
1649		vrele(tdvp);
1650		tdnode = VTON(tdvp);
1651		tnode = NULL;
1652		if (tvp)
1653			tnode = VTON(tvp);
1654	}
1655
1656	/*
1657	 * If the target doesn't exist, link the target to the source and
1658	 * unlink the source. Otherwise, rewrite the target directory to
1659	 * reference the source and remove the original entry.
1660	 */
1661
1662	if (tvp == NULL) {
1663		/*
1664		 * Account for ".." in new directory.
1665		 */
1666		if (doingdirectory && fdvp != tdvp)
1667			tdnode->nn_inode.i_links_count++;
1668
1669		DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp));
1670		/*
1671		 * Add name in new directory.
1672		 */
1673		error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr,
1674		    tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode));
1675		if (error) {
1676			if (doingdirectory && fdvp != tdvp)
1677				tdnode->nn_inode.i_links_count--;
1678			goto bad;
1679		}
1680
1681		vput(tdvp);
1682	} else {
1683		/*
1684		 * If the parent directory is "sticky", then the user must
1685		 * own the parent directory, or the destination of the rename,
1686		 * otherwise the destination may not be changed (except by
1687		 * root). This implements append-only directories.
1688		 */
1689		if ((tdnode->nn_inode.i_mode & S_ISTXT) &&
1690		    tcnp->cn_cred->cr_uid != 0 &&
1691		    tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid &&
1692		    tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) {
1693			error = EPERM;
1694			goto bad;
1695		}
1696		/*
1697		 * Target must be empty if a directory and have no links
1698		 * to it. Also, ensure source and target are compatible
1699		 * (both directories, or both not directories).
1700		 */
1701		mode = tnode->nn_inode.i_mode;
1702		if ((mode & S_IFMT) == S_IFDIR) {
1703			if (!nandfs_dirempty(tvp, tdnode->nn_ino,
1704			    tcnp->cn_cred)) {
1705				error = ENOTEMPTY;
1706				goto bad;
1707			}
1708			if (!doingdirectory) {
1709				error = ENOTDIR;
1710				goto bad;
1711			}
1712			/*
1713			 * Update name cache since directory is going away.
1714			 */
1715			cache_purge(tdvp);
1716		} else if (doingdirectory) {
1717			error = EISDIR;
1718			goto bad;
1719		}
1720
1721		DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp));
1722		/*
1723		 * Change name tcnp in tdvp to point at fvp.
1724		 */
1725		error = nandfs_update_dirent(tdvp, fnode, tnode);
1726		if (error)
1727			goto bad;
1728
1729		if (doingdirectory && !newparent)
1730			tdnode->nn_inode.i_links_count--;
1731
1732		vput(tdvp);
1733
1734		tnode->nn_inode.i_links_count--;
1735		vput(tvp);
1736		tnode = NULL;
1737	}
1738
1739	/*
1740	 * Unlink the source.
1741	 */
1742	fcnp->cn_flags &= ~MODMASK;
1743	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1744	VREF(fdvp);
1745	error = relookup(fdvp, &fvp, fcnp);
1746	if (error == 0)
1747		vrele(fdvp);
1748	if (fvp != NULL) {
1749		fnode1 = VTON(fvp);
1750		fdnode = VTON(fdvp);
1751	} else {
1752		/*
1753		 * From name has disappeared.
1754		 */
1755		if (doingdirectory)
1756			panic("nandfs_rename: lost dir entry");
1757		vrele(ap->a_fvp);
1758		return (0);
1759	}
1760
1761	DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode));
1762
1763	/*
1764	 * Ensure that the directory entry still exists and has not
1765	 * changed while the new name has been entered. If the source is
1766	 * a file then the entry may have been unlinked or renamed. In
1767	 * either case there is no further work to be done. If the source
1768	 * is a directory then it cannot have been rmdir'ed; its link
1769	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1770	 * The IN_RENAME flag ensures that it cannot be moved by another
1771	 * rename.
1772	 */
1773	if (fnode != fnode1) {
1774		if (doingdirectory)
1775			panic("nandfs: lost dir entry");
1776	} else {
1777		/*
1778		 * If the source is a directory with a
1779		 * new parent, the link count of the old
1780		 * parent directory must be decremented
1781		 * and ".." set to point to the new parent.
1782		 */
1783		if (doingdirectory && newparent) {
1784			DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n",
1785			    __func__, (uintmax_t) oldparent,
1786			    (uintmax_t) newparent));
1787			error = nandfs_update_parent_dir(fvp, newparent);
1788			if (!error) {
1789				fdnode->nn_inode.i_links_count--;
1790				fdnode->nn_flags |= IN_CHANGE;
1791			}
1792		}
1793		error = nandfs_remove_dirent(fdvp, fnode, fcnp);
1794		if (!error) {
1795			fnode->nn_inode.i_links_count--;
1796			fnode->nn_flags |= IN_CHANGE;
1797		}
1798		fnode->nn_flags &= ~IN_RENAME;
1799	}
1800	if (fdnode)
1801		vput(fdvp);
1802	if (fnode)
1803		vput(fvp);
1804	vrele(ap->a_fvp);
1805	return (error);
1806
1807bad:
1808	DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error));
1809	if (tnode)
1810		vput(NTOV(tnode));
1811	vput(NTOV(tdnode));
1812out:
1813	if (doingdirectory)
1814		fnode->nn_flags &= ~IN_RENAME;
1815	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1816		fnode->nn_inode.i_links_count--;
1817		fnode->nn_flags |= IN_CHANGE;
1818		fnode->nn_flags &= ~IN_RENAME;
1819		vput(fvp);
1820	} else
1821		vrele(fvp);
1822	return (error);
1823}
1824
1825static int
1826nandfs_mkdir(struct vop_mkdir_args *ap)
1827{
1828	struct vnode *dvp = ap->a_dvp;
1829	struct vnode **vpp = ap->a_vpp;
1830	struct componentname *cnp = ap->a_cnp;
1831	struct nandfs_node *dir_node = VTON(dvp);
1832	struct nandfs_inode *dir_inode = &dir_node->nn_inode;
1833	struct nandfs_node *node;
1834	struct nandfsmount *nmp = dir_node->nn_nmp;
1835	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1836	int error;
1837
1838	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1839
1840	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1841		return (ENOSPC);
1842
1843	if (dir_inode->i_links_count >= LINK_MAX)
1844		return (EMLINK);
1845
1846	error = nandfs_node_create(nmp, &node, mode);
1847	if (error)
1848		return (error);
1849
1850	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1851	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1852
1853	*vpp = NTOV(node);
1854
1855	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1856	    cnp->cn_namelen, IFTODT(mode));
1857	if (error) {
1858		vput(*vpp);
1859		return (error);
1860	}
1861
1862	dir_node->nn_inode.i_links_count++;
1863	dir_node->nn_flags |= IN_CHANGE;
1864
1865	error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino);
1866	if (error) {
1867		vput(NTOV(node));
1868		return (error);
1869	}
1870
1871	DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node,
1872	    (uintmax_t)node->nn_ino));
1873	return (0);
1874}
1875
1876static int
1877nandfs_mknod(struct vop_mknod_args *ap)
1878{
1879	struct vnode *dvp = ap->a_dvp;
1880	struct vnode **vpp = ap->a_vpp;
1881	struct vattr *vap = ap->a_vap;
1882	uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
1883	struct componentname *cnp = ap->a_cnp;
1884	struct nandfs_node *dir_node = VTON(dvp);
1885	struct nandfsmount *nmp = dir_node->nn_nmp;
1886	struct nandfs_node *node;
1887	int error;
1888
1889	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1890		return (ENOSPC);
1891
1892	error = nandfs_node_create(nmp, &node, mode);
1893	if (error)
1894		return (error);
1895	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1896	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1897	if (vap->va_rdev != VNOVAL)
1898		node->nn_inode.i_special = vap->va_rdev;
1899
1900	*vpp = NTOV(node);
1901
1902	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1903	    cnp->cn_namelen, IFTODT(mode))) {
1904		vput(*vpp);
1905		return (ENOTDIR);
1906	}
1907
1908	node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1909
1910	return (0);
1911}
1912
1913static int
1914nandfs_symlink(struct vop_symlink_args *ap)
1915{
1916	struct vnode **vpp = ap->a_vpp;
1917	struct vnode *dvp = ap->a_dvp;
1918	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1919	struct componentname *cnp = ap->a_cnp;
1920	struct nandfs_node *dir_node = VTON(dvp);
1921	struct nandfsmount *nmp = dir_node->nn_nmp;
1922	struct nandfs_node *node;
1923	int len, error;
1924
1925	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1926		return (ENOSPC);
1927
1928	error = nandfs_node_create(nmp, &node, S_IFLNK | mode);
1929	if (error)
1930		return (error);
1931	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1932	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1933
1934	*vpp = NTOV(node);
1935
1936	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1937	    cnp->cn_namelen, IFTODT(mode))) {
1938		vput(*vpp);
1939		return (ENOTDIR);
1940	}
1941
1942
1943	len = strlen(ap->a_target);
1944	error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
1945	    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1946	    cnp->cn_cred, NOCRED, NULL, NULL);
1947	if (error)
1948		vput(*vpp);
1949
1950	return (error);
1951}
1952
1953static int
1954nandfs_readlink(struct vop_readlink_args *ap)
1955{
1956	struct vnode *vp = ap->a_vp;
1957
1958	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1959}
1960
1961static int
1962nandfs_rmdir(struct vop_rmdir_args *ap)
1963{
1964	struct vnode *vp = ap->a_vp;
1965	struct vnode *dvp = ap->a_dvp;
1966	struct componentname *cnp = ap->a_cnp;
1967	struct nandfs_node *node, *dnode;
1968	uint32_t dflag, flag;
1969	int error = 0;
1970
1971	node = VTON(vp);
1972	dnode = VTON(dvp);
1973
1974	/* Files marked as immutable or append-only cannot be deleted. */
1975	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1976	    (dnode->nn_inode.i_flags & APPEND))
1977		return (EPERM);
1978
1979	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__,
1980	    dvp, vp, node, (uintmax_t)node->nn_ino));
1981
1982	if (node->nn_inode.i_links_count < 2)
1983		return (EINVAL);
1984
1985	if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred))
1986		return (ENOTEMPTY);
1987
1988	/* Files marked as immutable or append-only cannot be deleted. */
1989	dflag = dnode->nn_inode.i_flags;
1990	flag = node->nn_inode.i_flags;
1991	if ((dflag & APPEND) ||
1992	    (flag & (NOUNLINK | IMMUTABLE | APPEND))) {
1993		return (EPERM);
1994	}
1995
1996	if (vp->v_mountedhere != 0)
1997		return (EINVAL);
1998
1999	nandfs_remove_dirent(dvp, node, cnp);
2000	dnode->nn_inode.i_links_count -= 1;
2001	dnode->nn_flags |= IN_CHANGE;
2002
2003	cache_purge(dvp);
2004
2005	error = nandfs_truncate(vp, (uint64_t)0);
2006	if (error)
2007		return (error);
2008
2009	node->nn_inode.i_links_count -= 2;
2010	node->nn_flags |= IN_CHANGE;
2011
2012	cache_purge(vp);
2013
2014	return (error);
2015}
2016
2017static int
2018nandfs_fsync(struct vop_fsync_args *ap)
2019{
2020	struct vnode *vp = ap->a_vp;
2021	struct nandfs_node *node = VTON(vp);
2022	int locked;
2023
2024	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2025	    node, (uintmax_t)node->nn_ino));
2026
2027	/*
2028	 * Start syncing vnode only if inode was modified or
2029	 * there are some dirty buffers
2030	 */
2031	if (VTON(vp)->nn_flags & IN_MODIFIED ||
2032	    vp->v_bufobj.bo_dirty.bv_cnt) {
2033		locked = VOP_ISLOCKED(vp);
2034		VOP_UNLOCK(vp, 0);
2035		nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC);
2036		VOP_LOCK(vp, locked | LK_RETRY);
2037	}
2038
2039	return (0);
2040}
2041
2042static int
2043nandfs_bmap(struct vop_bmap_args *ap)
2044{
2045	struct vnode *vp = ap->a_vp;
2046	struct nandfs_node *nnode = VTON(vp);
2047	struct nandfs_device *nandfsdev = nnode->nn_nandfsdev;
2048	nandfs_daddr_t l2vmap, v2pmap;
2049	int error;
2050	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
2051
2052	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2053	    nnode, (uintmax_t)nnode->nn_ino));
2054
2055	if (ap->a_bop != NULL)
2056		*ap->a_bop = &nandfsdev->nd_devvp->v_bufobj;
2057	if (ap->a_bnp == NULL)
2058		return (0);
2059	if (ap->a_runp != NULL)
2060		*ap->a_runp = 0;
2061	if (ap->a_runb != NULL)
2062		*ap->a_runb = 0;
2063
2064	/*
2065	 * Translate all the block sectors into a series of buffers to read
2066	 * asynchronously from the nandfs device. Note that this lookup may
2067	 * induce readin's too.
2068	 */
2069
2070	/* Get virtual block numbers for the vnode's buffer span */
2071	error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap);
2072	if (error)
2073		return (-1);
2074
2075	/* Translate virtual block numbers to physical block numbers */
2076	error = nandfs_vtop(nnode, l2vmap, &v2pmap);
2077	if (error)
2078		return (-1);
2079
2080	/* Note virtual block 0 marks not mapped */
2081	if (l2vmap == 0)
2082		*ap->a_bnp = -1;
2083	else
2084		*ap->a_bnp = v2pmap * blk2dev;	/* in DEV_BSIZE */
2085
2086	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n",
2087	    __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn,
2088	    (uintmax_t)*ap->a_bnp ));
2089
2090	return (0);
2091}
2092
2093static void
2094nandfs_force_syncer(struct nandfsmount *nmp)
2095{
2096
2097	nmp->nm_flags |= NANDFS_FORCE_SYNCER;
2098	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE);
2099}
2100
2101static int
2102nandfs_ioctl(struct vop_ioctl_args *ap)
2103{
2104	struct vnode *vp = ap->a_vp;
2105	u_long command = ap->a_command;
2106	caddr_t data = ap->a_data;
2107	struct nandfs_node *node = VTON(vp);
2108	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
2109	struct nandfsmount *nmp = node->nn_nmp;
2110	uint64_t *tab, *cno;
2111	struct nandfs_seg_stat *nss;
2112	struct nandfs_cpmode *ncpm;
2113	struct nandfs_argv *nargv;
2114	struct nandfs_cpstat *ncp;
2115	int error;
2116
2117	DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command));
2118
2119	error = priv_check(ap->a_td, PRIV_VFS_MOUNT);
2120	if (error)
2121		return (error);
2122
2123	if (nmp->nm_ronly) {
2124		switch (command) {
2125		case NANDFS_IOCTL_GET_FSINFO:
2126		case NANDFS_IOCTL_GET_SUSTAT:
2127		case NANDFS_IOCTL_GET_CPINFO:
2128		case NANDFS_IOCTL_GET_CPSTAT:
2129		case NANDFS_IOCTL_GET_SUINFO:
2130		case NANDFS_IOCTL_GET_VINFO:
2131		case NANDFS_IOCTL_GET_BDESCS:
2132			break;
2133		default:
2134			return (EROFS);
2135		}
2136	}
2137
2138	switch (command) {
2139	case NANDFS_IOCTL_GET_FSINFO:
2140		error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data);
2141		break;
2142	case NANDFS_IOCTL_GET_SUSTAT:
2143		nss = (struct nandfs_seg_stat *)data;
2144		error = nandfs_get_seg_stat(nandfsdev, nss);
2145		break;
2146	case NANDFS_IOCTL_CHANGE_CPMODE:
2147		ncpm = (struct nandfs_cpmode *)data;
2148		error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm);
2149		nandfs_force_syncer(nmp);
2150		break;
2151	case NANDFS_IOCTL_GET_CPINFO:
2152		nargv = (struct nandfs_argv *)data;
2153		error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv);
2154		break;
2155	case NANDFS_IOCTL_DELETE_CP:
2156		tab = (uint64_t *)data;
2157		error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]);
2158		nandfs_force_syncer(nmp);
2159		break;
2160	case NANDFS_IOCTL_GET_CPSTAT:
2161		ncp = (struct nandfs_cpstat *)data;
2162		error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp);
2163		break;
2164	case NANDFS_IOCTL_GET_SUINFO:
2165		nargv = (struct nandfs_argv *)data;
2166		error = nandfs_get_segment_info_ioctl(nandfsdev, nargv);
2167		break;
2168	case NANDFS_IOCTL_GET_VINFO:
2169		nargv = (struct nandfs_argv *)data;
2170		error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv);
2171		break;
2172	case NANDFS_IOCTL_GET_BDESCS:
2173		nargv = (struct nandfs_argv *)data;
2174		error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv);
2175		break;
2176	case NANDFS_IOCTL_SYNC:
2177		cno = (uint64_t *)data;
2178		nandfs_force_syncer(nmp);
2179		*cno = nandfsdev->nd_last_cno;
2180		error = 0;
2181		break;
2182	case NANDFS_IOCTL_MAKE_SNAP:
2183		cno = (uint64_t *)data;
2184		error = nandfs_make_snap(nandfsdev, cno);
2185		nandfs_force_syncer(nmp);
2186		break;
2187	case NANDFS_IOCTL_DELETE_SNAP:
2188		cno = (uint64_t *)data;
2189		error = nandfs_delete_snap(nandfsdev, *cno);
2190		nandfs_force_syncer(nmp);
2191		break;
2192	default:
2193		error = ENOTTY;
2194		break;
2195	}
2196
2197	return (error);
2198}
2199
2200/*
2201 * Whiteout vnode call
2202 */
2203static int
2204nandfs_whiteout(struct vop_whiteout_args *ap)
2205{
2206	struct vnode *dvp = ap->a_dvp;
2207	struct componentname *cnp = ap->a_cnp;
2208	int error = 0;
2209
2210	switch (ap->a_flags) {
2211	case LOOKUP:
2212		return (0);
2213	case CREATE:
2214		/* Create a new directory whiteout */
2215#ifdef INVARIANTS
2216		if ((cnp->cn_flags & SAVENAME) == 0)
2217			panic("ufs_whiteout: missing name");
2218#endif
2219		error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr,
2220		    cnp->cn_namelen, DT_WHT);
2221		break;
2222
2223	case DELETE:
2224		/* Remove an existing directory whiteout */
2225		cnp->cn_flags &= ~DOWHITEOUT;
2226		error = nandfs_remove_dirent(dvp, NULL, cnp);
2227		break;
2228	default:
2229		panic("nandf_whiteout: unknown op: %d", ap->a_flags);
2230	}
2231
2232	return (error);
2233}
2234
2235static int
2236nandfs_pathconf(struct vop_pathconf_args *ap)
2237{
2238	int error;
2239
2240	error = 0;
2241	switch (ap->a_name) {
2242	case _PC_LINK_MAX:
2243		*ap->a_retval = LINK_MAX;
2244		break;
2245	case _PC_NAME_MAX:
2246		*ap->a_retval = NAME_MAX;
2247		break;
2248	case _PC_PATH_MAX:
2249		*ap->a_retval = PATH_MAX;
2250		break;
2251	case _PC_PIPE_BUF:
2252		*ap->a_retval = PIPE_BUF;
2253		break;
2254	case _PC_CHOWN_RESTRICTED:
2255		*ap->a_retval = 1;
2256		break;
2257	case _PC_NO_TRUNC:
2258		*ap->a_retval = 1;
2259		break;
2260	case _PC_ACL_EXTENDED:
2261		*ap->a_retval = 0;
2262		break;
2263	case _PC_ALLOC_SIZE_MIN:
2264		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2265		break;
2266	case _PC_FILESIZEBITS:
2267		*ap->a_retval = 64;
2268		break;
2269	case _PC_REC_INCR_XFER_SIZE:
2270		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2271		break;
2272	case _PC_REC_MAX_XFER_SIZE:
2273		*ap->a_retval = -1; /* means ``unlimited'' */
2274		break;
2275	case _PC_REC_MIN_XFER_SIZE:
2276		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2277		break;
2278	default:
2279		error = EINVAL;
2280		break;
2281	}
2282	return (error);
2283}
2284
2285static int
2286nandfs_vnlock1(struct vop_lock1_args *ap)
2287{
2288	struct vnode *vp = ap->a_vp;
2289	struct nandfs_node *node = VTON(vp);
2290	int error, vi_locked;
2291
2292	/*
2293	 * XXX can vnode go away while we are sleeping?
2294	 */
2295	vi_locked = mtx_owned(&vp->v_interlock);
2296	if (vi_locked)
2297		VI_UNLOCK(vp);
2298	error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev,
2299	    ap->a_flags & LK_NOWAIT);
2300	if (vi_locked && !error)
2301		VI_LOCK(vp);
2302	if (error)
2303		return (error);
2304
2305	error = vop_stdlock(ap);
2306	if (error) {
2307		NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2308		return (error);
2309	}
2310
2311	return (0);
2312}
2313
2314static int
2315nandfs_vnunlock(struct vop_unlock_args *ap)
2316{
2317	struct vnode *vp = ap->a_vp;
2318	struct nandfs_node *node = VTON(vp);
2319	int error;
2320
2321	error = vop_stdunlock(ap);
2322	if (error)
2323		return (error);
2324
2325	NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2326
2327	return (0);
2328}
2329
2330/*
2331 * Global vfs data structures
2332 */
2333struct vop_vector nandfs_vnodeops = {
2334	.vop_default =		&default_vnodeops,
2335	.vop_access =		nandfs_access,
2336	.vop_advlock =		nandfs_advlock,
2337	.vop_bmap =		nandfs_bmap,
2338	.vop_close =		nandfs_close,
2339	.vop_create =		nandfs_create,
2340	.vop_fsync =		nandfs_fsync,
2341	.vop_getattr =		nandfs_getattr,
2342	.vop_inactive =		nandfs_inactive,
2343	.vop_cachedlookup =	nandfs_lookup,
2344	.vop_ioctl =		nandfs_ioctl,
2345	.vop_link =		nandfs_link,
2346	.vop_lookup =		vfs_cache_lookup,
2347	.vop_mkdir =		nandfs_mkdir,
2348	.vop_mknod =		nandfs_mknod,
2349	.vop_open =		nandfs_open,
2350	.vop_pathconf =		nandfs_pathconf,
2351	.vop_print =		nandfs_print,
2352	.vop_read =		nandfs_read,
2353	.vop_readdir =		nandfs_readdir,
2354	.vop_readlink =		nandfs_readlink,
2355	.vop_reclaim =		nandfs_reclaim,
2356	.vop_remove =		nandfs_remove,
2357	.vop_rename =		nandfs_rename,
2358	.vop_rmdir =		nandfs_rmdir,
2359	.vop_whiteout =		nandfs_whiteout,
2360	.vop_write =		nandfs_write,
2361	.vop_setattr =		nandfs_setattr,
2362	.vop_strategy =		nandfs_strategy,
2363	.vop_symlink =		nandfs_symlink,
2364	.vop_lock1 =		nandfs_vnlock1,
2365	.vop_unlock =		nandfs_vnunlock,
2366};
2367
2368struct vop_vector nandfs_system_vnodeops = {
2369	.vop_default =		&default_vnodeops,
2370	.vop_close =		nandfs_close,
2371	.vop_inactive =		nandfs_inactive,
2372	.vop_reclaim =		nandfs_reclaim,
2373	.vop_strategy =		nandfs_strategy,
2374	.vop_fsync =		nandfs_fsync,
2375	.vop_bmap =		nandfs_bmap,
2376	.vop_access =		VOP_PANIC,
2377	.vop_advlock =		VOP_PANIC,
2378	.vop_create =		VOP_PANIC,
2379	.vop_getattr =		VOP_PANIC,
2380	.vop_cachedlookup =	VOP_PANIC,
2381	.vop_ioctl =		VOP_PANIC,
2382	.vop_link =		VOP_PANIC,
2383	.vop_lookup =		VOP_PANIC,
2384	.vop_mkdir =		VOP_PANIC,
2385	.vop_mknod =		VOP_PANIC,
2386	.vop_open =		VOP_PANIC,
2387	.vop_pathconf =		VOP_PANIC,
2388	.vop_print =		VOP_PANIC,
2389	.vop_read =		VOP_PANIC,
2390	.vop_readdir =		VOP_PANIC,
2391	.vop_readlink =		VOP_PANIC,
2392	.vop_remove =		VOP_PANIC,
2393	.vop_rename =		VOP_PANIC,
2394	.vop_rmdir =		VOP_PANIC,
2395	.vop_whiteout =		VOP_PANIC,
2396	.vop_write =		VOP_PANIC,
2397	.vop_setattr =		VOP_PANIC,
2398	.vop_symlink =		VOP_PANIC,
2399};
2400
2401static int
2402nandfsfifo_close(struct vop_close_args *ap)
2403{
2404	struct vnode *vp = ap->a_vp;
2405	struct nandfs_node *node = VTON(vp);
2406
2407	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
2408
2409	mtx_lock(&vp->v_interlock);
2410	if (vp->v_usecount > 1)
2411		nandfs_itimes_locked(vp);
2412	mtx_unlock(&vp->v_interlock);
2413
2414	return (fifo_specops.vop_close(ap));
2415}
2416
2417struct vop_vector nandfs_fifoops = {
2418	.vop_default =		&fifo_specops,
2419	.vop_fsync =		VOP_PANIC,
2420	.vop_access =		nandfs_access,
2421	.vop_close =		nandfsfifo_close,
2422	.vop_getattr =		nandfs_getattr,
2423	.vop_inactive =		nandfs_inactive,
2424	.vop_print =		nandfs_print,
2425	.vop_read =		VOP_PANIC,
2426	.vop_reclaim =		nandfs_reclaim,
2427	.vop_setattr =		nandfs_setattr,
2428	.vop_write =		VOP_PANIC,
2429	.vop_lock1 =		nandfs_vnlock1,
2430	.vop_unlock =		nandfs_vnunlock,
2431};
2432
2433int
2434nandfs_vinit(struct vnode *vp, uint64_t ino)
2435{
2436	struct nandfs_node *node;
2437
2438	ASSERT_VOP_LOCKED(vp, __func__);
2439
2440	node = VTON(vp);
2441
2442	/* Check if we're fetching the root */
2443	if (ino == NANDFS_ROOT_INO)
2444		vp->v_vflag |= VV_ROOT;
2445
2446	if (ino != NANDFS_GC_INO)
2447		vp->v_type = IFTOVT(node->nn_inode.i_mode);
2448	else
2449		vp->v_type = VREG;
2450
2451	if (vp->v_type == VFIFO)
2452		vp->v_op = &nandfs_fifoops;
2453
2454	return (0);
2455}
2456