nandfs_vnops.c revision 251171
1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: head/sys/fs/nandfs/nandfs_vnops.c 251171 2013-05-31 00:43:41Z jeff $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/lockf.h>
38#include <sys/malloc.h>
39#include <sys/mount.h>
40#include <sys/mutex.h>
41#include <sys/namei.h>
42#include <sys/sysctl.h>
43#include <sys/unistd.h>
44#include <sys/vnode.h>
45#include <sys/buf.h>
46#include <sys/bio.h>
47#include <sys/fcntl.h>
48#include <sys/dirent.h>
49#include <sys/rwlock.h>
50#include <sys/stat.h>
51#include <sys/priv.h>
52
53#include <vm/vm.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_object.h>
56#include <vm/vnode_pager.h>
57
58#include <machine/_inttypes.h>
59
60#include <fs/nandfs/nandfs_mount.h>
61#include <fs/nandfs/nandfs.h>
62#include <fs/nandfs/nandfs_subr.h>
63
64extern uma_zone_t nandfs_node_zone;
65static void nandfs_read_filebuf(struct nandfs_node *, struct buf *);
66static void nandfs_itimes_locked(struct vnode *);
67static int nandfs_truncate(struct vnode *, uint64_t);
68
69static vop_pathconf_t	nandfs_pathconf;
70
71#define UPDATE_CLOSE 0
72#define UPDATE_WAIT 0
73
74static int
75nandfs_inactive(struct vop_inactive_args *ap)
76{
77	struct vnode *vp = ap->a_vp;
78	struct nandfs_node *node = VTON(vp);
79	int error = 0;
80
81	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node));
82
83	if (node == NULL) {
84		DPRINTF(NODE, ("%s: inactive NULL node\n", __func__));
85		return (0);
86	}
87
88	if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) {
89		nandfs_truncate(vp, 0);
90		error = nandfs_node_destroy(node);
91		if (error)
92			nandfs_error("%s: destroy node: %p\n", __func__, node);
93		node->nn_flags = 0;
94		vrecycle(vp);
95	}
96
97	return (error);
98}
99
100static int
101nandfs_reclaim(struct vop_reclaim_args *ap)
102{
103	struct vnode *vp = ap->a_vp;
104	struct nandfs_node *nandfs_node = VTON(vp);
105	struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev;
106	uint64_t ino = nandfs_node->nn_ino;
107
108	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node));
109
110	/* Invalidate all entries to a particular vnode. */
111	cache_purge(vp);
112
113	/* Destroy the vm object and flush associated pages. */
114	vnode_destroy_vobject(vp);
115
116	/* Remove from vfs hash if not system vnode */
117	if (!NANDFS_SYS_NODE(nandfs_node->nn_ino))
118		vfs_hash_remove(vp);
119
120	/* Dispose all node knowledge */
121	nandfs_dispose_node(&nandfs_node);
122
123	if (!NANDFS_SYS_NODE(ino))
124		NANDFS_WRITEUNLOCK(fsdev);
125
126	return (0);
127}
128
129static int
130nandfs_read(struct vop_read_args *ap)
131{
132	register struct vnode *vp = ap->a_vp;
133	register struct nandfs_node *node = VTON(vp);
134	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
135	struct uio *uio = ap->a_uio;
136	struct buf *bp;
137	uint64_t size;
138	uint32_t blocksize;
139	off_t bytesinfile;
140	ssize_t toread, off;
141	daddr_t lbn;
142	ssize_t resid;
143	int error = 0;
144
145	if (uio->uio_resid == 0)
146		return (0);
147
148	size = node->nn_inode.i_size;
149	if (uio->uio_offset >= size)
150		return (0);
151
152	blocksize = nandfsdev->nd_blocksize;
153	bytesinfile = size - uio->uio_offset;
154
155	resid = omin(uio->uio_resid, bytesinfile);
156
157	while (resid) {
158		lbn = uio->uio_offset / blocksize;
159		off = uio->uio_offset & (blocksize - 1);
160
161		toread = omin(resid, blocksize - off);
162
163		DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n",
164		    (uintmax_t)lbn, toread, blocksize));
165
166		error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
167		if (error) {
168			brelse(bp);
169			break;
170		}
171
172		error = uiomove(bp->b_data + off, toread, uio);
173		if (error) {
174			brelse(bp);
175			break;
176		}
177
178		brelse(bp);
179		resid -= toread;
180	}
181
182	return (error);
183}
184
185static int
186nandfs_write(struct vop_write_args *ap)
187{
188	struct nandfs_device *fsdev;
189	struct nandfs_node *node;
190	struct vnode *vp;
191	struct uio *uio;
192	struct buf *bp;
193	uint64_t file_size, vblk;
194	uint32_t blocksize;
195	ssize_t towrite, off;
196	daddr_t lbn;
197	ssize_t resid;
198	int error, ioflag, modified;
199
200	vp = ap->a_vp;
201	uio = ap->a_uio;
202	ioflag = ap->a_ioflag;
203	node = VTON(vp);
204	fsdev = node->nn_nandfsdev;
205
206	if (nandfs_fs_full(fsdev))
207		return (ENOSPC);
208
209	DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n",
210	    uio->uio_resid, (uintmax_t)uio->uio_offset));
211
212	if (uio->uio_offset < 0)
213		return (EINVAL);
214	if (uio->uio_resid == 0)
215		return (0);
216
217	blocksize = fsdev->nd_blocksize;
218	file_size = node->nn_inode.i_size;
219
220	switch (vp->v_type) {
221	case VREG:
222		if (ioflag & IO_APPEND)
223			uio->uio_offset = file_size;
224		break;
225	case VDIR:
226		return (EISDIR);
227	case VLNK:
228		break;
229	default:
230		panic("%s: bad file type vp: %p", __func__, vp);
231	}
232
233	/* If explicitly asked to append, uio_offset can be wrong? */
234	if (ioflag & IO_APPEND)
235		uio->uio_offset = file_size;
236
237	resid = uio->uio_resid;
238	modified = error = 0;
239
240	while (uio->uio_resid) {
241		lbn = uio->uio_offset / blocksize;
242		off = uio->uio_offset & (blocksize - 1);
243
244		towrite = omin(uio->uio_resid, blocksize - off);
245
246		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n",
247		    __func__, (uintmax_t)lbn, towrite, blocksize));
248
249		error = nandfs_bmap_lookup(node, lbn, &vblk);
250		if (error)
251			break;
252
253		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) "
254		    "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize,
255		    vblk));
256
257		if (vblk != 0)
258			error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
259		else
260			error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp);
261
262		DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__,
263		    vp, bp, (uintmax_t)lbn));
264		if (error) {
265			if (bp)
266				brelse(bp);
267			break;
268		}
269
270		error = uiomove((char *)bp->b_data + off, (int)towrite, uio);
271		if (error)
272			break;
273
274		error = nandfs_dirty_buf(bp, 0);
275		if (error)
276			break;
277
278		modified++;
279	}
280
281	/* XXX proper handling when only part of file was properly written */
282	if (modified) {
283		if (resid > uio->uio_resid && ap->a_cred &&
284		    ap->a_cred->cr_uid != 0)
285			node->nn_inode.i_mode &= ~(ISUID | ISGID);
286
287		if (file_size < uio->uio_offset + uio->uio_resid) {
288			node->nn_inode.i_size = uio->uio_offset +
289			    uio->uio_resid;
290			node->nn_flags |= IN_CHANGE | IN_UPDATE;
291			vnode_pager_setsize(vp, uio->uio_offset +
292			    uio->uio_resid);
293			nandfs_itimes(vp);
294		}
295	}
296
297	DPRINTF(WRITE, ("%s: return:%d\n", __func__, error));
298
299	return (error);
300}
301
302static int
303nandfs_lookup(struct vop_cachedlookup_args *ap)
304{
305	struct vnode *dvp, **vpp;
306	struct componentname *cnp;
307	struct ucred *cred;
308	struct thread *td;
309	struct nandfs_node *dir_node, *node;
310	struct nandfsmount *nmp;
311	uint64_t ino, off;
312	const char *name;
313	int namelen, nameiop, islastcn, mounted_ro;
314	int error, found;
315
316	DPRINTF(VNCALL, ("%s\n", __func__));
317
318	dvp = ap->a_dvp;
319	vpp = ap->a_vpp;
320	*vpp = NULL;
321
322	cnp = ap->a_cnp;
323	cred = cnp->cn_cred;
324	td = cnp->cn_thread;
325
326	dir_node = VTON(dvp);
327	nmp = dir_node->nn_nmp;
328
329	/* Simplify/clarification flags */
330	nameiop = cnp->cn_nameiop;
331	islastcn = cnp->cn_flags & ISLASTCN;
332	mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY;
333
334	/*
335	 * If requesting a modify on the last path element on a read-only
336	 * filingsystem, reject lookup;
337	 */
338	if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME))
339		return (EROFS);
340
341	if (dir_node->nn_inode.i_links_count == 0)
342		return (ENOENT);
343
344	/*
345	 * Obviously, the file is not (anymore) in the namecache, we have to
346	 * search for it. There are three basic cases: '.', '..' and others.
347	 *
348	 * Following the guidelines of VOP_LOOKUP manpage and tmpfs.
349	 */
350	error = 0;
351	if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
352		DPRINTF(LOOKUP, ("\tlookup '.'\n"));
353		/* Special case 1 '.' */
354		VREF(dvp);
355		*vpp = dvp;
356		/* Done */
357	} else if (cnp->cn_flags & ISDOTDOT) {
358		/* Special case 2 '..' */
359		DPRINTF(LOOKUP, ("\tlookup '..'\n"));
360
361		/* Get our node */
362		name = "..";
363		namelen = 2;
364		error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino,
365		    &found, &off);
366		if (error)
367			goto out;
368		if (!found)
369			error = ENOENT;
370
371		/* First unlock parent */
372		VOP_UNLOCK(dvp, 0);
373
374		if (error == 0) {
375			DPRINTF(LOOKUP, ("\tfound '..'\n"));
376			/* Try to create/reuse the node */
377			error = nandfs_get_node(nmp, ino, &node);
378
379			if (!error) {
380				DPRINTF(LOOKUP,
381				    ("\tnode retrieved/created OK\n"));
382				*vpp = NTOV(node);
383			}
384		}
385
386		/* Try to relock parent */
387		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
388	} else {
389		DPRINTF(LOOKUP, ("\tlookup file\n"));
390		/* All other files */
391		/* Look up filename in the directory returning its inode */
392		name = cnp->cn_nameptr;
393		namelen = cnp->cn_namelen;
394		error = nandfs_lookup_name_in_dir(dvp, name, namelen,
395		    &ino, &found, &off);
396		if (error)
397			goto out;
398		if (!found) {
399			DPRINTF(LOOKUP, ("\tNOT found\n"));
400			/*
401			 * UGH, didn't find name. If we're creating or
402			 * renaming on the last name this is OK and we ought
403			 * to return EJUSTRETURN if its allowed to be created.
404			 */
405			error = ENOENT;
406			if ((nameiop == CREATE || nameiop == RENAME) &&
407			    islastcn) {
408				error = VOP_ACCESS(dvp, VWRITE, cred,
409				    td);
410				if (!error) {
411					/* keep the component name */
412					cnp->cn_flags |= SAVENAME;
413					error = EJUSTRETURN;
414				}
415			}
416			/* Done */
417		} else {
418			if (ino == NANDFS_WHT_INO)
419				cnp->cn_flags |= ISWHITEOUT;
420
421			if ((cnp->cn_flags & ISWHITEOUT) &&
422			    (nameiop == LOOKUP))
423				return (ENOENT);
424
425			if ((nameiop == DELETE) && islastcn) {
426				if ((cnp->cn_flags & ISWHITEOUT) &&
427				    (cnp->cn_flags & DOWHITEOUT)) {
428					cnp->cn_flags |= SAVENAME;
429					dir_node->nn_diroff = off;
430					return (EJUSTRETURN);
431				}
432
433				error = VOP_ACCESS(dvp, VWRITE, cred,
434				    cnp->cn_thread);
435				if (error)
436					return (error);
437
438				/* Try to create/reuse the node */
439				error = nandfs_get_node(nmp, ino, &node);
440				if (!error) {
441					*vpp = NTOV(node);
442					node->nn_diroff = off;
443				}
444
445				if ((dir_node->nn_inode.i_mode & ISVTX) &&
446				    cred->cr_uid != 0 &&
447				    cred->cr_uid != dir_node->nn_inode.i_uid &&
448				    node->nn_inode.i_uid != cred->cr_uid) {
449					vput(*vpp);
450					*vpp = NULL;
451					return (EPERM);
452				}
453			} else if ((nameiop == RENAME) && islastcn) {
454				error = VOP_ACCESS(dvp, VWRITE, cred,
455				    cnp->cn_thread);
456				if (error)
457					return (error);
458
459				/* Try to create/reuse the node */
460				error = nandfs_get_node(nmp, ino, &node);
461				if (!error) {
462					*vpp = NTOV(node);
463					node->nn_diroff = off;
464				}
465			} else {
466				/* Try to create/reuse the node */
467				error = nandfs_get_node(nmp, ino, &node);
468				if (!error) {
469					*vpp = NTOV(node);
470					node->nn_diroff = off;
471				}
472			}
473		}
474	}
475
476out:
477	/*
478	 * Store result in the cache if requested. If we are creating a file,
479	 * the file might not be found and thus putting it into the namecache
480	 * might be seen as negative caching.
481	 */
482	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
483		cache_enter(dvp, *vpp, cnp);
484
485	return (error);
486
487}
488
489static int
490nandfs_getattr(struct vop_getattr_args *ap)
491{
492	struct vnode *vp = ap->a_vp;
493	struct vattr *vap = ap->a_vap;
494	struct nandfs_node *node = VTON(vp);
495	struct nandfs_inode *inode = &node->nn_inode;
496
497	DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp));
498	nandfs_itimes(vp);
499
500	/* Basic info */
501	VATTR_NULL(vap);
502	vap->va_atime.tv_sec = inode->i_mtime;
503	vap->va_atime.tv_nsec = inode->i_mtime_nsec;
504	vap->va_mtime.tv_sec = inode->i_mtime;
505	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
506	vap->va_ctime.tv_sec = inode->i_ctime;
507	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
508	vap->va_type = IFTOVT(inode->i_mode);
509	vap->va_mode = inode->i_mode & ~S_IFMT;
510	vap->va_nlink = inode->i_links_count;
511	vap->va_uid = inode->i_uid;
512	vap->va_gid = inode->i_gid;
513	vap->va_rdev = inode->i_special;
514	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
515	vap->va_fileid = node->nn_ino;
516	vap->va_size = inode->i_size;
517	vap->va_blocksize = node->nn_nandfsdev->nd_blocksize;
518	vap->va_gen = 0;
519	vap->va_flags = inode->i_flags;
520	vap->va_bytes = inode->i_blocks * vap->va_blocksize;
521	vap->va_filerev = 0;
522	vap->va_vaflags = 0;
523
524	return (0);
525}
526
527static int
528nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks)
529{
530	struct nandfs_device *nffsdev;
531	struct bufobj *bo;
532	struct buf *bp, *nbp;
533
534	bo = &vp->v_bufobj;
535	nffsdev = VTON(vp)->nn_nandfsdev;
536
537	ASSERT_VOP_LOCKED(vp, "nandfs_truncate");
538restart:
539	BO_LOCK(bo);
540restart_locked:
541	TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
542		if (bp->b_lblkno < nblks)
543			continue;
544		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
545			goto restart_locked;
546
547		bremfree(bp);
548		bp->b_flags |= (B_INVAL | B_RELBUF);
549		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
550		BO_UNLOCK(bo);
551		brelse(bp);
552		BO_LOCK(bo);
553	}
554
555	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
556		if (bp->b_lblkno < nblks)
557			continue;
558		if (BUF_LOCK(bp,
559		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
560		    BO_LOCKPTR(bo)) == ENOLCK)
561			goto restart;
562		bp->b_flags |= (B_INVAL | B_RELBUF);
563		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
564		brelse(bp);
565		nandfs_dirty_bufs_decrement(nffsdev);
566		BO_LOCK(bo);
567	}
568
569	BO_UNLOCK(bo);
570
571	return (0);
572}
573
574static int
575nandfs_truncate(struct vnode *vp, uint64_t newsize)
576{
577	struct nandfs_device *nffsdev;
578	struct nandfs_node *node;
579	struct nandfs_inode *inode;
580	struct buf *bp = NULL;
581	uint64_t oblks, nblks, vblk, size, rest;
582	int error;
583
584	node = VTON(vp);
585	nffsdev = node->nn_nandfsdev;
586	inode = &node->nn_inode;
587
588	/* Calculate end of file */
589	size = inode->i_size;
590
591	if (newsize == size) {
592		node->nn_flags |= IN_CHANGE | IN_UPDATE;
593		nandfs_itimes(vp);
594		return (0);
595	}
596
597	if (newsize > size) {
598		inode->i_size = newsize;
599		vnode_pager_setsize(vp, newsize);
600		node->nn_flags |= IN_CHANGE | IN_UPDATE;
601		nandfs_itimes(vp);
602		return (0);
603	}
604
605	nblks = howmany(newsize, nffsdev->nd_blocksize);
606	oblks = howmany(size, nffsdev->nd_blocksize);
607	rest = newsize % nffsdev->nd_blocksize;
608
609	if (rest) {
610		error = nandfs_bmap_lookup(node, nblks - 1, &vblk);
611		if (error)
612			return (error);
613
614		if (vblk != 0)
615			error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp);
616		else
617			error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp);
618
619		if (error) {
620			if (bp)
621				brelse(bp);
622			return (error);
623		}
624
625		bzero((char *)bp->b_data + rest,
626		    (u_int)(nffsdev->nd_blocksize - rest));
627		error = nandfs_dirty_buf(bp, 0);
628		if (error)
629			return (error);
630	}
631
632	DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks,
633	    nblks));
634
635	error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1);
636	if (error) {
637		if (bp)
638			nandfs_undirty_buf(bp);
639		return (error);
640	}
641
642	error = nandfs_vtruncbuf(vp, nblks);
643	if (error) {
644		if (bp)
645			nandfs_undirty_buf(bp);
646		return (error);
647	}
648
649	inode->i_size = newsize;
650	vnode_pager_setsize(vp, newsize);
651	node->nn_flags |= IN_CHANGE | IN_UPDATE;
652	nandfs_itimes(vp);
653
654	return (error);
655}
656
657static void
658nandfs_itimes_locked(struct vnode *vp)
659{
660	struct nandfs_node *node;
661	struct nandfs_inode *inode;
662	struct timespec ts;
663
664	ASSERT_VI_LOCKED(vp, __func__);
665
666	node = VTON(vp);
667	inode = &node->nn_inode;
668
669	if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
670		return;
671
672	if (((vp->v_mount->mnt_kern_flag &
673	    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
674	    (node->nn_flags & (IN_CHANGE | IN_UPDATE)))
675		node->nn_flags |= IN_MODIFIED;
676
677	vfs_timestamp(&ts);
678	if (node->nn_flags & IN_UPDATE) {
679		inode->i_mtime = ts.tv_sec;
680		inode->i_mtime_nsec = ts.tv_nsec;
681	}
682	if (node->nn_flags & IN_CHANGE) {
683		inode->i_ctime = ts.tv_sec;
684		inode->i_ctime_nsec = ts.tv_nsec;
685	}
686
687	node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
688}
689
690void
691nandfs_itimes(struct vnode *vp)
692{
693
694	VI_LOCK(vp);
695	nandfs_itimes_locked(vp);
696	VI_UNLOCK(vp);
697}
698
699static int
700nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
701{
702	struct nandfs_node *node = VTON(vp);
703	struct nandfs_inode *inode = &node->nn_inode;
704	uint16_t nmode;
705	int error = 0;
706
707	DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp,
708	    mode, cred, td));
709	/*
710	 * To modify the permissions on a file, must possess VADMIN
711	 * for that file.
712	 */
713	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
714		return (error);
715
716	/*
717	 * Privileged processes may set the sticky bit on non-directories,
718	 * as well as set the setgid bit on a file with a group that the
719	 * process is not a member of. Both of these are allowed in
720	 * jail(8).
721	 */
722	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
723		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
724			return (EFTYPE);
725	}
726	if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) {
727		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
728		if (error)
729			return (error);
730	}
731
732	/*
733	 * Deny setting setuid if we are not the file owner.
734	 */
735	if ((mode & ISUID) && inode->i_uid != cred->cr_uid) {
736		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
737		if (error)
738			return (error);
739	}
740
741	nmode = inode->i_mode;
742	nmode &= ~ALLPERMS;
743	nmode |= (mode & ALLPERMS);
744	inode->i_mode = nmode;
745	node->nn_flags |= IN_CHANGE;
746
747	DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode));
748
749	return (error);
750}
751
752static int
753nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
754    struct thread *td)
755{
756	struct nandfs_node *node = VTON(vp);
757	struct nandfs_inode *inode = &node->nn_inode;
758	uid_t ouid;
759	gid_t ogid;
760	int error = 0;
761
762	if (uid == (uid_t)VNOVAL)
763		uid = inode->i_uid;
764	if (gid == (gid_t)VNOVAL)
765		gid = inode->i_gid;
766	/*
767	 * To modify the ownership of a file, must possess VADMIN for that
768	 * file.
769	 */
770	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
771		return (error);
772	/*
773	 * To change the owner of a file, or change the group of a file to a
774	 * group of which we are not a member, the caller must have
775	 * privilege.
776	 */
777	if (((uid != inode->i_uid && uid != cred->cr_uid) ||
778	    (gid != inode->i_gid && !groupmember(gid, cred))) &&
779	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
780		return (error);
781	ogid = inode->i_gid;
782	ouid = inode->i_uid;
783
784	inode->i_gid = gid;
785	inode->i_uid = uid;
786
787	node->nn_flags |= IN_CHANGE;
788	if ((inode->i_mode & (ISUID | ISGID)) &&
789	    (ouid != uid || ogid != gid)) {
790		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
791			inode->i_mode &= ~(ISUID | ISGID);
792		}
793	}
794	DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp,
795	    cred, td));
796	return (0);
797}
798
799static int
800nandfs_setattr(struct vop_setattr_args *ap)
801{
802	struct vnode *vp = ap->a_vp;
803	struct nandfs_node *node = VTON(vp);
804	struct nandfs_inode *inode = &node->nn_inode;
805	struct vattr *vap = ap->a_vap;
806	struct ucred *cred = ap->a_cred;
807	struct thread *td = curthread;
808	uint32_t flags;
809	int error = 0;
810
811	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
812	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
813	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
814	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
815		DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__));
816		return (EINVAL);
817	}
818
819	if (vap->va_flags != VNOVAL) {
820		DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp,
821		    td, vap->va_flags));
822
823		if (vp->v_mount->mnt_flag & MNT_RDONLY)
824			return (EROFS);
825		/*
826		 * Callers may only modify the file flags on objects they
827		 * have VADMIN rights for.
828		 */
829		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
830			return (error);
831		/*
832		 * Unprivileged processes are not permitted to unset system
833		 * flags, or modify flags if any system flags are set.
834		 * Privileged non-jail processes may not modify system flags
835		 * if securelevel > 0 and any existing system flags are set.
836		 * Privileged jail processes behave like privileged non-jail
837		 * processes if the security.jail.chflags_allowed sysctl is
838		 * is non-zero; otherwise, they behave like unprivileged
839		 * processes.
840		 */
841
842		flags = inode->i_flags;
843		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
844			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
845				error = securelevel_gt(cred, 0);
846				if (error)
847					return (error);
848			}
849			/* Snapshot flag cannot be set or cleared */
850			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
851			    (flags & SF_SNAPSHOT) == 0) ||
852			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
853			    (flags & SF_SNAPSHOT) != 0))
854				return (EPERM);
855
856			inode->i_flags = vap->va_flags;
857		} else {
858			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
859			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
860				return (EPERM);
861
862			flags &= SF_SETTABLE;
863			flags |= (vap->va_flags & UF_SETTABLE);
864			inode->i_flags = flags;
865		}
866		node->nn_flags |= IN_CHANGE;
867		if (vap->va_flags & (IMMUTABLE | APPEND))
868			return (0);
869	}
870	if (inode->i_flags & (IMMUTABLE | APPEND))
871		return (EPERM);
872
873	if (vap->va_size != (u_quad_t)VNOVAL) {
874		DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td,
875		    (uintmax_t)vap->va_size));
876
877		switch (vp->v_type) {
878		case VDIR:
879			return (EISDIR);
880		case VLNK:
881		case VREG:
882			if (vp->v_mount->mnt_flag & MNT_RDONLY)
883				return (EROFS);
884			if ((inode->i_flags & SF_SNAPSHOT) != 0)
885				return (EPERM);
886			break;
887		default:
888			return (0);
889		}
890
891		if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize)
892			return (EFBIG);
893
894		KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type));
895		nandfs_truncate(vp, vap->va_size);
896		node->nn_flags |= IN_CHANGE;
897
898		return (0);
899	}
900
901	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
902		if (vp->v_mount->mnt_flag & MNT_RDONLY)
903			return (EROFS);
904		DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
905		    vp, td, vap->va_uid, vap->va_gid));
906		error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
907		if (error)
908			return (error);
909	}
910
911	if (vap->va_mode != (mode_t)VNOVAL) {
912		if (vp->v_mount->mnt_flag & MNT_RDONLY)
913			return (EROFS);
914		DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td,
915		    vap->va_mode));
916
917		error = nandfs_chmod(vp, (int)vap->va_mode, cred, td);
918		if (error)
919			return (error);
920	}
921	if (vap->va_atime.tv_sec != VNOVAL ||
922	    vap->va_mtime.tv_sec != VNOVAL ||
923	    vap->va_birthtime.tv_sec != VNOVAL) {
924		DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n",
925		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
926		    (uintmax_t)vap->va_mtime.tv_sec,
927		    (uintmax_t)vap->va_birthtime.tv_sec));
928
929		if (vap->va_atime.tv_sec != VNOVAL)
930			node->nn_flags |= IN_ACCESS;
931		if (vap->va_mtime.tv_sec != VNOVAL)
932			node->nn_flags |= IN_CHANGE | IN_UPDATE;
933		if (vap->va_birthtime.tv_sec != VNOVAL)
934			node->nn_flags |= IN_MODIFIED;
935		nandfs_itimes(vp);
936		return (0);
937	}
938
939	return (0);
940}
941
942static int
943nandfs_open(struct vop_open_args *ap)
944{
945	struct nandfs_node *node = VTON(ap->a_vp);
946	uint64_t filesize;
947
948	DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode));
949
950	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
951		return (EOPNOTSUPP);
952
953	if ((node->nn_inode.i_flags & APPEND) &&
954	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
955		return (EPERM);
956
957	filesize = node->nn_inode.i_size;
958	vnode_create_vobject(ap->a_vp, filesize, ap->a_td);
959
960	return (0);
961}
962
963static int
964nandfs_close(struct vop_close_args *ap)
965{
966	struct vnode *vp = ap->a_vp;
967	struct nandfs_node *node = VTON(vp);
968
969	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
970
971	mtx_lock(&vp->v_interlock);
972	if (vp->v_usecount > 1)
973		nandfs_itimes_locked(vp);
974	mtx_unlock(&vp->v_interlock);
975
976	return (0);
977}
978
979static int
980nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
981{
982
983	/* Check if we are allowed to write */
984	switch (vap->va_type) {
985	case VDIR:
986	case VLNK:
987	case VREG:
988		/*
989		 * Normal nodes: check if we're on a read-only mounted
990		 * filingsystem and bomb out if we're trying to write.
991		 */
992		if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY))
993			return (EROFS);
994		break;
995	case VBLK:
996	case VCHR:
997	case VSOCK:
998	case VFIFO:
999		/*
1000		 * Special nodes: even on read-only mounted filingsystems
1001		 * these are allowed to be written to if permissions allow.
1002		 */
1003		break;
1004	default:
1005		/* No idea what this is */
1006		return (EINVAL);
1007	}
1008
1009	/* Noone may write immutable files */
1010	if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE))
1011		return (EPERM);
1012
1013	return (0);
1014}
1015
1016static int
1017nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode,
1018    struct ucred *cred)
1019{
1020
1021	return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode,
1022	    cred, NULL));
1023}
1024
1025static int
1026nandfs_advlock(struct vop_advlock_args *ap)
1027{
1028	struct nandfs_node *nvp;
1029	quad_t size;
1030
1031	nvp = VTON(ap->a_vp);
1032	size = nvp->nn_inode.i_size;
1033	return (lf_advlock(ap, &(nvp->nn_lockf), size));
1034}
1035
1036static int
1037nandfs_access(struct vop_access_args *ap)
1038{
1039	struct vnode *vp = ap->a_vp;
1040	accmode_t accmode = ap->a_accmode;
1041	struct ucred *cred = ap->a_cred;
1042	struct vattr vap;
1043	int error;
1044
1045	DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode));
1046
1047	error = VOP_GETATTR(vp, &vap, NULL);
1048	if (error)
1049		return (error);
1050
1051	error = nandfs_check_possible(vp, &vap, accmode);
1052	if (error) {
1053		return (error);
1054	}
1055
1056	error = nandfs_check_permitted(vp, &vap, accmode, cred);
1057
1058	return (error);
1059}
1060
1061static int
1062nandfs_print(struct vop_print_args *ap)
1063{
1064	struct vnode *vp = ap->a_vp;
1065	struct nandfs_node *nvp = VTON(vp);
1066
1067	printf("\tvp=%p, nandfs_node=%p\n", vp, nvp);
1068	printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino);
1069	printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS);
1070
1071	return (0);
1072}
1073
1074static void
1075nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp)
1076{
1077	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1078	struct buf *nbp;
1079	nandfs_daddr_t vblk, pblk;
1080	nandfs_lbn_t from;
1081	uint32_t blocksize;
1082	int error = 0;
1083	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
1084
1085	/*
1086	 * Translate all the block sectors into a series of buffers to read
1087	 * asynchronously from the nandfs device. Note that this lookup may
1088	 * induce readin's too.
1089	 */
1090
1091	blocksize = nandfsdev->nd_blocksize;
1092	if (bp->b_bcount / blocksize != 1)
1093		panic("invalid b_count in bp %p\n", bp);
1094
1095	from = bp->b_blkno;
1096
1097	DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx"
1098	    " count %#lx\n", (uintmax_t)node->nn_ino, from,
1099	    bp->b_bcount));
1100
1101	/* Get virtual block numbers for the vnode's buffer span */
1102	error = nandfs_bmap_lookup(node, from, &vblk);
1103	if (error) {
1104		bp->b_error = EINVAL;
1105		bp->b_ioflags |= BIO_ERROR;
1106		bufdone(bp);
1107		return;
1108	}
1109
1110	/* Translate virtual block numbers to physical block numbers */
1111	error = nandfs_vtop(node, vblk, &pblk);
1112	if (error) {
1113		bp->b_error = EINVAL;
1114		bp->b_ioflags |= BIO_ERROR;
1115		bufdone(bp);
1116		return;
1117	}
1118
1119	/* Issue translated blocks */
1120	bp->b_resid = bp->b_bcount;
1121
1122	/* Note virtual block 0 marks not mapped */
1123	if (vblk == 0) {
1124		vfs_bio_clrbuf(bp);
1125		bufdone(bp);
1126		return;
1127	}
1128
1129	nbp = bp;
1130	nbp->b_blkno = pblk * blk2dev;
1131	bp->b_iooffset = dbtob(nbp->b_blkno);
1132	MPASS(bp->b_iooffset >= 0);
1133	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp);
1134	nandfs_vblk_set(bp, vblk);
1135	DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> "
1136	    "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino,
1137	    (uintmax_t)(from), (uintmax_t)vblk,
1138	    (uintmax_t)pblk, nbp));
1139}
1140
1141static void
1142nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp)
1143{
1144	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1145
1146	bp->b_iooffset = dbtob(bp->b_blkno);
1147	MPASS(bp->b_iooffset >= 0);
1148	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp);
1149}
1150
1151static int
1152nandfs_strategy(struct vop_strategy_args *ap)
1153{
1154	struct vnode *vp = ap->a_vp;
1155	struct buf *bp = ap->a_bp;
1156	struct nandfs_node *node = VTON(vp);
1157
1158
1159	/* check if we ought to be here */
1160	KASSERT((vp->v_type != VBLK && vp->v_type != VCHR),
1161	    ("nandfs_strategy on type %d", vp->v_type));
1162
1163	/* Translate if needed and pass on */
1164	if (bp->b_iocmd == BIO_READ) {
1165		nandfs_read_filebuf(node, bp);
1166		return (0);
1167	}
1168
1169	/* Send to segment collector */
1170	nandfs_write_filebuf(node, bp);
1171	return (0);
1172}
1173
1174static int
1175nandfs_readdir(struct vop_readdir_args *ap)
1176{
1177	struct uio *uio = ap->a_uio;
1178	struct vnode *vp = ap->a_vp;
1179	struct nandfs_node *node = VTON(vp);
1180	struct nandfs_dir_entry *ndirent;
1181	struct dirent dirent;
1182	struct buf *bp;
1183	uint64_t file_size, diroffset, transoffset, blkoff;
1184	uint64_t blocknr;
1185	uint32_t blocksize = node->nn_nandfsdev->nd_blocksize;
1186	uint8_t *pos, name_len;
1187	int error;
1188
1189	DPRINTF(READDIR, ("nandfs_readdir called\n"));
1190
1191	if (vp->v_type != VDIR)
1192		return (ENOTDIR);
1193
1194	file_size = node->nn_inode.i_size;
1195	DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n",
1196	    (uintmax_t)file_size, uio->uio_resid ));
1197
1198	/* We are called just as long as we keep on pushing data in */
1199	error = 0;
1200	if ((uio->uio_offset < file_size) &&
1201	    (uio->uio_resid >= sizeof(struct dirent))) {
1202		diroffset = uio->uio_offset;
1203		transoffset = diroffset;
1204
1205		blocknr = diroffset / blocksize;
1206		blkoff = diroffset % blocksize;
1207		error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
1208		if (error) {
1209			brelse(bp);
1210			return (EIO);
1211		}
1212		while (diroffset < file_size) {
1213			DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n",
1214			    diroffset));
1215			if (blkoff >= blocksize) {
1216				blkoff = 0; blocknr++;
1217				brelse(bp);
1218				error = nandfs_bread(node, blocknr, NOCRED, 0,
1219				    &bp);
1220				if (error) {
1221					brelse(bp);
1222					return (EIO);
1223				}
1224			}
1225
1226			/* Read in one dirent */
1227			pos = (uint8_t *)bp->b_data + blkoff;
1228			ndirent = (struct nandfs_dir_entry *)pos;
1229
1230			name_len = ndirent->name_len;
1231			memset(&dirent, 0, sizeof(struct dirent));
1232			dirent.d_fileno = ndirent->inode;
1233			if (dirent.d_fileno) {
1234				dirent.d_type = ndirent->file_type;
1235				dirent.d_namlen = name_len;
1236				strncpy(dirent.d_name, ndirent->name, name_len);
1237				dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
1238				DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
1239				    name_len, dirent.d_name));
1240			}
1241
1242			/*
1243			 * If there isn't enough space in the uio to return a
1244			 * whole dirent, break off read
1245			 */
1246			if (uio->uio_resid < GENERIC_DIRSIZ(&dirent))
1247				break;
1248
1249			/* Transfer */
1250			if (dirent.d_fileno)
1251				uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio);
1252
1253			/* Advance */
1254			diroffset += ndirent->rec_len;
1255			blkoff += ndirent->rec_len;
1256
1257			/* Remember the last entry we transfered */
1258			transoffset = diroffset;
1259		}
1260		brelse(bp);
1261
1262		/* Pass on last transfered offset */
1263		uio->uio_offset = transoffset;
1264	}
1265
1266	if (ap->a_eofflag)
1267		*ap->a_eofflag = (uio->uio_offset >= file_size);
1268
1269	return (error);
1270}
1271
1272static int
1273nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred)
1274{
1275	struct nandfs_node *dnode = VTON(dvp);
1276	struct nandfs_dir_entry *dirent;
1277	uint64_t file_size = dnode->nn_inode.i_size;
1278	uint64_t blockcount = dnode->nn_inode.i_blocks;
1279	uint64_t blocknr;
1280	uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize;
1281	uint32_t limit;
1282	uint32_t off;
1283	uint8_t	*pos;
1284	struct buf *bp;
1285	int error;
1286
1287	DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp,
1288	    (uintmax_t)parentino, cred));
1289
1290	KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp));
1291
1292	blocknr = 0;
1293	while (blocknr < blockcount) {
1294		error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp);
1295		if (error) {
1296			brelse(bp);
1297			return (0);
1298		}
1299
1300		pos = (uint8_t *)bp->b_data;
1301		off = 0;
1302
1303		if (blocknr == (blockcount - 1))
1304			limit = file_size % blocksize;
1305		else
1306			limit = blocksize;
1307
1308		while (off < limit) {
1309			dirent = (struct nandfs_dir_entry *)(pos + off);
1310			off += dirent->rec_len;
1311
1312			if (dirent->inode == 0)
1313				continue;
1314
1315			switch (dirent->name_len) {
1316			case 0:
1317				break;
1318			case 1:
1319				if (dirent->name[0] != '.')
1320					goto notempty;
1321
1322				KASSERT(dirent->inode == dnode->nn_ino,
1323				    (".'s inode does not match dir"));
1324				break;
1325			case 2:
1326				if (dirent->name[0] != '.' &&
1327				    dirent->name[1] != '.')
1328					goto notempty;
1329
1330				KASSERT(dirent->inode == parentino,
1331				    ("..'s inode does not match parent"));
1332				break;
1333			default:
1334				goto notempty;
1335			}
1336		}
1337
1338		brelse(bp);
1339		blocknr++;
1340	}
1341
1342	return (1);
1343notempty:
1344	brelse(bp);
1345	return (0);
1346}
1347
1348static int
1349nandfs_link(struct vop_link_args *ap)
1350{
1351	struct vnode *tdvp = ap->a_tdvp;
1352	struct vnode *vp = ap->a_vp;
1353	struct componentname *cnp = ap->a_cnp;
1354	struct nandfs_node *node = VTON(vp);
1355	struct nandfs_inode *inode = &node->nn_inode;
1356	int error;
1357
1358	if (tdvp->v_mount != vp->v_mount)
1359		return (EXDEV);
1360
1361	if (inode->i_links_count >= LINK_MAX)
1362		return (EMLINK);
1363
1364	if (inode->i_flags & (IMMUTABLE | APPEND))
1365		return (EPERM);
1366
1367	/* Update link count */
1368	inode->i_links_count++;
1369
1370	/* Add dir entry */
1371	error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr,
1372	    cnp->cn_namelen, IFTODT(inode->i_mode));
1373	if (error) {
1374		inode->i_links_count--;
1375	}
1376
1377	node->nn_flags |= IN_CHANGE;
1378	nandfs_itimes(vp);
1379	DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n",
1380	    __func__, tdvp, vp, cnp));
1381
1382	return (0);
1383}
1384
1385static int
1386nandfs_create(struct vop_create_args *ap)
1387{
1388	struct vnode *dvp = ap->a_dvp;
1389	struct vnode **vpp = ap->a_vpp;
1390	struct componentname *cnp = ap->a_cnp;
1391	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1392	struct nandfs_node *dir_node = VTON(dvp);
1393	struct nandfsmount *nmp = dir_node->nn_nmp;
1394	struct nandfs_node *node;
1395	int error;
1396
1397	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1398
1399	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1400		return (ENOSPC);
1401
1402	/* Create new vnode/inode */
1403	error = nandfs_node_create(nmp, &node, mode);
1404	if (error)
1405		return (error);
1406	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1407	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1408
1409	/* Add new dir entry */
1410	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1411	    cnp->cn_namelen, IFTODT(mode));
1412	if (error) {
1413		if (nandfs_node_destroy(node)) {
1414			nandfs_error("%s: error destroying node %p\n",
1415			    __func__, node);
1416		}
1417		return (error);
1418	}
1419	*vpp = NTOV(node);
1420
1421	DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node,
1422	    (uintmax_t)node->nn_ino));
1423	return (0);
1424}
1425
1426static int
1427nandfs_remove(struct vop_remove_args *ap)
1428{
1429	struct vnode *vp = ap->a_vp;
1430	struct vnode *dvp = ap->a_dvp;
1431	struct nandfs_node *node = VTON(vp);
1432	struct nandfs_node *dnode = VTON(dvp);
1433	struct componentname *cnp = ap->a_cnp;
1434
1435	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n",
1436	    __func__, dvp, vp, node, (uintmax_t)node->nn_ino,
1437	    node->nn_inode.i_links_count));
1438
1439	if (vp->v_type == VDIR)
1440		return (EISDIR);
1441
1442	/* Files marked as immutable or append-only cannot be deleted. */
1443	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1444	    (dnode->nn_inode.i_flags & APPEND))
1445		return (EPERM);
1446
1447	nandfs_remove_dirent(dvp, node, cnp);
1448	node->nn_inode.i_links_count--;
1449	node->nn_flags |= IN_CHANGE;
1450
1451	return (0);
1452}
1453
1454/*
1455 * Check if source directory is in the path of the target directory.
1456 * Target is supplied locked, source is unlocked.
1457 * The target is always vput before returning.
1458 */
1459static int
1460nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest,
1461    struct ucred *cred)
1462{
1463	struct vnode *vp;
1464	int error, rootino;
1465	struct nandfs_dir_entry dirent;
1466
1467	vp = NTOV(dest);
1468	if (src->nn_ino == dest->nn_ino) {
1469		error = EEXIST;
1470		goto out;
1471	}
1472	rootino = NANDFS_ROOT_INO;
1473	error = 0;
1474	if (dest->nn_ino == rootino)
1475		goto out;
1476
1477	for (;;) {
1478		if (vp->v_type != VDIR) {
1479			error = ENOTDIR;
1480			break;
1481		}
1482
1483		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent,
1484		    NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE,
1485		    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED,
1486		    NULL, NULL);
1487		if (error != 0)
1488			break;
1489		if (dirent.name_len != 2 ||
1490		    dirent.name[0] != '.' ||
1491		    dirent.name[1] != '.') {
1492			error = ENOTDIR;
1493			break;
1494		}
1495		if (dirent.inode == src->nn_ino) {
1496			error = EINVAL;
1497			break;
1498		}
1499		if (dirent.inode == rootino)
1500			break;
1501		vput(vp);
1502		if ((error = VFS_VGET(vp->v_mount, dirent.inode,
1503		    LK_EXCLUSIVE, &vp)) != 0) {
1504			vp = NULL;
1505			break;
1506		}
1507	}
1508
1509out:
1510	if (error == ENOTDIR)
1511		printf("checkpath: .. not a directory\n");
1512	if (vp != NULL)
1513		vput(vp);
1514	return (error);
1515}
1516
1517static int
1518nandfs_rename(struct vop_rename_args *ap)
1519{
1520	struct vnode *tvp = ap->a_tvp;
1521	struct vnode *tdvp = ap->a_tdvp;
1522	struct vnode *fvp = ap->a_fvp;
1523	struct vnode *fdvp = ap->a_fdvp;
1524	struct componentname *tcnp = ap->a_tcnp;
1525	struct componentname *fcnp = ap->a_fcnp;
1526	int doingdirectory = 0, oldparent = 0, newparent = 0;
1527	int error = 0;
1528
1529	struct nandfs_node *fdnode, *fnode, *fnode1;
1530	struct nandfs_node *tdnode = VTON(tdvp);
1531	struct nandfs_node *tnode;
1532
1533	uint32_t tdflags, fflags, fdflags;
1534	uint16_t mode;
1535
1536	DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp,
1537	    fvp, tdvp, tvp));
1538
1539	/*
1540	 * Check for cross-device rename.
1541	 */
1542	if ((fvp->v_mount != tdvp->v_mount) ||
1543	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1544		error = EXDEV;
1545abortit:
1546		if (tdvp == tvp)
1547			vrele(tdvp);
1548		else
1549			vput(tdvp);
1550		if (tvp)
1551			vput(tvp);
1552		vrele(fdvp);
1553		vrele(fvp);
1554		return (error);
1555	}
1556
1557	tdflags = tdnode->nn_inode.i_flags;
1558	if (tvp &&
1559	    ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1560	    (tdflags & APPEND))) {
1561		error = EPERM;
1562		goto abortit;
1563	}
1564
1565	/*
1566	 * Renaming a file to itself has no effect.  The upper layers should
1567	 * not call us in that case.  Temporarily just warn if they do.
1568	 */
1569	if (fvp == tvp) {
1570		printf("nandfs_rename: fvp == tvp (can't happen)\n");
1571		error = 0;
1572		goto abortit;
1573	}
1574
1575	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
1576		goto abortit;
1577
1578	fdnode = VTON(fdvp);
1579	fnode = VTON(fvp);
1580
1581	if (fnode->nn_inode.i_links_count >= LINK_MAX) {
1582		VOP_UNLOCK(fvp, 0);
1583		error = EMLINK;
1584		goto abortit;
1585	}
1586
1587	fflags = fnode->nn_inode.i_flags;
1588	fdflags = fdnode->nn_inode.i_flags;
1589
1590	if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1591	    (fdflags & APPEND)) {
1592		VOP_UNLOCK(fvp, 0);
1593		error = EPERM;
1594		goto abortit;
1595	}
1596
1597	mode = fnode->nn_inode.i_mode;
1598	if ((mode & S_IFMT) == S_IFDIR) {
1599		/*
1600		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1601		 */
1602
1603		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1604		    (fdvp == fvp) ||
1605		    ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) ||
1606		    (fnode->nn_flags & IN_RENAME)) {
1607			VOP_UNLOCK(fvp, 0);
1608			error = EINVAL;
1609			goto abortit;
1610		}
1611		fnode->nn_flags |= IN_RENAME;
1612		doingdirectory = 1;
1613		DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__,
1614		    tdvp));
1615		oldparent = fdnode->nn_ino;
1616	}
1617
1618	vrele(fdvp);
1619
1620	tnode = NULL;
1621	if (tvp)
1622		tnode = VTON(tvp);
1623
1624	/*
1625	 * Bump link count on fvp while we are moving stuff around. If we
1626	 * crash before completing the work, the link count may be wrong
1627	 * but correctable.
1628	 */
1629	fnode->nn_inode.i_links_count++;
1630
1631	/* Check for in path moving XXX */
1632	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1633	VOP_UNLOCK(fvp, 0);
1634	if (oldparent != tdnode->nn_ino)
1635		newparent = tdnode->nn_ino;
1636	if (doingdirectory && newparent) {
1637		if (error)	/* write access check above */
1638			goto bad;
1639		if (tnode != NULL)
1640			vput(tvp);
1641
1642		error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred);
1643		if (error)
1644			goto out;
1645
1646		VREF(tdvp);
1647		error = relookup(tdvp, &tvp, tcnp);
1648		if (error)
1649			goto out;
1650		vrele(tdvp);
1651		tdnode = VTON(tdvp);
1652		tnode = NULL;
1653		if (tvp)
1654			tnode = VTON(tvp);
1655	}
1656
1657	/*
1658	 * If the target doesn't exist, link the target to the source and
1659	 * unlink the source. Otherwise, rewrite the target directory to
1660	 * reference the source and remove the original entry.
1661	 */
1662
1663	if (tvp == NULL) {
1664		/*
1665		 * Account for ".." in new directory.
1666		 */
1667		if (doingdirectory && fdvp != tdvp)
1668			tdnode->nn_inode.i_links_count++;
1669
1670		DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp));
1671		/*
1672		 * Add name in new directory.
1673		 */
1674		error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr,
1675		    tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode));
1676		if (error) {
1677			if (doingdirectory && fdvp != tdvp)
1678				tdnode->nn_inode.i_links_count--;
1679			goto bad;
1680		}
1681
1682		vput(tdvp);
1683	} else {
1684		/*
1685		 * If the parent directory is "sticky", then the user must
1686		 * own the parent directory, or the destination of the rename,
1687		 * otherwise the destination may not be changed (except by
1688		 * root). This implements append-only directories.
1689		 */
1690		if ((tdnode->nn_inode.i_mode & S_ISTXT) &&
1691		    tcnp->cn_cred->cr_uid != 0 &&
1692		    tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid &&
1693		    tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) {
1694			error = EPERM;
1695			goto bad;
1696		}
1697		/*
1698		 * Target must be empty if a directory and have no links
1699		 * to it. Also, ensure source and target are compatible
1700		 * (both directories, or both not directories).
1701		 */
1702		mode = tnode->nn_inode.i_mode;
1703		if ((mode & S_IFMT) == S_IFDIR) {
1704			if (!nandfs_dirempty(tvp, tdnode->nn_ino,
1705			    tcnp->cn_cred)) {
1706				error = ENOTEMPTY;
1707				goto bad;
1708			}
1709			if (!doingdirectory) {
1710				error = ENOTDIR;
1711				goto bad;
1712			}
1713			/*
1714			 * Update name cache since directory is going away.
1715			 */
1716			cache_purge(tdvp);
1717		} else if (doingdirectory) {
1718			error = EISDIR;
1719			goto bad;
1720		}
1721
1722		DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp));
1723		/*
1724		 * Change name tcnp in tdvp to point at fvp.
1725		 */
1726		error = nandfs_update_dirent(tdvp, fnode, tnode);
1727		if (error)
1728			goto bad;
1729
1730		if (doingdirectory && !newparent)
1731			tdnode->nn_inode.i_links_count--;
1732
1733		vput(tdvp);
1734
1735		tnode->nn_inode.i_links_count--;
1736		vput(tvp);
1737		tnode = NULL;
1738	}
1739
1740	/*
1741	 * Unlink the source.
1742	 */
1743	fcnp->cn_flags &= ~MODMASK;
1744	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1745	VREF(fdvp);
1746	error = relookup(fdvp, &fvp, fcnp);
1747	if (error == 0)
1748		vrele(fdvp);
1749	if (fvp != NULL) {
1750		fnode1 = VTON(fvp);
1751		fdnode = VTON(fdvp);
1752	} else {
1753		/*
1754		 * From name has disappeared.
1755		 */
1756		if (doingdirectory)
1757			panic("nandfs_rename: lost dir entry");
1758		vrele(ap->a_fvp);
1759		return (0);
1760	}
1761
1762	DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode));
1763
1764	/*
1765	 * Ensure that the directory entry still exists and has not
1766	 * changed while the new name has been entered. If the source is
1767	 * a file then the entry may have been unlinked or renamed. In
1768	 * either case there is no further work to be done. If the source
1769	 * is a directory then it cannot have been rmdir'ed; its link
1770	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1771	 * The IN_RENAME flag ensures that it cannot be moved by another
1772	 * rename.
1773	 */
1774	if (fnode != fnode1) {
1775		if (doingdirectory)
1776			panic("nandfs: lost dir entry");
1777	} else {
1778		/*
1779		 * If the source is a directory with a
1780		 * new parent, the link count of the old
1781		 * parent directory must be decremented
1782		 * and ".." set to point to the new parent.
1783		 */
1784		if (doingdirectory && newparent) {
1785			DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n",
1786			    __func__, (uintmax_t) oldparent,
1787			    (uintmax_t) newparent));
1788			error = nandfs_update_parent_dir(fvp, newparent);
1789			if (!error) {
1790				fdnode->nn_inode.i_links_count--;
1791				fdnode->nn_flags |= IN_CHANGE;
1792			}
1793		}
1794		error = nandfs_remove_dirent(fdvp, fnode, fcnp);
1795		if (!error) {
1796			fnode->nn_inode.i_links_count--;
1797			fnode->nn_flags |= IN_CHANGE;
1798		}
1799		fnode->nn_flags &= ~IN_RENAME;
1800	}
1801	if (fdnode)
1802		vput(fdvp);
1803	if (fnode)
1804		vput(fvp);
1805	vrele(ap->a_fvp);
1806	return (error);
1807
1808bad:
1809	DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error));
1810	if (tnode)
1811		vput(NTOV(tnode));
1812	vput(NTOV(tdnode));
1813out:
1814	if (doingdirectory)
1815		fnode->nn_flags &= ~IN_RENAME;
1816	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1817		fnode->nn_inode.i_links_count--;
1818		fnode->nn_flags |= IN_CHANGE;
1819		fnode->nn_flags &= ~IN_RENAME;
1820		vput(fvp);
1821	} else
1822		vrele(fvp);
1823	return (error);
1824}
1825
1826static int
1827nandfs_mkdir(struct vop_mkdir_args *ap)
1828{
1829	struct vnode *dvp = ap->a_dvp;
1830	struct vnode **vpp = ap->a_vpp;
1831	struct componentname *cnp = ap->a_cnp;
1832	struct nandfs_node *dir_node = VTON(dvp);
1833	struct nandfs_inode *dir_inode = &dir_node->nn_inode;
1834	struct nandfs_node *node;
1835	struct nandfsmount *nmp = dir_node->nn_nmp;
1836	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1837	int error;
1838
1839	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1840
1841	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1842		return (ENOSPC);
1843
1844	if (dir_inode->i_links_count >= LINK_MAX)
1845		return (EMLINK);
1846
1847	error = nandfs_node_create(nmp, &node, mode);
1848	if (error)
1849		return (error);
1850
1851	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1852	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1853
1854	*vpp = NTOV(node);
1855
1856	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1857	    cnp->cn_namelen, IFTODT(mode));
1858	if (error) {
1859		vput(*vpp);
1860		return (error);
1861	}
1862
1863	dir_node->nn_inode.i_links_count++;
1864	dir_node->nn_flags |= IN_CHANGE;
1865
1866	error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino);
1867	if (error) {
1868		vput(NTOV(node));
1869		return (error);
1870	}
1871
1872	DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node,
1873	    (uintmax_t)node->nn_ino));
1874	return (0);
1875}
1876
1877static int
1878nandfs_mknod(struct vop_mknod_args *ap)
1879{
1880	struct vnode *dvp = ap->a_dvp;
1881	struct vnode **vpp = ap->a_vpp;
1882	struct vattr *vap = ap->a_vap;
1883	uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
1884	struct componentname *cnp = ap->a_cnp;
1885	struct nandfs_node *dir_node = VTON(dvp);
1886	struct nandfsmount *nmp = dir_node->nn_nmp;
1887	struct nandfs_node *node;
1888	int error;
1889
1890	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1891		return (ENOSPC);
1892
1893	error = nandfs_node_create(nmp, &node, mode);
1894	if (error)
1895		return (error);
1896	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1897	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1898	if (vap->va_rdev != VNOVAL)
1899		node->nn_inode.i_special = vap->va_rdev;
1900
1901	*vpp = NTOV(node);
1902
1903	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1904	    cnp->cn_namelen, IFTODT(mode))) {
1905		vput(*vpp);
1906		return (ENOTDIR);
1907	}
1908
1909	node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1910
1911	return (0);
1912}
1913
1914static int
1915nandfs_symlink(struct vop_symlink_args *ap)
1916{
1917	struct vnode **vpp = ap->a_vpp;
1918	struct vnode *dvp = ap->a_dvp;
1919	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1920	struct componentname *cnp = ap->a_cnp;
1921	struct nandfs_node *dir_node = VTON(dvp);
1922	struct nandfsmount *nmp = dir_node->nn_nmp;
1923	struct nandfs_node *node;
1924	int len, error;
1925
1926	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1927		return (ENOSPC);
1928
1929	error = nandfs_node_create(nmp, &node, S_IFLNK | mode);
1930	if (error)
1931		return (error);
1932	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1933	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1934
1935	*vpp = NTOV(node);
1936
1937	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1938	    cnp->cn_namelen, IFTODT(mode))) {
1939		vput(*vpp);
1940		return (ENOTDIR);
1941	}
1942
1943
1944	len = strlen(ap->a_target);
1945	error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
1946	    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1947	    cnp->cn_cred, NOCRED, NULL, NULL);
1948	if (error)
1949		vput(*vpp);
1950
1951	return (error);
1952}
1953
1954static int
1955nandfs_readlink(struct vop_readlink_args *ap)
1956{
1957	struct vnode *vp = ap->a_vp;
1958
1959	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1960}
1961
1962static int
1963nandfs_rmdir(struct vop_rmdir_args *ap)
1964{
1965	struct vnode *vp = ap->a_vp;
1966	struct vnode *dvp = ap->a_dvp;
1967	struct componentname *cnp = ap->a_cnp;
1968	struct nandfs_node *node, *dnode;
1969	uint32_t dflag, flag;
1970	int error = 0;
1971
1972	node = VTON(vp);
1973	dnode = VTON(dvp);
1974
1975	/* Files marked as immutable or append-only cannot be deleted. */
1976	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1977	    (dnode->nn_inode.i_flags & APPEND))
1978		return (EPERM);
1979
1980	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__,
1981	    dvp, vp, node, (uintmax_t)node->nn_ino));
1982
1983	if (node->nn_inode.i_links_count < 2)
1984		return (EINVAL);
1985
1986	if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred))
1987		return (ENOTEMPTY);
1988
1989	/* Files marked as immutable or append-only cannot be deleted. */
1990	dflag = dnode->nn_inode.i_flags;
1991	flag = node->nn_inode.i_flags;
1992	if ((dflag & APPEND) ||
1993	    (flag & (NOUNLINK | IMMUTABLE | APPEND))) {
1994		return (EPERM);
1995	}
1996
1997	if (vp->v_mountedhere != 0)
1998		return (EINVAL);
1999
2000	nandfs_remove_dirent(dvp, node, cnp);
2001	dnode->nn_inode.i_links_count -= 1;
2002	dnode->nn_flags |= IN_CHANGE;
2003
2004	cache_purge(dvp);
2005
2006	error = nandfs_truncate(vp, (uint64_t)0);
2007	if (error)
2008		return (error);
2009
2010	node->nn_inode.i_links_count -= 2;
2011	node->nn_flags |= IN_CHANGE;
2012
2013	cache_purge(vp);
2014
2015	return (error);
2016}
2017
2018static int
2019nandfs_fsync(struct vop_fsync_args *ap)
2020{
2021	struct vnode *vp = ap->a_vp;
2022	struct nandfs_node *node = VTON(vp);
2023	int locked;
2024
2025	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2026	    node, (uintmax_t)node->nn_ino));
2027
2028	/*
2029	 * Start syncing vnode only if inode was modified or
2030	 * there are some dirty buffers
2031	 */
2032	if (VTON(vp)->nn_flags & IN_MODIFIED ||
2033	    vp->v_bufobj.bo_dirty.bv_cnt) {
2034		locked = VOP_ISLOCKED(vp);
2035		VOP_UNLOCK(vp, 0);
2036		nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC);
2037		VOP_LOCK(vp, locked | LK_RETRY);
2038	}
2039
2040	return (0);
2041}
2042
2043static int
2044nandfs_bmap(struct vop_bmap_args *ap)
2045{
2046	struct vnode *vp = ap->a_vp;
2047	struct nandfs_node *nnode = VTON(vp);
2048	struct nandfs_device *nandfsdev = nnode->nn_nandfsdev;
2049	nandfs_daddr_t l2vmap, v2pmap;
2050	int error;
2051	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
2052
2053	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2054	    nnode, (uintmax_t)nnode->nn_ino));
2055
2056	if (ap->a_bop != NULL)
2057		*ap->a_bop = &nandfsdev->nd_devvp->v_bufobj;
2058	if (ap->a_bnp == NULL)
2059		return (0);
2060	if (ap->a_runp != NULL)
2061		*ap->a_runp = 0;
2062	if (ap->a_runb != NULL)
2063		*ap->a_runb = 0;
2064
2065	/*
2066	 * Translate all the block sectors into a series of buffers to read
2067	 * asynchronously from the nandfs device. Note that this lookup may
2068	 * induce readin's too.
2069	 */
2070
2071	/* Get virtual block numbers for the vnode's buffer span */
2072	error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap);
2073	if (error)
2074		return (-1);
2075
2076	/* Translate virtual block numbers to physical block numbers */
2077	error = nandfs_vtop(nnode, l2vmap, &v2pmap);
2078	if (error)
2079		return (-1);
2080
2081	/* Note virtual block 0 marks not mapped */
2082	if (l2vmap == 0)
2083		*ap->a_bnp = -1;
2084	else
2085		*ap->a_bnp = v2pmap * blk2dev;	/* in DEV_BSIZE */
2086
2087	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n",
2088	    __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn,
2089	    (uintmax_t)*ap->a_bnp ));
2090
2091	return (0);
2092}
2093
2094static void
2095nandfs_force_syncer(struct nandfsmount *nmp)
2096{
2097
2098	nmp->nm_flags |= NANDFS_FORCE_SYNCER;
2099	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE);
2100}
2101
2102static int
2103nandfs_ioctl(struct vop_ioctl_args *ap)
2104{
2105	struct vnode *vp = ap->a_vp;
2106	u_long command = ap->a_command;
2107	caddr_t data = ap->a_data;
2108	struct nandfs_node *node = VTON(vp);
2109	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
2110	struct nandfsmount *nmp = node->nn_nmp;
2111	uint64_t *tab, *cno;
2112	struct nandfs_seg_stat *nss;
2113	struct nandfs_cpmode *ncpm;
2114	struct nandfs_argv *nargv;
2115	struct nandfs_cpstat *ncp;
2116	int error;
2117
2118	DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command));
2119
2120	error = priv_check(ap->a_td, PRIV_VFS_MOUNT);
2121	if (error)
2122		return (error);
2123
2124	if (nmp->nm_ronly) {
2125		switch (command) {
2126		case NANDFS_IOCTL_GET_FSINFO:
2127		case NANDFS_IOCTL_GET_SUSTAT:
2128		case NANDFS_IOCTL_GET_CPINFO:
2129		case NANDFS_IOCTL_GET_CPSTAT:
2130		case NANDFS_IOCTL_GET_SUINFO:
2131		case NANDFS_IOCTL_GET_VINFO:
2132		case NANDFS_IOCTL_GET_BDESCS:
2133			break;
2134		default:
2135			return (EROFS);
2136		}
2137	}
2138
2139	switch (command) {
2140	case NANDFS_IOCTL_GET_FSINFO:
2141		error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data);
2142		break;
2143	case NANDFS_IOCTL_GET_SUSTAT:
2144		nss = (struct nandfs_seg_stat *)data;
2145		error = nandfs_get_seg_stat(nandfsdev, nss);
2146		break;
2147	case NANDFS_IOCTL_CHANGE_CPMODE:
2148		ncpm = (struct nandfs_cpmode *)data;
2149		error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm);
2150		nandfs_force_syncer(nmp);
2151		break;
2152	case NANDFS_IOCTL_GET_CPINFO:
2153		nargv = (struct nandfs_argv *)data;
2154		error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv);
2155		break;
2156	case NANDFS_IOCTL_DELETE_CP:
2157		tab = (uint64_t *)data;
2158		error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]);
2159		nandfs_force_syncer(nmp);
2160		break;
2161	case NANDFS_IOCTL_GET_CPSTAT:
2162		ncp = (struct nandfs_cpstat *)data;
2163		error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp);
2164		break;
2165	case NANDFS_IOCTL_GET_SUINFO:
2166		nargv = (struct nandfs_argv *)data;
2167		error = nandfs_get_segment_info_ioctl(nandfsdev, nargv);
2168		break;
2169	case NANDFS_IOCTL_GET_VINFO:
2170		nargv = (struct nandfs_argv *)data;
2171		error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv);
2172		break;
2173	case NANDFS_IOCTL_GET_BDESCS:
2174		nargv = (struct nandfs_argv *)data;
2175		error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv);
2176		break;
2177	case NANDFS_IOCTL_SYNC:
2178		cno = (uint64_t *)data;
2179		nandfs_force_syncer(nmp);
2180		*cno = nandfsdev->nd_last_cno;
2181		error = 0;
2182		break;
2183	case NANDFS_IOCTL_MAKE_SNAP:
2184		cno = (uint64_t *)data;
2185		error = nandfs_make_snap(nandfsdev, cno);
2186		nandfs_force_syncer(nmp);
2187		break;
2188	case NANDFS_IOCTL_DELETE_SNAP:
2189		cno = (uint64_t *)data;
2190		error = nandfs_delete_snap(nandfsdev, *cno);
2191		nandfs_force_syncer(nmp);
2192		break;
2193	default:
2194		error = ENOTTY;
2195		break;
2196	}
2197
2198	return (error);
2199}
2200
2201/*
2202 * Whiteout vnode call
2203 */
2204static int
2205nandfs_whiteout(struct vop_whiteout_args *ap)
2206{
2207	struct vnode *dvp = ap->a_dvp;
2208	struct componentname *cnp = ap->a_cnp;
2209	int error = 0;
2210
2211	switch (ap->a_flags) {
2212	case LOOKUP:
2213		return (0);
2214	case CREATE:
2215		/* Create a new directory whiteout */
2216#ifdef INVARIANTS
2217		if ((cnp->cn_flags & SAVENAME) == 0)
2218			panic("ufs_whiteout: missing name");
2219#endif
2220		error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr,
2221		    cnp->cn_namelen, DT_WHT);
2222		break;
2223
2224	case DELETE:
2225		/* Remove an existing directory whiteout */
2226		cnp->cn_flags &= ~DOWHITEOUT;
2227		error = nandfs_remove_dirent(dvp, NULL, cnp);
2228		break;
2229	default:
2230		panic("nandf_whiteout: unknown op: %d", ap->a_flags);
2231	}
2232
2233	return (error);
2234}
2235
2236static int
2237nandfs_pathconf(struct vop_pathconf_args *ap)
2238{
2239	int error;
2240
2241	error = 0;
2242	switch (ap->a_name) {
2243	case _PC_LINK_MAX:
2244		*ap->a_retval = LINK_MAX;
2245		break;
2246	case _PC_NAME_MAX:
2247		*ap->a_retval = NAME_MAX;
2248		break;
2249	case _PC_PATH_MAX:
2250		*ap->a_retval = PATH_MAX;
2251		break;
2252	case _PC_PIPE_BUF:
2253		*ap->a_retval = PIPE_BUF;
2254		break;
2255	case _PC_CHOWN_RESTRICTED:
2256		*ap->a_retval = 1;
2257		break;
2258	case _PC_NO_TRUNC:
2259		*ap->a_retval = 1;
2260		break;
2261	case _PC_ACL_EXTENDED:
2262		*ap->a_retval = 0;
2263		break;
2264	case _PC_ALLOC_SIZE_MIN:
2265		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2266		break;
2267	case _PC_FILESIZEBITS:
2268		*ap->a_retval = 64;
2269		break;
2270	case _PC_REC_INCR_XFER_SIZE:
2271		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2272		break;
2273	case _PC_REC_MAX_XFER_SIZE:
2274		*ap->a_retval = -1; /* means ``unlimited'' */
2275		break;
2276	case _PC_REC_MIN_XFER_SIZE:
2277		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2278		break;
2279	default:
2280		error = EINVAL;
2281		break;
2282	}
2283	return (error);
2284}
2285
2286static int
2287nandfs_vnlock1(struct vop_lock1_args *ap)
2288{
2289	struct vnode *vp = ap->a_vp;
2290	struct nandfs_node *node = VTON(vp);
2291	int error, vi_locked;
2292
2293	/*
2294	 * XXX can vnode go away while we are sleeping?
2295	 */
2296	vi_locked = mtx_owned(&vp->v_interlock);
2297	if (vi_locked)
2298		VI_UNLOCK(vp);
2299	error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev,
2300	    ap->a_flags & LK_NOWAIT);
2301	if (vi_locked && !error)
2302		VI_LOCK(vp);
2303	if (error)
2304		return (error);
2305
2306	error = vop_stdlock(ap);
2307	if (error) {
2308		NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2309		return (error);
2310	}
2311
2312	return (0);
2313}
2314
2315static int
2316nandfs_vnunlock(struct vop_unlock_args *ap)
2317{
2318	struct vnode *vp = ap->a_vp;
2319	struct nandfs_node *node = VTON(vp);
2320	int error;
2321
2322	error = vop_stdunlock(ap);
2323	if (error)
2324		return (error);
2325
2326	NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2327
2328	return (0);
2329}
2330
2331/*
2332 * Global vfs data structures
2333 */
2334struct vop_vector nandfs_vnodeops = {
2335	.vop_default =		&default_vnodeops,
2336	.vop_access =		nandfs_access,
2337	.vop_advlock =		nandfs_advlock,
2338	.vop_bmap =		nandfs_bmap,
2339	.vop_close =		nandfs_close,
2340	.vop_create =		nandfs_create,
2341	.vop_fsync =		nandfs_fsync,
2342	.vop_getattr =		nandfs_getattr,
2343	.vop_inactive =		nandfs_inactive,
2344	.vop_cachedlookup =	nandfs_lookup,
2345	.vop_ioctl =		nandfs_ioctl,
2346	.vop_link =		nandfs_link,
2347	.vop_lookup =		vfs_cache_lookup,
2348	.vop_mkdir =		nandfs_mkdir,
2349	.vop_mknod =		nandfs_mknod,
2350	.vop_open =		nandfs_open,
2351	.vop_pathconf =		nandfs_pathconf,
2352	.vop_print =		nandfs_print,
2353	.vop_read =		nandfs_read,
2354	.vop_readdir =		nandfs_readdir,
2355	.vop_readlink =		nandfs_readlink,
2356	.vop_reclaim =		nandfs_reclaim,
2357	.vop_remove =		nandfs_remove,
2358	.vop_rename =		nandfs_rename,
2359	.vop_rmdir =		nandfs_rmdir,
2360	.vop_whiteout =		nandfs_whiteout,
2361	.vop_write =		nandfs_write,
2362	.vop_setattr =		nandfs_setattr,
2363	.vop_strategy =		nandfs_strategy,
2364	.vop_symlink =		nandfs_symlink,
2365	.vop_lock1 =		nandfs_vnlock1,
2366	.vop_unlock =		nandfs_vnunlock,
2367};
2368
2369struct vop_vector nandfs_system_vnodeops = {
2370	.vop_default =		&default_vnodeops,
2371	.vop_close =		nandfs_close,
2372	.vop_inactive =		nandfs_inactive,
2373	.vop_reclaim =		nandfs_reclaim,
2374	.vop_strategy =		nandfs_strategy,
2375	.vop_fsync =		nandfs_fsync,
2376	.vop_bmap =		nandfs_bmap,
2377	.vop_access =		VOP_PANIC,
2378	.vop_advlock =		VOP_PANIC,
2379	.vop_create =		VOP_PANIC,
2380	.vop_getattr =		VOP_PANIC,
2381	.vop_cachedlookup =	VOP_PANIC,
2382	.vop_ioctl =		VOP_PANIC,
2383	.vop_link =		VOP_PANIC,
2384	.vop_lookup =		VOP_PANIC,
2385	.vop_mkdir =		VOP_PANIC,
2386	.vop_mknod =		VOP_PANIC,
2387	.vop_open =		VOP_PANIC,
2388	.vop_pathconf =		VOP_PANIC,
2389	.vop_print =		VOP_PANIC,
2390	.vop_read =		VOP_PANIC,
2391	.vop_readdir =		VOP_PANIC,
2392	.vop_readlink =		VOP_PANIC,
2393	.vop_remove =		VOP_PANIC,
2394	.vop_rename =		VOP_PANIC,
2395	.vop_rmdir =		VOP_PANIC,
2396	.vop_whiteout =		VOP_PANIC,
2397	.vop_write =		VOP_PANIC,
2398	.vop_setattr =		VOP_PANIC,
2399	.vop_symlink =		VOP_PANIC,
2400};
2401
2402static int
2403nandfsfifo_close(struct vop_close_args *ap)
2404{
2405	struct vnode *vp = ap->a_vp;
2406	struct nandfs_node *node = VTON(vp);
2407
2408	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
2409
2410	mtx_lock(&vp->v_interlock);
2411	if (vp->v_usecount > 1)
2412		nandfs_itimes_locked(vp);
2413	mtx_unlock(&vp->v_interlock);
2414
2415	return (fifo_specops.vop_close(ap));
2416}
2417
2418struct vop_vector nandfs_fifoops = {
2419	.vop_default =		&fifo_specops,
2420	.vop_fsync =		VOP_PANIC,
2421	.vop_access =		nandfs_access,
2422	.vop_close =		nandfsfifo_close,
2423	.vop_getattr =		nandfs_getattr,
2424	.vop_inactive =		nandfs_inactive,
2425	.vop_print =		nandfs_print,
2426	.vop_read =		VOP_PANIC,
2427	.vop_reclaim =		nandfs_reclaim,
2428	.vop_setattr =		nandfs_setattr,
2429	.vop_write =		VOP_PANIC,
2430	.vop_lock1 =		nandfs_vnlock1,
2431	.vop_unlock =		nandfs_vnunlock,
2432};
2433
2434int
2435nandfs_vinit(struct vnode *vp, uint64_t ino)
2436{
2437	struct nandfs_node *node;
2438
2439	ASSERT_VOP_LOCKED(vp, __func__);
2440
2441	node = VTON(vp);
2442
2443	/* Check if we're fetching the root */
2444	if (ino == NANDFS_ROOT_INO)
2445		vp->v_vflag |= VV_ROOT;
2446
2447	if (ino != NANDFS_GC_INO)
2448		vp->v_type = IFTOVT(node->nn_inode.i_mode);
2449	else
2450		vp->v_type = VREG;
2451
2452	if (vp->v_type == VFIFO)
2453		vp->v_op = &nandfs_fifoops;
2454
2455	return (0);
2456}
2457