nandfs_vnops.c revision 331643
1/*-
2 * Copyright (c) 2010-2012 Semihalf
3 * Copyright (c) 2008, 2009 Reinoud Zandijk
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad
27 */
28
29#include <sys/cdefs.h>
30__FBSDID("$FreeBSD: stable/11/sys/fs/nandfs/nandfs_vnops.c 331643 2018-03-27 18:52:27Z dim $");
31
32#include <sys/param.h>
33#include <sys/systm.h>
34#include <sys/conf.h>
35#include <sys/kernel.h>
36#include <sys/lock.h>
37#include <sys/lockf.h>
38#include <sys/malloc.h>
39#include <sys/mount.h>
40#include <sys/mutex.h>
41#include <sys/namei.h>
42#include <sys/sysctl.h>
43#include <sys/unistd.h>
44#include <sys/vnode.h>
45#include <sys/buf.h>
46#include <sys/bio.h>
47#include <sys/fcntl.h>
48#include <sys/dirent.h>
49#include <sys/rwlock.h>
50#include <sys/stat.h>
51#include <sys/priv.h>
52
53#include <vm/vm.h>
54#include <vm/vm_extern.h>
55#include <vm/vm_object.h>
56#include <vm/vnode_pager.h>
57
58#include <machine/_inttypes.h>
59
60#include <fs/nandfs/nandfs_mount.h>
61#include <fs/nandfs/nandfs.h>
62#include <fs/nandfs/nandfs_subr.h>
63
64extern uma_zone_t nandfs_node_zone;
65static void nandfs_read_filebuf(struct nandfs_node *, struct buf *);
66static void nandfs_itimes_locked(struct vnode *);
67static int nandfs_truncate(struct vnode *, uint64_t);
68
69static vop_pathconf_t	nandfs_pathconf;
70
71#define UPDATE_CLOSE 0
72#define UPDATE_WAIT 0
73
74static int
75nandfs_inactive(struct vop_inactive_args *ap)
76{
77	struct vnode *vp = ap->a_vp;
78	struct nandfs_node *node = VTON(vp);
79	int error = 0;
80
81	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node));
82
83	if (node == NULL) {
84		DPRINTF(NODE, ("%s: inactive NULL node\n", __func__));
85		return (0);
86	}
87
88	if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) {
89		nandfs_truncate(vp, 0);
90		error = nandfs_node_destroy(node);
91		if (error)
92			nandfs_error("%s: destroy node: %p\n", __func__, node);
93		node->nn_flags = 0;
94		vrecycle(vp);
95	}
96
97	return (error);
98}
99
100static int
101nandfs_reclaim(struct vop_reclaim_args *ap)
102{
103	struct vnode *vp = ap->a_vp;
104	struct nandfs_node *nandfs_node = VTON(vp);
105	struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev;
106	uint64_t ino = nandfs_node->nn_ino;
107
108	DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node));
109
110	/* Invalidate all entries to a particular vnode. */
111	cache_purge(vp);
112
113	/* Destroy the vm object and flush associated pages. */
114	vnode_destroy_vobject(vp);
115
116	/* Remove from vfs hash if not system vnode */
117	if (!NANDFS_SYS_NODE(nandfs_node->nn_ino))
118		vfs_hash_remove(vp);
119
120	/* Dispose all node knowledge */
121	nandfs_dispose_node(&nandfs_node);
122
123	if (!NANDFS_SYS_NODE(ino))
124		NANDFS_WRITEUNLOCK(fsdev);
125
126	return (0);
127}
128
129static int
130nandfs_read(struct vop_read_args *ap)
131{
132	struct vnode *vp = ap->a_vp;
133	struct nandfs_node *node = VTON(vp);
134	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
135	struct uio *uio = ap->a_uio;
136	struct buf *bp;
137	uint64_t size;
138	uint32_t blocksize;
139	off_t bytesinfile;
140	ssize_t toread, off;
141	daddr_t lbn;
142	ssize_t resid;
143	int error = 0;
144
145	if (uio->uio_resid == 0)
146		return (0);
147
148	size = node->nn_inode.i_size;
149	if (uio->uio_offset >= size)
150		return (0);
151
152	blocksize = nandfsdev->nd_blocksize;
153	bytesinfile = size - uio->uio_offset;
154
155	resid = omin(uio->uio_resid, bytesinfile);
156
157	while (resid) {
158		lbn = uio->uio_offset / blocksize;
159		off = uio->uio_offset & (blocksize - 1);
160
161		toread = omin(resid, blocksize - off);
162
163		DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n",
164		    (uintmax_t)lbn, toread, blocksize));
165
166		error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
167		if (error) {
168			brelse(bp);
169			break;
170		}
171
172		error = uiomove(bp->b_data + off, toread, uio);
173		if (error) {
174			brelse(bp);
175			break;
176		}
177
178		brelse(bp);
179		resid -= toread;
180	}
181
182	return (error);
183}
184
185static int
186nandfs_write(struct vop_write_args *ap)
187{
188	struct nandfs_device *fsdev;
189	struct nandfs_node *node;
190	struct vnode *vp;
191	struct uio *uio;
192	struct buf *bp;
193	uint64_t file_size, vblk;
194	uint32_t blocksize;
195	ssize_t towrite, off;
196	daddr_t lbn;
197	ssize_t resid;
198	int error, ioflag, modified;
199
200	vp = ap->a_vp;
201	uio = ap->a_uio;
202	ioflag = ap->a_ioflag;
203	node = VTON(vp);
204	fsdev = node->nn_nandfsdev;
205
206	if (nandfs_fs_full(fsdev))
207		return (ENOSPC);
208
209	DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n",
210	    uio->uio_resid, (uintmax_t)uio->uio_offset));
211
212	if (uio->uio_offset < 0)
213		return (EINVAL);
214	if (uio->uio_resid == 0)
215		return (0);
216
217	blocksize = fsdev->nd_blocksize;
218	file_size = node->nn_inode.i_size;
219
220	switch (vp->v_type) {
221	case VREG:
222		if (ioflag & IO_APPEND)
223			uio->uio_offset = file_size;
224		break;
225	case VDIR:
226		return (EISDIR);
227	case VLNK:
228		break;
229	default:
230		panic("%s: bad file type vp: %p", __func__, vp);
231	}
232
233	/* If explicitly asked to append, uio_offset can be wrong? */
234	if (ioflag & IO_APPEND)
235		uio->uio_offset = file_size;
236
237	resid = uio->uio_resid;
238	modified = error = 0;
239
240	while (uio->uio_resid) {
241		lbn = uio->uio_offset / blocksize;
242		off = uio->uio_offset & (blocksize - 1);
243
244		towrite = omin(uio->uio_resid, blocksize - off);
245
246		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n",
247		    __func__, (uintmax_t)lbn, towrite, blocksize));
248
249		error = nandfs_bmap_lookup(node, lbn, &vblk);
250		if (error)
251			break;
252
253		DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) "
254		    "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize,
255		    vblk));
256
257		if (vblk != 0)
258			error = nandfs_bread(node, lbn, NOCRED, 0, &bp);
259		else
260			error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp);
261
262		DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__,
263		    vp, bp, (uintmax_t)lbn));
264		if (error) {
265			if (bp)
266				brelse(bp);
267			break;
268		}
269
270		error = uiomove((char *)bp->b_data + off, (int)towrite, uio);
271		if (error)
272			break;
273
274		error = nandfs_dirty_buf(bp, 0);
275		if (error)
276			break;
277
278		modified++;
279	}
280
281	/* XXX proper handling when only part of file was properly written */
282	if (modified) {
283		if (resid > uio->uio_resid && ap->a_cred &&
284		    ap->a_cred->cr_uid != 0)
285			node->nn_inode.i_mode &= ~(ISUID | ISGID);
286
287		if (file_size < uio->uio_offset + uio->uio_resid) {
288			node->nn_inode.i_size = uio->uio_offset +
289			    uio->uio_resid;
290			node->nn_flags |= IN_CHANGE | IN_UPDATE;
291			vnode_pager_setsize(vp, uio->uio_offset +
292			    uio->uio_resid);
293			nandfs_itimes(vp);
294		}
295	}
296
297	DPRINTF(WRITE, ("%s: return:%d\n", __func__, error));
298
299	return (error);
300}
301
302static int
303nandfs_lookup(struct vop_cachedlookup_args *ap)
304{
305	struct vnode *dvp, **vpp;
306	struct componentname *cnp;
307	struct ucred *cred;
308	struct thread *td;
309	struct nandfs_node *dir_node, *node;
310	struct nandfsmount *nmp;
311	uint64_t ino, off;
312	const char *name;
313	int namelen, nameiop, islastcn, mounted_ro;
314	int error, found;
315
316	DPRINTF(VNCALL, ("%s\n", __func__));
317
318	dvp = ap->a_dvp;
319	vpp = ap->a_vpp;
320	*vpp = NULL;
321
322	cnp = ap->a_cnp;
323	cred = cnp->cn_cred;
324	td = cnp->cn_thread;
325
326	dir_node = VTON(dvp);
327	nmp = dir_node->nn_nmp;
328
329	/* Simplify/clarification flags */
330	nameiop = cnp->cn_nameiop;
331	islastcn = cnp->cn_flags & ISLASTCN;
332	mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY;
333
334	/*
335	 * If requesting a modify on the last path element on a read-only
336	 * filingsystem, reject lookup;
337	 */
338	if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME))
339		return (EROFS);
340
341	if (dir_node->nn_inode.i_links_count == 0)
342		return (ENOENT);
343
344	/*
345	 * Obviously, the file is not (anymore) in the namecache, we have to
346	 * search for it. There are three basic cases: '.', '..' and others.
347	 *
348	 * Following the guidelines of VOP_LOOKUP manpage and tmpfs.
349	 */
350	error = 0;
351	if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) {
352		DPRINTF(LOOKUP, ("\tlookup '.'\n"));
353		/* Special case 1 '.' */
354		VREF(dvp);
355		*vpp = dvp;
356		/* Done */
357	} else if (cnp->cn_flags & ISDOTDOT) {
358		/* Special case 2 '..' */
359		DPRINTF(LOOKUP, ("\tlookup '..'\n"));
360
361		/* Get our node */
362		name = "..";
363		namelen = 2;
364		error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino,
365		    &found, &off);
366		if (error)
367			goto out;
368		if (!found)
369			error = ENOENT;
370
371		/* First unlock parent */
372		VOP_UNLOCK(dvp, 0);
373
374		if (error == 0) {
375			DPRINTF(LOOKUP, ("\tfound '..'\n"));
376			/* Try to create/reuse the node */
377			error = nandfs_get_node(nmp, ino, &node);
378
379			if (!error) {
380				DPRINTF(LOOKUP,
381				    ("\tnode retrieved/created OK\n"));
382				*vpp = NTOV(node);
383			}
384		}
385
386		/* Try to relock parent */
387		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
388	} else {
389		DPRINTF(LOOKUP, ("\tlookup file\n"));
390		/* All other files */
391		/* Look up filename in the directory returning its inode */
392		name = cnp->cn_nameptr;
393		namelen = cnp->cn_namelen;
394		error = nandfs_lookup_name_in_dir(dvp, name, namelen,
395		    &ino, &found, &off);
396		if (error)
397			goto out;
398		if (!found) {
399			DPRINTF(LOOKUP, ("\tNOT found\n"));
400			/*
401			 * UGH, didn't find name. If we're creating or
402			 * renaming on the last name this is OK and we ought
403			 * to return EJUSTRETURN if its allowed to be created.
404			 */
405			error = ENOENT;
406			if ((nameiop == CREATE || nameiop == RENAME) &&
407			    islastcn) {
408				error = VOP_ACCESS(dvp, VWRITE, cred, td);
409				if (!error) {
410					/* keep the component name */
411					cnp->cn_flags |= SAVENAME;
412					error = EJUSTRETURN;
413				}
414			}
415			/* Done */
416		} else {
417			if (ino == NANDFS_WHT_INO)
418				cnp->cn_flags |= ISWHITEOUT;
419
420			if ((cnp->cn_flags & ISWHITEOUT) &&
421			    (nameiop == LOOKUP))
422				return (ENOENT);
423
424			if ((nameiop == DELETE) && islastcn) {
425				if ((cnp->cn_flags & ISWHITEOUT) &&
426				    (cnp->cn_flags & DOWHITEOUT)) {
427					cnp->cn_flags |= SAVENAME;
428					dir_node->nn_diroff = off;
429					return (EJUSTRETURN);
430				}
431
432				error = VOP_ACCESS(dvp, VWRITE, cred,
433				    cnp->cn_thread);
434				if (error)
435					return (error);
436
437				/* Try to create/reuse the node */
438				error = nandfs_get_node(nmp, ino, &node);
439				if (!error) {
440					*vpp = NTOV(node);
441					node->nn_diroff = off;
442				}
443
444				if ((dir_node->nn_inode.i_mode & ISVTX) &&
445				    cred->cr_uid != 0 &&
446				    cred->cr_uid != dir_node->nn_inode.i_uid &&
447				    node->nn_inode.i_uid != cred->cr_uid) {
448					vput(*vpp);
449					*vpp = NULL;
450					return (EPERM);
451				}
452			} else if ((nameiop == RENAME) && islastcn) {
453				error = VOP_ACCESS(dvp, VWRITE, cred,
454				    cnp->cn_thread);
455				if (error)
456					return (error);
457
458				/* Try to create/reuse the node */
459				error = nandfs_get_node(nmp, ino, &node);
460				if (!error) {
461					*vpp = NTOV(node);
462					node->nn_diroff = off;
463				}
464			} else {
465				/* Try to create/reuse the node */
466				error = nandfs_get_node(nmp, ino, &node);
467				if (!error) {
468					*vpp = NTOV(node);
469					node->nn_diroff = off;
470				}
471			}
472		}
473	}
474
475out:
476	/*
477	 * Store result in the cache if requested. If we are creating a file,
478	 * the file might not be found and thus putting it into the namecache
479	 * might be seen as negative caching.
480	 */
481	if ((cnp->cn_flags & MAKEENTRY) != 0)
482		cache_enter(dvp, *vpp, cnp);
483
484	return (error);
485
486}
487
488static int
489nandfs_getattr(struct vop_getattr_args *ap)
490{
491	struct vnode *vp = ap->a_vp;
492	struct vattr *vap = ap->a_vap;
493	struct nandfs_node *node = VTON(vp);
494	struct nandfs_inode *inode = &node->nn_inode;
495
496	DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp));
497	nandfs_itimes(vp);
498
499	/* Basic info */
500	VATTR_NULL(vap);
501	vap->va_atime.tv_sec = inode->i_mtime;
502	vap->va_atime.tv_nsec = inode->i_mtime_nsec;
503	vap->va_mtime.tv_sec = inode->i_mtime;
504	vap->va_mtime.tv_nsec = inode->i_mtime_nsec;
505	vap->va_ctime.tv_sec = inode->i_ctime;
506	vap->va_ctime.tv_nsec = inode->i_ctime_nsec;
507	vap->va_type = IFTOVT(inode->i_mode);
508	vap->va_mode = inode->i_mode & ~S_IFMT;
509	vap->va_nlink = inode->i_links_count;
510	vap->va_uid = inode->i_uid;
511	vap->va_gid = inode->i_gid;
512	vap->va_rdev = inode->i_special;
513	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
514	vap->va_fileid = node->nn_ino;
515	vap->va_size = inode->i_size;
516	vap->va_blocksize = node->nn_nandfsdev->nd_blocksize;
517	vap->va_gen = 0;
518	vap->va_flags = inode->i_flags;
519	vap->va_bytes = inode->i_blocks * vap->va_blocksize;
520	vap->va_filerev = 0;
521	vap->va_vaflags = 0;
522
523	return (0);
524}
525
526static int
527nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks)
528{
529	struct nandfs_device *nffsdev;
530	struct bufobj *bo;
531	struct buf *bp, *nbp;
532
533	bo = &vp->v_bufobj;
534	nffsdev = VTON(vp)->nn_nandfsdev;
535
536	ASSERT_VOP_LOCKED(vp, "nandfs_truncate");
537restart:
538	BO_LOCK(bo);
539restart_locked:
540	TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
541		if (bp->b_lblkno < nblks)
542			continue;
543		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
544			goto restart_locked;
545
546		bremfree(bp);
547		bp->b_flags |= (B_INVAL | B_RELBUF);
548		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
549		BO_UNLOCK(bo);
550		brelse(bp);
551		BO_LOCK(bo);
552	}
553
554	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
555		if (bp->b_lblkno < nblks)
556			continue;
557		if (BUF_LOCK(bp,
558		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
559		    BO_LOCKPTR(bo)) == ENOLCK)
560			goto restart;
561		bp->b_flags |= (B_INVAL | B_RELBUF);
562		bp->b_flags &= ~(B_ASYNC | B_MANAGED);
563		brelse(bp);
564		nandfs_dirty_bufs_decrement(nffsdev);
565		BO_LOCK(bo);
566	}
567
568	BO_UNLOCK(bo);
569
570	return (0);
571}
572
573static int
574nandfs_truncate(struct vnode *vp, uint64_t newsize)
575{
576	struct nandfs_device *nffsdev;
577	struct nandfs_node *node;
578	struct nandfs_inode *inode;
579	struct buf *bp = NULL;
580	uint64_t oblks, nblks, vblk, size, rest;
581	int error;
582
583	node = VTON(vp);
584	nffsdev = node->nn_nandfsdev;
585	inode = &node->nn_inode;
586
587	/* Calculate end of file */
588	size = inode->i_size;
589
590	if (newsize == size) {
591		node->nn_flags |= IN_CHANGE | IN_UPDATE;
592		nandfs_itimes(vp);
593		return (0);
594	}
595
596	if (newsize > size) {
597		inode->i_size = newsize;
598		vnode_pager_setsize(vp, newsize);
599		node->nn_flags |= IN_CHANGE | IN_UPDATE;
600		nandfs_itimes(vp);
601		return (0);
602	}
603
604	nblks = howmany(newsize, nffsdev->nd_blocksize);
605	oblks = howmany(size, nffsdev->nd_blocksize);
606	rest = newsize % nffsdev->nd_blocksize;
607
608	if (rest) {
609		error = nandfs_bmap_lookup(node, nblks - 1, &vblk);
610		if (error)
611			return (error);
612
613		if (vblk != 0)
614			error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp);
615		else
616			error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp);
617
618		if (error) {
619			if (bp)
620				brelse(bp);
621			return (error);
622		}
623
624		bzero((char *)bp->b_data + rest,
625		    (u_int)(nffsdev->nd_blocksize - rest));
626		error = nandfs_dirty_buf(bp, 0);
627		if (error)
628			return (error);
629	}
630
631	DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks,
632	    nblks));
633
634	error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1);
635	if (error) {
636		if (bp)
637			nandfs_undirty_buf(bp);
638		return (error);
639	}
640
641	error = nandfs_vtruncbuf(vp, nblks);
642	if (error) {
643		if (bp)
644			nandfs_undirty_buf(bp);
645		return (error);
646	}
647
648	inode->i_size = newsize;
649	vnode_pager_setsize(vp, newsize);
650	node->nn_flags |= IN_CHANGE | IN_UPDATE;
651	nandfs_itimes(vp);
652
653	return (error);
654}
655
656static void
657nandfs_itimes_locked(struct vnode *vp)
658{
659	struct nandfs_node *node;
660	struct nandfs_inode *inode;
661	struct timespec ts;
662
663	ASSERT_VI_LOCKED(vp, __func__);
664
665	node = VTON(vp);
666	inode = &node->nn_inode;
667
668	if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
669		return;
670
671	if (((vp->v_mount->mnt_kern_flag &
672	    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
673	    (node->nn_flags & (IN_CHANGE | IN_UPDATE)))
674		node->nn_flags |= IN_MODIFIED;
675
676	vfs_timestamp(&ts);
677	if (node->nn_flags & IN_UPDATE) {
678		inode->i_mtime = ts.tv_sec;
679		inode->i_mtime_nsec = ts.tv_nsec;
680	}
681	if (node->nn_flags & IN_CHANGE) {
682		inode->i_ctime = ts.tv_sec;
683		inode->i_ctime_nsec = ts.tv_nsec;
684	}
685
686	node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
687}
688
689void
690nandfs_itimes(struct vnode *vp)
691{
692
693	VI_LOCK(vp);
694	nandfs_itimes_locked(vp);
695	VI_UNLOCK(vp);
696}
697
698static int
699nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td)
700{
701	struct nandfs_node *node = VTON(vp);
702	struct nandfs_inode *inode = &node->nn_inode;
703	uint16_t nmode;
704	int error = 0;
705
706	DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp,
707	    mode, cred, td));
708	/*
709	 * To modify the permissions on a file, must possess VADMIN
710	 * for that file.
711	 */
712	if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
713		return (error);
714
715	/*
716	 * Privileged processes may set the sticky bit on non-directories,
717	 * as well as set the setgid bit on a file with a group that the
718	 * process is not a member of. Both of these are allowed in
719	 * jail(8).
720	 */
721	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
722		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
723			return (EFTYPE);
724	}
725	if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) {
726		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
727		if (error)
728			return (error);
729	}
730
731	/*
732	 * Deny setting setuid if we are not the file owner.
733	 */
734	if ((mode & ISUID) && inode->i_uid != cred->cr_uid) {
735		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
736		if (error)
737			return (error);
738	}
739
740	nmode = inode->i_mode;
741	nmode &= ~ALLPERMS;
742	nmode |= (mode & ALLPERMS);
743	inode->i_mode = nmode;
744	node->nn_flags |= IN_CHANGE;
745
746	DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode));
747
748	return (error);
749}
750
751static int
752nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred,
753    struct thread *td)
754{
755	struct nandfs_node *node = VTON(vp);
756	struct nandfs_inode *inode = &node->nn_inode;
757	uid_t ouid;
758	gid_t ogid;
759	int error = 0;
760
761	if (uid == (uid_t)VNOVAL)
762		uid = inode->i_uid;
763	if (gid == (gid_t)VNOVAL)
764		gid = inode->i_gid;
765	/*
766	 * To modify the ownership of a file, must possess VADMIN for that
767	 * file.
768	 */
769	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
770		return (error);
771	/*
772	 * To change the owner of a file, or change the group of a file to a
773	 * group of which we are not a member, the caller must have
774	 * privilege.
775	 */
776	if (((uid != inode->i_uid && uid != cred->cr_uid) ||
777	    (gid != inode->i_gid && !groupmember(gid, cred))) &&
778	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
779		return (error);
780	ogid = inode->i_gid;
781	ouid = inode->i_uid;
782
783	inode->i_gid = gid;
784	inode->i_uid = uid;
785
786	node->nn_flags |= IN_CHANGE;
787	if ((inode->i_mode & (ISUID | ISGID)) &&
788	    (ouid != uid || ogid != gid)) {
789		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0))
790			inode->i_mode &= ~(ISUID | ISGID);
791	}
792	DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp,
793	    cred, td));
794	return (0);
795}
796
797static int
798nandfs_setattr(struct vop_setattr_args *ap)
799{
800	struct vnode *vp = ap->a_vp;
801	struct nandfs_node *node = VTON(vp);
802	struct nandfs_inode *inode = &node->nn_inode;
803	struct vattr *vap = ap->a_vap;
804	struct ucred *cred = ap->a_cred;
805	struct thread *td = curthread;
806	uint32_t flags;
807	int error = 0;
808
809	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
810	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
811	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
812	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
813		DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__));
814		return (EINVAL);
815	}
816
817	if (vap->va_flags != VNOVAL) {
818		DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp,
819		    td, vap->va_flags));
820
821		if (vp->v_mount->mnt_flag & MNT_RDONLY)
822			return (EROFS);
823		/*
824		 * Callers may only modify the file flags on objects they
825		 * have VADMIN rights for.
826		 */
827		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
828			return (error);
829		/*
830		 * Unprivileged processes are not permitted to unset system
831		 * flags, or modify flags if any system flags are set.
832		 * Privileged non-jail processes may not modify system flags
833		 * if securelevel > 0 and any existing system flags are set.
834		 * Privileged jail processes behave like privileged non-jail
835		 * processes if the security.jail.chflags_allowed sysctl is
836		 * is non-zero; otherwise, they behave like unprivileged
837		 * processes.
838		 */
839
840		flags = inode->i_flags;
841		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
842			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
843				error = securelevel_gt(cred, 0);
844				if (error)
845					return (error);
846			}
847			/* Snapshot flag cannot be set or cleared */
848			if (((vap->va_flags & SF_SNAPSHOT) != 0 &&
849			    (flags & SF_SNAPSHOT) == 0) ||
850			    ((vap->va_flags & SF_SNAPSHOT) == 0 &&
851			    (flags & SF_SNAPSHOT) != 0))
852				return (EPERM);
853
854			inode->i_flags = vap->va_flags;
855		} else {
856			if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
857			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
858				return (EPERM);
859
860			flags &= SF_SETTABLE;
861			flags |= (vap->va_flags & UF_SETTABLE);
862			inode->i_flags = flags;
863		}
864		node->nn_flags |= IN_CHANGE;
865		if (vap->va_flags & (IMMUTABLE | APPEND))
866			return (0);
867	}
868	if (inode->i_flags & (IMMUTABLE | APPEND))
869		return (EPERM);
870
871	if (vap->va_size != (u_quad_t)VNOVAL) {
872		DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td,
873		    (uintmax_t)vap->va_size));
874
875		switch (vp->v_type) {
876		case VDIR:
877			return (EISDIR);
878		case VLNK:
879		case VREG:
880			if (vp->v_mount->mnt_flag & MNT_RDONLY)
881				return (EROFS);
882			if ((inode->i_flags & SF_SNAPSHOT) != 0)
883				return (EPERM);
884			break;
885		default:
886			return (0);
887		}
888
889		if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize)
890			return (EFBIG);
891
892		KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type));
893		nandfs_truncate(vp, vap->va_size);
894		node->nn_flags |= IN_CHANGE;
895
896		return (0);
897	}
898
899	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
900		if (vp->v_mount->mnt_flag & MNT_RDONLY)
901			return (EROFS);
902		DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__,
903		    vp, td, vap->va_uid, vap->va_gid));
904		error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
905		if (error)
906			return (error);
907	}
908
909	if (vap->va_mode != (mode_t)VNOVAL) {
910		if (vp->v_mount->mnt_flag & MNT_RDONLY)
911			return (EROFS);
912		DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td,
913		    vap->va_mode));
914
915		error = nandfs_chmod(vp, (int)vap->va_mode, cred, td);
916		if (error)
917			return (error);
918	}
919	if (vap->va_atime.tv_sec != VNOVAL ||
920	    vap->va_mtime.tv_sec != VNOVAL ||
921	    vap->va_birthtime.tv_sec != VNOVAL) {
922		DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n",
923		    __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec,
924		    (uintmax_t)vap->va_mtime.tv_sec,
925		    (uintmax_t)vap->va_birthtime.tv_sec));
926
927		if (vap->va_atime.tv_sec != VNOVAL)
928			node->nn_flags |= IN_ACCESS;
929		if (vap->va_mtime.tv_sec != VNOVAL)
930			node->nn_flags |= IN_CHANGE | IN_UPDATE;
931		if (vap->va_birthtime.tv_sec != VNOVAL)
932			node->nn_flags |= IN_MODIFIED;
933		nandfs_itimes(vp);
934		return (0);
935	}
936
937	return (0);
938}
939
940static int
941nandfs_open(struct vop_open_args *ap)
942{
943	struct nandfs_node *node = VTON(ap->a_vp);
944	uint64_t filesize;
945
946	DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode));
947
948	if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK)
949		return (EOPNOTSUPP);
950
951	if ((node->nn_inode.i_flags & APPEND) &&
952	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
953		return (EPERM);
954
955	filesize = node->nn_inode.i_size;
956	vnode_create_vobject(ap->a_vp, filesize, ap->a_td);
957
958	return (0);
959}
960
961static int
962nandfs_close(struct vop_close_args *ap)
963{
964	struct vnode *vp = ap->a_vp;
965	struct nandfs_node *node = VTON(vp);
966
967	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
968
969	mtx_lock(&vp->v_interlock);
970	if (vp->v_usecount > 1)
971		nandfs_itimes_locked(vp);
972	mtx_unlock(&vp->v_interlock);
973
974	return (0);
975}
976
977static int
978nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode)
979{
980
981	/* Check if we are allowed to write */
982	switch (vap->va_type) {
983	case VDIR:
984	case VLNK:
985	case VREG:
986		/*
987		 * Normal nodes: check if we're on a read-only mounted
988		 * filingsystem and bomb out if we're trying to write.
989		 */
990		if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY))
991			return (EROFS);
992		break;
993	case VBLK:
994	case VCHR:
995	case VSOCK:
996	case VFIFO:
997		/*
998		 * Special nodes: even on read-only mounted filingsystems
999		 * these are allowed to be written to if permissions allow.
1000		 */
1001		break;
1002	default:
1003		/* No idea what this is */
1004		return (EINVAL);
1005	}
1006
1007	/* No one may write immutable files */
1008	if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE))
1009		return (EPERM);
1010
1011	return (0);
1012}
1013
1014static int
1015nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode,
1016    struct ucred *cred)
1017{
1018
1019	return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode,
1020	    cred, NULL));
1021}
1022
1023static int
1024nandfs_advlock(struct vop_advlock_args *ap)
1025{
1026	struct nandfs_node *nvp;
1027	quad_t size;
1028
1029	nvp = VTON(ap->a_vp);
1030	size = nvp->nn_inode.i_size;
1031	return (lf_advlock(ap, &(nvp->nn_lockf), size));
1032}
1033
1034static int
1035nandfs_access(struct vop_access_args *ap)
1036{
1037	struct vnode *vp = ap->a_vp;
1038	accmode_t accmode = ap->a_accmode;
1039	struct ucred *cred = ap->a_cred;
1040	struct vattr vap;
1041	int error;
1042
1043	DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode));
1044
1045	error = VOP_GETATTR(vp, &vap, NULL);
1046	if (error)
1047		return (error);
1048
1049	error = nandfs_check_possible(vp, &vap, accmode);
1050	if (error)
1051		return (error);
1052
1053	error = nandfs_check_permitted(vp, &vap, accmode, cred);
1054
1055	return (error);
1056}
1057
1058static int
1059nandfs_print(struct vop_print_args *ap)
1060{
1061	struct vnode *vp = ap->a_vp;
1062	struct nandfs_node *nvp = VTON(vp);
1063
1064	printf("\tvp=%p, nandfs_node=%p\n", vp, nvp);
1065	printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino);
1066	printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS);
1067
1068	return (0);
1069}
1070
1071static void
1072nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp)
1073{
1074	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1075	struct buf *nbp;
1076	nandfs_daddr_t vblk, pblk;
1077	nandfs_lbn_t from;
1078	uint32_t blocksize;
1079	int error = 0;
1080	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
1081
1082	/*
1083	 * Translate all the block sectors into a series of buffers to read
1084	 * asynchronously from the nandfs device. Note that this lookup may
1085	 * induce readin's too.
1086	 */
1087
1088	blocksize = nandfsdev->nd_blocksize;
1089	if (bp->b_bcount / blocksize != 1)
1090		panic("invalid b_count in bp %p\n", bp);
1091
1092	from = bp->b_blkno;
1093
1094	DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx"
1095	    " count %#lx\n", (uintmax_t)node->nn_ino, from,
1096	    bp->b_bcount));
1097
1098	/* Get virtual block numbers for the vnode's buffer span */
1099	error = nandfs_bmap_lookup(node, from, &vblk);
1100	if (error) {
1101		bp->b_error = EINVAL;
1102		bp->b_ioflags |= BIO_ERROR;
1103		bufdone(bp);
1104		return;
1105	}
1106
1107	/* Translate virtual block numbers to physical block numbers */
1108	error = nandfs_vtop(node, vblk, &pblk);
1109	if (error) {
1110		bp->b_error = EINVAL;
1111		bp->b_ioflags |= BIO_ERROR;
1112		bufdone(bp);
1113		return;
1114	}
1115
1116	/* Issue translated blocks */
1117	bp->b_resid = bp->b_bcount;
1118
1119	/* Note virtual block 0 marks not mapped */
1120	if (vblk == 0) {
1121		vfs_bio_clrbuf(bp);
1122		bufdone(bp);
1123		return;
1124	}
1125
1126	nbp = bp;
1127	nbp->b_blkno = pblk * blk2dev;
1128	bp->b_iooffset = dbtob(nbp->b_blkno);
1129	MPASS(bp->b_iooffset >= 0);
1130	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp);
1131	nandfs_vblk_set(bp, vblk);
1132	DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> "
1133	    "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino,
1134	    (uintmax_t)(from), (uintmax_t)vblk,
1135	    (uintmax_t)pblk, nbp));
1136}
1137
1138static void
1139nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp)
1140{
1141	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
1142
1143	bp->b_iooffset = dbtob(bp->b_blkno);
1144	MPASS(bp->b_iooffset >= 0);
1145	BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp);
1146}
1147
1148static int
1149nandfs_strategy(struct vop_strategy_args *ap)
1150{
1151	struct vnode *vp = ap->a_vp;
1152	struct buf *bp = ap->a_bp;
1153	struct nandfs_node *node = VTON(vp);
1154
1155
1156	/* check if we ought to be here */
1157	KASSERT((vp->v_type != VBLK && vp->v_type != VCHR),
1158	    ("nandfs_strategy on type %d", vp->v_type));
1159
1160	/* Translate if needed and pass on */
1161	if (bp->b_iocmd == BIO_READ) {
1162		nandfs_read_filebuf(node, bp);
1163		return (0);
1164	}
1165
1166	/* Send to segment collector */
1167	nandfs_write_filebuf(node, bp);
1168	return (0);
1169}
1170
1171static int
1172nandfs_readdir(struct vop_readdir_args *ap)
1173{
1174	struct uio *uio = ap->a_uio;
1175	struct vnode *vp = ap->a_vp;
1176	struct nandfs_node *node = VTON(vp);
1177	struct nandfs_dir_entry *ndirent;
1178	struct dirent dirent;
1179	struct buf *bp;
1180	uint64_t file_size, diroffset, transoffset, blkoff;
1181	uint64_t blocknr;
1182	uint32_t blocksize = node->nn_nandfsdev->nd_blocksize;
1183	uint8_t *pos, name_len;
1184	int error;
1185
1186	DPRINTF(READDIR, ("nandfs_readdir called\n"));
1187
1188	if (vp->v_type != VDIR)
1189		return (ENOTDIR);
1190
1191	file_size = node->nn_inode.i_size;
1192	DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n",
1193	    (uintmax_t)file_size, uio->uio_resid ));
1194
1195	/* We are called just as long as we keep on pushing data in */
1196	error = 0;
1197	if ((uio->uio_offset < file_size) &&
1198	    (uio->uio_resid >= sizeof(struct dirent))) {
1199		diroffset = uio->uio_offset;
1200		transoffset = diroffset;
1201
1202		blocknr = diroffset / blocksize;
1203		blkoff = diroffset % blocksize;
1204		error = nandfs_bread(node, blocknr, NOCRED, 0, &bp);
1205		if (error) {
1206			brelse(bp);
1207			return (EIO);
1208		}
1209		while (diroffset < file_size) {
1210			DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n",
1211			    diroffset));
1212			if (blkoff >= blocksize) {
1213				blkoff = 0; blocknr++;
1214				brelse(bp);
1215				error = nandfs_bread(node, blocknr, NOCRED, 0,
1216				    &bp);
1217				if (error) {
1218					brelse(bp);
1219					return (EIO);
1220				}
1221			}
1222
1223			/* Read in one dirent */
1224			pos = (uint8_t *)bp->b_data + blkoff;
1225			ndirent = (struct nandfs_dir_entry *)pos;
1226
1227			name_len = ndirent->name_len;
1228			memset(&dirent, 0, sizeof(struct dirent));
1229			dirent.d_fileno = ndirent->inode;
1230			if (dirent.d_fileno) {
1231				dirent.d_type = ndirent->file_type;
1232				dirent.d_namlen = name_len;
1233				strncpy(dirent.d_name, ndirent->name, name_len);
1234				dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
1235				DPRINTF(READDIR, ("copying `%*.*s`\n", name_len,
1236				    name_len, dirent.d_name));
1237			}
1238
1239			/*
1240			 * If there isn't enough space in the uio to return a
1241			 * whole dirent, break off read
1242			 */
1243			if (uio->uio_resid < GENERIC_DIRSIZ(&dirent))
1244				break;
1245
1246			/* Transfer */
1247			if (dirent.d_fileno)
1248				uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio);
1249
1250			/* Advance */
1251			diroffset += ndirent->rec_len;
1252			blkoff += ndirent->rec_len;
1253
1254			/* Remember the last entry we transferred */
1255			transoffset = diroffset;
1256		}
1257		brelse(bp);
1258
1259		/* Pass on last transferred offset */
1260		uio->uio_offset = transoffset;
1261	}
1262
1263	if (ap->a_eofflag)
1264		*ap->a_eofflag = (uio->uio_offset >= file_size);
1265
1266	return (error);
1267}
1268
1269static int
1270nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred)
1271{
1272	struct nandfs_node *dnode = VTON(dvp);
1273	struct nandfs_dir_entry *dirent;
1274	uint64_t file_size = dnode->nn_inode.i_size;
1275	uint64_t blockcount = dnode->nn_inode.i_blocks;
1276	uint64_t blocknr;
1277	uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize;
1278	uint32_t limit;
1279	uint32_t off;
1280	uint8_t	*pos;
1281	struct buf *bp;
1282	int error;
1283
1284	DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp,
1285	    (uintmax_t)parentino, cred));
1286
1287	KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp));
1288
1289	blocknr = 0;
1290	while (blocknr < blockcount) {
1291		error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp);
1292		if (error) {
1293			brelse(bp);
1294			return (0);
1295		}
1296
1297		pos = (uint8_t *)bp->b_data;
1298		off = 0;
1299
1300		if (blocknr == (blockcount - 1))
1301			limit = file_size % blocksize;
1302		else
1303			limit = blocksize;
1304
1305		while (off < limit) {
1306			dirent = (struct nandfs_dir_entry *)(pos + off);
1307			off += dirent->rec_len;
1308
1309			if (dirent->inode == 0)
1310				continue;
1311
1312			switch (dirent->name_len) {
1313			case 0:
1314				break;
1315			case 1:
1316				if (dirent->name[0] != '.')
1317					goto notempty;
1318
1319				KASSERT(dirent->inode == dnode->nn_ino,
1320				    (".'s inode does not match dir"));
1321				break;
1322			case 2:
1323				if (dirent->name[0] != '.' &&
1324				    dirent->name[1] != '.')
1325					goto notempty;
1326
1327				KASSERT(dirent->inode == parentino,
1328				    ("..'s inode does not match parent"));
1329				break;
1330			default:
1331				goto notempty;
1332			}
1333		}
1334
1335		brelse(bp);
1336		blocknr++;
1337	}
1338
1339	return (1);
1340notempty:
1341	brelse(bp);
1342	return (0);
1343}
1344
1345static int
1346nandfs_link(struct vop_link_args *ap)
1347{
1348	struct vnode *tdvp = ap->a_tdvp;
1349	struct vnode *vp = ap->a_vp;
1350	struct componentname *cnp = ap->a_cnp;
1351	struct nandfs_node *node = VTON(vp);
1352	struct nandfs_inode *inode = &node->nn_inode;
1353	int error;
1354
1355	if (inode->i_links_count >= LINK_MAX)
1356		return (EMLINK);
1357
1358	if (inode->i_flags & (IMMUTABLE | APPEND))
1359		return (EPERM);
1360
1361	/* Update link count */
1362	inode->i_links_count++;
1363
1364	/* Add dir entry */
1365	error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr,
1366	    cnp->cn_namelen, IFTODT(inode->i_mode));
1367	if (error) {
1368		inode->i_links_count--;
1369	}
1370
1371	node->nn_flags |= IN_CHANGE;
1372	nandfs_itimes(vp);
1373	DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n",
1374	    __func__, tdvp, vp, cnp));
1375
1376	return (0);
1377}
1378
1379static int
1380nandfs_create(struct vop_create_args *ap)
1381{
1382	struct vnode *dvp = ap->a_dvp;
1383	struct vnode **vpp = ap->a_vpp;
1384	struct componentname *cnp = ap->a_cnp;
1385	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1386	struct nandfs_node *dir_node = VTON(dvp);
1387	struct nandfsmount *nmp = dir_node->nn_nmp;
1388	struct nandfs_node *node;
1389	int error;
1390
1391	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1392
1393	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1394		return (ENOSPC);
1395
1396	/* Create new vnode/inode */
1397	error = nandfs_node_create(nmp, &node, mode);
1398	if (error)
1399		return (error);
1400	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1401	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1402
1403	/* Add new dir entry */
1404	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1405	    cnp->cn_namelen, IFTODT(mode));
1406	if (error) {
1407		if (nandfs_node_destroy(node)) {
1408			nandfs_error("%s: error destroying node %p\n",
1409			    __func__, node);
1410		}
1411		return (error);
1412	}
1413	*vpp = NTOV(node);
1414	if ((cnp->cn_flags & MAKEENTRY) != 0)
1415		cache_enter(dvp, *vpp, cnp);
1416
1417	DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node,
1418	    (uintmax_t)node->nn_ino));
1419	return (0);
1420}
1421
1422static int
1423nandfs_remove(struct vop_remove_args *ap)
1424{
1425	struct vnode *vp = ap->a_vp;
1426	struct vnode *dvp = ap->a_dvp;
1427	struct nandfs_node *node = VTON(vp);
1428	struct nandfs_node *dnode = VTON(dvp);
1429	struct componentname *cnp = ap->a_cnp;
1430
1431	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n",
1432	    __func__, dvp, vp, node, (uintmax_t)node->nn_ino,
1433	    node->nn_inode.i_links_count));
1434
1435	if (vp->v_type == VDIR)
1436		return (EISDIR);
1437
1438	/* Files marked as immutable or append-only cannot be deleted. */
1439	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1440	    (dnode->nn_inode.i_flags & APPEND))
1441		return (EPERM);
1442
1443	nandfs_remove_dirent(dvp, node, cnp);
1444	node->nn_inode.i_links_count--;
1445	node->nn_flags |= IN_CHANGE;
1446
1447	return (0);
1448}
1449
1450/*
1451 * Check if source directory is in the path of the target directory.
1452 * Target is supplied locked, source is unlocked.
1453 * The target is always vput before returning.
1454 */
1455static int
1456nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest,
1457    struct ucred *cred)
1458{
1459	struct vnode *vp;
1460	int error, rootino;
1461	struct nandfs_dir_entry dirent;
1462
1463	vp = NTOV(dest);
1464	if (src->nn_ino == dest->nn_ino) {
1465		error = EEXIST;
1466		goto out;
1467	}
1468	rootino = NANDFS_ROOT_INO;
1469	error = 0;
1470	if (dest->nn_ino == rootino)
1471		goto out;
1472
1473	for (;;) {
1474		if (vp->v_type != VDIR) {
1475			error = ENOTDIR;
1476			break;
1477		}
1478
1479		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent,
1480		    NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE,
1481		    IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED,
1482		    NULL, NULL);
1483		if (error != 0)
1484			break;
1485		if (dirent.name_len != 2 ||
1486		    dirent.name[0] != '.' ||
1487		    dirent.name[1] != '.') {
1488			error = ENOTDIR;
1489			break;
1490		}
1491		if (dirent.inode == src->nn_ino) {
1492			error = EINVAL;
1493			break;
1494		}
1495		if (dirent.inode == rootino)
1496			break;
1497		vput(vp);
1498		if ((error = VFS_VGET(vp->v_mount, dirent.inode,
1499		    LK_EXCLUSIVE, &vp)) != 0) {
1500			vp = NULL;
1501			break;
1502		}
1503	}
1504
1505out:
1506	if (error == ENOTDIR)
1507		printf("checkpath: .. not a directory\n");
1508	if (vp != NULL)
1509		vput(vp);
1510	return (error);
1511}
1512
1513static int
1514nandfs_rename(struct vop_rename_args *ap)
1515{
1516	struct vnode *tvp = ap->a_tvp;
1517	struct vnode *tdvp = ap->a_tdvp;
1518	struct vnode *fvp = ap->a_fvp;
1519	struct vnode *fdvp = ap->a_fdvp;
1520	struct componentname *tcnp = ap->a_tcnp;
1521	struct componentname *fcnp = ap->a_fcnp;
1522	int doingdirectory = 0, oldparent = 0, newparent = 0;
1523	int error = 0;
1524
1525	struct nandfs_node *fdnode, *fnode, *fnode1;
1526	struct nandfs_node *tdnode = VTON(tdvp);
1527	struct nandfs_node *tnode;
1528
1529	uint32_t tdflags, fflags, fdflags;
1530	uint16_t mode;
1531
1532	DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp,
1533	    fvp, tdvp, tvp));
1534
1535	/*
1536	 * Check for cross-device rename.
1537	 */
1538	if ((fvp->v_mount != tdvp->v_mount) ||
1539	    (tvp && (fvp->v_mount != tvp->v_mount))) {
1540		error = EXDEV;
1541abortit:
1542		if (tdvp == tvp)
1543			vrele(tdvp);
1544		else
1545			vput(tdvp);
1546		if (tvp)
1547			vput(tvp);
1548		vrele(fdvp);
1549		vrele(fvp);
1550		return (error);
1551	}
1552
1553	tdflags = tdnode->nn_inode.i_flags;
1554	if (tvp &&
1555	    ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1556	    (tdflags & APPEND))) {
1557		error = EPERM;
1558		goto abortit;
1559	}
1560
1561	/*
1562	 * Renaming a file to itself has no effect.  The upper layers should
1563	 * not call us in that case.  Temporarily just warn if they do.
1564	 */
1565	if (fvp == tvp) {
1566		printf("nandfs_rename: fvp == tvp (can't happen)\n");
1567		error = 0;
1568		goto abortit;
1569	}
1570
1571	if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0)
1572		goto abortit;
1573
1574	fdnode = VTON(fdvp);
1575	fnode = VTON(fvp);
1576
1577	if (fnode->nn_inode.i_links_count >= LINK_MAX) {
1578		VOP_UNLOCK(fvp, 0);
1579		error = EMLINK;
1580		goto abortit;
1581	}
1582
1583	fflags = fnode->nn_inode.i_flags;
1584	fdflags = fdnode->nn_inode.i_flags;
1585
1586	if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1587	    (fdflags & APPEND)) {
1588		VOP_UNLOCK(fvp, 0);
1589		error = EPERM;
1590		goto abortit;
1591	}
1592
1593	mode = fnode->nn_inode.i_mode;
1594	if ((mode & S_IFMT) == S_IFDIR) {
1595		/*
1596		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1597		 */
1598
1599		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1600		    (fdvp == fvp) ||
1601		    ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) ||
1602		    (fnode->nn_flags & IN_RENAME)) {
1603			VOP_UNLOCK(fvp, 0);
1604			error = EINVAL;
1605			goto abortit;
1606		}
1607		fnode->nn_flags |= IN_RENAME;
1608		doingdirectory = 1;
1609		DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__,
1610		    tdvp));
1611		oldparent = fdnode->nn_ino;
1612	}
1613
1614	vrele(fdvp);
1615
1616	tnode = NULL;
1617	if (tvp)
1618		tnode = VTON(tvp);
1619
1620	/*
1621	 * Bump link count on fvp while we are moving stuff around. If we
1622	 * crash before completing the work, the link count may be wrong
1623	 * but correctable.
1624	 */
1625	fnode->nn_inode.i_links_count++;
1626
1627	/* Check for in path moving XXX */
1628	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
1629	VOP_UNLOCK(fvp, 0);
1630	if (oldparent != tdnode->nn_ino)
1631		newparent = tdnode->nn_ino;
1632	if (doingdirectory && newparent) {
1633		if (error)	/* write access check above */
1634			goto bad;
1635		if (tnode != NULL)
1636			vput(tvp);
1637
1638		error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred);
1639		if (error)
1640			goto out;
1641
1642		VREF(tdvp);
1643		error = relookup(tdvp, &tvp, tcnp);
1644		if (error)
1645			goto out;
1646		vrele(tdvp);
1647		tdnode = VTON(tdvp);
1648		tnode = NULL;
1649		if (tvp)
1650			tnode = VTON(tvp);
1651	}
1652
1653	/*
1654	 * If the target doesn't exist, link the target to the source and
1655	 * unlink the source. Otherwise, rewrite the target directory to
1656	 * reference the source and remove the original entry.
1657	 */
1658
1659	if (tvp == NULL) {
1660		/*
1661		 * Account for ".." in new directory.
1662		 */
1663		if (doingdirectory && fdvp != tdvp)
1664			tdnode->nn_inode.i_links_count++;
1665
1666		DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp));
1667		/*
1668		 * Add name in new directory.
1669		 */
1670		error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr,
1671		    tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode));
1672		if (error) {
1673			if (doingdirectory && fdvp != tdvp)
1674				tdnode->nn_inode.i_links_count--;
1675			goto bad;
1676		}
1677
1678		vput(tdvp);
1679	} else {
1680		/*
1681		 * If the parent directory is "sticky", then the user must
1682		 * own the parent directory, or the destination of the rename,
1683		 * otherwise the destination may not be changed (except by
1684		 * root). This implements append-only directories.
1685		 */
1686		if ((tdnode->nn_inode.i_mode & S_ISTXT) &&
1687		    tcnp->cn_cred->cr_uid != 0 &&
1688		    tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid &&
1689		    tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) {
1690			error = EPERM;
1691			goto bad;
1692		}
1693		/*
1694		 * Target must be empty if a directory and have no links
1695		 * to it. Also, ensure source and target are compatible
1696		 * (both directories, or both not directories).
1697		 */
1698		mode = tnode->nn_inode.i_mode;
1699		if ((mode & S_IFMT) == S_IFDIR) {
1700			if (!nandfs_dirempty(tvp, tdnode->nn_ino,
1701			    tcnp->cn_cred)) {
1702				error = ENOTEMPTY;
1703				goto bad;
1704			}
1705			if (!doingdirectory) {
1706				error = ENOTDIR;
1707				goto bad;
1708			}
1709			/*
1710			 * Update name cache since directory is going away.
1711			 */
1712			cache_purge(tdvp);
1713		} else if (doingdirectory) {
1714			error = EISDIR;
1715			goto bad;
1716		}
1717
1718		DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp));
1719		/*
1720		 * Change name tcnp in tdvp to point at fvp.
1721		 */
1722		error = nandfs_update_dirent(tdvp, fnode, tnode);
1723		if (error)
1724			goto bad;
1725
1726		if (doingdirectory && !newparent)
1727			tdnode->nn_inode.i_links_count--;
1728
1729		vput(tdvp);
1730
1731		tnode->nn_inode.i_links_count--;
1732		vput(tvp);
1733		tnode = NULL;
1734	}
1735
1736	/*
1737	 * Unlink the source.
1738	 */
1739	fcnp->cn_flags &= ~MODMASK;
1740	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
1741	VREF(fdvp);
1742	error = relookup(fdvp, &fvp, fcnp);
1743	if (error == 0)
1744		vrele(fdvp);
1745	if (fvp != NULL) {
1746		fnode1 = VTON(fvp);
1747		fdnode = VTON(fdvp);
1748	} else {
1749		/*
1750		 * From name has disappeared.
1751		 */
1752		if (doingdirectory)
1753			panic("nandfs_rename: lost dir entry");
1754		vrele(ap->a_fvp);
1755		return (0);
1756	}
1757
1758	DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode));
1759
1760	/*
1761	 * Ensure that the directory entry still exists and has not
1762	 * changed while the new name has been entered. If the source is
1763	 * a file then the entry may have been unlinked or renamed. In
1764	 * either case there is no further work to be done. If the source
1765	 * is a directory then it cannot have been rmdir'ed; its link
1766	 * count of three would cause a rmdir to fail with ENOTEMPTY.
1767	 * The IN_RENAME flag ensures that it cannot be moved by another
1768	 * rename.
1769	 */
1770	if (fnode != fnode1) {
1771		if (doingdirectory)
1772			panic("nandfs: lost dir entry");
1773	} else {
1774		/*
1775		 * If the source is a directory with a
1776		 * new parent, the link count of the old
1777		 * parent directory must be decremented
1778		 * and ".." set to point to the new parent.
1779		 */
1780		if (doingdirectory && newparent) {
1781			DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n",
1782			    __func__, (uintmax_t) oldparent,
1783			    (uintmax_t) newparent));
1784			error = nandfs_update_parent_dir(fvp, newparent);
1785			if (!error) {
1786				fdnode->nn_inode.i_links_count--;
1787				fdnode->nn_flags |= IN_CHANGE;
1788			}
1789		}
1790		error = nandfs_remove_dirent(fdvp, fnode, fcnp);
1791		if (!error) {
1792			fnode->nn_inode.i_links_count--;
1793			fnode->nn_flags |= IN_CHANGE;
1794		}
1795		fnode->nn_flags &= ~IN_RENAME;
1796	}
1797	if (fdnode)
1798		vput(fdvp);
1799	if (fnode)
1800		vput(fvp);
1801	vrele(ap->a_fvp);
1802	return (error);
1803
1804bad:
1805	DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error));
1806	if (tnode)
1807		vput(NTOV(tnode));
1808	vput(NTOV(tdnode));
1809out:
1810	if (doingdirectory)
1811		fnode->nn_flags &= ~IN_RENAME;
1812	if (vn_lock(fvp, LK_EXCLUSIVE) == 0) {
1813		fnode->nn_inode.i_links_count--;
1814		fnode->nn_flags |= IN_CHANGE;
1815		fnode->nn_flags &= ~IN_RENAME;
1816		vput(fvp);
1817	} else
1818		vrele(fvp);
1819	return (error);
1820}
1821
1822static int
1823nandfs_mkdir(struct vop_mkdir_args *ap)
1824{
1825	struct vnode *dvp = ap->a_dvp;
1826	struct vnode **vpp = ap->a_vpp;
1827	struct componentname *cnp = ap->a_cnp;
1828	struct nandfs_node *dir_node = VTON(dvp);
1829	struct nandfs_inode *dir_inode = &dir_node->nn_inode;
1830	struct nandfs_node *node;
1831	struct nandfsmount *nmp = dir_node->nn_nmp;
1832	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1833	int error;
1834
1835	DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp));
1836
1837	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1838		return (ENOSPC);
1839
1840	if (dir_inode->i_links_count >= LINK_MAX)
1841		return (EMLINK);
1842
1843	error = nandfs_node_create(nmp, &node, mode);
1844	if (error)
1845		return (error);
1846
1847	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1848	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1849
1850	*vpp = NTOV(node);
1851
1852	error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1853	    cnp->cn_namelen, IFTODT(mode));
1854	if (error) {
1855		vput(*vpp);
1856		return (error);
1857	}
1858
1859	dir_node->nn_inode.i_links_count++;
1860	dir_node->nn_flags |= IN_CHANGE;
1861
1862	error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino);
1863	if (error) {
1864		vput(NTOV(node));
1865		return (error);
1866	}
1867
1868	DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node,
1869	    (uintmax_t)node->nn_ino));
1870	return (0);
1871}
1872
1873static int
1874nandfs_mknod(struct vop_mknod_args *ap)
1875{
1876	struct vnode *dvp = ap->a_dvp;
1877	struct vnode **vpp = ap->a_vpp;
1878	struct vattr *vap = ap->a_vap;
1879	uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode);
1880	struct componentname *cnp = ap->a_cnp;
1881	struct nandfs_node *dir_node = VTON(dvp);
1882	struct nandfsmount *nmp = dir_node->nn_nmp;
1883	struct nandfs_node *node;
1884	int error;
1885
1886	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1887		return (ENOSPC);
1888
1889	error = nandfs_node_create(nmp, &node, mode);
1890	if (error)
1891		return (error);
1892	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1893	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1894	if (vap->va_rdev != VNOVAL)
1895		node->nn_inode.i_special = vap->va_rdev;
1896
1897	*vpp = NTOV(node);
1898
1899	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1900	    cnp->cn_namelen, IFTODT(mode))) {
1901		vput(*vpp);
1902		return (ENOTDIR);
1903	}
1904
1905	node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
1906
1907	return (0);
1908}
1909
1910static int
1911nandfs_symlink(struct vop_symlink_args *ap)
1912{
1913	struct vnode **vpp = ap->a_vpp;
1914	struct vnode *dvp = ap->a_dvp;
1915	uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode);
1916	struct componentname *cnp = ap->a_cnp;
1917	struct nandfs_node *dir_node = VTON(dvp);
1918	struct nandfsmount *nmp = dir_node->nn_nmp;
1919	struct nandfs_node *node;
1920	int len, error;
1921
1922	if (nandfs_fs_full(dir_node->nn_nandfsdev))
1923		return (ENOSPC);
1924
1925	error = nandfs_node_create(nmp, &node, S_IFLNK | mode);
1926	if (error)
1927		return (error);
1928	node->nn_inode.i_gid = dir_node->nn_inode.i_gid;
1929	node->nn_inode.i_uid = cnp->cn_cred->cr_uid;
1930
1931	*vpp = NTOV(node);
1932
1933	if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr,
1934	    cnp->cn_namelen, IFTODT(mode))) {
1935		vput(*vpp);
1936		return (ENOTDIR);
1937	}
1938
1939
1940	len = strlen(ap->a_target);
1941	error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0,
1942	    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
1943	    cnp->cn_cred, NOCRED, NULL, NULL);
1944	if (error)
1945		vput(*vpp);
1946
1947	return (error);
1948}
1949
1950static int
1951nandfs_readlink(struct vop_readlink_args *ap)
1952{
1953	struct vnode *vp = ap->a_vp;
1954
1955	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
1956}
1957
1958static int
1959nandfs_rmdir(struct vop_rmdir_args *ap)
1960{
1961	struct vnode *vp = ap->a_vp;
1962	struct vnode *dvp = ap->a_dvp;
1963	struct componentname *cnp = ap->a_cnp;
1964	struct nandfs_node *node, *dnode;
1965	uint32_t dflag, flag;
1966	int error = 0;
1967
1968	node = VTON(vp);
1969	dnode = VTON(dvp);
1970
1971	/* Files marked as immutable or append-only cannot be deleted. */
1972	if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
1973	    (dnode->nn_inode.i_flags & APPEND))
1974		return (EPERM);
1975
1976	DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__,
1977	    dvp, vp, node, (uintmax_t)node->nn_ino));
1978
1979	if (node->nn_inode.i_links_count < 2)
1980		return (EINVAL);
1981
1982	if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred))
1983		return (ENOTEMPTY);
1984
1985	/* Files marked as immutable or append-only cannot be deleted. */
1986	dflag = dnode->nn_inode.i_flags;
1987	flag = node->nn_inode.i_flags;
1988	if ((dflag & APPEND) ||
1989	    (flag & (NOUNLINK | IMMUTABLE | APPEND))) {
1990		return (EPERM);
1991	}
1992
1993	if (vp->v_mountedhere != 0)
1994		return (EINVAL);
1995
1996	nandfs_remove_dirent(dvp, node, cnp);
1997	dnode->nn_inode.i_links_count -= 1;
1998	dnode->nn_flags |= IN_CHANGE;
1999
2000	cache_purge(dvp);
2001
2002	error = nandfs_truncate(vp, (uint64_t)0);
2003	if (error)
2004		return (error);
2005
2006	node->nn_inode.i_links_count -= 2;
2007	node->nn_flags |= IN_CHANGE;
2008
2009	cache_purge(vp);
2010
2011	return (error);
2012}
2013
2014static int
2015nandfs_fsync(struct vop_fsync_args *ap)
2016{
2017	struct vnode *vp = ap->a_vp;
2018	struct nandfs_node *node = VTON(vp);
2019	int locked;
2020
2021	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2022	    node, (uintmax_t)node->nn_ino));
2023
2024	/*
2025	 * Start syncing vnode only if inode was modified or
2026	 * there are some dirty buffers
2027	 */
2028	if (VTON(vp)->nn_flags & IN_MODIFIED ||
2029	    vp->v_bufobj.bo_dirty.bv_cnt) {
2030		locked = VOP_ISLOCKED(vp);
2031		VOP_UNLOCK(vp, 0);
2032		nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC);
2033		VOP_LOCK(vp, locked | LK_RETRY);
2034	}
2035
2036	return (0);
2037}
2038
2039static int
2040nandfs_bmap(struct vop_bmap_args *ap)
2041{
2042	struct vnode *vp = ap->a_vp;
2043	struct nandfs_node *nnode = VTON(vp);
2044	struct nandfs_device *nandfsdev = nnode->nn_nandfsdev;
2045	nandfs_daddr_t l2vmap, v2pmap;
2046	int error;
2047	int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE;
2048
2049	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp,
2050	    nnode, (uintmax_t)nnode->nn_ino));
2051
2052	if (ap->a_bop != NULL)
2053		*ap->a_bop = &nandfsdev->nd_devvp->v_bufobj;
2054	if (ap->a_bnp == NULL)
2055		return (0);
2056	if (ap->a_runp != NULL)
2057		*ap->a_runp = 0;
2058	if (ap->a_runb != NULL)
2059		*ap->a_runb = 0;
2060
2061	/*
2062	 * Translate all the block sectors into a series of buffers to read
2063	 * asynchronously from the nandfs device. Note that this lookup may
2064	 * induce readin's too.
2065	 */
2066
2067	/* Get virtual block numbers for the vnode's buffer span */
2068	error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap);
2069	if (error)
2070		return (-1);
2071
2072	/* Translate virtual block numbers to physical block numbers */
2073	error = nandfs_vtop(nnode, l2vmap, &v2pmap);
2074	if (error)
2075		return (-1);
2076
2077	/* Note virtual block 0 marks not mapped */
2078	if (l2vmap == 0)
2079		*ap->a_bnp = -1;
2080	else
2081		*ap->a_bnp = v2pmap * blk2dev;	/* in DEV_BSIZE */
2082
2083	DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n",
2084	    __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn,
2085	    (uintmax_t)*ap->a_bnp ));
2086
2087	return (0);
2088}
2089
2090static void
2091nandfs_force_syncer(struct nandfsmount *nmp)
2092{
2093
2094	nmp->nm_flags |= NANDFS_FORCE_SYNCER;
2095	nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE);
2096}
2097
2098static int
2099nandfs_ioctl(struct vop_ioctl_args *ap)
2100{
2101	struct vnode *vp = ap->a_vp;
2102	u_long command = ap->a_command;
2103	caddr_t data = ap->a_data;
2104	struct nandfs_node *node = VTON(vp);
2105	struct nandfs_device *nandfsdev = node->nn_nandfsdev;
2106	struct nandfsmount *nmp = node->nn_nmp;
2107	uint64_t *tab, *cno;
2108	struct nandfs_seg_stat *nss;
2109	struct nandfs_cpmode *ncpm;
2110	struct nandfs_argv *nargv;
2111	struct nandfs_cpstat *ncp;
2112	int error;
2113
2114	DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command));
2115
2116	error = priv_check(ap->a_td, PRIV_VFS_MOUNT);
2117	if (error)
2118		return (error);
2119
2120	if (nmp->nm_ronly) {
2121		switch (command) {
2122		case NANDFS_IOCTL_GET_FSINFO:
2123		case NANDFS_IOCTL_GET_SUSTAT:
2124		case NANDFS_IOCTL_GET_CPINFO:
2125		case NANDFS_IOCTL_GET_CPSTAT:
2126		case NANDFS_IOCTL_GET_SUINFO:
2127		case NANDFS_IOCTL_GET_VINFO:
2128		case NANDFS_IOCTL_GET_BDESCS:
2129			break;
2130		default:
2131			return (EROFS);
2132		}
2133	}
2134
2135	switch (command) {
2136	case NANDFS_IOCTL_GET_FSINFO:
2137		error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data);
2138		break;
2139	case NANDFS_IOCTL_GET_SUSTAT:
2140		nss = (struct nandfs_seg_stat *)data;
2141		error = nandfs_get_seg_stat(nandfsdev, nss);
2142		break;
2143	case NANDFS_IOCTL_CHANGE_CPMODE:
2144		ncpm = (struct nandfs_cpmode *)data;
2145		error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm);
2146		nandfs_force_syncer(nmp);
2147		break;
2148	case NANDFS_IOCTL_GET_CPINFO:
2149		nargv = (struct nandfs_argv *)data;
2150		error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv);
2151		break;
2152	case NANDFS_IOCTL_DELETE_CP:
2153		tab = (uint64_t *)data;
2154		error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]);
2155		nandfs_force_syncer(nmp);
2156		break;
2157	case NANDFS_IOCTL_GET_CPSTAT:
2158		ncp = (struct nandfs_cpstat *)data;
2159		error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp);
2160		break;
2161	case NANDFS_IOCTL_GET_SUINFO:
2162		nargv = (struct nandfs_argv *)data;
2163		error = nandfs_get_segment_info_ioctl(nandfsdev, nargv);
2164		break;
2165	case NANDFS_IOCTL_GET_VINFO:
2166		nargv = (struct nandfs_argv *)data;
2167		error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv);
2168		break;
2169	case NANDFS_IOCTL_GET_BDESCS:
2170		nargv = (struct nandfs_argv *)data;
2171		error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv);
2172		break;
2173	case NANDFS_IOCTL_SYNC:
2174		cno = (uint64_t *)data;
2175		nandfs_force_syncer(nmp);
2176		*cno = nandfsdev->nd_last_cno;
2177		error = 0;
2178		break;
2179	case NANDFS_IOCTL_MAKE_SNAP:
2180		cno = (uint64_t *)data;
2181		error = nandfs_make_snap(nandfsdev, cno);
2182		nandfs_force_syncer(nmp);
2183		break;
2184	case NANDFS_IOCTL_DELETE_SNAP:
2185		cno = (uint64_t *)data;
2186		error = nandfs_delete_snap(nandfsdev, *cno);
2187		nandfs_force_syncer(nmp);
2188		break;
2189	default:
2190		error = ENOTTY;
2191		break;
2192	}
2193
2194	return (error);
2195}
2196
2197/*
2198 * Whiteout vnode call
2199 */
2200static int
2201nandfs_whiteout(struct vop_whiteout_args *ap)
2202{
2203	struct vnode *dvp = ap->a_dvp;
2204	struct componentname *cnp = ap->a_cnp;
2205	int error = 0;
2206
2207	switch (ap->a_flags) {
2208	case LOOKUP:
2209		return (0);
2210	case CREATE:
2211		/* Create a new directory whiteout */
2212#ifdef INVARIANTS
2213		if ((cnp->cn_flags & SAVENAME) == 0)
2214			panic("ufs_whiteout: missing name");
2215#endif
2216		error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr,
2217		    cnp->cn_namelen, DT_WHT);
2218		break;
2219
2220	case DELETE:
2221		/* Remove an existing directory whiteout */
2222		cnp->cn_flags &= ~DOWHITEOUT;
2223		error = nandfs_remove_dirent(dvp, NULL, cnp);
2224		break;
2225	default:
2226		panic("nandf_whiteout: unknown op: %d", ap->a_flags);
2227	}
2228
2229	return (error);
2230}
2231
2232static int
2233nandfs_pathconf(struct vop_pathconf_args *ap)
2234{
2235	int error;
2236
2237	error = 0;
2238	switch (ap->a_name) {
2239	case _PC_LINK_MAX:
2240		*ap->a_retval = LINK_MAX;
2241		break;
2242	case _PC_NAME_MAX:
2243		*ap->a_retval = NANDFS_NAME_LEN;
2244		break;
2245	case _PC_PIPE_BUF:
2246		if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO)
2247			*ap->a_retval = PIPE_BUF;
2248		else
2249			error = EINVAL;
2250		break;
2251	case _PC_CHOWN_RESTRICTED:
2252		*ap->a_retval = 1;
2253		break;
2254	case _PC_NO_TRUNC:
2255		*ap->a_retval = 1;
2256		break;
2257	case _PC_ACL_EXTENDED:
2258		*ap->a_retval = 0;
2259		break;
2260	case _PC_ALLOC_SIZE_MIN:
2261		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
2262		break;
2263	case _PC_FILESIZEBITS:
2264		*ap->a_retval = 64;
2265		break;
2266	case _PC_REC_INCR_XFER_SIZE:
2267		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2268		break;
2269	case _PC_REC_MAX_XFER_SIZE:
2270		*ap->a_retval = -1; /* means ``unlimited'' */
2271		break;
2272	case _PC_REC_MIN_XFER_SIZE:
2273		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
2274		break;
2275	default:
2276		error = vop_stdpathconf(ap);
2277		break;
2278	}
2279	return (error);
2280}
2281
2282static int
2283nandfs_vnlock1(struct vop_lock1_args *ap)
2284{
2285	struct vnode *vp = ap->a_vp;
2286	struct nandfs_node *node = VTON(vp);
2287	int error, vi_locked;
2288
2289	/*
2290	 * XXX can vnode go away while we are sleeping?
2291	 */
2292	vi_locked = mtx_owned(&vp->v_interlock);
2293	if (vi_locked)
2294		VI_UNLOCK(vp);
2295	error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev,
2296	    ap->a_flags & LK_NOWAIT);
2297	if (vi_locked && !error)
2298		VI_LOCK(vp);
2299	if (error)
2300		return (error);
2301
2302	error = vop_stdlock(ap);
2303	if (error) {
2304		NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2305		return (error);
2306	}
2307
2308	return (0);
2309}
2310
2311static int
2312nandfs_vnunlock(struct vop_unlock_args *ap)
2313{
2314	struct vnode *vp = ap->a_vp;
2315	struct nandfs_node *node = VTON(vp);
2316	int error;
2317
2318	error = vop_stdunlock(ap);
2319	if (error)
2320		return (error);
2321
2322	NANDFS_WRITEUNLOCK(node->nn_nandfsdev);
2323
2324	return (0);
2325}
2326
2327/*
2328 * Global vfs data structures
2329 */
2330struct vop_vector nandfs_vnodeops = {
2331	.vop_default =		&default_vnodeops,
2332	.vop_access =		nandfs_access,
2333	.vop_advlock =		nandfs_advlock,
2334	.vop_bmap =		nandfs_bmap,
2335	.vop_close =		nandfs_close,
2336	.vop_create =		nandfs_create,
2337	.vop_fsync =		nandfs_fsync,
2338	.vop_getattr =		nandfs_getattr,
2339	.vop_inactive =		nandfs_inactive,
2340	.vop_cachedlookup =	nandfs_lookup,
2341	.vop_ioctl =		nandfs_ioctl,
2342	.vop_link =		nandfs_link,
2343	.vop_lookup =		vfs_cache_lookup,
2344	.vop_mkdir =		nandfs_mkdir,
2345	.vop_mknod =		nandfs_mknod,
2346	.vop_open =		nandfs_open,
2347	.vop_pathconf =		nandfs_pathconf,
2348	.vop_print =		nandfs_print,
2349	.vop_read =		nandfs_read,
2350	.vop_readdir =		nandfs_readdir,
2351	.vop_readlink =		nandfs_readlink,
2352	.vop_reclaim =		nandfs_reclaim,
2353	.vop_remove =		nandfs_remove,
2354	.vop_rename =		nandfs_rename,
2355	.vop_rmdir =		nandfs_rmdir,
2356	.vop_whiteout =		nandfs_whiteout,
2357	.vop_write =		nandfs_write,
2358	.vop_setattr =		nandfs_setattr,
2359	.vop_strategy =		nandfs_strategy,
2360	.vop_symlink =		nandfs_symlink,
2361	.vop_lock1 =		nandfs_vnlock1,
2362	.vop_unlock =		nandfs_vnunlock,
2363};
2364
2365struct vop_vector nandfs_system_vnodeops = {
2366	.vop_default =		&default_vnodeops,
2367	.vop_close =		nandfs_close,
2368	.vop_inactive =		nandfs_inactive,
2369	.vop_reclaim =		nandfs_reclaim,
2370	.vop_strategy =		nandfs_strategy,
2371	.vop_fsync =		nandfs_fsync,
2372	.vop_bmap =		nandfs_bmap,
2373	.vop_access =		VOP_PANIC,
2374	.vop_advlock =		VOP_PANIC,
2375	.vop_create =		VOP_PANIC,
2376	.vop_getattr =		VOP_PANIC,
2377	.vop_cachedlookup =	VOP_PANIC,
2378	.vop_ioctl =		VOP_PANIC,
2379	.vop_link =		VOP_PANIC,
2380	.vop_lookup =		VOP_PANIC,
2381	.vop_mkdir =		VOP_PANIC,
2382	.vop_mknod =		VOP_PANIC,
2383	.vop_open =		VOP_PANIC,
2384	.vop_pathconf =		VOP_PANIC,
2385	.vop_print =		VOP_PANIC,
2386	.vop_read =		VOP_PANIC,
2387	.vop_readdir =		VOP_PANIC,
2388	.vop_readlink =		VOP_PANIC,
2389	.vop_remove =		VOP_PANIC,
2390	.vop_rename =		VOP_PANIC,
2391	.vop_rmdir =		VOP_PANIC,
2392	.vop_whiteout =		VOP_PANIC,
2393	.vop_write =		VOP_PANIC,
2394	.vop_setattr =		VOP_PANIC,
2395	.vop_symlink =		VOP_PANIC,
2396};
2397
2398static int
2399nandfsfifo_close(struct vop_close_args *ap)
2400{
2401	struct vnode *vp = ap->a_vp;
2402	struct nandfs_node *node = VTON(vp);
2403
2404	DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node));
2405
2406	mtx_lock(&vp->v_interlock);
2407	if (vp->v_usecount > 1)
2408		nandfs_itimes_locked(vp);
2409	mtx_unlock(&vp->v_interlock);
2410
2411	return (fifo_specops.vop_close(ap));
2412}
2413
2414struct vop_vector nandfs_fifoops = {
2415	.vop_default =		&fifo_specops,
2416	.vop_fsync =		VOP_PANIC,
2417	.vop_access =		nandfs_access,
2418	.vop_close =		nandfsfifo_close,
2419	.vop_getattr =		nandfs_getattr,
2420	.vop_inactive =		nandfs_inactive,
2421	.vop_pathconf =		nandfs_pathconf,
2422	.vop_print =		nandfs_print,
2423	.vop_read =		VOP_PANIC,
2424	.vop_reclaim =		nandfs_reclaim,
2425	.vop_setattr =		nandfs_setattr,
2426	.vop_write =		VOP_PANIC,
2427	.vop_lock1 =		nandfs_vnlock1,
2428	.vop_unlock =		nandfs_vnunlock,
2429};
2430
2431int
2432nandfs_vinit(struct vnode *vp, uint64_t ino)
2433{
2434	struct nandfs_node *node;
2435
2436	ASSERT_VOP_LOCKED(vp, __func__);
2437
2438	node = VTON(vp);
2439
2440	/* Check if we're fetching the root */
2441	if (ino == NANDFS_ROOT_INO)
2442		vp->v_vflag |= VV_ROOT;
2443
2444	if (ino != NANDFS_GC_INO)
2445		vp->v_type = IFTOVT(node->nn_inode.i_mode);
2446	else
2447		vp->v_type = VREG;
2448
2449	if (vp->v_type == VFIFO)
2450		vp->v_op = &nandfs_fifoops;
2451
2452	return (0);
2453}
2454