tmpfs_vnops.c revision 1.123
1/*	$NetBSD: tmpfs_vnops.c,v 1.123 2015/07/06 10:07:12 hannken Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.123 2015/07/06 10:07:12 hannken Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52#include <sys/atomic.h>
53
54#include <uvm/uvm.h>
55
56#include <miscfs/fifofs/fifo.h>
57#include <miscfs/genfs/genfs.h>
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61/*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64int (**tmpfs_vnodeop_p)(void *);
65const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66	{ &vop_default_desc,		vn_default_error },
67	{ &vop_lookup_desc,		tmpfs_lookup },
68	{ &vop_create_desc,		tmpfs_create },
69	{ &vop_mknod_desc,		tmpfs_mknod },
70	{ &vop_open_desc,		tmpfs_open },
71	{ &vop_close_desc,		tmpfs_close },
72	{ &vop_access_desc,		tmpfs_access },
73	{ &vop_getattr_desc,		tmpfs_getattr },
74	{ &vop_setattr_desc,		tmpfs_setattr },
75	{ &vop_read_desc,		tmpfs_read },
76	{ &vop_write_desc,		tmpfs_write },
77	{ &vop_fallocate_desc,		genfs_eopnotsupp },
78	{ &vop_fdiscard_desc,		genfs_eopnotsupp },
79	{ &vop_ioctl_desc,		tmpfs_ioctl },
80	{ &vop_fcntl_desc,		tmpfs_fcntl },
81	{ &vop_poll_desc,		tmpfs_poll },
82	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
83	{ &vop_revoke_desc,		tmpfs_revoke },
84	{ &vop_mmap_desc,		tmpfs_mmap },
85	{ &vop_fsync_desc,		tmpfs_fsync },
86	{ &vop_seek_desc,		tmpfs_seek },
87	{ &vop_remove_desc,		tmpfs_remove },
88	{ &vop_link_desc,		tmpfs_link },
89	{ &vop_rename_desc,		tmpfs_rename },
90	{ &vop_mkdir_desc,		tmpfs_mkdir },
91	{ &vop_rmdir_desc,		tmpfs_rmdir },
92	{ &vop_symlink_desc,		tmpfs_symlink },
93	{ &vop_readdir_desc,		tmpfs_readdir },
94	{ &vop_readlink_desc,		tmpfs_readlink },
95	{ &vop_abortop_desc,		tmpfs_abortop },
96	{ &vop_inactive_desc,		tmpfs_inactive },
97	{ &vop_reclaim_desc,		tmpfs_reclaim },
98	{ &vop_lock_desc,		tmpfs_lock },
99	{ &vop_unlock_desc,		tmpfs_unlock },
100	{ &vop_bmap_desc,		tmpfs_bmap },
101	{ &vop_strategy_desc,		tmpfs_strategy },
102	{ &vop_print_desc,		tmpfs_print },
103	{ &vop_pathconf_desc,		tmpfs_pathconf },
104	{ &vop_islocked_desc,		tmpfs_islocked },
105	{ &vop_advlock_desc,		tmpfs_advlock },
106	{ &vop_bwrite_desc,		tmpfs_bwrite },
107	{ &vop_getpages_desc,		tmpfs_getpages },
108	{ &vop_putpages_desc,		tmpfs_putpages },
109	{ &vop_whiteout_desc,		tmpfs_whiteout },
110	{ NULL, NULL }
111};
112
113const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
114	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
115};
116
117/*
118 * tmpfs_lookup: path name traversal routine.
119 *
120 * Arguments: dvp (directory being searched), vpp (result),
121 * cnp (component name - path).
122 *
123 * => Caller holds a reference and lock on dvp.
124 * => We return looked-up vnode (vpp) locked, with a reference held.
125 */
126int
127tmpfs_lookup(void *v)
128{
129	struct vop_lookup_v2_args /* {
130		struct vnode *a_dvp;
131		struct vnode **a_vpp;
132		struct componentname *a_cnp;
133	} */ *ap = v;
134	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
135	struct componentname *cnp = ap->a_cnp;
136	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
137	tmpfs_node_t *dnode, *tnode;
138	tmpfs_dirent_t *de;
139	int cachefound, iswhiteout;
140	int error;
141
142	KASSERT(VOP_ISLOCKED(dvp));
143
144	dnode = VP_TO_TMPFS_DIR(dvp);
145	*vpp = NULL;
146
147	/* Check accessibility of directory. */
148	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
149	if (error) {
150		goto out;
151	}
152
153	/*
154	 * If requesting the last path component on a read-only file system
155	 * with a write operation, deny it.
156	 */
157	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
158	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
159		error = EROFS;
160		goto out;
161	}
162
163	/*
164	 * Avoid doing a linear scan of the directory if the requested
165	 * directory/name couple is already in the cache.
166	 */
167	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
168				  cnp->cn_nameiop, cnp->cn_flags,
169				  &iswhiteout, vpp);
170	if (iswhiteout) {
171		cnp->cn_flags |= ISWHITEOUT;
172	}
173	if (cachefound && *vpp == NULLVP) {
174		/* Negative cache hit. */
175		error = ENOENT;
176		goto out;
177	} else if (cachefound) {
178		error = 0;
179		goto out;
180	}
181
182	/*
183	 * Treat an unlinked directory as empty (no "." or "..")
184	 */
185	if (dnode->tn_links == 0) {
186		KASSERT(dnode->tn_size == 0);
187		error = ENOENT;
188		goto out;
189	}
190
191	if (cnp->cn_flags & ISDOTDOT) {
192		tmpfs_node_t *pnode;
193
194		/*
195		 * Lookup of ".." case.
196		 */
197		if (lastcn && cnp->cn_nameiop == RENAME) {
198			error = EINVAL;
199			goto out;
200		}
201		KASSERT(dnode->tn_type == VDIR);
202		pnode = dnode->tn_spec.tn_dir.tn_parent;
203		if (pnode == NULL) {
204			error = ENOENT;
205			goto out;
206		}
207
208		error = vcache_get(dvp->v_mount, &pnode, sizeof(pnode), vpp);
209		goto out;
210	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
211		/*
212		 * Lookup of "." case.
213		 */
214		if (lastcn && cnp->cn_nameiop == RENAME) {
215			error = EISDIR;
216			goto out;
217		}
218		vref(dvp);
219		*vpp = dvp;
220		error = 0;
221		goto done;
222	}
223
224	/*
225	 * Other lookup cases: perform directory scan.
226	 */
227	de = tmpfs_dir_lookup(dnode, cnp);
228	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
229		/*
230		 * The entry was not found in the directory.  This is valid
231		 * if we are creating or renaming an entry and are working
232		 * on the last component of the path name.
233		 */
234		if (lastcn && (cnp->cn_nameiop == CREATE ||
235		    cnp->cn_nameiop == RENAME)) {
236			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
237			if (error) {
238				goto out;
239			}
240			error = EJUSTRETURN;
241		} else {
242			error = ENOENT;
243		}
244		if (de) {
245			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
246			cnp->cn_flags |= ISWHITEOUT;
247		}
248		goto done;
249	}
250
251	tnode = de->td_node;
252
253	/*
254	 * If it is not the last path component and found a non-directory
255	 * or non-link entry (which may itself be pointing to a directory),
256	 * raise an error.
257	 */
258	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
259		error = ENOTDIR;
260		goto out;
261	}
262
263	/* Check the permissions. */
264	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
265		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
266		if (error)
267			goto out;
268
269		if ((dnode->tn_mode & S_ISTXT) != 0) {
270			error = kauth_authorize_vnode(cnp->cn_cred,
271			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
272			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
273			    dnode->tn_uid, tnode->tn_uid));
274			if (error) {
275				error = EPERM;
276				goto out;
277			}
278		}
279	}
280
281	/* Get a vnode for the matching entry. */
282	error = vcache_get(dvp->v_mount, &tnode, sizeof(tnode), vpp);
283done:
284	/*
285	 * Cache the result, unless request was for creation (as it does
286	 * not improve the performance).
287	 */
288	if (cnp->cn_nameiop != CREATE) {
289		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
290			    cnp->cn_flags);
291	}
292out:
293	KASSERT(VOP_ISLOCKED(dvp));
294
295	return error;
296}
297
298int
299tmpfs_create(void *v)
300{
301	struct vop_create_v3_args /* {
302		struct vnode		*a_dvp;
303		struct vnode		**a_vpp;
304		struct componentname	*a_cnp;
305		struct vattr		*a_vap;
306	} */ *ap = v;
307	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
308	struct componentname *cnp = ap->a_cnp;
309	struct vattr *vap = ap->a_vap;
310
311	KASSERT(VOP_ISLOCKED(dvp));
312	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
313	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
314}
315
316int
317tmpfs_mknod(void *v)
318{
319	struct vop_mknod_v3_args /* {
320		struct vnode		*a_dvp;
321		struct vnode		**a_vpp;
322		struct componentname	*a_cnp;
323		struct vattr		*a_vap;
324	} */ *ap = v;
325	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
326	struct componentname *cnp = ap->a_cnp;
327	struct vattr *vap = ap->a_vap;
328	enum vtype vt = vap->va_type;
329
330	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
331		*vpp = NULL;
332		return EINVAL;
333	}
334	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
335}
336
337int
338tmpfs_open(void *v)
339{
340	struct vop_open_args /* {
341		struct vnode	*a_vp;
342		int		a_mode;
343		kauth_cred_t	a_cred;
344	} */ *ap = v;
345	vnode_t *vp = ap->a_vp;
346	mode_t mode = ap->a_mode;
347	tmpfs_node_t *node;
348
349	KASSERT(VOP_ISLOCKED(vp));
350
351	node = VP_TO_TMPFS_NODE(vp);
352
353	/* If the file is marked append-only, deny write requests. */
354	if ((node->tn_flags & APPEND) != 0 &&
355	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
356		return EPERM;
357	}
358	return 0;
359}
360
361int
362tmpfs_close(void *v)
363{
364	struct vop_close_args /* {
365		struct vnode	*a_vp;
366		int		a_fflag;
367		kauth_cred_t	a_cred;
368	} */ *ap = v;
369	vnode_t *vp __diagused = ap->a_vp;
370
371	KASSERT(VOP_ISLOCKED(vp));
372	return 0;
373}
374
375int
376tmpfs_access(void *v)
377{
378	struct vop_access_args /* {
379		struct vnode	*a_vp;
380		int		a_mode;
381		kauth_cred_t	a_cred;
382	} */ *ap = v;
383	vnode_t *vp = ap->a_vp;
384	mode_t mode = ap->a_mode;
385	kauth_cred_t cred = ap->a_cred;
386	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
387	const bool writing = (mode & VWRITE) != 0;
388
389	KASSERT(VOP_ISLOCKED(vp));
390
391	/* Possible? */
392	switch (vp->v_type) {
393	case VDIR:
394	case VLNK:
395	case VREG:
396		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
397			return EROFS;
398		}
399		break;
400	case VBLK:
401	case VCHR:
402	case VSOCK:
403	case VFIFO:
404		break;
405	default:
406		return EINVAL;
407	}
408	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
409		return EPERM;
410	}
411
412	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
413	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
414	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
415}
416
417int
418tmpfs_getattr(void *v)
419{
420	struct vop_getattr_args /* {
421		struct vnode	*a_vp;
422		struct vattr	*a_vap;
423		kauth_cred_t	a_cred;
424	} */ *ap = v;
425	vnode_t *vp = ap->a_vp;
426	struct vattr *vap = ap->a_vap;
427	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
428
429	vattr_null(vap);
430
431	vap->va_type = vp->v_type;
432	vap->va_mode = node->tn_mode;
433	vap->va_nlink = node->tn_links;
434	vap->va_uid = node->tn_uid;
435	vap->va_gid = node->tn_gid;
436	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
437	vap->va_fileid = node->tn_id;
438	vap->va_size = node->tn_size;
439	vap->va_blocksize = PAGE_SIZE;
440	vap->va_atime = node->tn_atime;
441	vap->va_mtime = node->tn_mtime;
442	vap->va_ctime = node->tn_ctime;
443	vap->va_birthtime = node->tn_birthtime;
444	vap->va_gen = TMPFS_NODE_GEN(node);
445	vap->va_flags = node->tn_flags;
446	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
447	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
448	vap->va_bytes = round_page(node->tn_size);
449	vap->va_filerev = VNOVAL;
450	vap->va_vaflags = 0;
451	vap->va_spare = VNOVAL; /* XXX */
452
453	return 0;
454}
455
456int
457tmpfs_setattr(void *v)
458{
459	struct vop_setattr_args /* {
460		struct vnode	*a_vp;
461		struct vattr	*a_vap;
462		kauth_cred_t	a_cred;
463	} */ *ap = v;
464	vnode_t *vp = ap->a_vp;
465	struct vattr *vap = ap->a_vap;
466	kauth_cred_t cred = ap->a_cred;
467	lwp_t *l = curlwp;
468	int error = 0;
469
470	KASSERT(VOP_ISLOCKED(vp));
471
472	/* Abort if any unsettable attribute is given. */
473	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
474	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
475	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
476	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
477	    vap->va_bytes != VNOVAL) {
478		return EINVAL;
479	}
480
481	if (error == 0 && vap->va_flags != VNOVAL)
482		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
483
484	if (error == 0 && vap->va_size != VNOVAL)
485		error = tmpfs_chsize(vp, vap->va_size, cred, l);
486
487	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
488		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
489
490	if (error == 0 && vap->va_mode != VNOVAL)
491		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
492
493	const bool chsometime =
494	    vap->va_atime.tv_sec != VNOVAL ||
495	    vap->va_mtime.tv_sec != VNOVAL ||
496	    vap->va_birthtime.tv_sec != VNOVAL;
497	if (error == 0 && chsometime) {
498		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
499		    &vap->va_birthtime, vap->va_vaflags, cred, l);
500	}
501	return error;
502}
503
504int
505tmpfs_read(void *v)
506{
507	struct vop_read_args /* {
508		struct vnode *a_vp;
509		struct uio *a_uio;
510		int a_ioflag;
511		kauth_cred_t a_cred;
512	} */ *ap = v;
513	vnode_t *vp = ap->a_vp;
514	struct uio *uio = ap->a_uio;
515	const int ioflag = ap->a_ioflag;
516	tmpfs_node_t *node;
517	struct uvm_object *uobj;
518	int error;
519
520	KASSERT(VOP_ISLOCKED(vp));
521
522	if (vp->v_type == VDIR) {
523		return EISDIR;
524	}
525	if (uio->uio_offset < 0 || vp->v_type != VREG) {
526		return EINVAL;
527	}
528
529	/* Note: reading zero bytes should not update atime. */
530	if (uio->uio_resid == 0) {
531		return 0;
532	}
533
534	node = VP_TO_TMPFS_NODE(vp);
535	uobj = node->tn_spec.tn_reg.tn_aobj;
536	error = 0;
537
538	while (error == 0 && uio->uio_resid > 0) {
539		vsize_t len;
540
541		if (node->tn_size <= uio->uio_offset) {
542			break;
543		}
544		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
545		if (len == 0) {
546			break;
547		}
548		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
549		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
550	}
551
552	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
553	return error;
554}
555
556int
557tmpfs_write(void *v)
558{
559	struct vop_write_args /* {
560		struct vnode	*a_vp;
561		struct uio	*a_uio;
562		int		a_ioflag;
563		kauth_cred_t	a_cred;
564	} */ *ap = v;
565	vnode_t *vp = ap->a_vp;
566	struct uio *uio = ap->a_uio;
567	const int ioflag = ap->a_ioflag;
568	tmpfs_node_t *node;
569	struct uvm_object *uobj;
570	off_t oldsize;
571	int error;
572
573	KASSERT(VOP_ISLOCKED(vp));
574
575	node = VP_TO_TMPFS_NODE(vp);
576	oldsize = node->tn_size;
577
578	if (uio->uio_offset < 0 || vp->v_type != VREG) {
579		error = EINVAL;
580		goto out;
581	}
582	if (uio->uio_resid == 0) {
583		error = 0;
584		goto out;
585	}
586	if (ioflag & IO_APPEND) {
587		uio->uio_offset = node->tn_size;
588	}
589
590	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
591		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
592		if (error)
593			goto out;
594	}
595
596	uobj = node->tn_spec.tn_reg.tn_aobj;
597	error = 0;
598	while (error == 0 && uio->uio_resid > 0) {
599		vsize_t len;
600
601		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
602		if (len == 0) {
603			break;
604		}
605		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
606		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
607	}
608	if (error) {
609		(void)tmpfs_reg_resize(vp, oldsize);
610	}
611
612	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
613	VN_KNOTE(vp, NOTE_WRITE);
614out:
615	if (error) {
616		KASSERT(oldsize == node->tn_size);
617	} else {
618		KASSERT(uio->uio_resid == 0);
619	}
620	return error;
621}
622
623int
624tmpfs_fsync(void *v)
625{
626	struct vop_fsync_args /* {
627		struct vnode *a_vp;
628		kauth_cred_t a_cred;
629		int a_flags;
630		off_t a_offlo;
631		off_t a_offhi;
632		struct lwp *a_l;
633	} */ *ap = v;
634	vnode_t *vp __diagused = ap->a_vp;
635
636	/* Nothing to do.  Should be up to date. */
637	KASSERT(VOP_ISLOCKED(vp));
638	return 0;
639}
640
641/*
642 * tmpfs_remove: unlink a file.
643 *
644 * => Both directory (dvp) and file (vp) are locked.
645 * => We unlock and drop the reference on both.
646 */
647int
648tmpfs_remove(void *v)
649{
650	struct vop_remove_args /* {
651		struct vnode *a_dvp;
652		struct vnode *a_vp;
653		struct componentname *a_cnp;
654	} */ *ap = v;
655	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
656	tmpfs_node_t *dnode, *node;
657	tmpfs_dirent_t *de;
658	int error;
659
660	KASSERT(VOP_ISLOCKED(dvp));
661	KASSERT(VOP_ISLOCKED(vp));
662
663	if (vp->v_type == VDIR) {
664		error = EPERM;
665		goto out;
666	}
667	dnode = VP_TO_TMPFS_DIR(dvp);
668	node = VP_TO_TMPFS_NODE(vp);
669
670	/*
671	 * Files marked as immutable or append-only cannot be deleted.
672	 * Likewise, files residing on directories marked as append-only
673	 * cannot be deleted.
674	 */
675	if (node->tn_flags & (IMMUTABLE | APPEND)) {
676		error = EPERM;
677		goto out;
678	}
679	if (dnode->tn_flags & APPEND) {
680		error = EPERM;
681		goto out;
682	}
683
684	/* Lookup the directory entry (check the cached hint first). */
685	de = tmpfs_dir_cached(node);
686	if (de == NULL) {
687		struct componentname *cnp = ap->a_cnp;
688		de = tmpfs_dir_lookup(dnode, cnp);
689	}
690	KASSERT(de && de->td_node == node);
691
692	/*
693	 * Remove the entry from the directory (drops the link count) and
694	 * destroy it or replace with a whiteout.
695	 *
696	 * Note: the inode referred by it will not be destroyed until the
697	 * vnode is reclaimed/recycled.
698	 */
699
700	tmpfs_dir_detach(dnode, de);
701
702	if (ap->a_cnp->cn_flags & DOWHITEOUT)
703		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
704	else
705		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
706
707	if (node->tn_links > 0) {
708		/* We removed a hard link. */
709		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
710	}
711	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
712	error = 0;
713out:
714	/* Drop the references and unlock the vnodes. */
715	vput(vp);
716	if (dvp == vp) {
717		vrele(dvp);
718	} else {
719		vput(dvp);
720	}
721	return error;
722}
723
724/*
725 * tmpfs_link: create a hard link.
726 */
727int
728tmpfs_link(void *v)
729{
730	struct vop_link_v2_args /* {
731		struct vnode *a_dvp;
732		struct vnode *a_vp;
733		struct componentname *a_cnp;
734	} */ *ap = v;
735	vnode_t *dvp = ap->a_dvp;
736	vnode_t *vp = ap->a_vp;
737	struct componentname *cnp = ap->a_cnp;
738	tmpfs_node_t *dnode, *node;
739	tmpfs_dirent_t *de;
740	int error;
741
742	KASSERT(dvp != vp);
743	KASSERT(VOP_ISLOCKED(dvp));
744	KASSERT(vp->v_type != VDIR);
745	KASSERT(dvp->v_mount == vp->v_mount);
746
747	dnode = VP_TO_TMPFS_DIR(dvp);
748	node = VP_TO_TMPFS_NODE(vp);
749
750	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
751
752	/* Check for maximum number of links limit. */
753	if (node->tn_links == LINK_MAX) {
754		error = EMLINK;
755		goto out;
756	}
757	KASSERT(node->tn_links < LINK_MAX);
758
759	/* We cannot create links of files marked immutable or append-only. */
760	if (node->tn_flags & (IMMUTABLE | APPEND)) {
761		error = EPERM;
762		goto out;
763	}
764
765	/* Allocate a new directory entry to represent the inode. */
766	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
767	    cnp->cn_nameptr, cnp->cn_namelen, &de);
768	if (error) {
769		goto out;
770	}
771
772	/*
773	 * Insert the entry into the directory.
774	 * It will increase the inode link count.
775	 */
776	tmpfs_dir_attach(dnode, de, node);
777	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
778
779	/* Update the timestamps and trigger the event. */
780	if (node->tn_vnode) {
781		VN_KNOTE(node->tn_vnode, NOTE_LINK);
782	}
783	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
784	error = 0;
785out:
786	VOP_UNLOCK(vp);
787	return error;
788}
789
790int
791tmpfs_mkdir(void *v)
792{
793	struct vop_mkdir_v3_args /* {
794		struct vnode		*a_dvp;
795		struct vnode		**a_vpp;
796		struct componentname	*a_cnp;
797		struct vattr		*a_vap;
798	} */ *ap = v;
799	vnode_t *dvp = ap->a_dvp;
800	vnode_t **vpp = ap->a_vpp;
801	struct componentname *cnp = ap->a_cnp;
802	struct vattr *vap = ap->a_vap;
803
804	KASSERT(vap->va_type == VDIR);
805	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
806}
807
808int
809tmpfs_rmdir(void *v)
810{
811	struct vop_rmdir_args /* {
812		struct vnode		*a_dvp;
813		struct vnode		*a_vp;
814		struct componentname	*a_cnp;
815	} */ *ap = v;
816	vnode_t *dvp = ap->a_dvp;
817	vnode_t *vp = ap->a_vp;
818	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
819	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
820	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
821	tmpfs_dirent_t *de;
822	int error = 0;
823
824	KASSERT(VOP_ISLOCKED(dvp));
825	KASSERT(VOP_ISLOCKED(vp));
826
827	/*
828	 * Directories with more than two entries ('.' and '..') cannot be
829	 * removed.  There may be whiteout entries, which we will destroy.
830	 */
831	if (node->tn_size > 0) {
832		/*
833		 * If never had whiteout entries, the directory is certainly
834		 * not empty.  Otherwise, scan for any non-whiteout entry.
835		 */
836		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
837			error = ENOTEMPTY;
838			goto out;
839		}
840		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
841			if (de->td_node != TMPFS_NODE_WHITEOUT) {
842				error = ENOTEMPTY;
843				goto out;
844			}
845		}
846		KASSERT(error == 0);
847	}
848
849	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
850
851	/* Lookup the directory entry (check the cached hint first). */
852	de = tmpfs_dir_cached(node);
853	if (de == NULL) {
854		struct componentname *cnp = ap->a_cnp;
855		de = tmpfs_dir_lookup(dnode, cnp);
856	}
857	KASSERT(de && de->td_node == node);
858
859	/* Check flags to see if we are allowed to remove the directory. */
860	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
861		error = EPERM;
862		goto out;
863	}
864
865	/* Decrement the link count for the virtual '.' entry. */
866	node->tn_links--;
867
868	/* Detach the directory entry from the directory. */
869	tmpfs_dir_detach(dnode, de);
870
871	/* Purge the cache for parent. */
872	cache_purge(dvp);
873
874	/*
875	 * Destroy the directory entry or replace it with a whiteout.
876	 *
877	 * Note: the inode referred by it will not be destroyed until the
878	 * vnode is reclaimed.
879	 */
880	if (ap->a_cnp->cn_flags & DOWHITEOUT)
881		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
882	else
883		tmpfs_free_dirent(tmp, de);
884
885	/* Destroy the whiteout entries from the node. */
886	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
887		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
888		tmpfs_dir_detach(node, de);
889		tmpfs_free_dirent(tmp, de);
890	}
891	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
892
893	KASSERT(node->tn_size == 0);
894	KASSERT(node->tn_links == 0);
895out:
896	/* Release the nodes. */
897	vput(dvp);
898	vput(vp);
899	return error;
900}
901
902int
903tmpfs_symlink(void *v)
904{
905	struct vop_symlink_v3_args /* {
906		struct vnode		*a_dvp;
907		struct vnode		**a_vpp;
908		struct componentname	*a_cnp;
909		struct vattr		*a_vap;
910		char			*a_target;
911	} */ *ap = v;
912	vnode_t *dvp = ap->a_dvp;
913	vnode_t **vpp = ap->a_vpp;
914	struct componentname *cnp = ap->a_cnp;
915	struct vattr *vap = ap->a_vap;
916	char *target = ap->a_target;
917
918	KASSERT(vap->va_type == VLNK);
919	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
920}
921
922int
923tmpfs_readdir(void *v)
924{
925	struct vop_readdir_args /* {
926		struct vnode	*a_vp;
927		struct uio	*a_uio;
928		kauth_cred_t	a_cred;
929		int		*a_eofflag;
930		off_t		**a_cookies;
931		int		*ncookies;
932	} */ *ap = v;
933	vnode_t *vp = ap->a_vp;
934	struct uio *uio = ap->a_uio;
935	int *eofflag = ap->a_eofflag;
936	off_t **cookies = ap->a_cookies;
937	int *ncookies = ap->a_ncookies;
938	off_t startoff, cnt;
939	tmpfs_node_t *node;
940	int error;
941
942	KASSERT(VOP_ISLOCKED(vp));
943
944	/* This operation only makes sense on directory nodes. */
945	if (vp->v_type != VDIR) {
946		return ENOTDIR;
947	}
948	node = VP_TO_TMPFS_DIR(vp);
949	startoff = uio->uio_offset;
950	cnt = 0;
951
952	/*
953	 * Retrieve the directory entries, unless it is being destroyed.
954	 */
955	if (node->tn_links) {
956		error = tmpfs_dir_getdents(node, uio, &cnt);
957	} else {
958		error = 0;
959	}
960
961	if (eofflag != NULL) {
962		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
963	}
964	if (error || cookies == NULL || ncookies == NULL) {
965		return error;
966	}
967
968	/* Update NFS-related variables, if any. */
969	tmpfs_dirent_t *de = NULL;
970	off_t i, off = startoff;
971
972	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
973	*ncookies = cnt;
974
975	for (i = 0; i < cnt; i++) {
976		KASSERT(off != TMPFS_DIRSEQ_EOF);
977		if (off != TMPFS_DIRSEQ_DOT) {
978			if (off == TMPFS_DIRSEQ_DOTDOT) {
979				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
980			} else if (de != NULL) {
981				de = TAILQ_NEXT(de, td_entries);
982			} else {
983				de = tmpfs_dir_lookupbyseq(node, off);
984				KASSERT(de != NULL);
985				de = TAILQ_NEXT(de, td_entries);
986			}
987			if (de == NULL) {
988				off = TMPFS_DIRSEQ_EOF;
989			} else {
990				off = tmpfs_dir_getseq(node, de);
991			}
992		} else {
993			off = TMPFS_DIRSEQ_DOTDOT;
994		}
995		(*cookies)[i] = off;
996	}
997	KASSERT(uio->uio_offset == off);
998	return error;
999}
1000
1001int
1002tmpfs_readlink(void *v)
1003{
1004	struct vop_readlink_args /* {
1005		struct vnode	*a_vp;
1006		struct uio	*a_uio;
1007		kauth_cred_t	a_cred;
1008	} */ *ap = v;
1009	vnode_t *vp = ap->a_vp;
1010	struct uio *uio = ap->a_uio;
1011	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1012	int error;
1013
1014	KASSERT(VOP_ISLOCKED(vp));
1015	KASSERT(uio->uio_offset == 0);
1016	KASSERT(vp->v_type == VLNK);
1017
1018	/* Note: readlink(2) returns the path without NUL terminator. */
1019	if (node->tn_size > 0) {
1020		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1021		    MIN(node->tn_size, uio->uio_resid), uio);
1022	} else {
1023		error = 0;
1024	}
1025	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1026
1027	return error;
1028}
1029
1030int
1031tmpfs_inactive(void *v)
1032{
1033	struct vop_inactive_args /* {
1034		struct vnode *a_vp;
1035		bool *a_recycle;
1036	} */ *ap = v;
1037	vnode_t *vp = ap->a_vp;
1038	tmpfs_node_t *node;
1039
1040	KASSERT(VOP_ISLOCKED(vp));
1041
1042	node = VP_TO_TMPFS_NODE(vp);
1043	if (node->tn_links == 0) {
1044		/*
1045		 * Mark node as dead by setting its generation to zero.
1046		 */
1047		atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1048		*ap->a_recycle = true;
1049	} else {
1050		*ap->a_recycle = false;
1051	}
1052	VOP_UNLOCK(vp);
1053
1054	return 0;
1055}
1056
1057int
1058tmpfs_reclaim(void *v)
1059{
1060	struct vop_reclaim_args /* {
1061		struct vnode *a_vp;
1062	} */ *ap = v;
1063	vnode_t *vp = ap->a_vp;
1064	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1065	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1066
1067	/* Disassociate inode from vnode. */
1068	node->tn_vnode = NULL;
1069	vcache_remove(vp->v_mount, &node, sizeof(node));
1070	vp->v_data = NULL;
1071
1072	/* If inode is not referenced, i.e. no links, then destroy it. */
1073	if (node->tn_links == 0)
1074		tmpfs_free_node(tmp, node);
1075	return 0;
1076}
1077
1078int
1079tmpfs_pathconf(void *v)
1080{
1081	struct vop_pathconf_args /* {
1082		struct vnode	*a_vp;
1083		int		a_name;
1084		register_t	*a_retval;
1085	} */ *ap = v;
1086	const int name = ap->a_name;
1087	register_t *retval = ap->a_retval;
1088	int error = 0;
1089
1090	switch (name) {
1091	case _PC_LINK_MAX:
1092		*retval = LINK_MAX;
1093		break;
1094	case _PC_NAME_MAX:
1095		*retval = TMPFS_MAXNAMLEN;
1096		break;
1097	case _PC_PATH_MAX:
1098		*retval = PATH_MAX;
1099		break;
1100	case _PC_PIPE_BUF:
1101		*retval = PIPE_BUF;
1102		break;
1103	case _PC_CHOWN_RESTRICTED:
1104		*retval = 1;
1105		break;
1106	case _PC_NO_TRUNC:
1107		*retval = 1;
1108		break;
1109	case _PC_SYNC_IO:
1110		*retval = 1;
1111		break;
1112	case _PC_FILESIZEBITS:
1113		*retval = sizeof(off_t) * CHAR_BIT;
1114		break;
1115	default:
1116		error = EINVAL;
1117	}
1118	return error;
1119}
1120
1121int
1122tmpfs_advlock(void *v)
1123{
1124	struct vop_advlock_args /* {
1125		struct vnode	*a_vp;
1126		void *		a_id;
1127		int		a_op;
1128		struct flock	*a_fl;
1129		int		a_flags;
1130	} */ *ap = v;
1131	vnode_t *vp = ap->a_vp;
1132	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1133
1134	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1135}
1136
1137int
1138tmpfs_getpages(void *v)
1139{
1140	struct vop_getpages_args /* {
1141		struct vnode *a_vp;
1142		voff_t a_offset;
1143		struct vm_page **a_m;
1144		int *a_count;
1145		int a_centeridx;
1146		vm_prot_t a_access_type;
1147		int a_advice;
1148		int a_flags;
1149	} */ * const ap = v;
1150	vnode_t *vp = ap->a_vp;
1151	const voff_t offset = ap->a_offset;
1152	struct vm_page **pgs = ap->a_m;
1153	const int centeridx = ap->a_centeridx;
1154	const vm_prot_t access_type = ap->a_access_type;
1155	const int advice = ap->a_advice;
1156	const int flags = ap->a_flags;
1157	int error, npages = *ap->a_count;
1158	tmpfs_node_t *node;
1159	struct uvm_object *uobj;
1160
1161	KASSERT(vp->v_type == VREG);
1162	KASSERT(mutex_owned(vp->v_interlock));
1163
1164	node = VP_TO_TMPFS_NODE(vp);
1165	uobj = node->tn_spec.tn_reg.tn_aobj;
1166
1167	/*
1168	 * Currently, PGO_PASTEOF is not supported.
1169	 */
1170	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1171		if ((flags & PGO_LOCKED) == 0)
1172			mutex_exit(vp->v_interlock);
1173		return EINVAL;
1174	}
1175
1176	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1177		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1178	}
1179
1180	if ((flags & PGO_LOCKED) != 0)
1181		return EBUSY;
1182
1183	if ((flags & PGO_NOTIMESTAMP) == 0) {
1184		u_int tflags = 0;
1185
1186		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1187			tflags |= TMPFS_UPDATE_ATIME;
1188
1189		if ((access_type & VM_PROT_WRITE) != 0) {
1190			tflags |= TMPFS_UPDATE_MTIME;
1191			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1192				tflags |= TMPFS_UPDATE_ATIME;
1193		}
1194		tmpfs_update(vp, tflags);
1195	}
1196
1197	/*
1198	 * Invoke the pager.
1199	 *
1200	 * Clean the array of pages before.  XXX: PR/32166
1201	 * Note that vnode lock is shared with underlying UVM object.
1202	 */
1203	if (pgs) {
1204		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1205	}
1206	KASSERT(vp->v_interlock == uobj->vmobjlock);
1207
1208	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1209	    access_type, advice, flags | PGO_ALLPAGES);
1210
1211#if defined(DEBUG)
1212	if (!error && pgs) {
1213		for (int i = 0; i < npages; i++) {
1214			KASSERT(pgs[i] != NULL);
1215		}
1216	}
1217#endif
1218	return error;
1219}
1220
1221int
1222tmpfs_putpages(void *v)
1223{
1224	struct vop_putpages_args /* {
1225		struct vnode *a_vp;
1226		voff_t a_offlo;
1227		voff_t a_offhi;
1228		int a_flags;
1229	} */ * const ap = v;
1230	vnode_t *vp = ap->a_vp;
1231	const voff_t offlo = ap->a_offlo;
1232	const voff_t offhi = ap->a_offhi;
1233	const int flags = ap->a_flags;
1234	tmpfs_node_t *node;
1235	struct uvm_object *uobj;
1236	int error;
1237
1238	KASSERT(mutex_owned(vp->v_interlock));
1239
1240	if (vp->v_type != VREG) {
1241		mutex_exit(vp->v_interlock);
1242		return 0;
1243	}
1244
1245	node = VP_TO_TMPFS_NODE(vp);
1246	uobj = node->tn_spec.tn_reg.tn_aobj;
1247
1248	KASSERT(vp->v_interlock == uobj->vmobjlock);
1249	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1250
1251	/* XXX mtime */
1252
1253	return error;
1254}
1255
1256int
1257tmpfs_whiteout(void *v)
1258{
1259	struct vop_whiteout_args /* {
1260		struct vnode		*a_dvp;
1261		struct componentname	*a_cnp;
1262		int			a_flags;
1263	} */ *ap = v;
1264	vnode_t *dvp = ap->a_dvp;
1265	struct componentname *cnp = ap->a_cnp;
1266	const int flags = ap->a_flags;
1267	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1268	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1269	tmpfs_dirent_t *de;
1270	int error;
1271
1272	switch (flags) {
1273	case LOOKUP:
1274		break;
1275	case CREATE:
1276		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1277		    cnp->cn_namelen, &de);
1278		if (error)
1279			return error;
1280		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1281		break;
1282	case DELETE:
1283		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1284		de = tmpfs_dir_lookup(dnode, cnp);
1285		if (de == NULL)
1286			return ENOENT;
1287		tmpfs_dir_detach(dnode, de);
1288		tmpfs_free_dirent(tmp, de);
1289		break;
1290	}
1291	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1292	return 0;
1293}
1294
1295int
1296tmpfs_print(void *v)
1297{
1298	struct vop_print_args /* {
1299		struct vnode	*a_vp;
1300	} */ *ap = v;
1301	vnode_t *vp = ap->a_vp;
1302	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1303
1304	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1305	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1306	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1307	    node->tn_gid, (uintmax_t)node->tn_size);
1308	if (vp->v_type == VFIFO) {
1309		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1310	}
1311	printf("\n");
1312	return 0;
1313}
1314