tmpfs_vnops.c revision 1.118
1/*	$NetBSD: tmpfs_vnops.c,v 1.118 2014/02/27 16:51:38 hannken Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.118 2014/02/27 16:51:38 hannken Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52#include <sys/atomic.h>
53
54#include <uvm/uvm.h>
55
56#include <miscfs/fifofs/fifo.h>
57#include <miscfs/genfs/genfs.h>
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61/*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64int (**tmpfs_vnodeop_p)(void *);
65const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66	{ &vop_default_desc,		vn_default_error },
67	{ &vop_lookup_desc,		tmpfs_lookup },
68	{ &vop_create_desc,		tmpfs_create },
69	{ &vop_mknod_desc,		tmpfs_mknod },
70	{ &vop_open_desc,		tmpfs_open },
71	{ &vop_close_desc,		tmpfs_close },
72	{ &vop_access_desc,		tmpfs_access },
73	{ &vop_getattr_desc,		tmpfs_getattr },
74	{ &vop_setattr_desc,		tmpfs_setattr },
75	{ &vop_read_desc,		tmpfs_read },
76	{ &vop_write_desc,		tmpfs_write },
77	{ &vop_ioctl_desc,		tmpfs_ioctl },
78	{ &vop_fcntl_desc,		tmpfs_fcntl },
79	{ &vop_poll_desc,		tmpfs_poll },
80	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
81	{ &vop_revoke_desc,		tmpfs_revoke },
82	{ &vop_mmap_desc,		tmpfs_mmap },
83	{ &vop_fsync_desc,		tmpfs_fsync },
84	{ &vop_seek_desc,		tmpfs_seek },
85	{ &vop_remove_desc,		tmpfs_remove },
86	{ &vop_link_desc,		tmpfs_link },
87	{ &vop_rename_desc,		tmpfs_rename },
88	{ &vop_mkdir_desc,		tmpfs_mkdir },
89	{ &vop_rmdir_desc,		tmpfs_rmdir },
90	{ &vop_symlink_desc,		tmpfs_symlink },
91	{ &vop_readdir_desc,		tmpfs_readdir },
92	{ &vop_readlink_desc,		tmpfs_readlink },
93	{ &vop_abortop_desc,		tmpfs_abortop },
94	{ &vop_inactive_desc,		tmpfs_inactive },
95	{ &vop_reclaim_desc,		tmpfs_reclaim },
96	{ &vop_lock_desc,		tmpfs_lock },
97	{ &vop_unlock_desc,		tmpfs_unlock },
98	{ &vop_bmap_desc,		tmpfs_bmap },
99	{ &vop_strategy_desc,		tmpfs_strategy },
100	{ &vop_print_desc,		tmpfs_print },
101	{ &vop_pathconf_desc,		tmpfs_pathconf },
102	{ &vop_islocked_desc,		tmpfs_islocked },
103	{ &vop_advlock_desc,		tmpfs_advlock },
104	{ &vop_bwrite_desc,		tmpfs_bwrite },
105	{ &vop_getpages_desc,		tmpfs_getpages },
106	{ &vop_putpages_desc,		tmpfs_putpages },
107	{ &vop_whiteout_desc,		tmpfs_whiteout },
108	{ NULL, NULL }
109};
110
111const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
112	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
113};
114
115/*
116 * tmpfs_lookup: path name traversal routine.
117 *
118 * Arguments: dvp (directory being searched), vpp (result),
119 * cnp (component name - path).
120 *
121 * => Caller holds a reference and lock on dvp.
122 * => We return looked-up vnode (vpp) locked, with a reference held.
123 */
124int
125tmpfs_lookup(void *v)
126{
127	struct vop_lookup_v2_args /* {
128		struct vnode *a_dvp;
129		struct vnode **a_vpp;
130		struct componentname *a_cnp;
131	} */ *ap = v;
132	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
133	struct componentname *cnp = ap->a_cnp;
134	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
135	tmpfs_node_t *dnode, *tnode;
136	tmpfs_dirent_t *de;
137	int cachefound, iswhiteout;
138	int error;
139
140	KASSERT(VOP_ISLOCKED(dvp));
141
142	dnode = VP_TO_TMPFS_DIR(dvp);
143	*vpp = NULL;
144
145	/* Check accessibility of directory. */
146	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
147	if (error) {
148		goto out;
149	}
150
151	/*
152	 * If requesting the last path component on a read-only file system
153	 * with a write operation, deny it.
154	 */
155	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
156	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
157		error = EROFS;
158		goto out;
159	}
160
161	/*
162	 * Avoid doing a linear scan of the directory if the requested
163	 * directory/name couple is already in the cache.
164	 */
165	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
166				  cnp->cn_nameiop, cnp->cn_flags,
167				  &iswhiteout, vpp);
168	if (iswhiteout) {
169		cnp->cn_flags |= ISWHITEOUT;
170	}
171	if (cachefound && *vpp == NULLVP) {
172		/* Negative cache hit. */
173		error = ENOENT;
174		goto out_unlocked;
175	} else if (cachefound) {
176		error = 0;
177		goto out_unlocked;
178	}
179
180	/*
181	 * Treat an unlinked directory as empty (no "." or "..")
182	 */
183	if (dnode->tn_links == 0) {
184		KASSERT(dnode->tn_size == 0);
185		error = ENOENT;
186		goto out;
187	}
188
189	if (cnp->cn_flags & ISDOTDOT) {
190		tmpfs_node_t *pnode;
191
192		/*
193		 * Lookup of ".." case.
194		 */
195		if (lastcn && cnp->cn_nameiop == RENAME) {
196			error = EINVAL;
197			goto out;
198		}
199		KASSERT(dnode->tn_type == VDIR);
200		pnode = dnode->tn_spec.tn_dir.tn_parent;
201		if (pnode == NULL) {
202			error = ENOENT;
203			goto out;
204		}
205
206		/*
207		 * Lock the parent tn_vlock before releasing the vnode lock,
208		 * and thus prevent parent from disappearing.
209		 */
210		mutex_enter(&pnode->tn_vlock);
211		VOP_UNLOCK(dvp);
212
213		/*
214		 * Get a vnode of the '..' entry and re-acquire the lock.
215		 * Release the tn_vlock.
216		 */
217		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
218		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
219		goto out;
220
221	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
222		/*
223		 * Lookup of "." case.
224		 */
225		if (lastcn && cnp->cn_nameiop == RENAME) {
226			error = EISDIR;
227			goto out;
228		}
229		vref(dvp);
230		*vpp = dvp;
231		error = 0;
232		goto done;
233	}
234
235	/*
236	 * Other lookup cases: perform directory scan.
237	 */
238	de = tmpfs_dir_lookup(dnode, cnp);
239	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
240		/*
241		 * The entry was not found in the directory.  This is valid
242		 * if we are creating or renaming an entry and are working
243		 * on the last component of the path name.
244		 */
245		if (lastcn && (cnp->cn_nameiop == CREATE ||
246		    cnp->cn_nameiop == RENAME)) {
247			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
248			if (error) {
249				goto out;
250			}
251			error = EJUSTRETURN;
252		} else {
253			error = ENOENT;
254		}
255		if (de) {
256			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
257			cnp->cn_flags |= ISWHITEOUT;
258		}
259		goto done;
260	}
261
262	tnode = de->td_node;
263
264	/*
265	 * If it is not the last path component and found a non-directory
266	 * or non-link entry (which may itself be pointing to a directory),
267	 * raise an error.
268	 */
269	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
270		error = ENOTDIR;
271		goto out;
272	}
273
274	/* Check the permissions. */
275	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
276		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
277		if (error)
278			goto out;
279
280		if ((dnode->tn_mode & S_ISTXT) != 0) {
281			error = kauth_authorize_vnode(cnp->cn_cred,
282			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
283			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
284			    dnode->tn_uid, tnode->tn_uid));
285			if (error) {
286				error = EPERM;
287				goto out;
288			}
289		}
290	}
291
292	/* Get a vnode for the matching entry. */
293	mutex_enter(&tnode->tn_vlock);
294	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
295done:
296	/*
297	 * Cache the result, unless request was for creation (as it does
298	 * not improve the performance).
299	 */
300	if (cnp->cn_nameiop != CREATE) {
301		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
302			    cnp->cn_flags);
303	}
304out:
305	if (error == 0 && *vpp != dvp)
306		VOP_UNLOCK(*vpp);
307out_unlocked:
308	KASSERT(VOP_ISLOCKED(dvp));
309
310	return error;
311}
312
313int
314tmpfs_create(void *v)
315{
316	struct vop_create_v3_args /* {
317		struct vnode		*a_dvp;
318		struct vnode		**a_vpp;
319		struct componentname	*a_cnp;
320		struct vattr		*a_vap;
321	} */ *ap = v;
322	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
323	struct componentname *cnp = ap->a_cnp;
324	struct vattr *vap = ap->a_vap;
325
326	KASSERT(VOP_ISLOCKED(dvp));
327	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
328	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
329}
330
331int
332tmpfs_mknod(void *v)
333{
334	struct vop_mknod_v3_args /* {
335		struct vnode		*a_dvp;
336		struct vnode		**a_vpp;
337		struct componentname	*a_cnp;
338		struct vattr		*a_vap;
339	} */ *ap = v;
340	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
341	struct componentname *cnp = ap->a_cnp;
342	struct vattr *vap = ap->a_vap;
343	enum vtype vt = vap->va_type;
344
345	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
346		vput(dvp);
347		return EINVAL;
348	}
349	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
350}
351
352int
353tmpfs_open(void *v)
354{
355	struct vop_open_args /* {
356		struct vnode	*a_vp;
357		int		a_mode;
358		kauth_cred_t	a_cred;
359	} */ *ap = v;
360	vnode_t *vp = ap->a_vp;
361	mode_t mode = ap->a_mode;
362	tmpfs_node_t *node;
363
364	KASSERT(VOP_ISLOCKED(vp));
365
366	node = VP_TO_TMPFS_NODE(vp);
367
368	/* If the file is marked append-only, deny write requests. */
369	if ((node->tn_flags & APPEND) != 0 &&
370	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
371		return EPERM;
372	}
373	return 0;
374}
375
376int
377tmpfs_close(void *v)
378{
379	struct vop_close_args /* {
380		struct vnode	*a_vp;
381		int		a_fflag;
382		kauth_cred_t	a_cred;
383	} */ *ap = v;
384	vnode_t *vp __diagused = ap->a_vp;
385
386	KASSERT(VOP_ISLOCKED(vp));
387	return 0;
388}
389
390int
391tmpfs_access(void *v)
392{
393	struct vop_access_args /* {
394		struct vnode	*a_vp;
395		int		a_mode;
396		kauth_cred_t	a_cred;
397	} */ *ap = v;
398	vnode_t *vp = ap->a_vp;
399	mode_t mode = ap->a_mode;
400	kauth_cred_t cred = ap->a_cred;
401	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
402	const bool writing = (mode & VWRITE) != 0;
403
404	KASSERT(VOP_ISLOCKED(vp));
405
406	/* Possible? */
407	switch (vp->v_type) {
408	case VDIR:
409	case VLNK:
410	case VREG:
411		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
412			return EROFS;
413		}
414		break;
415	case VBLK:
416	case VCHR:
417	case VSOCK:
418	case VFIFO:
419		break;
420	default:
421		return EINVAL;
422	}
423	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
424		return EPERM;
425	}
426
427	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
428	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
429	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
430}
431
432int
433tmpfs_getattr(void *v)
434{
435	struct vop_getattr_args /* {
436		struct vnode	*a_vp;
437		struct vattr	*a_vap;
438		kauth_cred_t	a_cred;
439	} */ *ap = v;
440	vnode_t *vp = ap->a_vp;
441	struct vattr *vap = ap->a_vap;
442	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
443
444	vattr_null(vap);
445
446	vap->va_type = vp->v_type;
447	vap->va_mode = node->tn_mode;
448	vap->va_nlink = node->tn_links;
449	vap->va_uid = node->tn_uid;
450	vap->va_gid = node->tn_gid;
451	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
452	vap->va_fileid = node->tn_id;
453	vap->va_size = node->tn_size;
454	vap->va_blocksize = PAGE_SIZE;
455	vap->va_atime = node->tn_atime;
456	vap->va_mtime = node->tn_mtime;
457	vap->va_ctime = node->tn_ctime;
458	vap->va_birthtime = node->tn_birthtime;
459	vap->va_gen = TMPFS_NODE_GEN(node);
460	vap->va_flags = node->tn_flags;
461	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
462	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
463	vap->va_bytes = round_page(node->tn_size);
464	vap->va_filerev = VNOVAL;
465	vap->va_vaflags = 0;
466	vap->va_spare = VNOVAL; /* XXX */
467
468	return 0;
469}
470
471int
472tmpfs_setattr(void *v)
473{
474	struct vop_setattr_args /* {
475		struct vnode	*a_vp;
476		struct vattr	*a_vap;
477		kauth_cred_t	a_cred;
478	} */ *ap = v;
479	vnode_t *vp = ap->a_vp;
480	struct vattr *vap = ap->a_vap;
481	kauth_cred_t cred = ap->a_cred;
482	lwp_t *l = curlwp;
483	int error = 0;
484
485	KASSERT(VOP_ISLOCKED(vp));
486
487	/* Abort if any unsettable attribute is given. */
488	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
489	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
490	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
491	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
492	    vap->va_bytes != VNOVAL) {
493		return EINVAL;
494	}
495
496	if (error == 0 && vap->va_flags != VNOVAL)
497		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
498
499	if (error == 0 && vap->va_size != VNOVAL)
500		error = tmpfs_chsize(vp, vap->va_size, cred, l);
501
502	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
503		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
504
505	if (error == 0 && vap->va_mode != VNOVAL)
506		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
507
508	const bool chsometime =
509	    vap->va_atime.tv_sec != VNOVAL ||
510	    vap->va_mtime.tv_sec != VNOVAL ||
511	    vap->va_birthtime.tv_sec != VNOVAL;
512	if (error == 0 && chsometime) {
513		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
514		    &vap->va_birthtime, vap->va_vaflags, cred, l);
515	}
516	return error;
517}
518
519int
520tmpfs_read(void *v)
521{
522	struct vop_read_args /* {
523		struct vnode *a_vp;
524		struct uio *a_uio;
525		int a_ioflag;
526		kauth_cred_t a_cred;
527	} */ *ap = v;
528	vnode_t *vp = ap->a_vp;
529	struct uio *uio = ap->a_uio;
530	const int ioflag = ap->a_ioflag;
531	tmpfs_node_t *node;
532	struct uvm_object *uobj;
533	int error;
534
535	KASSERT(VOP_ISLOCKED(vp));
536
537	if (vp->v_type == VDIR) {
538		return EISDIR;
539	}
540	if (uio->uio_offset < 0 || vp->v_type != VREG) {
541		return EINVAL;
542	}
543
544	/* Note: reading zero bytes should not update atime. */
545	if (uio->uio_resid == 0) {
546		return 0;
547	}
548
549	node = VP_TO_TMPFS_NODE(vp);
550	uobj = node->tn_spec.tn_reg.tn_aobj;
551	error = 0;
552
553	while (error == 0 && uio->uio_resid > 0) {
554		vsize_t len;
555
556		if (node->tn_size <= uio->uio_offset) {
557			break;
558		}
559		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
560		if (len == 0) {
561			break;
562		}
563		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
564		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
565	}
566
567	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
568	return error;
569}
570
571int
572tmpfs_write(void *v)
573{
574	struct vop_write_args /* {
575		struct vnode	*a_vp;
576		struct uio	*a_uio;
577		int		a_ioflag;
578		kauth_cred_t	a_cred;
579	} */ *ap = v;
580	vnode_t *vp = ap->a_vp;
581	struct uio *uio = ap->a_uio;
582	const int ioflag = ap->a_ioflag;
583	tmpfs_node_t *node;
584	struct uvm_object *uobj;
585	off_t oldsize;
586	int error;
587
588	KASSERT(VOP_ISLOCKED(vp));
589
590	node = VP_TO_TMPFS_NODE(vp);
591	oldsize = node->tn_size;
592
593	if (uio->uio_offset < 0 || vp->v_type != VREG) {
594		error = EINVAL;
595		goto out;
596	}
597	if (uio->uio_resid == 0) {
598		error = 0;
599		goto out;
600	}
601	if (ioflag & IO_APPEND) {
602		uio->uio_offset = node->tn_size;
603	}
604
605	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
606		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
607		if (error)
608			goto out;
609	}
610
611	uobj = node->tn_spec.tn_reg.tn_aobj;
612	error = 0;
613	while (error == 0 && uio->uio_resid > 0) {
614		vsize_t len;
615
616		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
617		if (len == 0) {
618			break;
619		}
620		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
621		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
622	}
623	if (error) {
624		(void)tmpfs_reg_resize(vp, oldsize);
625	}
626
627	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
628	VN_KNOTE(vp, NOTE_WRITE);
629out:
630	if (error) {
631		KASSERT(oldsize == node->tn_size);
632	} else {
633		KASSERT(uio->uio_resid == 0);
634	}
635	return error;
636}
637
638int
639tmpfs_fsync(void *v)
640{
641	struct vop_fsync_args /* {
642		struct vnode *a_vp;
643		kauth_cred_t a_cred;
644		int a_flags;
645		off_t a_offlo;
646		off_t a_offhi;
647		struct lwp *a_l;
648	} */ *ap = v;
649	vnode_t *vp __diagused = ap->a_vp;
650
651	/* Nothing to do.  Should be up to date. */
652	KASSERT(VOP_ISLOCKED(vp));
653	return 0;
654}
655
656/*
657 * tmpfs_remove: unlink a file.
658 *
659 * => Both directory (dvp) and file (vp) are locked.
660 * => We unlock and drop the reference on both.
661 */
662int
663tmpfs_remove(void *v)
664{
665	struct vop_remove_args /* {
666		struct vnode *a_dvp;
667		struct vnode *a_vp;
668		struct componentname *a_cnp;
669	} */ *ap = v;
670	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
671	tmpfs_node_t *dnode, *node;
672	tmpfs_dirent_t *de;
673	int error;
674
675	KASSERT(VOP_ISLOCKED(dvp));
676	KASSERT(VOP_ISLOCKED(vp));
677
678	if (vp->v_type == VDIR) {
679		error = EPERM;
680		goto out;
681	}
682	dnode = VP_TO_TMPFS_DIR(dvp);
683	node = VP_TO_TMPFS_NODE(vp);
684
685	/*
686	 * Files marked as immutable or append-only cannot be deleted.
687	 * Likewise, files residing on directories marked as append-only
688	 * cannot be deleted.
689	 */
690	if (node->tn_flags & (IMMUTABLE | APPEND)) {
691		error = EPERM;
692		goto out;
693	}
694	if (dnode->tn_flags & APPEND) {
695		error = EPERM;
696		goto out;
697	}
698
699	/* Lookup the directory entry (check the cached hint first). */
700	de = tmpfs_dir_cached(node);
701	if (de == NULL) {
702		struct componentname *cnp = ap->a_cnp;
703		de = tmpfs_dir_lookup(dnode, cnp);
704	}
705	KASSERT(de && de->td_node == node);
706
707	/*
708	 * Remove the entry from the directory (drops the link count) and
709	 * destroy it or replace with a whiteout.
710	 *
711	 * Note: the inode referred by it will not be destroyed until the
712	 * vnode is reclaimed/recycled.
713	 */
714
715	tmpfs_dir_detach(dnode, de);
716
717	if (ap->a_cnp->cn_flags & DOWHITEOUT)
718		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
719	else
720		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
721
722	if (node->tn_links > 0) {
723		/* We removed a hard link. */
724		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
725	}
726	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
727	error = 0;
728out:
729	/* Drop the references and unlock the vnodes. */
730	vput(vp);
731	if (dvp == vp) {
732		vrele(dvp);
733	} else {
734		vput(dvp);
735	}
736	return error;
737}
738
739/*
740 * tmpfs_link: create a hard link.
741 */
742int
743tmpfs_link(void *v)
744{
745	struct vop_link_args /* {
746		struct vnode *a_dvp;
747		struct vnode *a_vp;
748		struct componentname *a_cnp;
749	} */ *ap = v;
750	vnode_t *dvp = ap->a_dvp;
751	vnode_t *vp = ap->a_vp;
752	struct componentname *cnp = ap->a_cnp;
753	tmpfs_node_t *dnode, *node;
754	tmpfs_dirent_t *de;
755	int error;
756
757	KASSERT(dvp != vp);
758	KASSERT(VOP_ISLOCKED(dvp));
759	KASSERT(vp->v_type != VDIR);
760	KASSERT(dvp->v_mount == vp->v_mount);
761
762	dnode = VP_TO_TMPFS_DIR(dvp);
763	node = VP_TO_TMPFS_NODE(vp);
764
765	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
766
767	/* Check for maximum number of links limit. */
768	if (node->tn_links == LINK_MAX) {
769		error = EMLINK;
770		goto out;
771	}
772	KASSERT(node->tn_links < LINK_MAX);
773
774	/* We cannot create links of files marked immutable or append-only. */
775	if (node->tn_flags & (IMMUTABLE | APPEND)) {
776		error = EPERM;
777		goto out;
778	}
779
780	/* Allocate a new directory entry to represent the inode. */
781	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
782	    cnp->cn_nameptr, cnp->cn_namelen, &de);
783	if (error) {
784		goto out;
785	}
786
787	/*
788	 * Insert the entry into the directory.
789	 * It will increase the inode link count.
790	 */
791	tmpfs_dir_attach(dnode, de, node);
792	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
793
794	/* Update the timestamps and trigger the event. */
795	if (node->tn_vnode) {
796		VN_KNOTE(node->tn_vnode, NOTE_LINK);
797	}
798	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
799	error = 0;
800out:
801	VOP_UNLOCK(vp);
802	vput(dvp);
803	return error;
804}
805
806int
807tmpfs_mkdir(void *v)
808{
809	struct vop_mkdir_v3_args /* {
810		struct vnode		*a_dvp;
811		struct vnode		**a_vpp;
812		struct componentname	*a_cnp;
813		struct vattr		*a_vap;
814	} */ *ap = v;
815	vnode_t *dvp = ap->a_dvp;
816	vnode_t **vpp = ap->a_vpp;
817	struct componentname *cnp = ap->a_cnp;
818	struct vattr *vap = ap->a_vap;
819
820	KASSERT(vap->va_type == VDIR);
821	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
822}
823
824int
825tmpfs_rmdir(void *v)
826{
827	struct vop_rmdir_args /* {
828		struct vnode		*a_dvp;
829		struct vnode		*a_vp;
830		struct componentname	*a_cnp;
831	} */ *ap = v;
832	vnode_t *dvp = ap->a_dvp;
833	vnode_t *vp = ap->a_vp;
834	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
835	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
836	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
837	tmpfs_dirent_t *de;
838	int error = 0;
839
840	KASSERT(VOP_ISLOCKED(dvp));
841	KASSERT(VOP_ISLOCKED(vp));
842
843	/*
844	 * Directories with more than two entries ('.' and '..') cannot be
845	 * removed.  There may be whiteout entries, which we will destroy.
846	 */
847	if (node->tn_size > 0) {
848		/*
849		 * If never had whiteout entries, the directory is certainly
850		 * not empty.  Otherwise, scan for any non-whiteout entry.
851		 */
852		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
853			error = ENOTEMPTY;
854			goto out;
855		}
856		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
857			if (de->td_node != TMPFS_NODE_WHITEOUT) {
858				error = ENOTEMPTY;
859				goto out;
860			}
861		}
862		KASSERT(error == 0);
863	}
864
865	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
866
867	/* Lookup the directory entry (check the cached hint first). */
868	de = tmpfs_dir_cached(node);
869	if (de == NULL) {
870		struct componentname *cnp = ap->a_cnp;
871		de = tmpfs_dir_lookup(dnode, cnp);
872	}
873	KASSERT(de && de->td_node == node);
874
875	/* Check flags to see if we are allowed to remove the directory. */
876	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
877		error = EPERM;
878		goto out;
879	}
880
881	/* Decrement the link count for the virtual '.' entry. */
882	node->tn_links--;
883
884	/* Detach the directory entry from the directory. */
885	tmpfs_dir_detach(dnode, de);
886
887	/* Purge the cache for parent. */
888	cache_purge(dvp);
889
890	/*
891	 * Destroy the directory entry or replace it with a whiteout.
892	 *
893	 * Note: the inode referred by it will not be destroyed until the
894	 * vnode is reclaimed.
895	 */
896	if (ap->a_cnp->cn_flags & DOWHITEOUT)
897		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
898	else
899		tmpfs_free_dirent(tmp, de);
900
901	/* Destroy the whiteout entries from the node. */
902	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
903		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
904		tmpfs_dir_detach(node, de);
905		tmpfs_free_dirent(tmp, de);
906	}
907	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
908
909	KASSERT(node->tn_size == 0);
910	KASSERT(node->tn_links == 0);
911out:
912	/* Release the nodes. */
913	vput(dvp);
914	vput(vp);
915	return error;
916}
917
918int
919tmpfs_symlink(void *v)
920{
921	struct vop_symlink_v3_args /* {
922		struct vnode		*a_dvp;
923		struct vnode		**a_vpp;
924		struct componentname	*a_cnp;
925		struct vattr		*a_vap;
926		char			*a_target;
927	} */ *ap = v;
928	vnode_t *dvp = ap->a_dvp;
929	vnode_t **vpp = ap->a_vpp;
930	struct componentname *cnp = ap->a_cnp;
931	struct vattr *vap = ap->a_vap;
932	char *target = ap->a_target;
933
934	KASSERT(vap->va_type == VLNK);
935	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
936}
937
938int
939tmpfs_readdir(void *v)
940{
941	struct vop_readdir_args /* {
942		struct vnode	*a_vp;
943		struct uio	*a_uio;
944		kauth_cred_t	a_cred;
945		int		*a_eofflag;
946		off_t		**a_cookies;
947		int		*ncookies;
948	} */ *ap = v;
949	vnode_t *vp = ap->a_vp;
950	struct uio *uio = ap->a_uio;
951	int *eofflag = ap->a_eofflag;
952	off_t **cookies = ap->a_cookies;
953	int *ncookies = ap->a_ncookies;
954	off_t startoff, cnt;
955	tmpfs_node_t *node;
956	int error;
957
958	KASSERT(VOP_ISLOCKED(vp));
959
960	/* This operation only makes sense on directory nodes. */
961	if (vp->v_type != VDIR) {
962		return ENOTDIR;
963	}
964	node = VP_TO_TMPFS_DIR(vp);
965	startoff = uio->uio_offset;
966	cnt = 0;
967
968	/*
969	 * Retrieve the directory entries, unless it is being destroyed.
970	 */
971	if (node->tn_links) {
972		error = tmpfs_dir_getdents(node, uio, &cnt);
973	} else {
974		error = 0;
975	}
976
977	if (eofflag != NULL) {
978		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
979	}
980	if (error || cookies == NULL || ncookies == NULL) {
981		return error;
982	}
983
984	/* Update NFS-related variables, if any. */
985	tmpfs_dirent_t *de = NULL;
986	off_t i, off = startoff;
987
988	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
989	*ncookies = cnt;
990
991	for (i = 0; i < cnt; i++) {
992		KASSERT(off != TMPFS_DIRSEQ_EOF);
993		if (off != TMPFS_DIRSEQ_DOT) {
994			if (off == TMPFS_DIRSEQ_DOTDOT) {
995				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
996			} else if (de != NULL) {
997				de = TAILQ_NEXT(de, td_entries);
998			} else {
999				de = tmpfs_dir_lookupbyseq(node, off);
1000				KASSERT(de != NULL);
1001				de = TAILQ_NEXT(de, td_entries);
1002			}
1003			if (de == NULL) {
1004				off = TMPFS_DIRSEQ_EOF;
1005			} else {
1006				off = tmpfs_dir_getseq(node, de);
1007			}
1008		} else {
1009			off = TMPFS_DIRSEQ_DOTDOT;
1010		}
1011		(*cookies)[i] = off;
1012	}
1013	KASSERT(uio->uio_offset == off);
1014	return error;
1015}
1016
1017int
1018tmpfs_readlink(void *v)
1019{
1020	struct vop_readlink_args /* {
1021		struct vnode	*a_vp;
1022		struct uio	*a_uio;
1023		kauth_cred_t	a_cred;
1024	} */ *ap = v;
1025	vnode_t *vp = ap->a_vp;
1026	struct uio *uio = ap->a_uio;
1027	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1028	int error;
1029
1030	KASSERT(VOP_ISLOCKED(vp));
1031	KASSERT(uio->uio_offset == 0);
1032	KASSERT(vp->v_type == VLNK);
1033
1034	/* Note: readlink(2) returns the path without NUL terminator. */
1035	if (node->tn_size > 0) {
1036		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1037		    MIN(node->tn_size - 1, uio->uio_resid), uio);
1038	} else {
1039		error = 0;
1040	}
1041	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1042
1043	return error;
1044}
1045
1046int
1047tmpfs_inactive(void *v)
1048{
1049	struct vop_inactive_args /* {
1050		struct vnode *a_vp;
1051		bool *a_recycle;
1052	} */ *ap = v;
1053	vnode_t *vp = ap->a_vp;
1054	tmpfs_node_t *node;
1055
1056	KASSERT(VOP_ISLOCKED(vp));
1057
1058	node = VP_TO_TMPFS_NODE(vp);
1059	if (node->tn_links == 0) {
1060		/*
1061		 * Mark node as dead by setting its generation to zero.
1062		 */
1063		atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1064		*ap->a_recycle = true;
1065	} else {
1066		*ap->a_recycle = false;
1067	}
1068	VOP_UNLOCK(vp);
1069
1070	return 0;
1071}
1072
1073int
1074tmpfs_reclaim(void *v)
1075{
1076	struct vop_reclaim_args /* {
1077		struct vnode *a_vp;
1078	} */ *ap = v;
1079	vnode_t *vp = ap->a_vp;
1080	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1081	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1082	bool recycle;
1083
1084	mutex_enter(&node->tn_vlock);
1085
1086	/* Disassociate inode from vnode. */
1087	node->tn_vnode = NULL;
1088	vp->v_data = NULL;
1089
1090	/* If inode is not referenced, i.e. no links, then destroy it. */
1091	recycle = node->tn_links == 0 && TMPFS_NODE_RECLAIMING(node) == 0;
1092
1093	mutex_exit(&node->tn_vlock);
1094
1095	if (recycle) {
1096		tmpfs_free_node(tmp, node);
1097	}
1098	return 0;
1099}
1100
1101int
1102tmpfs_pathconf(void *v)
1103{
1104	struct vop_pathconf_args /* {
1105		struct vnode	*a_vp;
1106		int		a_name;
1107		register_t	*a_retval;
1108	} */ *ap = v;
1109	const int name = ap->a_name;
1110	register_t *retval = ap->a_retval;
1111	int error = 0;
1112
1113	switch (name) {
1114	case _PC_LINK_MAX:
1115		*retval = LINK_MAX;
1116		break;
1117	case _PC_NAME_MAX:
1118		*retval = TMPFS_MAXNAMLEN;
1119		break;
1120	case _PC_PATH_MAX:
1121		*retval = PATH_MAX;
1122		break;
1123	case _PC_PIPE_BUF:
1124		*retval = PIPE_BUF;
1125		break;
1126	case _PC_CHOWN_RESTRICTED:
1127		*retval = 1;
1128		break;
1129	case _PC_NO_TRUNC:
1130		*retval = 1;
1131		break;
1132	case _PC_SYNC_IO:
1133		*retval = 1;
1134		break;
1135	case _PC_FILESIZEBITS:
1136		*retval = sizeof(off_t) * CHAR_BIT;
1137		break;
1138	default:
1139		error = EINVAL;
1140	}
1141	return error;
1142}
1143
1144int
1145tmpfs_advlock(void *v)
1146{
1147	struct vop_advlock_args /* {
1148		struct vnode	*a_vp;
1149		void *		a_id;
1150		int		a_op;
1151		struct flock	*a_fl;
1152		int		a_flags;
1153	} */ *ap = v;
1154	vnode_t *vp = ap->a_vp;
1155	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1156
1157	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1158}
1159
1160int
1161tmpfs_getpages(void *v)
1162{
1163	struct vop_getpages_args /* {
1164		struct vnode *a_vp;
1165		voff_t a_offset;
1166		struct vm_page **a_m;
1167		int *a_count;
1168		int a_centeridx;
1169		vm_prot_t a_access_type;
1170		int a_advice;
1171		int a_flags;
1172	} */ * const ap = v;
1173	vnode_t *vp = ap->a_vp;
1174	const voff_t offset = ap->a_offset;
1175	struct vm_page **pgs = ap->a_m;
1176	const int centeridx = ap->a_centeridx;
1177	const vm_prot_t access_type = ap->a_access_type;
1178	const int advice = ap->a_advice;
1179	const int flags = ap->a_flags;
1180	int error, npages = *ap->a_count;
1181	tmpfs_node_t *node;
1182	struct uvm_object *uobj;
1183
1184	KASSERT(vp->v_type == VREG);
1185	KASSERT(mutex_owned(vp->v_interlock));
1186
1187	node = VP_TO_TMPFS_NODE(vp);
1188	uobj = node->tn_spec.tn_reg.tn_aobj;
1189
1190	/*
1191	 * Currently, PGO_PASTEOF is not supported.
1192	 */
1193	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1194		if ((flags & PGO_LOCKED) == 0)
1195			mutex_exit(vp->v_interlock);
1196		return EINVAL;
1197	}
1198
1199	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1200		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1201	}
1202
1203	if ((flags & PGO_LOCKED) != 0)
1204		return EBUSY;
1205
1206	if ((flags & PGO_NOTIMESTAMP) == 0) {
1207		u_int tflags = 0;
1208
1209		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1210			tflags |= TMPFS_UPDATE_ATIME;
1211
1212		if ((access_type & VM_PROT_WRITE) != 0) {
1213			tflags |= TMPFS_UPDATE_MTIME;
1214			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1215				tflags |= TMPFS_UPDATE_ATIME;
1216		}
1217		tmpfs_update(vp, tflags);
1218	}
1219
1220	/*
1221	 * Invoke the pager.
1222	 *
1223	 * Clean the array of pages before.  XXX: PR/32166
1224	 * Note that vnode lock is shared with underlying UVM object.
1225	 */
1226	if (pgs) {
1227		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1228	}
1229	KASSERT(vp->v_interlock == uobj->vmobjlock);
1230
1231	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1232	    access_type, advice, flags | PGO_ALLPAGES);
1233
1234#if defined(DEBUG)
1235	if (!error && pgs) {
1236		for (int i = 0; i < npages; i++) {
1237			KASSERT(pgs[i] != NULL);
1238		}
1239	}
1240#endif
1241	return error;
1242}
1243
1244int
1245tmpfs_putpages(void *v)
1246{
1247	struct vop_putpages_args /* {
1248		struct vnode *a_vp;
1249		voff_t a_offlo;
1250		voff_t a_offhi;
1251		int a_flags;
1252	} */ * const ap = v;
1253	vnode_t *vp = ap->a_vp;
1254	const voff_t offlo = ap->a_offlo;
1255	const voff_t offhi = ap->a_offhi;
1256	const int flags = ap->a_flags;
1257	tmpfs_node_t *node;
1258	struct uvm_object *uobj;
1259	int error;
1260
1261	KASSERT(mutex_owned(vp->v_interlock));
1262
1263	if (vp->v_type != VREG) {
1264		mutex_exit(vp->v_interlock);
1265		return 0;
1266	}
1267
1268	node = VP_TO_TMPFS_NODE(vp);
1269	uobj = node->tn_spec.tn_reg.tn_aobj;
1270
1271	KASSERT(vp->v_interlock == uobj->vmobjlock);
1272	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1273
1274	/* XXX mtime */
1275
1276	return error;
1277}
1278
1279int
1280tmpfs_whiteout(void *v)
1281{
1282	struct vop_whiteout_args /* {
1283		struct vnode		*a_dvp;
1284		struct componentname	*a_cnp;
1285		int			a_flags;
1286	} */ *ap = v;
1287	vnode_t *dvp = ap->a_dvp;
1288	struct componentname *cnp = ap->a_cnp;
1289	const int flags = ap->a_flags;
1290	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1291	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1292	tmpfs_dirent_t *de;
1293	int error;
1294
1295	switch (flags) {
1296	case LOOKUP:
1297		break;
1298	case CREATE:
1299		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1300		    cnp->cn_namelen, &de);
1301		if (error)
1302			return error;
1303		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1304		break;
1305	case DELETE:
1306		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1307		de = tmpfs_dir_lookup(dnode, cnp);
1308		if (de == NULL)
1309			return ENOENT;
1310		tmpfs_dir_detach(dnode, de);
1311		tmpfs_free_dirent(tmp, de);
1312		break;
1313	}
1314	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1315	return 0;
1316}
1317
1318int
1319tmpfs_print(void *v)
1320{
1321	struct vop_print_args /* {
1322		struct vnode	*a_vp;
1323	} */ *ap = v;
1324	vnode_t *vp = ap->a_vp;
1325	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1326
1327	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1328	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1329	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1330	    node->tn_gid, (uintmax_t)node->tn_size);
1331	if (vp->v_type == VFIFO) {
1332		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1333	}
1334	printf("\n");
1335	return 0;
1336}
1337