tmpfs_vnops.c revision 1.113
1/*	$NetBSD: tmpfs_vnops.c,v 1.113 2014/01/17 10:55:02 hannken Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.113 2014/01/17 10:55:02 hannken Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52#include <sys/atomic.h>
53
54#include <uvm/uvm.h>
55
56#include <miscfs/fifofs/fifo.h>
57#include <miscfs/genfs/genfs.h>
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61/*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64int (**tmpfs_vnodeop_p)(void *);
65const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66	{ &vop_default_desc,		vn_default_error },
67	{ &vop_lookup_desc,		tmpfs_lookup },
68	{ &vop_create_desc,		tmpfs_create },
69	{ &vop_mknod_desc,		tmpfs_mknod },
70	{ &vop_open_desc,		tmpfs_open },
71	{ &vop_close_desc,		tmpfs_close },
72	{ &vop_access_desc,		tmpfs_access },
73	{ &vop_getattr_desc,		tmpfs_getattr },
74	{ &vop_setattr_desc,		tmpfs_setattr },
75	{ &vop_read_desc,		tmpfs_read },
76	{ &vop_write_desc,		tmpfs_write },
77	{ &vop_ioctl_desc,		tmpfs_ioctl },
78	{ &vop_fcntl_desc,		tmpfs_fcntl },
79	{ &vop_poll_desc,		tmpfs_poll },
80	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
81	{ &vop_revoke_desc,		tmpfs_revoke },
82	{ &vop_mmap_desc,		tmpfs_mmap },
83	{ &vop_fsync_desc,		tmpfs_fsync },
84	{ &vop_seek_desc,		tmpfs_seek },
85	{ &vop_remove_desc,		tmpfs_remove },
86	{ &vop_link_desc,		tmpfs_link },
87	{ &vop_rename_desc,		tmpfs_rename },
88	{ &vop_mkdir_desc,		tmpfs_mkdir },
89	{ &vop_rmdir_desc,		tmpfs_rmdir },
90	{ &vop_symlink_desc,		tmpfs_symlink },
91	{ &vop_readdir_desc,		tmpfs_readdir },
92	{ &vop_readlink_desc,		tmpfs_readlink },
93	{ &vop_abortop_desc,		tmpfs_abortop },
94	{ &vop_inactive_desc,		tmpfs_inactive },
95	{ &vop_reclaim_desc,		tmpfs_reclaim },
96	{ &vop_lock_desc,		tmpfs_lock },
97	{ &vop_unlock_desc,		tmpfs_unlock },
98	{ &vop_bmap_desc,		tmpfs_bmap },
99	{ &vop_strategy_desc,		tmpfs_strategy },
100	{ &vop_print_desc,		tmpfs_print },
101	{ &vop_pathconf_desc,		tmpfs_pathconf },
102	{ &vop_islocked_desc,		tmpfs_islocked },
103	{ &vop_advlock_desc,		tmpfs_advlock },
104	{ &vop_bwrite_desc,		tmpfs_bwrite },
105	{ &vop_getpages_desc,		tmpfs_getpages },
106	{ &vop_putpages_desc,		tmpfs_putpages },
107	{ &vop_whiteout_desc,		tmpfs_whiteout },
108	{ NULL, NULL }
109};
110
111const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
112	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
113};
114
115/*
116 * tmpfs_lookup: path name traversal routine.
117 *
118 * Arguments: dvp (directory being searched), vpp (result),
119 * cnp (component name - path).
120 *
121 * => Caller holds a reference and lock on dvp.
122 * => We return looked-up vnode (vpp) locked, with a reference held.
123 */
124int
125tmpfs_lookup(void *v)
126{
127	struct vop_lookup_args /* {
128		struct vnode *a_dvp;
129		struct vnode **a_vpp;
130		struct componentname *a_cnp;
131	} */ *ap = v;
132	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
133	struct componentname *cnp = ap->a_cnp;
134	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
135	tmpfs_node_t *dnode, *tnode;
136	tmpfs_dirent_t *de;
137	int cachefound, iswhiteout;
138	int error;
139
140	KASSERT(VOP_ISLOCKED(dvp));
141
142	dnode = VP_TO_TMPFS_DIR(dvp);
143	*vpp = NULL;
144
145	/* Check accessibility of directory. */
146	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
147	if (error) {
148		goto out;
149	}
150
151	/*
152	 * If requesting the last path component on a read-only file system
153	 * with a write operation, deny it.
154	 */
155	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
156	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
157		error = EROFS;
158		goto out;
159	}
160
161	/*
162	 * Avoid doing a linear scan of the directory if the requested
163	 * directory/name couple is already in the cache.
164	 */
165	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
166				  cnp->cn_nameiop, cnp->cn_flags,
167				  &iswhiteout, vpp);
168	if (iswhiteout) {
169		cnp->cn_flags |= ISWHITEOUT;
170	}
171	if (cachefound && *vpp == NULLVP) {
172		/* Negative cache hit. */
173		error = ENOENT;
174		goto out;
175	} else if (cachefound) {
176		error = 0;
177		goto out;
178	}
179
180	/*
181	 * Treat an unlinked directory as empty (no "." or "..")
182	 */
183	if (dnode->tn_links == 0) {
184		KASSERT(dnode->tn_size == 0);
185		error = ENOENT;
186		goto out;
187	}
188
189	if (cnp->cn_flags & ISDOTDOT) {
190		tmpfs_node_t *pnode;
191
192		/*
193		 * Lookup of ".." case.
194		 */
195		if (lastcn && cnp->cn_nameiop == RENAME) {
196			error = EINVAL;
197			goto out;
198		}
199		KASSERT(dnode->tn_type == VDIR);
200		pnode = dnode->tn_spec.tn_dir.tn_parent;
201		if (pnode == NULL) {
202			error = ENOENT;
203			goto out;
204		}
205
206		/*
207		 * Lock the parent tn_vlock before releasing the vnode lock,
208		 * and thus prevent parent from disappearing.
209		 */
210		mutex_enter(&pnode->tn_vlock);
211		VOP_UNLOCK(dvp);
212
213		/*
214		 * Get a vnode of the '..' entry and re-acquire the lock.
215		 * Release the tn_vlock.
216		 */
217		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
218		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
219		goto out;
220
221	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
222		/*
223		 * Lookup of "." case.
224		 */
225		if (lastcn && cnp->cn_nameiop == RENAME) {
226			error = EISDIR;
227			goto out;
228		}
229		vref(dvp);
230		*vpp = dvp;
231		error = 0;
232		goto done;
233	}
234
235	/*
236	 * Other lookup cases: perform directory scan.
237	 */
238	de = tmpfs_dir_lookup(dnode, cnp);
239	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
240		/*
241		 * The entry was not found in the directory.  This is valid
242		 * if we are creating or renaming an entry and are working
243		 * on the last component of the path name.
244		 */
245		if (lastcn && (cnp->cn_nameiop == CREATE ||
246		    cnp->cn_nameiop == RENAME)) {
247			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
248			if (error) {
249				goto out;
250			}
251			error = EJUSTRETURN;
252		} else {
253			error = ENOENT;
254		}
255		if (de) {
256			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
257			cnp->cn_flags |= ISWHITEOUT;
258		}
259		goto done;
260	}
261
262	tnode = de->td_node;
263
264	/*
265	 * If it is not the last path component and found a non-directory
266	 * or non-link entry (which may itself be pointing to a directory),
267	 * raise an error.
268	 */
269	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
270		error = ENOTDIR;
271		goto out;
272	}
273
274	/* Check the permissions. */
275	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
276		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
277		if (error)
278			goto out;
279
280		if ((dnode->tn_mode & S_ISTXT) != 0) {
281			error = kauth_authorize_vnode(cnp->cn_cred,
282			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
283			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
284			    dnode->tn_uid, tnode->tn_uid));
285			if (error) {
286				error = EPERM;
287				goto out;
288			}
289		}
290	}
291
292	/* Get a vnode for the matching entry. */
293	mutex_enter(&tnode->tn_vlock);
294	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
295done:
296	/*
297	 * Cache the result, unless request was for creation (as it does
298	 * not improve the performance).
299	 */
300	if (cnp->cn_nameiop != CREATE) {
301		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
302			    cnp->cn_flags);
303	}
304out:
305	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
306	KASSERT(VOP_ISLOCKED(dvp));
307
308	return error;
309}
310
311int
312tmpfs_create(void *v)
313{
314	struct vop_create_v2_args /* {
315		struct vnode		*a_dvp;
316		struct vnode		**a_vpp;
317		struct componentname	*a_cnp;
318		struct vattr		*a_vap;
319	} */ *ap = v;
320	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
321	struct componentname *cnp = ap->a_cnp;
322	struct vattr *vap = ap->a_vap;
323
324	KASSERT(VOP_ISLOCKED(dvp));
325	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
326	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
327}
328
329int
330tmpfs_mknod(void *v)
331{
332	struct vop_mknod_v2_args /* {
333		struct vnode		*a_dvp;
334		struct vnode		**a_vpp;
335		struct componentname	*a_cnp;
336		struct vattr		*a_vap;
337	} */ *ap = v;
338	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
339	struct componentname *cnp = ap->a_cnp;
340	struct vattr *vap = ap->a_vap;
341	enum vtype vt = vap->va_type;
342
343	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
344		vput(dvp);
345		return EINVAL;
346	}
347	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
348}
349
350int
351tmpfs_open(void *v)
352{
353	struct vop_open_args /* {
354		struct vnode	*a_vp;
355		int		a_mode;
356		kauth_cred_t	a_cred;
357	} */ *ap = v;
358	vnode_t *vp = ap->a_vp;
359	mode_t mode = ap->a_mode;
360	tmpfs_node_t *node;
361
362	KASSERT(VOP_ISLOCKED(vp));
363
364	node = VP_TO_TMPFS_NODE(vp);
365
366	/* If the file is marked append-only, deny write requests. */
367	if ((node->tn_flags & APPEND) != 0 &&
368	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
369		return EPERM;
370	}
371	return 0;
372}
373
374int
375tmpfs_close(void *v)
376{
377	struct vop_close_args /* {
378		struct vnode	*a_vp;
379		int		a_fflag;
380		kauth_cred_t	a_cred;
381	} */ *ap = v;
382	vnode_t *vp __diagused = ap->a_vp;
383
384	KASSERT(VOP_ISLOCKED(vp));
385	return 0;
386}
387
388int
389tmpfs_access(void *v)
390{
391	struct vop_access_args /* {
392		struct vnode	*a_vp;
393		int		a_mode;
394		kauth_cred_t	a_cred;
395	} */ *ap = v;
396	vnode_t *vp = ap->a_vp;
397	mode_t mode = ap->a_mode;
398	kauth_cred_t cred = ap->a_cred;
399	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
400	const bool writing = (mode & VWRITE) != 0;
401
402	KASSERT(VOP_ISLOCKED(vp));
403
404	/* Possible? */
405	switch (vp->v_type) {
406	case VDIR:
407	case VLNK:
408	case VREG:
409		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
410			return EROFS;
411		}
412		break;
413	case VBLK:
414	case VCHR:
415	case VSOCK:
416	case VFIFO:
417		break;
418	default:
419		return EINVAL;
420	}
421	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
422		return EPERM;
423	}
424
425	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
426	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
427	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
428}
429
430int
431tmpfs_getattr(void *v)
432{
433	struct vop_getattr_args /* {
434		struct vnode	*a_vp;
435		struct vattr	*a_vap;
436		kauth_cred_t	a_cred;
437	} */ *ap = v;
438	vnode_t *vp = ap->a_vp;
439	struct vattr *vap = ap->a_vap;
440	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
441
442	vattr_null(vap);
443
444	vap->va_type = vp->v_type;
445	vap->va_mode = node->tn_mode;
446	vap->va_nlink = node->tn_links;
447	vap->va_uid = node->tn_uid;
448	vap->va_gid = node->tn_gid;
449	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
450	vap->va_fileid = node->tn_id;
451	vap->va_size = node->tn_size;
452	vap->va_blocksize = PAGE_SIZE;
453	vap->va_atime = node->tn_atime;
454	vap->va_mtime = node->tn_mtime;
455	vap->va_ctime = node->tn_ctime;
456	vap->va_birthtime = node->tn_birthtime;
457	vap->va_gen = TMPFS_NODE_GEN(node);
458	vap->va_flags = node->tn_flags;
459	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
460	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
461	vap->va_bytes = round_page(node->tn_size);
462	vap->va_filerev = VNOVAL;
463	vap->va_vaflags = 0;
464	vap->va_spare = VNOVAL; /* XXX */
465
466	return 0;
467}
468
469int
470tmpfs_setattr(void *v)
471{
472	struct vop_setattr_args /* {
473		struct vnode	*a_vp;
474		struct vattr	*a_vap;
475		kauth_cred_t	a_cred;
476	} */ *ap = v;
477	vnode_t *vp = ap->a_vp;
478	struct vattr *vap = ap->a_vap;
479	kauth_cred_t cred = ap->a_cred;
480	lwp_t *l = curlwp;
481	int error = 0;
482
483	KASSERT(VOP_ISLOCKED(vp));
484
485	/* Abort if any unsettable attribute is given. */
486	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
487	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
488	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
489	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
490	    vap->va_bytes != VNOVAL) {
491		return EINVAL;
492	}
493
494	if (error == 0 && vap->va_flags != VNOVAL)
495		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
496
497	if (error == 0 && vap->va_size != VNOVAL)
498		error = tmpfs_chsize(vp, vap->va_size, cred, l);
499
500	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
501		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
502
503	if (error == 0 && vap->va_mode != VNOVAL)
504		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
505
506	const bool chsometime =
507	    vap->va_atime.tv_sec != VNOVAL ||
508	    vap->va_mtime.tv_sec != VNOVAL ||
509	    vap->va_birthtime.tv_sec != VNOVAL;
510	if (error == 0 && chsometime) {
511		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
512		    &vap->va_birthtime, vap->va_vaflags, cred, l);
513	}
514	return error;
515}
516
517int
518tmpfs_read(void *v)
519{
520	struct vop_read_args /* {
521		struct vnode *a_vp;
522		struct uio *a_uio;
523		int a_ioflag;
524		kauth_cred_t a_cred;
525	} */ *ap = v;
526	vnode_t *vp = ap->a_vp;
527	struct uio *uio = ap->a_uio;
528	const int ioflag = ap->a_ioflag;
529	tmpfs_node_t *node;
530	struct uvm_object *uobj;
531	int error;
532
533	KASSERT(VOP_ISLOCKED(vp));
534
535	if (vp->v_type != VREG) {
536		return EISDIR;
537	}
538	if (uio->uio_offset < 0) {
539		return EINVAL;
540	}
541
542	/* Note: reading zero bytes should not update atime. */
543	if (uio->uio_resid == 0) {
544		return 0;
545	}
546
547	node = VP_TO_TMPFS_NODE(vp);
548	uobj = node->tn_spec.tn_reg.tn_aobj;
549	error = 0;
550
551	while (error == 0 && uio->uio_resid > 0) {
552		vsize_t len;
553
554		if (node->tn_size <= uio->uio_offset) {
555			break;
556		}
557		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
558		if (len == 0) {
559			break;
560		}
561		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
562		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
563	}
564
565	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
566	return error;
567}
568
569int
570tmpfs_write(void *v)
571{
572	struct vop_write_args /* {
573		struct vnode	*a_vp;
574		struct uio	*a_uio;
575		int		a_ioflag;
576		kauth_cred_t	a_cred;
577	} */ *ap = v;
578	vnode_t *vp = ap->a_vp;
579	struct uio *uio = ap->a_uio;
580	const int ioflag = ap->a_ioflag;
581	tmpfs_node_t *node;
582	struct uvm_object *uobj;
583	off_t oldsize;
584	int error;
585
586	KASSERT(VOP_ISLOCKED(vp));
587
588	node = VP_TO_TMPFS_NODE(vp);
589	oldsize = node->tn_size;
590
591	if (uio->uio_offset < 0 || vp->v_type != VREG) {
592		error = EINVAL;
593		goto out;
594	}
595	if (uio->uio_resid == 0) {
596		error = 0;
597		goto out;
598	}
599	if (ioflag & IO_APPEND) {
600		uio->uio_offset = node->tn_size;
601	}
602
603	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
604		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
605		if (error)
606			goto out;
607	}
608
609	uobj = node->tn_spec.tn_reg.tn_aobj;
610	error = 0;
611	while (error == 0 && uio->uio_resid > 0) {
612		vsize_t len;
613
614		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
615		if (len == 0) {
616			break;
617		}
618		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
619		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
620	}
621	if (error) {
622		(void)tmpfs_reg_resize(vp, oldsize);
623	}
624
625	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
626	VN_KNOTE(vp, NOTE_WRITE);
627out:
628	if (error) {
629		KASSERT(oldsize == node->tn_size);
630	} else {
631		KASSERT(uio->uio_resid == 0);
632	}
633	return error;
634}
635
636int
637tmpfs_fsync(void *v)
638{
639	struct vop_fsync_args /* {
640		struct vnode *a_vp;
641		kauth_cred_t a_cred;
642		int a_flags;
643		off_t a_offlo;
644		off_t a_offhi;
645		struct lwp *a_l;
646	} */ *ap = v;
647	vnode_t *vp __diagused = ap->a_vp;
648
649	/* Nothing to do.  Should be up to date. */
650	KASSERT(VOP_ISLOCKED(vp));
651	return 0;
652}
653
654/*
655 * tmpfs_remove: unlink a file.
656 *
657 * => Both directory (dvp) and file (vp) are locked.
658 * => We unlock and drop the reference on both.
659 */
660int
661tmpfs_remove(void *v)
662{
663	struct vop_remove_args /* {
664		struct vnode *a_dvp;
665		struct vnode *a_vp;
666		struct componentname *a_cnp;
667	} */ *ap = v;
668	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
669	tmpfs_node_t *dnode, *node;
670	tmpfs_dirent_t *de;
671	int error;
672
673	KASSERT(VOP_ISLOCKED(dvp));
674	KASSERT(VOP_ISLOCKED(vp));
675
676	if (vp->v_type == VDIR) {
677		error = EPERM;
678		goto out;
679	}
680	dnode = VP_TO_TMPFS_DIR(dvp);
681	node = VP_TO_TMPFS_NODE(vp);
682
683	/*
684	 * Files marked as immutable or append-only cannot be deleted.
685	 * Likewise, files residing on directories marked as append-only
686	 * cannot be deleted.
687	 */
688	if (node->tn_flags & (IMMUTABLE | APPEND)) {
689		error = EPERM;
690		goto out;
691	}
692	if (dnode->tn_flags & APPEND) {
693		error = EPERM;
694		goto out;
695	}
696
697	/* Lookup the directory entry (check the cached hint first). */
698	de = tmpfs_dir_cached(node);
699	if (de == NULL) {
700		struct componentname *cnp = ap->a_cnp;
701		de = tmpfs_dir_lookup(dnode, cnp);
702	}
703	KASSERT(de && de->td_node == node);
704
705	/*
706	 * Remove the entry from the directory (drops the link count) and
707	 * destroy it or replace with a whiteout.
708	 *
709	 * Note: the inode referred by it will not be destroyed until the
710	 * vnode is reclaimed/recycled.
711	 */
712
713	tmpfs_dir_detach(dnode, de);
714
715	if (ap->a_cnp->cn_flags & DOWHITEOUT)
716		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
717	else
718		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
719
720	if (node->tn_links > 0) {
721		/* We removed a hard link. */
722		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
723	}
724	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
725	error = 0;
726out:
727	/* Drop the references and unlock the vnodes. */
728	vput(vp);
729	if (dvp == vp) {
730		vrele(dvp);
731	} else {
732		vput(dvp);
733	}
734	return error;
735}
736
737/*
738 * tmpfs_link: create a hard link.
739 */
740int
741tmpfs_link(void *v)
742{
743	struct vop_link_args /* {
744		struct vnode *a_dvp;
745		struct vnode *a_vp;
746		struct componentname *a_cnp;
747	} */ *ap = v;
748	vnode_t *dvp = ap->a_dvp;
749	vnode_t *vp = ap->a_vp;
750	struct componentname *cnp = ap->a_cnp;
751	tmpfs_node_t *dnode, *node;
752	tmpfs_dirent_t *de;
753	int error;
754
755	KASSERT(dvp != vp);
756	KASSERT(VOP_ISLOCKED(dvp));
757	KASSERT(vp->v_type != VDIR);
758	KASSERT(dvp->v_mount == vp->v_mount);
759
760	dnode = VP_TO_TMPFS_DIR(dvp);
761	node = VP_TO_TMPFS_NODE(vp);
762
763	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
764
765	/* Check for maximum number of links limit. */
766	if (node->tn_links == LINK_MAX) {
767		error = EMLINK;
768		goto out;
769	}
770	KASSERT(node->tn_links < LINK_MAX);
771
772	/* We cannot create links of files marked immutable or append-only. */
773	if (node->tn_flags & (IMMUTABLE | APPEND)) {
774		error = EPERM;
775		goto out;
776	}
777
778	/* Allocate a new directory entry to represent the inode. */
779	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
780	    cnp->cn_nameptr, cnp->cn_namelen, &de);
781	if (error) {
782		goto out;
783	}
784
785	/*
786	 * Insert the entry into the directory.
787	 * It will increase the inode link count.
788	 */
789	tmpfs_dir_attach(dnode, de, node);
790	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
791
792	/* Update the timestamps and trigger the event. */
793	if (node->tn_vnode) {
794		VN_KNOTE(node->tn_vnode, NOTE_LINK);
795	}
796	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
797	error = 0;
798out:
799	VOP_UNLOCK(vp);
800	vput(dvp);
801	return error;
802}
803
804int
805tmpfs_mkdir(void *v)
806{
807	struct vop_mkdir_v2_args /* {
808		struct vnode		*a_dvp;
809		struct vnode		**a_vpp;
810		struct componentname	*a_cnp;
811		struct vattr		*a_vap;
812	} */ *ap = v;
813	vnode_t *dvp = ap->a_dvp;
814	vnode_t **vpp = ap->a_vpp;
815	struct componentname *cnp = ap->a_cnp;
816	struct vattr *vap = ap->a_vap;
817
818	KASSERT(vap->va_type == VDIR);
819	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
820}
821
822int
823tmpfs_rmdir(void *v)
824{
825	struct vop_rmdir_args /* {
826		struct vnode		*a_dvp;
827		struct vnode		*a_vp;
828		struct componentname	*a_cnp;
829	} */ *ap = v;
830	vnode_t *dvp = ap->a_dvp;
831	vnode_t *vp = ap->a_vp;
832	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
833	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
834	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
835	tmpfs_dirent_t *de;
836	int error = 0;
837
838	KASSERT(VOP_ISLOCKED(dvp));
839	KASSERT(VOP_ISLOCKED(vp));
840
841	/*
842	 * Directories with more than two entries ('.' and '..') cannot be
843	 * removed.  There may be whiteout entries, which we will destroy.
844	 */
845	if (node->tn_size > 0) {
846		/*
847		 * If never had whiteout entries, the directory is certainly
848		 * not empty.  Otherwise, scan for any non-whiteout entry.
849		 */
850		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
851			error = ENOTEMPTY;
852			goto out;
853		}
854		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
855			if (de->td_node != TMPFS_NODE_WHITEOUT) {
856				error = ENOTEMPTY;
857				goto out;
858			}
859		}
860		KASSERT(error == 0);
861	}
862
863	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
864
865	/* Lookup the directory entry (check the cached hint first). */
866	de = tmpfs_dir_cached(node);
867	if (de == NULL) {
868		struct componentname *cnp = ap->a_cnp;
869		de = tmpfs_dir_lookup(dnode, cnp);
870	}
871	KASSERT(de && de->td_node == node);
872
873	/* Check flags to see if we are allowed to remove the directory. */
874	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
875		error = EPERM;
876		goto out;
877	}
878
879	/* Decrement the link count for the virtual '.' entry. */
880	node->tn_links--;
881
882	/* Detach the directory entry from the directory. */
883	tmpfs_dir_detach(dnode, de);
884
885	/* Purge the cache for parent. */
886	cache_purge(dvp);
887
888	/*
889	 * Destroy the directory entry or replace it with a whiteout.
890	 *
891	 * Note: the inode referred by it will not be destroyed until the
892	 * vnode is reclaimed.
893	 */
894	if (ap->a_cnp->cn_flags & DOWHITEOUT)
895		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
896	else
897		tmpfs_free_dirent(tmp, de);
898
899	/* Destroy the whiteout entries from the node. */
900	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
901		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
902		tmpfs_dir_detach(node, de);
903		tmpfs_free_dirent(tmp, de);
904	}
905	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
906
907	KASSERT(node->tn_size == 0);
908	KASSERT(node->tn_links == 0);
909out:
910	/* Release the nodes. */
911	vput(dvp);
912	vput(vp);
913	return error;
914}
915
916int
917tmpfs_symlink(void *v)
918{
919	struct vop_symlink_v2_args /* {
920		struct vnode		*a_dvp;
921		struct vnode		**a_vpp;
922		struct componentname	*a_cnp;
923		struct vattr		*a_vap;
924		char			*a_target;
925	} */ *ap = v;
926	vnode_t *dvp = ap->a_dvp;
927	vnode_t **vpp = ap->a_vpp;
928	struct componentname *cnp = ap->a_cnp;
929	struct vattr *vap = ap->a_vap;
930	char *target = ap->a_target;
931
932	KASSERT(vap->va_type == VLNK);
933	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
934}
935
936int
937tmpfs_readdir(void *v)
938{
939	struct vop_readdir_args /* {
940		struct vnode	*a_vp;
941		struct uio	*a_uio;
942		kauth_cred_t	a_cred;
943		int		*a_eofflag;
944		off_t		**a_cookies;
945		int		*ncookies;
946	} */ *ap = v;
947	vnode_t *vp = ap->a_vp;
948	struct uio *uio = ap->a_uio;
949	int *eofflag = ap->a_eofflag;
950	off_t **cookies = ap->a_cookies;
951	int *ncookies = ap->a_ncookies;
952	off_t startoff, cnt;
953	tmpfs_node_t *node;
954	int error;
955
956	KASSERT(VOP_ISLOCKED(vp));
957
958	/* This operation only makes sense on directory nodes. */
959	if (vp->v_type != VDIR) {
960		return ENOTDIR;
961	}
962	node = VP_TO_TMPFS_DIR(vp);
963	startoff = uio->uio_offset;
964	cnt = 0;
965
966	/*
967	 * Retrieve the directory entries, unless it is being destroyed.
968	 */
969	if (node->tn_links) {
970		error = tmpfs_dir_getdents(node, uio, &cnt);
971	} else {
972		error = 0;
973	}
974
975	if (eofflag != NULL) {
976		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
977	}
978	if (error || cookies == NULL || ncookies == NULL) {
979		return error;
980	}
981
982	/* Update NFS-related variables, if any. */
983	tmpfs_dirent_t *de = NULL;
984	off_t i, off = startoff;
985
986	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
987	*ncookies = cnt;
988
989	for (i = 0; i < cnt; i++) {
990		KASSERT(off != TMPFS_DIRSEQ_EOF);
991		if (off != TMPFS_DIRSEQ_DOT) {
992			if (off == TMPFS_DIRSEQ_DOTDOT) {
993				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
994			} else if (de != NULL) {
995				de = TAILQ_NEXT(de, td_entries);
996			} else {
997				de = tmpfs_dir_lookupbyseq(node, off);
998				KASSERT(de != NULL);
999				de = TAILQ_NEXT(de, td_entries);
1000			}
1001			if (de == NULL) {
1002				off = TMPFS_DIRSEQ_EOF;
1003			} else {
1004				off = tmpfs_dir_getseq(node, de);
1005			}
1006		} else {
1007			off = TMPFS_DIRSEQ_DOTDOT;
1008		}
1009		(*cookies)[i] = off;
1010	}
1011	KASSERT(uio->uio_offset == off);
1012	return error;
1013}
1014
1015int
1016tmpfs_readlink(void *v)
1017{
1018	struct vop_readlink_args /* {
1019		struct vnode	*a_vp;
1020		struct uio	*a_uio;
1021		kauth_cred_t	a_cred;
1022	} */ *ap = v;
1023	vnode_t *vp = ap->a_vp;
1024	struct uio *uio = ap->a_uio;
1025	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1026	int error;
1027
1028	KASSERT(VOP_ISLOCKED(vp));
1029	KASSERT(uio->uio_offset == 0);
1030	KASSERT(vp->v_type == VLNK);
1031
1032	/* Note: readlink(2) returns the path without NUL terminator. */
1033	if (node->tn_size > 0) {
1034		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1035		    MIN(node->tn_size - 1, uio->uio_resid), uio);
1036	} else {
1037		error = 0;
1038	}
1039	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1040
1041	return error;
1042}
1043
1044int
1045tmpfs_inactive(void *v)
1046{
1047	struct vop_inactive_args /* {
1048		struct vnode *a_vp;
1049		bool *a_recycle;
1050	} */ *ap = v;
1051	vnode_t *vp = ap->a_vp;
1052	tmpfs_node_t *node;
1053
1054	KASSERT(VOP_ISLOCKED(vp));
1055
1056	node = VP_TO_TMPFS_NODE(vp);
1057	if (node->tn_links == 0) {
1058		/*
1059		 * Mark node as dead by setting its generation to zero.
1060		 */
1061		atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1062		*ap->a_recycle = true;
1063	} else {
1064		*ap->a_recycle = false;
1065	}
1066	VOP_UNLOCK(vp);
1067
1068	return 0;
1069}
1070
1071int
1072tmpfs_reclaim(void *v)
1073{
1074	struct vop_reclaim_args /* {
1075		struct vnode *a_vp;
1076	} */ *ap = v;
1077	vnode_t *vp = ap->a_vp;
1078	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1079	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1080	bool recycle;
1081
1082	mutex_enter(&node->tn_vlock);
1083	VOP_LOCK(vp, LK_EXCLUSIVE);
1084
1085	/* Disassociate inode from vnode. */
1086	node->tn_vnode = NULL;
1087	vp->v_data = NULL;
1088
1089	/* If inode is not referenced, i.e. no links, then destroy it. */
1090	recycle = node->tn_links == 0 && TMPFS_NODE_RECLAIMING(node) == 0;
1091
1092	VOP_UNLOCK(vp);
1093	mutex_exit(&node->tn_vlock);
1094
1095	if (recycle) {
1096		tmpfs_free_node(tmp, node);
1097	}
1098	return 0;
1099}
1100
1101int
1102tmpfs_pathconf(void *v)
1103{
1104	struct vop_pathconf_args /* {
1105		struct vnode	*a_vp;
1106		int		a_name;
1107		register_t	*a_retval;
1108	} */ *ap = v;
1109	const int name = ap->a_name;
1110	register_t *retval = ap->a_retval;
1111	int error = 0;
1112
1113	switch (name) {
1114	case _PC_LINK_MAX:
1115		*retval = LINK_MAX;
1116		break;
1117	case _PC_NAME_MAX:
1118		*retval = TMPFS_MAXNAMLEN;
1119		break;
1120	case _PC_PATH_MAX:
1121		*retval = PATH_MAX;
1122		break;
1123	case _PC_PIPE_BUF:
1124		*retval = PIPE_BUF;
1125		break;
1126	case _PC_CHOWN_RESTRICTED:
1127		*retval = 1;
1128		break;
1129	case _PC_NO_TRUNC:
1130		*retval = 1;
1131		break;
1132	case _PC_SYNC_IO:
1133		*retval = 1;
1134		break;
1135	case _PC_FILESIZEBITS:
1136		*retval = sizeof(off_t) * CHAR_BIT;
1137		break;
1138	default:
1139		error = EINVAL;
1140	}
1141	return error;
1142}
1143
1144int
1145tmpfs_advlock(void *v)
1146{
1147	struct vop_advlock_args /* {
1148		struct vnode	*a_vp;
1149		void *		a_id;
1150		int		a_op;
1151		struct flock	*a_fl;
1152		int		a_flags;
1153	} */ *ap = v;
1154	vnode_t *vp = ap->a_vp;
1155	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1156
1157	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1158}
1159
1160int
1161tmpfs_getpages(void *v)
1162{
1163	struct vop_getpages_args /* {
1164		struct vnode *a_vp;
1165		voff_t a_offset;
1166		struct vm_page **a_m;
1167		int *a_count;
1168		int a_centeridx;
1169		vm_prot_t a_access_type;
1170		int a_advice;
1171		int a_flags;
1172	} */ * const ap = v;
1173	vnode_t *vp = ap->a_vp;
1174	const voff_t offset = ap->a_offset;
1175	struct vm_page **pgs = ap->a_m;
1176	const int centeridx = ap->a_centeridx;
1177	const vm_prot_t access_type = ap->a_access_type;
1178	const int advice = ap->a_advice;
1179	const int flags = ap->a_flags;
1180	int error, npages = *ap->a_count;
1181	tmpfs_node_t *node;
1182	struct uvm_object *uobj;
1183
1184	KASSERT(vp->v_type == VREG);
1185	KASSERT(mutex_owned(vp->v_interlock));
1186
1187	node = VP_TO_TMPFS_NODE(vp);
1188	uobj = node->tn_spec.tn_reg.tn_aobj;
1189
1190	/*
1191	 * Currently, PGO_PASTEOF is not supported.
1192	 */
1193	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1194		if ((flags & PGO_LOCKED) == 0)
1195			mutex_exit(vp->v_interlock);
1196		return EINVAL;
1197	}
1198
1199	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1200		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1201	}
1202
1203	if ((flags & PGO_LOCKED) != 0)
1204		return EBUSY;
1205
1206	if ((flags & PGO_NOTIMESTAMP) == 0) {
1207		u_int tflags = 0;
1208
1209		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1210			tflags |= TMPFS_UPDATE_ATIME;
1211
1212		if ((access_type & VM_PROT_WRITE) != 0) {
1213			tflags |= TMPFS_UPDATE_MTIME;
1214			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1215				tflags |= TMPFS_UPDATE_ATIME;
1216		}
1217		tmpfs_update(vp, tflags);
1218	}
1219
1220	/*
1221	 * Invoke the pager.
1222	 *
1223	 * Clean the array of pages before.  XXX: PR/32166
1224	 * Note that vnode lock is shared with underlying UVM object.
1225	 */
1226	if (pgs) {
1227		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1228	}
1229	KASSERT(vp->v_interlock == uobj->vmobjlock);
1230
1231	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1232	    access_type, advice, flags | PGO_ALLPAGES);
1233
1234#if defined(DEBUG)
1235	if (!error && pgs) {
1236		for (int i = 0; i < npages; i++) {
1237			KASSERT(pgs[i] != NULL);
1238		}
1239	}
1240#endif
1241	return error;
1242}
1243
1244int
1245tmpfs_putpages(void *v)
1246{
1247	struct vop_putpages_args /* {
1248		struct vnode *a_vp;
1249		voff_t a_offlo;
1250		voff_t a_offhi;
1251		int a_flags;
1252	} */ * const ap = v;
1253	vnode_t *vp = ap->a_vp;
1254	const voff_t offlo = ap->a_offlo;
1255	const voff_t offhi = ap->a_offhi;
1256	const int flags = ap->a_flags;
1257	tmpfs_node_t *node;
1258	struct uvm_object *uobj;
1259	int error;
1260
1261	KASSERT(mutex_owned(vp->v_interlock));
1262
1263	if (vp->v_type != VREG) {
1264		mutex_exit(vp->v_interlock);
1265		return 0;
1266	}
1267
1268	node = VP_TO_TMPFS_NODE(vp);
1269	uobj = node->tn_spec.tn_reg.tn_aobj;
1270
1271	KASSERT(vp->v_interlock == uobj->vmobjlock);
1272	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1273
1274	/* XXX mtime */
1275
1276	return error;
1277}
1278
1279int
1280tmpfs_whiteout(void *v)
1281{
1282	struct vop_whiteout_args /* {
1283		struct vnode		*a_dvp;
1284		struct componentname	*a_cnp;
1285		int			a_flags;
1286	} */ *ap = v;
1287	vnode_t *dvp = ap->a_dvp;
1288	struct componentname *cnp = ap->a_cnp;
1289	const int flags = ap->a_flags;
1290	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1291	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1292	tmpfs_dirent_t *de;
1293	int error;
1294
1295	switch (flags) {
1296	case LOOKUP:
1297		break;
1298	case CREATE:
1299		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1300		    cnp->cn_namelen, &de);
1301		if (error)
1302			return error;
1303		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1304		break;
1305	case DELETE:
1306		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1307		de = tmpfs_dir_lookup(dnode, cnp);
1308		if (de == NULL)
1309			return ENOENT;
1310		tmpfs_dir_detach(dnode, de);
1311		tmpfs_free_dirent(tmp, de);
1312		break;
1313	}
1314	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1315	return 0;
1316}
1317
1318int
1319tmpfs_print(void *v)
1320{
1321	struct vop_print_args /* {
1322		struct vnode	*a_vp;
1323	} */ *ap = v;
1324	vnode_t *vp = ap->a_vp;
1325	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1326
1327	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1328	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1329	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1330	    node->tn_gid, (uintmax_t)node->tn_size);
1331	if (vp->v_type == VFIFO) {
1332		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1333	}
1334	printf("\n");
1335	return 0;
1336}
1337