tmpfs_vnops.c revision 1.130
1/*	$NetBSD: tmpfs_vnops.c,v 1.130 2017/03/30 09:09:26 hannken Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.130 2017/03/30 09:09:26 hannken Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52#include <sys/atomic.h>
53
54#include <uvm/uvm.h>
55
56#include <miscfs/fifofs/fifo.h>
57#include <miscfs/genfs/genfs.h>
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61/*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64int (**tmpfs_vnodeop_p)(void *);
65const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66	{ &vop_default_desc,		vn_default_error },
67	{ &vop_lookup_desc,		tmpfs_lookup },
68	{ &vop_create_desc,		tmpfs_create },
69	{ &vop_mknod_desc,		tmpfs_mknod },
70	{ &vop_open_desc,		tmpfs_open },
71	{ &vop_close_desc,		tmpfs_close },
72	{ &vop_access_desc,		tmpfs_access },
73	{ &vop_getattr_desc,		tmpfs_getattr },
74	{ &vop_setattr_desc,		tmpfs_setattr },
75	{ &vop_read_desc,		tmpfs_read },
76	{ &vop_write_desc,		tmpfs_write },
77	{ &vop_fallocate_desc,		genfs_eopnotsupp },
78	{ &vop_fdiscard_desc,		genfs_eopnotsupp },
79	{ &vop_ioctl_desc,		tmpfs_ioctl },
80	{ &vop_fcntl_desc,		tmpfs_fcntl },
81	{ &vop_poll_desc,		tmpfs_poll },
82	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
83	{ &vop_revoke_desc,		tmpfs_revoke },
84	{ &vop_mmap_desc,		tmpfs_mmap },
85	{ &vop_fsync_desc,		tmpfs_fsync },
86	{ &vop_seek_desc,		tmpfs_seek },
87	{ &vop_remove_desc,		tmpfs_remove },
88	{ &vop_link_desc,		tmpfs_link },
89	{ &vop_rename_desc,		tmpfs_rename },
90	{ &vop_mkdir_desc,		tmpfs_mkdir },
91	{ &vop_rmdir_desc,		tmpfs_rmdir },
92	{ &vop_symlink_desc,		tmpfs_symlink },
93	{ &vop_readdir_desc,		tmpfs_readdir },
94	{ &vop_readlink_desc,		tmpfs_readlink },
95	{ &vop_abortop_desc,		tmpfs_abortop },
96	{ &vop_inactive_desc,		tmpfs_inactive },
97	{ &vop_reclaim_desc,		tmpfs_reclaim },
98	{ &vop_lock_desc,		tmpfs_lock },
99	{ &vop_unlock_desc,		tmpfs_unlock },
100	{ &vop_bmap_desc,		tmpfs_bmap },
101	{ &vop_strategy_desc,		tmpfs_strategy },
102	{ &vop_print_desc,		tmpfs_print },
103	{ &vop_pathconf_desc,		tmpfs_pathconf },
104	{ &vop_islocked_desc,		tmpfs_islocked },
105	{ &vop_advlock_desc,		tmpfs_advlock },
106	{ &vop_bwrite_desc,		tmpfs_bwrite },
107	{ &vop_getpages_desc,		tmpfs_getpages },
108	{ &vop_putpages_desc,		tmpfs_putpages },
109	{ &vop_whiteout_desc,		tmpfs_whiteout },
110	{ NULL, NULL }
111};
112
113const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
114	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
115};
116
117/*
118 * tmpfs_lookup: path name traversal routine.
119 *
120 * Arguments: dvp (directory being searched), vpp (result),
121 * cnp (component name - path).
122 *
123 * => Caller holds a reference and lock on dvp.
124 * => We return looked-up vnode (vpp) locked, with a reference held.
125 */
126int
127tmpfs_lookup(void *v)
128{
129	struct vop_lookup_v2_args /* {
130		struct vnode *a_dvp;
131		struct vnode **a_vpp;
132		struct componentname *a_cnp;
133	} */ *ap = v;
134	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
135	struct componentname *cnp = ap->a_cnp;
136	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
137	tmpfs_node_t *dnode, *tnode;
138	tmpfs_dirent_t *de;
139	int cachefound, iswhiteout;
140	int error;
141
142	KASSERT(VOP_ISLOCKED(dvp));
143
144	dnode = VP_TO_TMPFS_DIR(dvp);
145	*vpp = NULL;
146
147	/* Check accessibility of directory. */
148	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
149	if (error) {
150		goto out;
151	}
152
153	/*
154	 * If requesting the last path component on a read-only file system
155	 * with a write operation, deny it.
156	 */
157	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
158	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
159		error = EROFS;
160		goto out;
161	}
162
163	/*
164	 * Avoid doing a linear scan of the directory if the requested
165	 * directory/name couple is already in the cache.
166	 */
167	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
168				  cnp->cn_nameiop, cnp->cn_flags,
169				  &iswhiteout, vpp);
170	if (iswhiteout) {
171		cnp->cn_flags |= ISWHITEOUT;
172	}
173	if (cachefound && *vpp == NULLVP) {
174		/* Negative cache hit. */
175		error = ENOENT;
176		goto out;
177	} else if (cachefound) {
178		error = 0;
179		goto out;
180	}
181
182	/*
183	 * Treat an unlinked directory as empty (no "." or "..")
184	 */
185	if (dnode->tn_links == 0) {
186		KASSERT(dnode->tn_size == 0);
187		error = ENOENT;
188		goto out;
189	}
190
191	if (cnp->cn_flags & ISDOTDOT) {
192		tmpfs_node_t *pnode;
193
194		/*
195		 * Lookup of ".." case.
196		 */
197		if (lastcn && cnp->cn_nameiop == RENAME) {
198			error = EINVAL;
199			goto out;
200		}
201		KASSERT(dnode->tn_type == VDIR);
202		pnode = dnode->tn_spec.tn_dir.tn_parent;
203		if (pnode == NULL) {
204			error = ENOENT;
205			goto out;
206		}
207
208		error = vcache_get(dvp->v_mount, &pnode, sizeof(pnode), vpp);
209		goto out;
210	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
211		/*
212		 * Lookup of "." case.
213		 */
214		if (lastcn && cnp->cn_nameiop == RENAME) {
215			error = EISDIR;
216			goto out;
217		}
218		vref(dvp);
219		*vpp = dvp;
220		error = 0;
221		goto done;
222	}
223
224	/*
225	 * Other lookup cases: perform directory scan.
226	 */
227	de = tmpfs_dir_lookup(dnode, cnp);
228	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
229		/*
230		 * The entry was not found in the directory.  This is valid
231		 * if we are creating or renaming an entry and are working
232		 * on the last component of the path name.
233		 */
234		if (lastcn && (cnp->cn_nameiop == CREATE ||
235		    cnp->cn_nameiop == RENAME)) {
236			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
237			if (error) {
238				goto out;
239			}
240			error = EJUSTRETURN;
241		} else {
242			error = ENOENT;
243		}
244		if (de) {
245			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
246			cnp->cn_flags |= ISWHITEOUT;
247		}
248		goto done;
249	}
250
251	tnode = de->td_node;
252
253	/*
254	 * If it is not the last path component and found a non-directory
255	 * or non-link entry (which may itself be pointing to a directory),
256	 * raise an error.
257	 */
258	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
259		error = ENOTDIR;
260		goto out;
261	}
262
263	/* Check the permissions. */
264	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
265		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
266		if (error)
267			goto out;
268
269		if ((dnode->tn_mode & S_ISTXT) != 0) {
270			error = kauth_authorize_vnode(cnp->cn_cred,
271			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
272			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
273			    dnode->tn_uid, tnode->tn_uid));
274			if (error) {
275				error = EPERM;
276				goto out;
277			}
278		}
279	}
280
281	/* Get a vnode for the matching entry. */
282	error = vcache_get(dvp->v_mount, &tnode, sizeof(tnode), vpp);
283done:
284	/*
285	 * Cache the result, unless request was for creation (as it does
286	 * not improve the performance).
287	 */
288	if (cnp->cn_nameiop != CREATE) {
289		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
290			    cnp->cn_flags);
291	}
292out:
293	KASSERT(VOP_ISLOCKED(dvp));
294
295	return error;
296}
297
298int
299tmpfs_create(void *v)
300{
301	struct vop_create_v3_args /* {
302		struct vnode		*a_dvp;
303		struct vnode		**a_vpp;
304		struct componentname	*a_cnp;
305		struct vattr		*a_vap;
306	} */ *ap = v;
307	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
308	struct componentname *cnp = ap->a_cnp;
309	struct vattr *vap = ap->a_vap;
310
311	KASSERT(VOP_ISLOCKED(dvp));
312	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
313	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
314}
315
316int
317tmpfs_mknod(void *v)
318{
319	struct vop_mknod_v3_args /* {
320		struct vnode		*a_dvp;
321		struct vnode		**a_vpp;
322		struct componentname	*a_cnp;
323		struct vattr		*a_vap;
324	} */ *ap = v;
325	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
326	struct componentname *cnp = ap->a_cnp;
327	struct vattr *vap = ap->a_vap;
328	enum vtype vt = vap->va_type;
329
330	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
331		*vpp = NULL;
332		return EINVAL;
333	}
334	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
335}
336
337int
338tmpfs_open(void *v)
339{
340	struct vop_open_args /* {
341		struct vnode	*a_vp;
342		int		a_mode;
343		kauth_cred_t	a_cred;
344	} */ *ap = v;
345	vnode_t *vp = ap->a_vp;
346	mode_t mode = ap->a_mode;
347	tmpfs_node_t *node;
348
349	KASSERT(VOP_ISLOCKED(vp));
350
351	node = VP_TO_TMPFS_NODE(vp);
352
353	/* If the file is marked append-only, deny write requests. */
354	if ((node->tn_flags & APPEND) != 0 &&
355	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
356		return EPERM;
357	}
358	return 0;
359}
360
361int
362tmpfs_close(void *v)
363{
364	struct vop_close_args /* {
365		struct vnode	*a_vp;
366		int		a_fflag;
367		kauth_cred_t	a_cred;
368	} */ *ap = v;
369	vnode_t *vp __diagused = ap->a_vp;
370
371	KASSERT(VOP_ISLOCKED(vp));
372	return 0;
373}
374
375int
376tmpfs_access(void *v)
377{
378	struct vop_access_args /* {
379		struct vnode	*a_vp;
380		int		a_mode;
381		kauth_cred_t	a_cred;
382	} */ *ap = v;
383	vnode_t *vp = ap->a_vp;
384	mode_t mode = ap->a_mode;
385	kauth_cred_t cred = ap->a_cred;
386	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
387	const bool writing = (mode & VWRITE) != 0;
388
389	KASSERT(VOP_ISLOCKED(vp));
390
391	/* Possible? */
392	switch (vp->v_type) {
393	case VDIR:
394	case VLNK:
395	case VREG:
396		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
397			return EROFS;
398		}
399		break;
400	case VBLK:
401	case VCHR:
402	case VSOCK:
403	case VFIFO:
404		break;
405	default:
406		return EINVAL;
407	}
408	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
409		return EPERM;
410	}
411
412	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
413	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
414	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
415}
416
417int
418tmpfs_getattr(void *v)
419{
420	struct vop_getattr_args /* {
421		struct vnode	*a_vp;
422		struct vattr	*a_vap;
423		kauth_cred_t	a_cred;
424	} */ *ap = v;
425	vnode_t *vp = ap->a_vp;
426	struct vattr *vap = ap->a_vap;
427	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
428
429	vattr_null(vap);
430
431	vap->va_type = vp->v_type;
432	vap->va_mode = node->tn_mode;
433	vap->va_nlink = node->tn_links;
434	vap->va_uid = node->tn_uid;
435	vap->va_gid = node->tn_gid;
436	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
437	vap->va_fileid = node->tn_id;
438	vap->va_size = node->tn_size;
439	vap->va_blocksize = PAGE_SIZE;
440	vap->va_atime = node->tn_atime;
441	vap->va_mtime = node->tn_mtime;
442	vap->va_ctime = node->tn_ctime;
443	vap->va_birthtime = node->tn_birthtime;
444	vap->va_gen = TMPFS_NODE_GEN(node);
445	vap->va_flags = node->tn_flags;
446	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
447	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
448	vap->va_bytes = round_page(node->tn_size);
449	vap->va_filerev = VNOVAL;
450	vap->va_vaflags = 0;
451	vap->va_spare = VNOVAL; /* XXX */
452
453	return 0;
454}
455
456int
457tmpfs_setattr(void *v)
458{
459	struct vop_setattr_args /* {
460		struct vnode	*a_vp;
461		struct vattr	*a_vap;
462		kauth_cred_t	a_cred;
463	} */ *ap = v;
464	vnode_t *vp = ap->a_vp;
465	struct vattr *vap = ap->a_vap;
466	kauth_cred_t cred = ap->a_cred;
467	lwp_t *l = curlwp;
468	int error = 0;
469
470	KASSERT(VOP_ISLOCKED(vp));
471
472	/* Abort if any unsettable attribute is given. */
473	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
474	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
475	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
476	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
477	    vap->va_bytes != VNOVAL) {
478		return EINVAL;
479	}
480
481	if (error == 0 && vap->va_flags != VNOVAL)
482		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
483
484	if (error == 0 && vap->va_size != VNOVAL)
485		error = tmpfs_chsize(vp, vap->va_size, cred, l);
486
487	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
488		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
489
490	if (error == 0 && vap->va_mode != VNOVAL)
491		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
492
493	const bool chsometime =
494	    vap->va_atime.tv_sec != VNOVAL ||
495	    vap->va_mtime.tv_sec != VNOVAL ||
496	    vap->va_birthtime.tv_sec != VNOVAL;
497	if (error == 0 && chsometime) {
498		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
499		    &vap->va_birthtime, vap->va_vaflags, cred, l);
500	}
501	return error;
502}
503
504int
505tmpfs_read(void *v)
506{
507	struct vop_read_args /* {
508		struct vnode *a_vp;
509		struct uio *a_uio;
510		int a_ioflag;
511		kauth_cred_t a_cred;
512	} */ *ap = v;
513	vnode_t *vp = ap->a_vp;
514	struct uio *uio = ap->a_uio;
515	const int ioflag = ap->a_ioflag;
516	tmpfs_node_t *node;
517	struct uvm_object *uobj;
518	int error;
519
520	KASSERT(VOP_ISLOCKED(vp));
521
522	if (vp->v_type == VDIR) {
523		return EISDIR;
524	}
525	if (uio->uio_offset < 0 || vp->v_type != VREG) {
526		return EINVAL;
527	}
528
529	/* Note: reading zero bytes should not update atime. */
530	if (uio->uio_resid == 0) {
531		return 0;
532	}
533
534	node = VP_TO_TMPFS_NODE(vp);
535	uobj = node->tn_spec.tn_reg.tn_aobj;
536	error = 0;
537
538	while (error == 0 && uio->uio_resid > 0) {
539		vsize_t len;
540
541		if (node->tn_size <= uio->uio_offset) {
542			break;
543		}
544		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
545		if (len == 0) {
546			break;
547		}
548		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
549		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
550	}
551
552	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
553	return error;
554}
555
556int
557tmpfs_write(void *v)
558{
559	struct vop_write_args /* {
560		struct vnode	*a_vp;
561		struct uio	*a_uio;
562		int		a_ioflag;
563		kauth_cred_t	a_cred;
564	} */ *ap = v;
565	vnode_t *vp = ap->a_vp;
566	struct uio *uio = ap->a_uio;
567	const int ioflag = ap->a_ioflag;
568	tmpfs_node_t *node;
569	struct uvm_object *uobj;
570	off_t oldsize;
571	int error;
572
573	KASSERT(VOP_ISLOCKED(vp));
574
575	node = VP_TO_TMPFS_NODE(vp);
576	oldsize = node->tn_size;
577
578	if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
579		error = EROFS;
580		goto out;
581	}
582
583	if (uio->uio_offset < 0 || vp->v_type != VREG) {
584		error = EINVAL;
585		goto out;
586	}
587	if (uio->uio_resid == 0) {
588		error = 0;
589		goto out;
590	}
591	if (ioflag & IO_APPEND) {
592		uio->uio_offset = node->tn_size;
593	}
594
595	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
596		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
597		if (error)
598			goto out;
599	}
600
601	uobj = node->tn_spec.tn_reg.tn_aobj;
602	error = 0;
603	while (error == 0 && uio->uio_resid > 0) {
604		vsize_t len;
605
606		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
607		if (len == 0) {
608			break;
609		}
610		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
611		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
612	}
613	if (error) {
614		(void)tmpfs_reg_resize(vp, oldsize);
615	}
616
617	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
618	VN_KNOTE(vp, NOTE_WRITE);
619out:
620	if (error) {
621		KASSERT(oldsize == node->tn_size);
622	} else {
623		KASSERT(uio->uio_resid == 0);
624	}
625	return error;
626}
627
628int
629tmpfs_fsync(void *v)
630{
631	struct vop_fsync_args /* {
632		struct vnode *a_vp;
633		kauth_cred_t a_cred;
634		int a_flags;
635		off_t a_offlo;
636		off_t a_offhi;
637		struct lwp *a_l;
638	} */ *ap = v;
639	vnode_t *vp __diagused = ap->a_vp;
640
641	/* Nothing to do.  Should be up to date. */
642	KASSERT(VOP_ISLOCKED(vp));
643	return 0;
644}
645
646/*
647 * tmpfs_remove: unlink a file.
648 *
649 * => Both directory (dvp) and file (vp) are locked.
650 * => We unlock and drop the reference on both.
651 */
652int
653tmpfs_remove(void *v)
654{
655	struct vop_remove_args /* {
656		struct vnode *a_dvp;
657		struct vnode *a_vp;
658		struct componentname *a_cnp;
659	} */ *ap = v;
660	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
661	tmpfs_node_t *dnode, *node;
662	tmpfs_dirent_t *de;
663	int error;
664
665	KASSERT(VOP_ISLOCKED(dvp));
666	KASSERT(VOP_ISLOCKED(vp));
667
668	if (vp->v_type == VDIR) {
669		error = EPERM;
670		goto out;
671	}
672	dnode = VP_TO_TMPFS_DIR(dvp);
673	node = VP_TO_TMPFS_NODE(vp);
674
675	/*
676	 * Files marked as immutable or append-only cannot be deleted.
677	 * Likewise, files residing on directories marked as append-only
678	 * cannot be deleted.
679	 */
680	if (node->tn_flags & (IMMUTABLE | APPEND)) {
681		error = EPERM;
682		goto out;
683	}
684	if (dnode->tn_flags & APPEND) {
685		error = EPERM;
686		goto out;
687	}
688
689	/* Lookup the directory entry (check the cached hint first). */
690	de = tmpfs_dir_cached(node);
691	if (de == NULL) {
692		struct componentname *cnp = ap->a_cnp;
693		de = tmpfs_dir_lookup(dnode, cnp);
694	}
695	KASSERT(de && de->td_node == node);
696
697	/*
698	 * Remove the entry from the directory (drops the link count) and
699	 * destroy it or replace with a whiteout.
700	 *
701	 * Note: the inode referred by it will not be destroyed until the
702	 * vnode is reclaimed/recycled.
703	 */
704
705	tmpfs_dir_detach(dnode, de);
706
707	if (ap->a_cnp->cn_flags & DOWHITEOUT)
708		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
709	else
710		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
711
712	if (node->tn_links > 0) {
713		/* We removed a hard link. */
714		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
715	}
716	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
717	error = 0;
718out:
719	/* Drop the references and unlock the vnodes. */
720	vput(vp);
721	if (dvp == vp) {
722		vrele(dvp);
723	} else {
724		vput(dvp);
725	}
726	return error;
727}
728
729/*
730 * tmpfs_link: create a hard link.
731 */
732int
733tmpfs_link(void *v)
734{
735	struct vop_link_v2_args /* {
736		struct vnode *a_dvp;
737		struct vnode *a_vp;
738		struct componentname *a_cnp;
739	} */ *ap = v;
740	vnode_t *dvp = ap->a_dvp;
741	vnode_t *vp = ap->a_vp;
742	struct componentname *cnp = ap->a_cnp;
743	tmpfs_node_t *dnode, *node;
744	tmpfs_dirent_t *de;
745	int error;
746
747	KASSERT(dvp != vp);
748	KASSERT(VOP_ISLOCKED(dvp));
749	KASSERT(vp->v_type != VDIR);
750	KASSERT(dvp->v_mount == vp->v_mount);
751
752	dnode = VP_TO_TMPFS_DIR(dvp);
753	node = VP_TO_TMPFS_NODE(vp);
754
755	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
756
757	/* Check for maximum number of links limit. */
758	if (node->tn_links == LINK_MAX) {
759		error = EMLINK;
760		goto out;
761	}
762	KASSERT(node->tn_links < LINK_MAX);
763
764	/* We cannot create links of files marked immutable or append-only. */
765	if (node->tn_flags & (IMMUTABLE | APPEND)) {
766		error = EPERM;
767		goto out;
768	}
769
770	/* Allocate a new directory entry to represent the inode. */
771	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
772	    cnp->cn_nameptr, cnp->cn_namelen, &de);
773	if (error) {
774		goto out;
775	}
776
777	/*
778	 * Insert the entry into the directory.
779	 * It will increase the inode link count.
780	 */
781	tmpfs_dir_attach(dnode, de, node);
782	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
783
784	/* Update the timestamps and trigger the event. */
785	if (node->tn_vnode) {
786		VN_KNOTE(node->tn_vnode, NOTE_LINK);
787	}
788	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
789	error = 0;
790out:
791	VOP_UNLOCK(vp);
792	return error;
793}
794
795int
796tmpfs_mkdir(void *v)
797{
798	struct vop_mkdir_v3_args /* {
799		struct vnode		*a_dvp;
800		struct vnode		**a_vpp;
801		struct componentname	*a_cnp;
802		struct vattr		*a_vap;
803	} */ *ap = v;
804	vnode_t *dvp = ap->a_dvp;
805	vnode_t **vpp = ap->a_vpp;
806	struct componentname *cnp = ap->a_cnp;
807	struct vattr *vap = ap->a_vap;
808
809	KASSERT(vap->va_type == VDIR);
810	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
811}
812
813int
814tmpfs_rmdir(void *v)
815{
816	struct vop_rmdir_args /* {
817		struct vnode		*a_dvp;
818		struct vnode		*a_vp;
819		struct componentname	*a_cnp;
820	} */ *ap = v;
821	vnode_t *dvp = ap->a_dvp;
822	vnode_t *vp = ap->a_vp;
823	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
824	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
825	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
826	tmpfs_dirent_t *de;
827	int error = 0;
828
829	KASSERT(VOP_ISLOCKED(dvp));
830	KASSERT(VOP_ISLOCKED(vp));
831
832	/*
833	 * Directories with more than two entries ('.' and '..') cannot be
834	 * removed.  There may be whiteout entries, which we will destroy.
835	 */
836	if (node->tn_size > 0) {
837		/*
838		 * If never had whiteout entries, the directory is certainly
839		 * not empty.  Otherwise, scan for any non-whiteout entry.
840		 */
841		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
842			error = ENOTEMPTY;
843			goto out;
844		}
845		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
846			if (de->td_node != TMPFS_NODE_WHITEOUT) {
847				error = ENOTEMPTY;
848				goto out;
849			}
850		}
851		KASSERT(error == 0);
852	}
853
854	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
855
856	/* Lookup the directory entry (check the cached hint first). */
857	de = tmpfs_dir_cached(node);
858	if (de == NULL) {
859		struct componentname *cnp = ap->a_cnp;
860		de = tmpfs_dir_lookup(dnode, cnp);
861	}
862	KASSERT(de && de->td_node == node);
863
864	/* Check flags to see if we are allowed to remove the directory. */
865	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
866		error = EPERM;
867		goto out;
868	}
869
870	/* Decrement the link count for the virtual '.' entry. */
871	node->tn_links--;
872
873	/* Detach the directory entry from the directory. */
874	tmpfs_dir_detach(dnode, de);
875
876	/* Purge the cache for parent. */
877	cache_purge(dvp);
878
879	/*
880	 * Destroy the directory entry or replace it with a whiteout.
881	 *
882	 * Note: the inode referred by it will not be destroyed until the
883	 * vnode is reclaimed.
884	 */
885	if (ap->a_cnp->cn_flags & DOWHITEOUT)
886		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
887	else
888		tmpfs_free_dirent(tmp, de);
889
890	/* Destroy the whiteout entries from the node. */
891	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
892		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
893		tmpfs_dir_detach(node, de);
894		tmpfs_free_dirent(tmp, de);
895	}
896	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
897
898	KASSERT(node->tn_size == 0);
899	KASSERT(node->tn_links == 0);
900out:
901	/* Release the nodes. */
902	vput(dvp);
903	vput(vp);
904	return error;
905}
906
907int
908tmpfs_symlink(void *v)
909{
910	struct vop_symlink_v3_args /* {
911		struct vnode		*a_dvp;
912		struct vnode		**a_vpp;
913		struct componentname	*a_cnp;
914		struct vattr		*a_vap;
915		char			*a_target;
916	} */ *ap = v;
917	vnode_t *dvp = ap->a_dvp;
918	vnode_t **vpp = ap->a_vpp;
919	struct componentname *cnp = ap->a_cnp;
920	struct vattr *vap = ap->a_vap;
921	char *target = ap->a_target;
922
923	KASSERT(vap->va_type == VLNK);
924	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
925}
926
927int
928tmpfs_readdir(void *v)
929{
930	struct vop_readdir_args /* {
931		struct vnode	*a_vp;
932		struct uio	*a_uio;
933		kauth_cred_t	a_cred;
934		int		*a_eofflag;
935		off_t		**a_cookies;
936		int		*ncookies;
937	} */ *ap = v;
938	vnode_t *vp = ap->a_vp;
939	struct uio *uio = ap->a_uio;
940	int *eofflag = ap->a_eofflag;
941	off_t **cookies = ap->a_cookies;
942	int *ncookies = ap->a_ncookies;
943	off_t startoff, cnt;
944	tmpfs_node_t *node;
945	int error;
946
947	KASSERT(VOP_ISLOCKED(vp));
948
949	/* This operation only makes sense on directory nodes. */
950	if (vp->v_type != VDIR) {
951		return ENOTDIR;
952	}
953	node = VP_TO_TMPFS_DIR(vp);
954	startoff = uio->uio_offset;
955	cnt = 0;
956
957	/*
958	 * Retrieve the directory entries, unless it is being destroyed.
959	 */
960	if (node->tn_links) {
961		error = tmpfs_dir_getdents(node, uio, &cnt);
962	} else {
963		error = 0;
964	}
965
966	if (eofflag != NULL) {
967		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
968	}
969	if (error || cookies == NULL || ncookies == NULL) {
970		return error;
971	}
972
973	/* Update NFS-related variables, if any. */
974	tmpfs_dirent_t *de = NULL;
975	off_t i, off = startoff;
976
977	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
978	*ncookies = cnt;
979
980	for (i = 0; i < cnt; i++) {
981		KASSERT(off != TMPFS_DIRSEQ_EOF);
982		if (off != TMPFS_DIRSEQ_DOT) {
983			if (off == TMPFS_DIRSEQ_DOTDOT) {
984				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
985			} else if (de != NULL) {
986				de = TAILQ_NEXT(de, td_entries);
987			} else {
988				de = tmpfs_dir_lookupbyseq(node, off);
989				KASSERT(de != NULL);
990				de = TAILQ_NEXT(de, td_entries);
991			}
992			if (de == NULL) {
993				off = TMPFS_DIRSEQ_EOF;
994			} else {
995				off = tmpfs_dir_getseq(node, de);
996			}
997		} else {
998			off = TMPFS_DIRSEQ_DOTDOT;
999		}
1000		(*cookies)[i] = off;
1001	}
1002	KASSERT(uio->uio_offset == off);
1003	return error;
1004}
1005
1006int
1007tmpfs_readlink(void *v)
1008{
1009	struct vop_readlink_args /* {
1010		struct vnode	*a_vp;
1011		struct uio	*a_uio;
1012		kauth_cred_t	a_cred;
1013	} */ *ap = v;
1014	vnode_t *vp = ap->a_vp;
1015	struct uio *uio = ap->a_uio;
1016	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1017	int error;
1018
1019	KASSERT(VOP_ISLOCKED(vp));
1020	KASSERT(uio->uio_offset == 0);
1021	KASSERT(vp->v_type == VLNK);
1022
1023	/* Note: readlink(2) returns the path without NUL terminator. */
1024	if (node->tn_size > 0) {
1025		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1026		    MIN(node->tn_size, uio->uio_resid), uio);
1027	} else {
1028		error = 0;
1029	}
1030	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1031
1032	return error;
1033}
1034
1035int
1036tmpfs_inactive(void *v)
1037{
1038	struct vop_inactive_args /* {
1039		struct vnode *a_vp;
1040		bool *a_recycle;
1041	} */ *ap = v;
1042	vnode_t *vp = ap->a_vp;
1043	tmpfs_node_t *node;
1044
1045	KASSERT(VOP_ISLOCKED(vp));
1046
1047	node = VP_TO_TMPFS_NODE(vp);
1048	if (node->tn_links == 0) {
1049		/*
1050		 * Mark node as dead by setting its generation to zero.
1051		 */
1052		atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1053		*ap->a_recycle = true;
1054	} else {
1055		*ap->a_recycle = false;
1056	}
1057	VOP_UNLOCK(vp);
1058
1059	return 0;
1060}
1061
1062int
1063tmpfs_reclaim(void *v)
1064{
1065	struct vop_reclaim_args /* {
1066		struct vnode *a_vp;
1067	} */ *ap = v;
1068	vnode_t *vp = ap->a_vp;
1069	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1070	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1071
1072	/* Disassociate inode from vnode. */
1073	node->tn_vnode = NULL;
1074	vp->v_data = NULL;
1075
1076	/* If inode is not referenced, i.e. no links, then destroy it. */
1077	if (node->tn_links == 0)
1078		tmpfs_free_node(tmp, node);
1079	return 0;
1080}
1081
1082int
1083tmpfs_pathconf(void *v)
1084{
1085	struct vop_pathconf_args /* {
1086		struct vnode	*a_vp;
1087		int		a_name;
1088		register_t	*a_retval;
1089	} */ *ap = v;
1090	const int name = ap->a_name;
1091	register_t *retval = ap->a_retval;
1092	int error = 0;
1093
1094	switch (name) {
1095	case _PC_LINK_MAX:
1096		*retval = LINK_MAX;
1097		break;
1098	case _PC_NAME_MAX:
1099		*retval = TMPFS_MAXNAMLEN;
1100		break;
1101	case _PC_PATH_MAX:
1102		*retval = PATH_MAX;
1103		break;
1104	case _PC_PIPE_BUF:
1105		*retval = PIPE_BUF;
1106		break;
1107	case _PC_CHOWN_RESTRICTED:
1108		*retval = 1;
1109		break;
1110	case _PC_NO_TRUNC:
1111		*retval = 1;
1112		break;
1113	case _PC_SYNC_IO:
1114		*retval = 1;
1115		break;
1116	case _PC_FILESIZEBITS:
1117		*retval = sizeof(off_t) * CHAR_BIT;
1118		break;
1119	default:
1120		error = EINVAL;
1121	}
1122	return error;
1123}
1124
1125int
1126tmpfs_advlock(void *v)
1127{
1128	struct vop_advlock_args /* {
1129		struct vnode	*a_vp;
1130		void *		a_id;
1131		int		a_op;
1132		struct flock	*a_fl;
1133		int		a_flags;
1134	} */ *ap = v;
1135	vnode_t *vp = ap->a_vp;
1136	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1137
1138	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1139}
1140
1141int
1142tmpfs_getpages(void *v)
1143{
1144	struct vop_getpages_args /* {
1145		struct vnode *a_vp;
1146		voff_t a_offset;
1147		struct vm_page **a_m;
1148		int *a_count;
1149		int a_centeridx;
1150		vm_prot_t a_access_type;
1151		int a_advice;
1152		int a_flags;
1153	} */ * const ap = v;
1154	vnode_t *vp = ap->a_vp;
1155	const voff_t offset = ap->a_offset;
1156	struct vm_page **pgs = ap->a_m;
1157	const int centeridx = ap->a_centeridx;
1158	const vm_prot_t access_type = ap->a_access_type;
1159	const int advice = ap->a_advice;
1160	const int flags = ap->a_flags;
1161	int error, npages = *ap->a_count;
1162	tmpfs_node_t *node;
1163	struct uvm_object *uobj;
1164
1165	KASSERT(vp->v_type == VREG);
1166	KASSERT(mutex_owned(vp->v_interlock));
1167
1168	/*
1169	 * Currently, PGO_PASTEOF is not supported.
1170	 */
1171	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1172		if ((flags & PGO_LOCKED) == 0)
1173			mutex_exit(vp->v_interlock);
1174		return EINVAL;
1175	}
1176
1177	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1178		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1179	}
1180
1181	if ((flags & PGO_LOCKED) != 0)
1182		return EBUSY;
1183
1184	if (vdead_check(vp, VDEAD_NOWAIT) != 0)
1185		return ENOENT;
1186
1187	node = VP_TO_TMPFS_NODE(vp);
1188	uobj = node->tn_spec.tn_reg.tn_aobj;
1189
1190	if ((flags & PGO_NOTIMESTAMP) == 0) {
1191		u_int tflags = 0;
1192
1193		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1194			tflags |= TMPFS_UPDATE_ATIME;
1195
1196		if ((access_type & VM_PROT_WRITE) != 0) {
1197			tflags |= TMPFS_UPDATE_MTIME;
1198			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1199				tflags |= TMPFS_UPDATE_ATIME;
1200		}
1201		tmpfs_update(vp, tflags);
1202	}
1203
1204	/*
1205	 * Invoke the pager.
1206	 *
1207	 * Clean the array of pages before.  XXX: PR/32166
1208	 * Note that vnode lock is shared with underlying UVM object.
1209	 */
1210	if (pgs) {
1211		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1212	}
1213	KASSERT(vp->v_interlock == uobj->vmobjlock);
1214
1215	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1216	    access_type, advice, flags | PGO_ALLPAGES);
1217
1218#if defined(DEBUG)
1219	if (!error && pgs) {
1220		for (int i = 0; i < npages; i++) {
1221			KASSERT(pgs[i] != NULL);
1222		}
1223	}
1224#endif
1225	return error;
1226}
1227
1228int
1229tmpfs_putpages(void *v)
1230{
1231	struct vop_putpages_args /* {
1232		struct vnode *a_vp;
1233		voff_t a_offlo;
1234		voff_t a_offhi;
1235		int a_flags;
1236	} */ * const ap = v;
1237	vnode_t *vp = ap->a_vp;
1238	const voff_t offlo = ap->a_offlo;
1239	const voff_t offhi = ap->a_offhi;
1240	const int flags = ap->a_flags;
1241	tmpfs_node_t *node;
1242	struct uvm_object *uobj;
1243	int error;
1244
1245	KASSERT(mutex_owned(vp->v_interlock));
1246
1247	if (vp->v_type != VREG) {
1248		mutex_exit(vp->v_interlock);
1249		return 0;
1250	}
1251
1252	node = VP_TO_TMPFS_NODE(vp);
1253	uobj = node->tn_spec.tn_reg.tn_aobj;
1254
1255	KASSERT(vp->v_interlock == uobj->vmobjlock);
1256	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1257
1258	/* XXX mtime */
1259
1260	return error;
1261}
1262
1263int
1264tmpfs_whiteout(void *v)
1265{
1266	struct vop_whiteout_args /* {
1267		struct vnode		*a_dvp;
1268		struct componentname	*a_cnp;
1269		int			a_flags;
1270	} */ *ap = v;
1271	vnode_t *dvp = ap->a_dvp;
1272	struct componentname *cnp = ap->a_cnp;
1273	const int flags = ap->a_flags;
1274	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1275	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1276	tmpfs_dirent_t *de;
1277	int error;
1278
1279	switch (flags) {
1280	case LOOKUP:
1281		break;
1282	case CREATE:
1283		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1284		    cnp->cn_namelen, &de);
1285		if (error)
1286			return error;
1287		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1288		break;
1289	case DELETE:
1290		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1291		de = tmpfs_dir_lookup(dnode, cnp);
1292		if (de == NULL)
1293			return ENOENT;
1294		tmpfs_dir_detach(dnode, de);
1295		tmpfs_free_dirent(tmp, de);
1296		break;
1297	}
1298	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1299	return 0;
1300}
1301
1302int
1303tmpfs_print(void *v)
1304{
1305	struct vop_print_args /* {
1306		struct vnode	*a_vp;
1307	} */ *ap = v;
1308	vnode_t *vp = ap->a_vp;
1309	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1310
1311	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1312	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1313	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1314	    node->tn_gid, (uintmax_t)node->tn_size);
1315	if (vp->v_type == VFIFO) {
1316		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1317	}
1318	printf("\n");
1319	return 0;
1320}
1321