tmpfs_vnops.c revision 1.132
1/*	$NetBSD: tmpfs_vnops.c,v 1.132 2017/04/26 03:02:48 riastradh Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.132 2017/04/26 03:02:48 riastradh Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52#include <sys/atomic.h>
53
54#include <uvm/uvm.h>
55
56#include <miscfs/fifofs/fifo.h>
57#include <miscfs/genfs/genfs.h>
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61/*
62 * vnode operations vector used for files stored in a tmpfs file system.
63 */
64int (**tmpfs_vnodeop_p)(void *);
65const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
66	{ &vop_default_desc,		vn_default_error },
67	{ &vop_lookup_desc,		tmpfs_lookup },
68	{ &vop_create_desc,		tmpfs_create },
69	{ &vop_mknod_desc,		tmpfs_mknod },
70	{ &vop_open_desc,		tmpfs_open },
71	{ &vop_close_desc,		tmpfs_close },
72	{ &vop_access_desc,		tmpfs_access },
73	{ &vop_getattr_desc,		tmpfs_getattr },
74	{ &vop_setattr_desc,		tmpfs_setattr },
75	{ &vop_read_desc,		tmpfs_read },
76	{ &vop_write_desc,		tmpfs_write },
77	{ &vop_fallocate_desc,		genfs_eopnotsupp },
78	{ &vop_fdiscard_desc,		genfs_eopnotsupp },
79	{ &vop_ioctl_desc,		tmpfs_ioctl },
80	{ &vop_fcntl_desc,		tmpfs_fcntl },
81	{ &vop_poll_desc,		tmpfs_poll },
82	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
83	{ &vop_revoke_desc,		tmpfs_revoke },
84	{ &vop_mmap_desc,		tmpfs_mmap },
85	{ &vop_fsync_desc,		tmpfs_fsync },
86	{ &vop_seek_desc,		tmpfs_seek },
87	{ &vop_remove_desc,		tmpfs_remove },
88	{ &vop_link_desc,		tmpfs_link },
89	{ &vop_rename_desc,		tmpfs_rename },
90	{ &vop_mkdir_desc,		tmpfs_mkdir },
91	{ &vop_rmdir_desc,		tmpfs_rmdir },
92	{ &vop_symlink_desc,		tmpfs_symlink },
93	{ &vop_readdir_desc,		tmpfs_readdir },
94	{ &vop_readlink_desc,		tmpfs_readlink },
95	{ &vop_abortop_desc,		tmpfs_abortop },
96	{ &vop_inactive_desc,		tmpfs_inactive },
97	{ &vop_reclaim_desc,		tmpfs_reclaim },
98	{ &vop_lock_desc,		tmpfs_lock },
99	{ &vop_unlock_desc,		tmpfs_unlock },
100	{ &vop_bmap_desc,		tmpfs_bmap },
101	{ &vop_strategy_desc,		tmpfs_strategy },
102	{ &vop_print_desc,		tmpfs_print },
103	{ &vop_pathconf_desc,		tmpfs_pathconf },
104	{ &vop_islocked_desc,		tmpfs_islocked },
105	{ &vop_advlock_desc,		tmpfs_advlock },
106	{ &vop_bwrite_desc,		tmpfs_bwrite },
107	{ &vop_getpages_desc,		tmpfs_getpages },
108	{ &vop_putpages_desc,		tmpfs_putpages },
109	{ &vop_whiteout_desc,		tmpfs_whiteout },
110	{ NULL, NULL }
111};
112
113const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
114	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
115};
116
117/*
118 * tmpfs_lookup: path name traversal routine.
119 *
120 * Arguments: dvp (directory being searched), vpp (result),
121 * cnp (component name - path).
122 *
123 * => Caller holds a reference and lock on dvp.
124 * => We return looked-up vnode (vpp) locked, with a reference held.
125 */
126int
127tmpfs_lookup(void *v)
128{
129	struct vop_lookup_v2_args /* {
130		struct vnode *a_dvp;
131		struct vnode **a_vpp;
132		struct componentname *a_cnp;
133	} */ *ap = v;
134	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
135	struct componentname *cnp = ap->a_cnp;
136	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
137	tmpfs_node_t *dnode, *tnode;
138	tmpfs_dirent_t *de;
139	int cachefound, iswhiteout;
140	int error;
141
142	KASSERT(VOP_ISLOCKED(dvp));
143
144	dnode = VP_TO_TMPFS_DIR(dvp);
145	*vpp = NULL;
146
147	/* Check accessibility of directory. */
148	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
149	if (error) {
150		goto out;
151	}
152
153	/*
154	 * If requesting the last path component on a read-only file system
155	 * with a write operation, deny it.
156	 */
157	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
158	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
159		error = EROFS;
160		goto out;
161	}
162
163	/*
164	 * Avoid doing a linear scan of the directory if the requested
165	 * directory/name couple is already in the cache.
166	 */
167	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
168				  cnp->cn_nameiop, cnp->cn_flags,
169				  &iswhiteout, vpp);
170	if (iswhiteout) {
171		cnp->cn_flags |= ISWHITEOUT;
172	}
173	if (cachefound && *vpp == NULLVP) {
174		/* Negative cache hit. */
175		error = ENOENT;
176		goto out;
177	} else if (cachefound) {
178		error = 0;
179		goto out;
180	}
181
182	/*
183	 * Treat an unlinked directory as empty (no "." or "..")
184	 */
185	if (dnode->tn_links == 0) {
186		KASSERT(dnode->tn_size == 0);
187		error = ENOENT;
188		goto out;
189	}
190
191	if (cnp->cn_flags & ISDOTDOT) {
192		tmpfs_node_t *pnode;
193
194		/*
195		 * Lookup of ".." case.
196		 */
197		if (lastcn && cnp->cn_nameiop == RENAME) {
198			error = EINVAL;
199			goto out;
200		}
201		KASSERT(dnode->tn_type == VDIR);
202		pnode = dnode->tn_spec.tn_dir.tn_parent;
203		if (pnode == NULL) {
204			error = ENOENT;
205			goto out;
206		}
207
208		error = vcache_get(dvp->v_mount, &pnode, sizeof(pnode), vpp);
209		goto out;
210	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
211		/*
212		 * Lookup of "." case.
213		 */
214		if (lastcn && cnp->cn_nameiop == RENAME) {
215			error = EISDIR;
216			goto out;
217		}
218		vref(dvp);
219		*vpp = dvp;
220		error = 0;
221		goto done;
222	}
223
224	/*
225	 * Other lookup cases: perform directory scan.
226	 */
227	de = tmpfs_dir_lookup(dnode, cnp);
228	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
229		/*
230		 * The entry was not found in the directory.  This is valid
231		 * if we are creating or renaming an entry and are working
232		 * on the last component of the path name.
233		 */
234		if (lastcn && (cnp->cn_nameiop == CREATE ||
235		    cnp->cn_nameiop == RENAME)) {
236			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
237			if (error) {
238				goto out;
239			}
240			error = EJUSTRETURN;
241		} else {
242			error = ENOENT;
243		}
244		if (de) {
245			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
246			cnp->cn_flags |= ISWHITEOUT;
247		}
248		goto done;
249	}
250
251	tnode = de->td_node;
252
253	/*
254	 * If it is not the last path component and found a non-directory
255	 * or non-link entry (which may itself be pointing to a directory),
256	 * raise an error.
257	 */
258	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
259		error = ENOTDIR;
260		goto out;
261	}
262
263	/* Check the permissions. */
264	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
265		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
266		if (error)
267			goto out;
268
269		if ((dnode->tn_mode & S_ISTXT) != 0) {
270			error = kauth_authorize_vnode(cnp->cn_cred,
271			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
272			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
273			    dnode->tn_uid, tnode->tn_uid));
274			if (error) {
275				error = EPERM;
276				goto out;
277			}
278		}
279	}
280
281	/* Get a vnode for the matching entry. */
282	error = vcache_get(dvp->v_mount, &tnode, sizeof(tnode), vpp);
283done:
284	/*
285	 * Cache the result, unless request was for creation (as it does
286	 * not improve the performance).
287	 */
288	if (cnp->cn_nameiop != CREATE) {
289		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
290			    cnp->cn_flags);
291	}
292out:
293	KASSERT(VOP_ISLOCKED(dvp));
294
295	return error;
296}
297
298int
299tmpfs_create(void *v)
300{
301	struct vop_create_v3_args /* {
302		struct vnode		*a_dvp;
303		struct vnode		**a_vpp;
304		struct componentname	*a_cnp;
305		struct vattr		*a_vap;
306	} */ *ap = v;
307	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
308	struct componentname *cnp = ap->a_cnp;
309	struct vattr *vap = ap->a_vap;
310
311	KASSERT(VOP_ISLOCKED(dvp));
312	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
313	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
314}
315
316int
317tmpfs_mknod(void *v)
318{
319	struct vop_mknod_v3_args /* {
320		struct vnode		*a_dvp;
321		struct vnode		**a_vpp;
322		struct componentname	*a_cnp;
323		struct vattr		*a_vap;
324	} */ *ap = v;
325	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
326	struct componentname *cnp = ap->a_cnp;
327	struct vattr *vap = ap->a_vap;
328	enum vtype vt = vap->va_type;
329
330	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
331		*vpp = NULL;
332		return EINVAL;
333	}
334	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
335}
336
337int
338tmpfs_open(void *v)
339{
340	struct vop_open_args /* {
341		struct vnode	*a_vp;
342		int		a_mode;
343		kauth_cred_t	a_cred;
344	} */ *ap = v;
345	vnode_t *vp = ap->a_vp;
346	mode_t mode = ap->a_mode;
347	tmpfs_node_t *node;
348
349	KASSERT(VOP_ISLOCKED(vp));
350
351	node = VP_TO_TMPFS_NODE(vp);
352
353	/* If the file is marked append-only, deny write requests. */
354	if ((node->tn_flags & APPEND) != 0 &&
355	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
356		return EPERM;
357	}
358	return 0;
359}
360
361int
362tmpfs_close(void *v)
363{
364	struct vop_close_args /* {
365		struct vnode	*a_vp;
366		int		a_fflag;
367		kauth_cred_t	a_cred;
368	} */ *ap = v;
369	vnode_t *vp __diagused = ap->a_vp;
370
371	KASSERT(VOP_ISLOCKED(vp));
372	return 0;
373}
374
375int
376tmpfs_access(void *v)
377{
378	struct vop_access_args /* {
379		struct vnode	*a_vp;
380		int		a_mode;
381		kauth_cred_t	a_cred;
382	} */ *ap = v;
383	vnode_t *vp = ap->a_vp;
384	mode_t mode = ap->a_mode;
385	kauth_cred_t cred = ap->a_cred;
386	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
387	const bool writing = (mode & VWRITE) != 0;
388
389	KASSERT(VOP_ISLOCKED(vp));
390
391	/* Possible? */
392	switch (vp->v_type) {
393	case VDIR:
394	case VLNK:
395	case VREG:
396		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
397			return EROFS;
398		}
399		break;
400	case VBLK:
401	case VCHR:
402	case VSOCK:
403	case VFIFO:
404		break;
405	default:
406		return EINVAL;
407	}
408	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
409		return EPERM;
410	}
411
412	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
413	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
414	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
415}
416
417int
418tmpfs_getattr(void *v)
419{
420	struct vop_getattr_args /* {
421		struct vnode	*a_vp;
422		struct vattr	*a_vap;
423		kauth_cred_t	a_cred;
424	} */ *ap = v;
425	vnode_t *vp = ap->a_vp;
426	struct vattr *vap = ap->a_vap;
427	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
428
429	vattr_null(vap);
430
431	vap->va_type = vp->v_type;
432	vap->va_mode = node->tn_mode;
433	vap->va_nlink = node->tn_links;
434	vap->va_uid = node->tn_uid;
435	vap->va_gid = node->tn_gid;
436	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
437	vap->va_fileid = node->tn_id;
438	vap->va_size = node->tn_size;
439	vap->va_blocksize = PAGE_SIZE;
440	vap->va_atime = node->tn_atime;
441	vap->va_mtime = node->tn_mtime;
442	vap->va_ctime = node->tn_ctime;
443	vap->va_birthtime = node->tn_birthtime;
444	vap->va_gen = TMPFS_NODE_GEN(node);
445	vap->va_flags = node->tn_flags;
446	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
447	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
448	vap->va_bytes = round_page(node->tn_size);
449	vap->va_filerev = VNOVAL;
450	vap->va_vaflags = 0;
451	vap->va_spare = VNOVAL; /* XXX */
452
453	return 0;
454}
455
456int
457tmpfs_setattr(void *v)
458{
459	struct vop_setattr_args /* {
460		struct vnode	*a_vp;
461		struct vattr	*a_vap;
462		kauth_cred_t	a_cred;
463	} */ *ap = v;
464	vnode_t *vp = ap->a_vp;
465	struct vattr *vap = ap->a_vap;
466	kauth_cred_t cred = ap->a_cred;
467	lwp_t *l = curlwp;
468	int error = 0;
469
470	KASSERT(VOP_ISLOCKED(vp));
471
472	/* Abort if any unsettable attribute is given. */
473	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
474	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
475	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
476	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
477	    vap->va_bytes != VNOVAL) {
478		return EINVAL;
479	}
480
481	if (error == 0 && vap->va_flags != VNOVAL)
482		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
483
484	if (error == 0 && vap->va_size != VNOVAL)
485		error = tmpfs_chsize(vp, vap->va_size, cred, l);
486
487	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
488		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
489
490	if (error == 0 && vap->va_mode != VNOVAL)
491		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
492
493	const bool chsometime =
494	    vap->va_atime.tv_sec != VNOVAL ||
495	    vap->va_mtime.tv_sec != VNOVAL ||
496	    vap->va_birthtime.tv_sec != VNOVAL;
497	if (error == 0 && chsometime) {
498		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
499		    &vap->va_birthtime, vap->va_vaflags, cred, l);
500	}
501	return error;
502}
503
504int
505tmpfs_read(void *v)
506{
507	struct vop_read_args /* {
508		struct vnode *a_vp;
509		struct uio *a_uio;
510		int a_ioflag;
511		kauth_cred_t a_cred;
512	} */ *ap = v;
513	vnode_t *vp = ap->a_vp;
514	struct uio *uio = ap->a_uio;
515	const int ioflag = ap->a_ioflag;
516	tmpfs_node_t *node;
517	struct uvm_object *uobj;
518	int error;
519
520	KASSERT(VOP_ISLOCKED(vp));
521
522	if (vp->v_type == VDIR) {
523		return EISDIR;
524	}
525	if (uio->uio_offset < 0 || vp->v_type != VREG) {
526		return EINVAL;
527	}
528
529	/* Note: reading zero bytes should not update atime. */
530	if (uio->uio_resid == 0) {
531		return 0;
532	}
533
534	node = VP_TO_TMPFS_NODE(vp);
535	uobj = node->tn_spec.tn_reg.tn_aobj;
536	error = 0;
537
538	while (error == 0 && uio->uio_resid > 0) {
539		vsize_t len;
540
541		if (node->tn_size <= uio->uio_offset) {
542			break;
543		}
544		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
545		if (len == 0) {
546			break;
547		}
548		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
549		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
550	}
551
552	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
553	return error;
554}
555
556int
557tmpfs_write(void *v)
558{
559	struct vop_write_args /* {
560		struct vnode	*a_vp;
561		struct uio	*a_uio;
562		int		a_ioflag;
563		kauth_cred_t	a_cred;
564	} */ *ap = v;
565	vnode_t *vp = ap->a_vp;
566	struct uio *uio = ap->a_uio;
567	const int ioflag = ap->a_ioflag;
568	tmpfs_node_t *node;
569	struct uvm_object *uobj;
570	off_t oldsize;
571	int error;
572
573	KASSERT(VOP_ISLOCKED(vp));
574
575	node = VP_TO_TMPFS_NODE(vp);
576	oldsize = node->tn_size;
577
578	if ((vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
579		error = EROFS;
580		goto out;
581	}
582
583	if (uio->uio_offset < 0 || vp->v_type != VREG) {
584		error = EINVAL;
585		goto out;
586	}
587	if (uio->uio_resid == 0) {
588		error = 0;
589		goto out;
590	}
591	if (ioflag & IO_APPEND) {
592		uio->uio_offset = node->tn_size;
593	}
594
595	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
596		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
597		if (error)
598			goto out;
599	}
600
601	uobj = node->tn_spec.tn_reg.tn_aobj;
602	error = 0;
603	while (error == 0 && uio->uio_resid > 0) {
604		vsize_t len;
605
606		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
607		if (len == 0) {
608			break;
609		}
610		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
611		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
612	}
613	if (error) {
614		(void)tmpfs_reg_resize(vp, oldsize);
615	}
616
617	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
618	VN_KNOTE(vp, NOTE_WRITE);
619out:
620	if (error) {
621		KASSERT(oldsize == node->tn_size);
622	} else {
623		KASSERT(uio->uio_resid == 0);
624	}
625	return error;
626}
627
628int
629tmpfs_fsync(void *v)
630{
631	struct vop_fsync_args /* {
632		struct vnode *a_vp;
633		kauth_cred_t a_cred;
634		int a_flags;
635		off_t a_offlo;
636		off_t a_offhi;
637		struct lwp *a_l;
638	} */ *ap = v;
639	vnode_t *vp __diagused = ap->a_vp;
640
641	/* Nothing to do.  Should be up to date. */
642	KASSERT(VOP_ISLOCKED(vp));
643	return 0;
644}
645
646/*
647 * tmpfs_remove: unlink a file.
648 *
649 * => Both directory (dvp) and file (vp) are locked.
650 * => We unlock and drop the reference on both.
651 */
652int
653tmpfs_remove(void *v)
654{
655	struct vop_remove_v2_args /* {
656		struct vnode *a_dvp;
657		struct vnode *a_vp;
658		struct componentname *a_cnp;
659	} */ *ap = v;
660	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
661	tmpfs_node_t *dnode, *node;
662	tmpfs_dirent_t *de;
663	int error;
664
665	KASSERT(VOP_ISLOCKED(dvp));
666	KASSERT(VOP_ISLOCKED(vp));
667
668	if (vp->v_type == VDIR) {
669		error = EPERM;
670		goto out;
671	}
672	dnode = VP_TO_TMPFS_DIR(dvp);
673	node = VP_TO_TMPFS_NODE(vp);
674
675	/*
676	 * Files marked as immutable or append-only cannot be deleted.
677	 * Likewise, files residing on directories marked as append-only
678	 * cannot be deleted.
679	 */
680	if (node->tn_flags & (IMMUTABLE | APPEND)) {
681		error = EPERM;
682		goto out;
683	}
684	if (dnode->tn_flags & APPEND) {
685		error = EPERM;
686		goto out;
687	}
688
689	/* Lookup the directory entry (check the cached hint first). */
690	de = tmpfs_dir_cached(node);
691	if (de == NULL) {
692		struct componentname *cnp = ap->a_cnp;
693		de = tmpfs_dir_lookup(dnode, cnp);
694	}
695	KASSERT(de && de->td_node == node);
696
697	/*
698	 * Remove the entry from the directory (drops the link count) and
699	 * destroy it or replace with a whiteout.
700	 *
701	 * Note: the inode referred by it will not be destroyed until the
702	 * vnode is reclaimed/recycled.
703	 */
704
705	tmpfs_dir_detach(dnode, de);
706
707	if (ap->a_cnp->cn_flags & DOWHITEOUT)
708		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
709	else
710		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
711
712	if (node->tn_links > 0) {
713		/* We removed a hard link. */
714		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
715	}
716	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
717	error = 0;
718out:
719	/* Drop the reference and unlock the node. */
720	if (dvp == vp) {
721		vrele(vp);
722	} else {
723		vput(vp);
724	}
725	return error;
726}
727
728/*
729 * tmpfs_link: create a hard link.
730 */
731int
732tmpfs_link(void *v)
733{
734	struct vop_link_v2_args /* {
735		struct vnode *a_dvp;
736		struct vnode *a_vp;
737		struct componentname *a_cnp;
738	} */ *ap = v;
739	vnode_t *dvp = ap->a_dvp;
740	vnode_t *vp = ap->a_vp;
741	struct componentname *cnp = ap->a_cnp;
742	tmpfs_node_t *dnode, *node;
743	tmpfs_dirent_t *de;
744	int error;
745
746	KASSERT(dvp != vp);
747	KASSERT(VOP_ISLOCKED(dvp));
748	KASSERT(vp->v_type != VDIR);
749	KASSERT(dvp->v_mount == vp->v_mount);
750
751	dnode = VP_TO_TMPFS_DIR(dvp);
752	node = VP_TO_TMPFS_NODE(vp);
753
754	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
755
756	/* Check for maximum number of links limit. */
757	if (node->tn_links == LINK_MAX) {
758		error = EMLINK;
759		goto out;
760	}
761	KASSERT(node->tn_links < LINK_MAX);
762
763	/* We cannot create links of files marked immutable or append-only. */
764	if (node->tn_flags & (IMMUTABLE | APPEND)) {
765		error = EPERM;
766		goto out;
767	}
768
769	/* Allocate a new directory entry to represent the inode. */
770	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
771	    cnp->cn_nameptr, cnp->cn_namelen, &de);
772	if (error) {
773		goto out;
774	}
775
776	/*
777	 * Insert the entry into the directory.
778	 * It will increase the inode link count.
779	 */
780	tmpfs_dir_attach(dnode, de, node);
781	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
782
783	/* Update the timestamps and trigger the event. */
784	if (node->tn_vnode) {
785		VN_KNOTE(node->tn_vnode, NOTE_LINK);
786	}
787	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
788	error = 0;
789out:
790	VOP_UNLOCK(vp);
791	return error;
792}
793
794int
795tmpfs_mkdir(void *v)
796{
797	struct vop_mkdir_v3_args /* {
798		struct vnode		*a_dvp;
799		struct vnode		**a_vpp;
800		struct componentname	*a_cnp;
801		struct vattr		*a_vap;
802	} */ *ap = v;
803	vnode_t *dvp = ap->a_dvp;
804	vnode_t **vpp = ap->a_vpp;
805	struct componentname *cnp = ap->a_cnp;
806	struct vattr *vap = ap->a_vap;
807
808	KASSERT(vap->va_type == VDIR);
809	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
810}
811
812int
813tmpfs_rmdir(void *v)
814{
815	struct vop_rmdir_v2_args /* {
816		struct vnode		*a_dvp;
817		struct vnode		*a_vp;
818		struct componentname	*a_cnp;
819	} */ *ap = v;
820	vnode_t *dvp = ap->a_dvp;
821	vnode_t *vp = ap->a_vp;
822	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
823	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
824	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
825	tmpfs_dirent_t *de;
826	int error = 0;
827
828	KASSERT(VOP_ISLOCKED(dvp));
829	KASSERT(VOP_ISLOCKED(vp));
830
831	/*
832	 * Directories with more than two entries ('.' and '..') cannot be
833	 * removed.  There may be whiteout entries, which we will destroy.
834	 */
835	if (node->tn_size > 0) {
836		/*
837		 * If never had whiteout entries, the directory is certainly
838		 * not empty.  Otherwise, scan for any non-whiteout entry.
839		 */
840		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
841			error = ENOTEMPTY;
842			goto out;
843		}
844		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
845			if (de->td_node != TMPFS_NODE_WHITEOUT) {
846				error = ENOTEMPTY;
847				goto out;
848			}
849		}
850		KASSERT(error == 0);
851	}
852
853	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
854
855	/* Lookup the directory entry (check the cached hint first). */
856	de = tmpfs_dir_cached(node);
857	if (de == NULL) {
858		struct componentname *cnp = ap->a_cnp;
859		de = tmpfs_dir_lookup(dnode, cnp);
860	}
861	KASSERT(de && de->td_node == node);
862
863	/* Check flags to see if we are allowed to remove the directory. */
864	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
865		error = EPERM;
866		goto out;
867	}
868
869	/* Decrement the link count for the virtual '.' entry. */
870	node->tn_links--;
871
872	/* Detach the directory entry from the directory. */
873	tmpfs_dir_detach(dnode, de);
874
875	/* Purge the cache for parent. */
876	cache_purge(dvp);
877
878	/*
879	 * Destroy the directory entry or replace it with a whiteout.
880	 *
881	 * Note: the inode referred by it will not be destroyed until the
882	 * vnode is reclaimed.
883	 */
884	if (ap->a_cnp->cn_flags & DOWHITEOUT)
885		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
886	else
887		tmpfs_free_dirent(tmp, de);
888
889	/* Destroy the whiteout entries from the node. */
890	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
891		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
892		tmpfs_dir_detach(node, de);
893		tmpfs_free_dirent(tmp, de);
894	}
895	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
896
897	KASSERT(node->tn_size == 0);
898	KASSERT(node->tn_links == 0);
899out:
900	/* Release the node. */
901	KASSERT(dvp != vp);
902	vput(vp);
903	return error;
904}
905
906int
907tmpfs_symlink(void *v)
908{
909	struct vop_symlink_v3_args /* {
910		struct vnode		*a_dvp;
911		struct vnode		**a_vpp;
912		struct componentname	*a_cnp;
913		struct vattr		*a_vap;
914		char			*a_target;
915	} */ *ap = v;
916	vnode_t *dvp = ap->a_dvp;
917	vnode_t **vpp = ap->a_vpp;
918	struct componentname *cnp = ap->a_cnp;
919	struct vattr *vap = ap->a_vap;
920	char *target = ap->a_target;
921
922	KASSERT(vap->va_type == VLNK);
923	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
924}
925
926int
927tmpfs_readdir(void *v)
928{
929	struct vop_readdir_args /* {
930		struct vnode	*a_vp;
931		struct uio	*a_uio;
932		kauth_cred_t	a_cred;
933		int		*a_eofflag;
934		off_t		**a_cookies;
935		int		*ncookies;
936	} */ *ap = v;
937	vnode_t *vp = ap->a_vp;
938	struct uio *uio = ap->a_uio;
939	int *eofflag = ap->a_eofflag;
940	off_t **cookies = ap->a_cookies;
941	int *ncookies = ap->a_ncookies;
942	off_t startoff, cnt;
943	tmpfs_node_t *node;
944	int error;
945
946	KASSERT(VOP_ISLOCKED(vp));
947
948	/* This operation only makes sense on directory nodes. */
949	if (vp->v_type != VDIR) {
950		return ENOTDIR;
951	}
952	node = VP_TO_TMPFS_DIR(vp);
953	startoff = uio->uio_offset;
954	cnt = 0;
955
956	/*
957	 * Retrieve the directory entries, unless it is being destroyed.
958	 */
959	if (node->tn_links) {
960		error = tmpfs_dir_getdents(node, uio, &cnt);
961	} else {
962		error = 0;
963	}
964
965	if (eofflag != NULL) {
966		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
967	}
968	if (error || cookies == NULL || ncookies == NULL) {
969		return error;
970	}
971
972	/* Update NFS-related variables, if any. */
973	tmpfs_dirent_t *de = NULL;
974	off_t i, off = startoff;
975
976	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
977	*ncookies = cnt;
978
979	for (i = 0; i < cnt; i++) {
980		KASSERT(off != TMPFS_DIRSEQ_EOF);
981		if (off != TMPFS_DIRSEQ_DOT) {
982			if (off == TMPFS_DIRSEQ_DOTDOT) {
983				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
984			} else if (de != NULL) {
985				de = TAILQ_NEXT(de, td_entries);
986			} else {
987				de = tmpfs_dir_lookupbyseq(node, off);
988				KASSERT(de != NULL);
989				de = TAILQ_NEXT(de, td_entries);
990			}
991			if (de == NULL) {
992				off = TMPFS_DIRSEQ_EOF;
993			} else {
994				off = tmpfs_dir_getseq(node, de);
995			}
996		} else {
997			off = TMPFS_DIRSEQ_DOTDOT;
998		}
999		(*cookies)[i] = off;
1000	}
1001	KASSERT(uio->uio_offset == off);
1002	return error;
1003}
1004
1005int
1006tmpfs_readlink(void *v)
1007{
1008	struct vop_readlink_args /* {
1009		struct vnode	*a_vp;
1010		struct uio	*a_uio;
1011		kauth_cred_t	a_cred;
1012	} */ *ap = v;
1013	vnode_t *vp = ap->a_vp;
1014	struct uio *uio = ap->a_uio;
1015	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1016	int error;
1017
1018	KASSERT(VOP_ISLOCKED(vp));
1019	KASSERT(uio->uio_offset == 0);
1020	KASSERT(vp->v_type == VLNK);
1021
1022	/* Note: readlink(2) returns the path without NUL terminator. */
1023	if (node->tn_size > 0) {
1024		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1025		    MIN(node->tn_size, uio->uio_resid), uio);
1026	} else {
1027		error = 0;
1028	}
1029	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1030
1031	return error;
1032}
1033
1034int
1035tmpfs_inactive(void *v)
1036{
1037	struct vop_inactive_v2_args /* {
1038		struct vnode *a_vp;
1039		bool *a_recycle;
1040	} */ *ap = v;
1041	vnode_t *vp = ap->a_vp;
1042	tmpfs_node_t *node;
1043
1044	KASSERT(VOP_ISLOCKED(vp));
1045
1046	node = VP_TO_TMPFS_NODE(vp);
1047	if (node->tn_links == 0) {
1048		/*
1049		 * Mark node as dead by setting its generation to zero.
1050		 */
1051		atomic_and_32(&node->tn_gen, ~TMPFS_NODE_GEN_MASK);
1052		*ap->a_recycle = true;
1053	} else {
1054		*ap->a_recycle = false;
1055	}
1056
1057	return 0;
1058}
1059
1060int
1061tmpfs_reclaim(void *v)
1062{
1063	struct vop_reclaim_args /* {
1064		struct vnode *a_vp;
1065	} */ *ap = v;
1066	vnode_t *vp = ap->a_vp;
1067	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1068	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1069
1070	/* Disassociate inode from vnode. */
1071	node->tn_vnode = NULL;
1072	vp->v_data = NULL;
1073
1074	/* If inode is not referenced, i.e. no links, then destroy it. */
1075	if (node->tn_links == 0)
1076		tmpfs_free_node(tmp, node);
1077	return 0;
1078}
1079
1080int
1081tmpfs_pathconf(void *v)
1082{
1083	struct vop_pathconf_args /* {
1084		struct vnode	*a_vp;
1085		int		a_name;
1086		register_t	*a_retval;
1087	} */ *ap = v;
1088	const int name = ap->a_name;
1089	register_t *retval = ap->a_retval;
1090	int error = 0;
1091
1092	switch (name) {
1093	case _PC_LINK_MAX:
1094		*retval = LINK_MAX;
1095		break;
1096	case _PC_NAME_MAX:
1097		*retval = TMPFS_MAXNAMLEN;
1098		break;
1099	case _PC_PATH_MAX:
1100		*retval = PATH_MAX;
1101		break;
1102	case _PC_PIPE_BUF:
1103		*retval = PIPE_BUF;
1104		break;
1105	case _PC_CHOWN_RESTRICTED:
1106		*retval = 1;
1107		break;
1108	case _PC_NO_TRUNC:
1109		*retval = 1;
1110		break;
1111	case _PC_SYNC_IO:
1112		*retval = 1;
1113		break;
1114	case _PC_FILESIZEBITS:
1115		*retval = sizeof(off_t) * CHAR_BIT;
1116		break;
1117	default:
1118		error = EINVAL;
1119	}
1120	return error;
1121}
1122
1123int
1124tmpfs_advlock(void *v)
1125{
1126	struct vop_advlock_args /* {
1127		struct vnode	*a_vp;
1128		void *		a_id;
1129		int		a_op;
1130		struct flock	*a_fl;
1131		int		a_flags;
1132	} */ *ap = v;
1133	vnode_t *vp = ap->a_vp;
1134	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1135
1136	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1137}
1138
1139int
1140tmpfs_getpages(void *v)
1141{
1142	struct vop_getpages_args /* {
1143		struct vnode *a_vp;
1144		voff_t a_offset;
1145		struct vm_page **a_m;
1146		int *a_count;
1147		int a_centeridx;
1148		vm_prot_t a_access_type;
1149		int a_advice;
1150		int a_flags;
1151	} */ * const ap = v;
1152	vnode_t *vp = ap->a_vp;
1153	const voff_t offset = ap->a_offset;
1154	struct vm_page **pgs = ap->a_m;
1155	const int centeridx = ap->a_centeridx;
1156	const vm_prot_t access_type = ap->a_access_type;
1157	const int advice = ap->a_advice;
1158	const int flags = ap->a_flags;
1159	int error, npages = *ap->a_count;
1160	tmpfs_node_t *node;
1161	struct uvm_object *uobj;
1162
1163	KASSERT(vp->v_type == VREG);
1164	KASSERT(mutex_owned(vp->v_interlock));
1165
1166	/*
1167	 * Currently, PGO_PASTEOF is not supported.
1168	 */
1169	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1170		if ((flags & PGO_LOCKED) == 0)
1171			mutex_exit(vp->v_interlock);
1172		return EINVAL;
1173	}
1174
1175	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1176		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1177	}
1178
1179	if ((flags & PGO_LOCKED) != 0)
1180		return EBUSY;
1181
1182	if (vdead_check(vp, VDEAD_NOWAIT) != 0)
1183		return ENOENT;
1184
1185	node = VP_TO_TMPFS_NODE(vp);
1186	uobj = node->tn_spec.tn_reg.tn_aobj;
1187
1188	if ((flags & PGO_NOTIMESTAMP) == 0) {
1189		u_int tflags = 0;
1190
1191		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1192			tflags |= TMPFS_UPDATE_ATIME;
1193
1194		if ((access_type & VM_PROT_WRITE) != 0) {
1195			tflags |= TMPFS_UPDATE_MTIME;
1196			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1197				tflags |= TMPFS_UPDATE_ATIME;
1198		}
1199		tmpfs_update(vp, tflags);
1200	}
1201
1202	/*
1203	 * Invoke the pager.
1204	 *
1205	 * Clean the array of pages before.  XXX: PR/32166
1206	 * Note that vnode lock is shared with underlying UVM object.
1207	 */
1208	if (pgs) {
1209		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1210	}
1211	KASSERT(vp->v_interlock == uobj->vmobjlock);
1212
1213	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1214	    access_type, advice, flags | PGO_ALLPAGES);
1215
1216#if defined(DEBUG)
1217	if (!error && pgs) {
1218		for (int i = 0; i < npages; i++) {
1219			KASSERT(pgs[i] != NULL);
1220		}
1221	}
1222#endif
1223	return error;
1224}
1225
1226int
1227tmpfs_putpages(void *v)
1228{
1229	struct vop_putpages_args /* {
1230		struct vnode *a_vp;
1231		voff_t a_offlo;
1232		voff_t a_offhi;
1233		int a_flags;
1234	} */ * const ap = v;
1235	vnode_t *vp = ap->a_vp;
1236	const voff_t offlo = ap->a_offlo;
1237	const voff_t offhi = ap->a_offhi;
1238	const int flags = ap->a_flags;
1239	tmpfs_node_t *node;
1240	struct uvm_object *uobj;
1241	int error;
1242
1243	KASSERT(mutex_owned(vp->v_interlock));
1244
1245	if (vp->v_type != VREG) {
1246		mutex_exit(vp->v_interlock);
1247		return 0;
1248	}
1249
1250	node = VP_TO_TMPFS_NODE(vp);
1251	uobj = node->tn_spec.tn_reg.tn_aobj;
1252
1253	KASSERT(vp->v_interlock == uobj->vmobjlock);
1254	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1255
1256	/* XXX mtime */
1257
1258	return error;
1259}
1260
1261int
1262tmpfs_whiteout(void *v)
1263{
1264	struct vop_whiteout_args /* {
1265		struct vnode		*a_dvp;
1266		struct componentname	*a_cnp;
1267		int			a_flags;
1268	} */ *ap = v;
1269	vnode_t *dvp = ap->a_dvp;
1270	struct componentname *cnp = ap->a_cnp;
1271	const int flags = ap->a_flags;
1272	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1273	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1274	tmpfs_dirent_t *de;
1275	int error;
1276
1277	switch (flags) {
1278	case LOOKUP:
1279		break;
1280	case CREATE:
1281		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1282		    cnp->cn_namelen, &de);
1283		if (error)
1284			return error;
1285		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1286		break;
1287	case DELETE:
1288		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1289		de = tmpfs_dir_lookup(dnode, cnp);
1290		if (de == NULL)
1291			return ENOENT;
1292		tmpfs_dir_detach(dnode, de);
1293		tmpfs_free_dirent(tmp, de);
1294		break;
1295	}
1296	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1297	return 0;
1298}
1299
1300int
1301tmpfs_print(void *v)
1302{
1303	struct vop_print_args /* {
1304		struct vnode	*a_vp;
1305	} */ *ap = v;
1306	vnode_t *vp = ap->a_vp;
1307	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1308
1309	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1310	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1311	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1312	    node->tn_gid, (uintmax_t)node->tn_size);
1313	if (vp->v_type == VFIFO) {
1314		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1315	}
1316	printf("\n");
1317	return 0;
1318}
1319