tmpfs_vnops.c revision 1.107
1/*	$NetBSD: tmpfs_vnops.c,v 1.107 2013/11/23 16:35:32 rmind Exp $	*/
2
3/*
4 * Copyright (c) 2005, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: tmpfs_vnops.c,v 1.107 2013/11/23 16:35:32 rmind Exp $");
39
40#include <sys/param.h>
41#include <sys/dirent.h>
42#include <sys/fcntl.h>
43#include <sys/event.h>
44#include <sys/malloc.h>
45#include <sys/namei.h>
46#include <sys/stat.h>
47#include <sys/uio.h>
48#include <sys/unistd.h>
49#include <sys/vnode.h>
50#include <sys/lockf.h>
51#include <sys/kauth.h>
52
53#include <uvm/uvm.h>
54
55#include <miscfs/fifofs/fifo.h>
56#include <miscfs/genfs/genfs.h>
57#include <fs/tmpfs/tmpfs_vnops.h>
58#include <fs/tmpfs/tmpfs.h>
59
60/*
61 * vnode operations vector used for files stored in a tmpfs file system.
62 */
63int (**tmpfs_vnodeop_p)(void *);
64const struct vnodeopv_entry_desc tmpfs_vnodeop_entries[] = {
65	{ &vop_default_desc,		vn_default_error },
66	{ &vop_lookup_desc,		tmpfs_lookup },
67	{ &vop_create_desc,		tmpfs_create },
68	{ &vop_mknod_desc,		tmpfs_mknod },
69	{ &vop_open_desc,		tmpfs_open },
70	{ &vop_close_desc,		tmpfs_close },
71	{ &vop_access_desc,		tmpfs_access },
72	{ &vop_getattr_desc,		tmpfs_getattr },
73	{ &vop_setattr_desc,		tmpfs_setattr },
74	{ &vop_read_desc,		tmpfs_read },
75	{ &vop_write_desc,		tmpfs_write },
76	{ &vop_ioctl_desc,		tmpfs_ioctl },
77	{ &vop_fcntl_desc,		tmpfs_fcntl },
78	{ &vop_poll_desc,		tmpfs_poll },
79	{ &vop_kqfilter_desc,		tmpfs_kqfilter },
80	{ &vop_revoke_desc,		tmpfs_revoke },
81	{ &vop_mmap_desc,		tmpfs_mmap },
82	{ &vop_fsync_desc,		tmpfs_fsync },
83	{ &vop_seek_desc,		tmpfs_seek },
84	{ &vop_remove_desc,		tmpfs_remove },
85	{ &vop_link_desc,		tmpfs_link },
86	{ &vop_rename_desc,		tmpfs_rename },
87	{ &vop_mkdir_desc,		tmpfs_mkdir },
88	{ &vop_rmdir_desc,		tmpfs_rmdir },
89	{ &vop_symlink_desc,		tmpfs_symlink },
90	{ &vop_readdir_desc,		tmpfs_readdir },
91	{ &vop_readlink_desc,		tmpfs_readlink },
92	{ &vop_abortop_desc,		tmpfs_abortop },
93	{ &vop_inactive_desc,		tmpfs_inactive },
94	{ &vop_reclaim_desc,		tmpfs_reclaim },
95	{ &vop_lock_desc,		tmpfs_lock },
96	{ &vop_unlock_desc,		tmpfs_unlock },
97	{ &vop_bmap_desc,		tmpfs_bmap },
98	{ &vop_strategy_desc,		tmpfs_strategy },
99	{ &vop_print_desc,		tmpfs_print },
100	{ &vop_pathconf_desc,		tmpfs_pathconf },
101	{ &vop_islocked_desc,		tmpfs_islocked },
102	{ &vop_advlock_desc,		tmpfs_advlock },
103	{ &vop_bwrite_desc,		tmpfs_bwrite },
104	{ &vop_getpages_desc,		tmpfs_getpages },
105	{ &vop_putpages_desc,		tmpfs_putpages },
106	{ &vop_whiteout_desc,		tmpfs_whiteout },
107	{ NULL, NULL }
108};
109
110const struct vnodeopv_desc tmpfs_vnodeop_opv_desc = {
111	&tmpfs_vnodeop_p, tmpfs_vnodeop_entries
112};
113
114/*
115 * tmpfs_lookup: path name traversal routine.
116 *
117 * Arguments: dvp (directory being searched), vpp (result),
118 * cnp (component name - path).
119 *
120 * => Caller holds a reference and lock on dvp.
121 * => We return looked-up vnode (vpp) locked, with a reference held.
122 */
123int
124tmpfs_lookup(void *v)
125{
126	struct vop_lookup_args /* {
127		struct vnode *a_dvp;
128		struct vnode **a_vpp;
129		struct componentname *a_cnp;
130	} */ *ap = v;
131	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
132	struct componentname *cnp = ap->a_cnp;
133	const bool lastcn = (cnp->cn_flags & ISLASTCN) != 0;
134	tmpfs_node_t *dnode, *tnode;
135	tmpfs_dirent_t *de;
136	int cachefound, iswhiteout;
137	int error;
138
139	KASSERT(VOP_ISLOCKED(dvp));
140
141	dnode = VP_TO_TMPFS_DIR(dvp);
142	*vpp = NULL;
143
144	/* Check accessibility of directory. */
145	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred);
146	if (error) {
147		goto out;
148	}
149
150	/*
151	 * If requesting the last path component on a read-only file system
152	 * with a write operation, deny it.
153	 */
154	if (lastcn && (dvp->v_mount->mnt_flag & MNT_RDONLY) != 0 &&
155	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
156		error = EROFS;
157		goto out;
158	}
159
160	/*
161	 * Avoid doing a linear scan of the directory if the requested
162	 * directory/name couple is already in the cache.
163	 */
164	cachefound = cache_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
165				  cnp->cn_nameiop, cnp->cn_flags,
166				  &iswhiteout, vpp);
167	if (iswhiteout) {
168		cnp->cn_flags |= ISWHITEOUT;
169	}
170	if (cachefound && *vpp == NULLVP) {
171		/* Negative cache hit. */
172		error = ENOENT;
173		goto out;
174	} else if (cachefound) {
175		error = 0;
176		goto out;
177	}
178
179	if (cnp->cn_flags & ISDOTDOT) {
180		tmpfs_node_t *pnode;
181
182		/*
183		 * Lookup of ".." case.
184		 */
185		if (lastcn && cnp->cn_nameiop == RENAME) {
186			error = EINVAL;
187			goto out;
188		}
189		KASSERT(dnode->tn_type == VDIR);
190		pnode = dnode->tn_spec.tn_dir.tn_parent;
191		if (pnode == NULL) {
192			error = ENOENT;
193			goto out;
194		}
195
196		/*
197		 * Lock the parent tn_vlock before releasing the vnode lock,
198		 * and thus prevents parent from disappearing.
199		 */
200		mutex_enter(&pnode->tn_vlock);
201		VOP_UNLOCK(dvp);
202
203		/*
204		 * Get a vnode of the '..' entry and re-acquire the lock.
205		 * Release the tn_vlock.
206		 */
207		error = tmpfs_vnode_get(dvp->v_mount, pnode, vpp);
208		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
209		goto out;
210
211	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
212		/*
213		 * Lookup of "." case.
214		 */
215		if (lastcn && cnp->cn_nameiop == RENAME) {
216			error = EISDIR;
217			goto out;
218		}
219		vref(dvp);
220		*vpp = dvp;
221		error = 0;
222		goto done;
223	}
224
225	/*
226	 * Other lookup cases: perform directory scan.
227	 */
228	de = tmpfs_dir_lookup(dnode, cnp);
229	if (de == NULL || de->td_node == TMPFS_NODE_WHITEOUT) {
230		/*
231		 * The entry was not found in the directory.  This is valid
232		 * if we are creating or renaming an entry and are working
233		 * on the last component of the path name.
234		 */
235		if (lastcn && (cnp->cn_nameiop == CREATE ||
236		    cnp->cn_nameiop == RENAME)) {
237			error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
238			if (error) {
239				goto out;
240			}
241			error = EJUSTRETURN;
242		} else {
243			error = ENOENT;
244		}
245		if (de) {
246			KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
247			cnp->cn_flags |= ISWHITEOUT;
248		}
249		goto done;
250	}
251
252	tnode = de->td_node;
253
254	/*
255	 * If it is not the last path component and found a non-directory
256	 * or non-link entry (which may itself be pointing to a directory),
257	 * raise an error.
258	 */
259	if (!lastcn && tnode->tn_type != VDIR && tnode->tn_type != VLNK) {
260		error = ENOTDIR;
261		goto out;
262	}
263
264	/* Check the permissions. */
265	if (lastcn && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) {
266		error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred);
267		if (error)
268			goto out;
269
270		if ((dnode->tn_mode & S_ISTXT) != 0) {
271			error = kauth_authorize_vnode(cnp->cn_cred,
272			    KAUTH_VNODE_DELETE, tnode->tn_vnode,
273			    dnode->tn_vnode, genfs_can_sticky(cnp->cn_cred,
274			    dnode->tn_uid, tnode->tn_uid));
275			if (error) {
276				error = EPERM;
277				goto out;
278			}
279		}
280	}
281
282	/* Get a vnode for the matching entry. */
283	mutex_enter(&tnode->tn_vlock);
284	error = tmpfs_vnode_get(dvp->v_mount, tnode, vpp);
285done:
286	/*
287	 * Cache the result, unless request was for creation (as it does
288	 * not improve the performance).
289	 */
290	if (cnp->cn_nameiop != CREATE) {
291		cache_enter(dvp, *vpp, cnp->cn_nameptr, cnp->cn_namelen,
292			    cnp->cn_flags);
293	}
294out:
295	KASSERT((*vpp && VOP_ISLOCKED(*vpp)) || error);
296	KASSERT(VOP_ISLOCKED(dvp));
297
298	return error;
299}
300
301int
302tmpfs_create(void *v)
303{
304	struct vop_create_args /* {
305		struct vnode		*a_dvp;
306		struct vnode		**a_vpp;
307		struct componentname	*a_cnp;
308		struct vattr		*a_vap;
309	} */ *ap = v;
310	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
311	struct componentname *cnp = ap->a_cnp;
312	struct vattr *vap = ap->a_vap;
313
314	KASSERT(VOP_ISLOCKED(dvp));
315	KASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
316	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
317}
318
319int
320tmpfs_mknod(void *v)
321{
322	struct vop_mknod_args /* {
323		struct vnode		*a_dvp;
324		struct vnode		**a_vpp;
325		struct componentname	*a_cnp;
326		struct vattr		*a_vap;
327	} */ *ap = v;
328	vnode_t *dvp = ap->a_dvp, **vpp = ap->a_vpp;
329	struct componentname *cnp = ap->a_cnp;
330	struct vattr *vap = ap->a_vap;
331	enum vtype vt = vap->va_type;
332
333	if (vt != VBLK && vt != VCHR && vt != VFIFO) {
334		vput(dvp);
335		return EINVAL;
336	}
337	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
338}
339
340int
341tmpfs_open(void *v)
342{
343	struct vop_open_args /* {
344		struct vnode	*a_vp;
345		int		a_mode;
346		kauth_cred_t	a_cred;
347	} */ *ap = v;
348	vnode_t *vp = ap->a_vp;
349	mode_t mode = ap->a_mode;
350	tmpfs_node_t *node;
351
352	KASSERT(VOP_ISLOCKED(vp));
353
354	node = VP_TO_TMPFS_NODE(vp);
355	if (node->tn_links < 1) {
356		/*
357		 * The file is still active, but all its names have been
358		 * removed (e.g. by a "rmdir $(pwd)").  It cannot be opened
359		 * any more, as it is about to be destroyed.
360		 */
361		return ENOENT;
362	}
363
364	/* If the file is marked append-only, deny write requests. */
365	if ((node->tn_flags & APPEND) != 0 &&
366	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
367		return EPERM;
368	}
369	return 0;
370}
371
372int
373tmpfs_close(void *v)
374{
375	struct vop_close_args /* {
376		struct vnode	*a_vp;
377		int		a_fflag;
378		kauth_cred_t	a_cred;
379	} */ *ap = v;
380	vnode_t *vp = ap->a_vp;
381
382	KASSERT(VOP_ISLOCKED(vp));
383	return 0;
384}
385
386int
387tmpfs_access(void *v)
388{
389	struct vop_access_args /* {
390		struct vnode	*a_vp;
391		int		a_mode;
392		kauth_cred_t	a_cred;
393	} */ *ap = v;
394	vnode_t *vp = ap->a_vp;
395	mode_t mode = ap->a_mode;
396	kauth_cred_t cred = ap->a_cred;
397	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
398	const bool writing = (mode & VWRITE) != 0;
399
400	KASSERT(VOP_ISLOCKED(vp));
401
402	/* Possible? */
403	switch (vp->v_type) {
404	case VDIR:
405	case VLNK:
406	case VREG:
407		if (writing && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
408			return EROFS;
409		}
410		break;
411	case VBLK:
412	case VCHR:
413	case VSOCK:
414	case VFIFO:
415		break;
416	default:
417		return EINVAL;
418	}
419	if (writing && (node->tn_flags & IMMUTABLE) != 0) {
420		return EPERM;
421	}
422
423	return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(mode,
424	    vp->v_type, node->tn_mode), vp, NULL, genfs_can_access(vp->v_type,
425	    node->tn_mode, node->tn_uid, node->tn_gid, mode, cred));
426}
427
428int
429tmpfs_getattr(void *v)
430{
431	struct vop_getattr_args /* {
432		struct vnode	*a_vp;
433		struct vattr	*a_vap;
434		kauth_cred_t	a_cred;
435	} */ *ap = v;
436	vnode_t *vp = ap->a_vp;
437	struct vattr *vap = ap->a_vap;
438	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
439
440	vattr_null(vap);
441
442	vap->va_type = vp->v_type;
443	vap->va_mode = node->tn_mode;
444	vap->va_nlink = node->tn_links;
445	vap->va_uid = node->tn_uid;
446	vap->va_gid = node->tn_gid;
447	vap->va_fsid = vp->v_mount->mnt_stat.f_fsidx.__fsid_val[0];
448	vap->va_fileid = node->tn_id;
449	vap->va_size = node->tn_size;
450	vap->va_blocksize = PAGE_SIZE;
451	vap->va_atime = node->tn_atime;
452	vap->va_mtime = node->tn_mtime;
453	vap->va_ctime = node->tn_ctime;
454	vap->va_birthtime = node->tn_birthtime;
455	vap->va_gen = TMPFS_NODE_GEN(node);
456	vap->va_flags = node->tn_flags;
457	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
458	    node->tn_spec.tn_dev.tn_rdev : VNOVAL;
459	vap->va_bytes = round_page(node->tn_size);
460	vap->va_filerev = VNOVAL;
461	vap->va_vaflags = 0;
462	vap->va_spare = VNOVAL; /* XXX */
463
464	return 0;
465}
466
467int
468tmpfs_setattr(void *v)
469{
470	struct vop_setattr_args /* {
471		struct vnode	*a_vp;
472		struct vattr	*a_vap;
473		kauth_cred_t	a_cred;
474	} */ *ap = v;
475	vnode_t *vp = ap->a_vp;
476	struct vattr *vap = ap->a_vap;
477	kauth_cred_t cred = ap->a_cred;
478	lwp_t *l = curlwp;
479	int error = 0;
480
481	KASSERT(VOP_ISLOCKED(vp));
482
483	/* Abort if any unsettable attribute is given. */
484	if (vap->va_type != VNON || vap->va_nlink != VNOVAL ||
485	    vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL ||
486	    vap->va_blocksize != VNOVAL || vap->va_ctime.tv_sec != VNOVAL ||
487	    vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL ||
488	    vap->va_bytes != VNOVAL) {
489		return EINVAL;
490	}
491
492	if (error == 0 && vap->va_flags != VNOVAL)
493		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
494
495	if (error == 0 && vap->va_size != VNOVAL)
496		error = tmpfs_chsize(vp, vap->va_size, cred, l);
497
498	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
499		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, l);
500
501	if (error == 0 && vap->va_mode != VNOVAL)
502		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
503
504	const bool chsometime =
505	    vap->va_atime.tv_sec != VNOVAL ||
506	    vap->va_mtime.tv_sec != VNOVAL ||
507	    vap->va_birthtime.tv_sec != VNOVAL;
508	if (error == 0 && chsometime) {
509		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
510		    &vap->va_birthtime, vap->va_vaflags, cred, l);
511	}
512	return error;
513}
514
515int
516tmpfs_read(void *v)
517{
518	struct vop_read_args /* {
519		struct vnode *a_vp;
520		struct uio *a_uio;
521		int a_ioflag;
522		kauth_cred_t a_cred;
523	} */ *ap = v;
524	vnode_t *vp = ap->a_vp;
525	struct uio *uio = ap->a_uio;
526	const int ioflag = ap->a_ioflag;
527	tmpfs_node_t *node;
528	struct uvm_object *uobj;
529	int error;
530
531	KASSERT(VOP_ISLOCKED(vp));
532
533	if (vp->v_type != VREG) {
534		return EISDIR;
535	}
536	if (uio->uio_offset < 0) {
537		return EINVAL;
538	}
539
540	/* Note: reading zero bytes should not update atime. */
541	if (uio->uio_resid == 0) {
542		return 0;
543	}
544
545	node = VP_TO_TMPFS_NODE(vp);
546	uobj = node->tn_spec.tn_reg.tn_aobj;
547	error = 0;
548
549	while (error == 0 && uio->uio_resid > 0) {
550		vsize_t len;
551
552		if (node->tn_size <= uio->uio_offset) {
553			break;
554		}
555		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
556		if (len == 0) {
557			break;
558		}
559		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
560		    UBC_READ | UBC_PARTIALOK | UBC_UNMAP_FLAG(vp));
561	}
562
563	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
564	return error;
565}
566
567int
568tmpfs_write(void *v)
569{
570	struct vop_write_args /* {
571		struct vnode	*a_vp;
572		struct uio	*a_uio;
573		int		a_ioflag;
574		kauth_cred_t	a_cred;
575	} */ *ap = v;
576	vnode_t *vp = ap->a_vp;
577	struct uio *uio = ap->a_uio;
578	const int ioflag = ap->a_ioflag;
579	tmpfs_node_t *node;
580	struct uvm_object *uobj;
581	off_t oldsize;
582	int error;
583
584	KASSERT(VOP_ISLOCKED(vp));
585
586	node = VP_TO_TMPFS_NODE(vp);
587	oldsize = node->tn_size;
588
589	if (uio->uio_offset < 0 || vp->v_type != VREG) {
590		error = EINVAL;
591		goto out;
592	}
593	if (uio->uio_resid == 0) {
594		error = 0;
595		goto out;
596	}
597	if (ioflag & IO_APPEND) {
598		uio->uio_offset = node->tn_size;
599	}
600
601	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
602		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
603		if (error)
604			goto out;
605	}
606
607	uobj = node->tn_spec.tn_reg.tn_aobj;
608	error = 0;
609	while (error == 0 && uio->uio_resid > 0) {
610		vsize_t len;
611
612		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
613		if (len == 0) {
614			break;
615		}
616		error = ubc_uiomove(uobj, uio, len, IO_ADV_DECODE(ioflag),
617		    UBC_WRITE | UBC_UNMAP_FLAG(vp));
618	}
619	if (error) {
620		(void)tmpfs_reg_resize(vp, oldsize);
621	}
622
623	tmpfs_update(vp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
624	VN_KNOTE(vp, NOTE_WRITE);
625out:
626	if (error) {
627		KASSERT(oldsize == node->tn_size);
628	} else {
629		KASSERT(uio->uio_resid == 0);
630	}
631	return error;
632}
633
634int
635tmpfs_fsync(void *v)
636{
637	struct vop_fsync_args /* {
638		struct vnode *a_vp;
639		kauth_cred_t a_cred;
640		int a_flags;
641		off_t a_offlo;
642		off_t a_offhi;
643		struct lwp *a_l;
644	} */ *ap = v;
645	vnode_t *vp = ap->a_vp;
646
647	/* Nothing to do.  Should be up to date. */
648	KASSERT(VOP_ISLOCKED(vp));
649	return 0;
650}
651
652/*
653 * tmpfs_remove: unlink a file.
654 *
655 * => Both directory (dvp) and file (vp) are locked.
656 * => We unlock and drop the reference on both.
657 */
658int
659tmpfs_remove(void *v)
660{
661	struct vop_remove_args /* {
662		struct vnode *a_dvp;
663		struct vnode *a_vp;
664		struct componentname *a_cnp;
665	} */ *ap = v;
666	vnode_t *dvp = ap->a_dvp, *vp = ap->a_vp;
667	tmpfs_node_t *dnode, *node;
668	tmpfs_dirent_t *de;
669	int error;
670
671	KASSERT(VOP_ISLOCKED(dvp));
672	KASSERT(VOP_ISLOCKED(vp));
673
674	if (vp->v_type == VDIR) {
675		error = EPERM;
676		goto out;
677	}
678	dnode = VP_TO_TMPFS_DIR(dvp);
679	node = VP_TO_TMPFS_NODE(vp);
680
681	/*
682	 * Files marked as immutable or append-only cannot be deleted.
683	 * Likewise, files residing on directories marked as append-only
684	 * cannot be deleted.
685	 */
686	if (node->tn_flags & (IMMUTABLE | APPEND)) {
687		error = EPERM;
688		goto out;
689	}
690	if (dnode->tn_flags & APPEND) {
691		error = EPERM;
692		goto out;
693	}
694
695	/* Lookup the directory entry (check the cached hint first). */
696	de = tmpfs_dir_cached(node);
697	if (de == NULL) {
698		struct componentname *cnp = ap->a_cnp;
699		de = tmpfs_dir_lookup(dnode, cnp);
700	}
701	KASSERT(de && de->td_node == node);
702
703	/*
704	 * Remove the entry from the directory (drops the link count) and
705	 * destroy it or replace with a whiteout.
706	 *
707	 * Note: the inode referred by it will not be destroyed until the
708	 * vnode is reclaimed/recycled.
709	 */
710
711	tmpfs_dir_detach(dnode, de);
712
713	if (ap->a_cnp->cn_flags & DOWHITEOUT)
714		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
715	else
716		tmpfs_free_dirent(VFS_TO_TMPFS(vp->v_mount), de);
717
718	if (node->tn_links > 0) {
719		/* We removed a hard link. */
720		tmpfs_update(vp, TMPFS_UPDATE_CTIME);
721	}
722	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
723	error = 0;
724out:
725	/* Drop the references and unlock the vnodes. */
726	vput(vp);
727	if (dvp == vp) {
728		vrele(dvp);
729	} else {
730		vput(dvp);
731	}
732	return error;
733}
734
735/*
736 * tmpfs_link: create a hard link.
737 */
738int
739tmpfs_link(void *v)
740{
741	struct vop_link_args /* {
742		struct vnode *a_dvp;
743		struct vnode *a_vp;
744		struct componentname *a_cnp;
745	} */ *ap = v;
746	vnode_t *dvp = ap->a_dvp;
747	vnode_t *vp = ap->a_vp;
748	struct componentname *cnp = ap->a_cnp;
749	tmpfs_node_t *dnode, *node;
750	tmpfs_dirent_t *de;
751	int error;
752
753	KASSERT(dvp != vp);
754	KASSERT(VOP_ISLOCKED(dvp));
755	KASSERT(vp->v_type != VDIR);
756	KASSERT(dvp->v_mount == vp->v_mount);
757
758	dnode = VP_TO_TMPFS_DIR(dvp);
759	node = VP_TO_TMPFS_NODE(vp);
760
761	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
762
763	/* Check for maximum number of links limit. */
764	if (node->tn_links == LINK_MAX) {
765		error = EMLINK;
766		goto out;
767	}
768	KASSERT(node->tn_links < LINK_MAX);
769
770	/* We cannot create links of files marked immutable or append-only. */
771	if (node->tn_flags & (IMMUTABLE | APPEND)) {
772		error = EPERM;
773		goto out;
774	}
775
776	/* Allocate a new directory entry to represent the inode. */
777	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount),
778	    cnp->cn_nameptr, cnp->cn_namelen, &de);
779	if (error) {
780		goto out;
781	}
782
783	/*
784	 * Insert the entry into the directory.
785	 * It will increase the inode link count.
786	 */
787	tmpfs_dir_attach(dnode, de, node);
788	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
789
790	/* Update the timestamps and trigger the event. */
791	if (node->tn_vnode) {
792		VN_KNOTE(node->tn_vnode, NOTE_LINK);
793	}
794	tmpfs_update(vp, TMPFS_UPDATE_CTIME);
795	error = 0;
796out:
797	VOP_UNLOCK(vp);
798	vput(dvp);
799	return error;
800}
801
802int
803tmpfs_mkdir(void *v)
804{
805	struct vop_mkdir_args /* {
806		struct vnode		*a_dvp;
807		struct vnode		**a_vpp;
808		struct componentname	*a_cnp;
809		struct vattr		*a_vap;
810	} */ *ap = v;
811	vnode_t *dvp = ap->a_dvp;
812	vnode_t **vpp = ap->a_vpp;
813	struct componentname *cnp = ap->a_cnp;
814	struct vattr *vap = ap->a_vap;
815
816	KASSERT(vap->va_type == VDIR);
817	return tmpfs_construct_node(dvp, vpp, vap, cnp, NULL);
818}
819
820int
821tmpfs_rmdir(void *v)
822{
823	struct vop_rmdir_args /* {
824		struct vnode		*a_dvp;
825		struct vnode		*a_vp;
826		struct componentname	*a_cnp;
827	} */ *ap = v;
828	vnode_t *dvp = ap->a_dvp;
829	vnode_t *vp = ap->a_vp;
830	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
831	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
832	tmpfs_node_t *node = VP_TO_TMPFS_DIR(vp);
833	tmpfs_dirent_t *de;
834	int error = 0;
835
836	KASSERT(VOP_ISLOCKED(dvp));
837	KASSERT(VOP_ISLOCKED(vp));
838	KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
839
840	/*
841	 * Directories with more than two entries ('.' and '..') cannot be
842	 * removed.  There may be whiteout entries, which we will destroy.
843	 */
844	if (node->tn_size > 0) {
845		/*
846		 * If never had whiteout entries, the directory is certainly
847		 * not empty.  Otherwise, scan for any non-whiteout entry.
848		 */
849		if ((node->tn_gen & TMPFS_WHITEOUT_BIT) == 0) {
850			error = ENOTEMPTY;
851			goto out;
852		}
853		TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
854			if (de->td_node != TMPFS_NODE_WHITEOUT) {
855				error = ENOTEMPTY;
856				goto out;
857			}
858		}
859		KASSERT(error == 0);
860	}
861
862	/* Lookup the directory entry (check the cached hint first). */
863	de = tmpfs_dir_cached(node);
864	if (de == NULL) {
865		struct componentname *cnp = ap->a_cnp;
866		de = tmpfs_dir_lookup(dnode, cnp);
867	}
868	KASSERT(de && de->td_node == node);
869
870	/* Check flags to see if we are allowed to remove the directory. */
871	if (dnode->tn_flags & APPEND || node->tn_flags & (IMMUTABLE | APPEND)) {
872		error = EPERM;
873		goto out;
874	}
875
876	/* Decrement the link count for the virtual '.' entry. */
877	node->tn_links--;
878
879	/* Detach the directory entry from the directory. */
880	tmpfs_dir_detach(dnode, de);
881
882	/* Purge the cache for parent. */
883	cache_purge(dvp);
884
885	/*
886	 * Destroy the directory entry or replace it with a whiteout.
887	 *
888	 * Note: the inode referred by it will not be destroyed until the
889	 * vnode is reclaimed.
890	 */
891	if (ap->a_cnp->cn_flags & DOWHITEOUT)
892		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
893	else
894		tmpfs_free_dirent(tmp, de);
895
896	/* Destroy the whiteout entries from the node. */
897	while ((de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir)) != NULL) {
898		KASSERT(de->td_node == TMPFS_NODE_WHITEOUT);
899		tmpfs_dir_detach(node, de);
900		tmpfs_free_dirent(tmp, de);
901	}
902	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
903
904	KASSERT(node->tn_size == 0);
905	KASSERT(node->tn_links == 0);
906out:
907	/* Release the nodes. */
908	vput(dvp);
909	vput(vp);
910	return error;
911}
912
913int
914tmpfs_symlink(void *v)
915{
916	struct vop_symlink_args /* {
917		struct vnode		*a_dvp;
918		struct vnode		**a_vpp;
919		struct componentname	*a_cnp;
920		struct vattr		*a_vap;
921		char			*a_target;
922	} */ *ap = v;
923	vnode_t *dvp = ap->a_dvp;
924	vnode_t **vpp = ap->a_vpp;
925	struct componentname *cnp = ap->a_cnp;
926	struct vattr *vap = ap->a_vap;
927	char *target = ap->a_target;
928
929	KASSERT(vap->va_type == VLNK);
930	return tmpfs_construct_node(dvp, vpp, vap, cnp, target);
931}
932
933int
934tmpfs_readdir(void *v)
935{
936	struct vop_readdir_args /* {
937		struct vnode	*a_vp;
938		struct uio	*a_uio;
939		kauth_cred_t	a_cred;
940		int		*a_eofflag;
941		off_t		**a_cookies;
942		int		*ncookies;
943	} */ *ap = v;
944	vnode_t *vp = ap->a_vp;
945	struct uio *uio = ap->a_uio;
946	int *eofflag = ap->a_eofflag;
947	off_t **cookies = ap->a_cookies;
948	int *ncookies = ap->a_ncookies;
949	off_t startoff, cnt;
950	tmpfs_node_t *node;
951	int error;
952
953	KASSERT(VOP_ISLOCKED(vp));
954
955	/* This operation only makes sense on directory nodes. */
956	if (vp->v_type != VDIR) {
957		return ENOTDIR;
958	}
959	node = VP_TO_TMPFS_DIR(vp);
960	startoff = uio->uio_offset;
961	cnt = 0;
962
963	/*
964	 * Retrieve the directory entries, unless it is being destroyed.
965	 */
966	if (node->tn_links) {
967		error = tmpfs_dir_getdents(node, uio, &cnt);
968	} else {
969		error = 0;
970	}
971
972	if (eofflag != NULL) {
973		*eofflag = !error && uio->uio_offset == TMPFS_DIRSEQ_EOF;
974	}
975	if (error || cookies == NULL || ncookies == NULL) {
976		return error;
977	}
978
979	/* Update NFS-related variables, if any. */
980	tmpfs_dirent_t *de = NULL;
981	off_t i, off = startoff;
982
983	*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
984	*ncookies = cnt;
985
986	for (i = 0; i < cnt; i++) {
987		KASSERT(off != TMPFS_DIRSEQ_EOF);
988		if (off != TMPFS_DIRSEQ_DOT) {
989			if (off == TMPFS_DIRSEQ_DOTDOT) {
990				de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
991			} else if (de != NULL) {
992				de = TAILQ_NEXT(de, td_entries);
993			} else {
994				de = tmpfs_dir_lookupbyseq(node, off);
995				KASSERT(de != NULL);
996				de = TAILQ_NEXT(de, td_entries);
997			}
998			if (de == NULL) {
999				off = TMPFS_DIRSEQ_EOF;
1000			} else {
1001				off = tmpfs_dir_getseq(node, de);
1002			}
1003		} else {
1004			off = TMPFS_DIRSEQ_DOTDOT;
1005		}
1006		(*cookies)[i] = off;
1007	}
1008	KASSERT(uio->uio_offset == off);
1009	return error;
1010}
1011
1012int
1013tmpfs_readlink(void *v)
1014{
1015	struct vop_readlink_args /* {
1016		struct vnode	*a_vp;
1017		struct uio	*a_uio;
1018		kauth_cred_t	a_cred;
1019	} */ *ap = v;
1020	vnode_t *vp = ap->a_vp;
1021	struct uio *uio = ap->a_uio;
1022	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1023	int error;
1024
1025	KASSERT(VOP_ISLOCKED(vp));
1026	KASSERT(uio->uio_offset == 0);
1027	KASSERT(vp->v_type == VLNK);
1028
1029	/* Note: readlink(2) returns the path without NUL terminator. */
1030	if (node->tn_size > 0) {
1031		error = uiomove(node->tn_spec.tn_lnk.tn_link,
1032		    MIN(node->tn_size - 1, uio->uio_resid), uio);
1033	} else {
1034		error = 0;
1035	}
1036	tmpfs_update(vp, TMPFS_UPDATE_ATIME);
1037
1038	return error;
1039}
1040
1041int
1042tmpfs_inactive(void *v)
1043{
1044	struct vop_inactive_args /* {
1045		struct vnode *a_vp;
1046		bool *a_recycle;
1047	} */ *ap = v;
1048	vnode_t *vp = ap->a_vp;
1049	tmpfs_node_t *node;
1050
1051	KASSERT(VOP_ISLOCKED(vp));
1052
1053	node = VP_TO_TMPFS_NODE(vp);
1054	*ap->a_recycle = (node->tn_links == 0);
1055	VOP_UNLOCK(vp);
1056
1057	return 0;
1058}
1059
1060int
1061tmpfs_reclaim(void *v)
1062{
1063	struct vop_reclaim_args /* {
1064		struct vnode *a_vp;
1065	} */ *ap = v;
1066	vnode_t *vp = ap->a_vp;
1067	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
1068	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1069	bool racing;
1070
1071	/* Disassociate inode from vnode. */
1072	mutex_enter(&node->tn_vlock);
1073	node->tn_vnode = NULL;
1074	vp->v_data = NULL;
1075	/* Check if tmpfs_vnode_get() is racing with us. */
1076	racing = TMPFS_NODE_RECLAIMING(node);
1077	mutex_exit(&node->tn_vlock);
1078
1079	/*
1080	 * If inode is not referenced, i.e. no links, then destroy it.
1081	 * Note: if racing - inode is about to get a new vnode, leave it.
1082	 */
1083	if (node->tn_links == 0 && !racing) {
1084		tmpfs_free_node(tmp, node);
1085	}
1086	return 0;
1087}
1088
1089int
1090tmpfs_pathconf(void *v)
1091{
1092	struct vop_pathconf_args /* {
1093		struct vnode	*a_vp;
1094		int		a_name;
1095		register_t	*a_retval;
1096	} */ *ap = v;
1097	const int name = ap->a_name;
1098	register_t *retval = ap->a_retval;
1099	int error = 0;
1100
1101	switch (name) {
1102	case _PC_LINK_MAX:
1103		*retval = LINK_MAX;
1104		break;
1105	case _PC_NAME_MAX:
1106		*retval = TMPFS_MAXNAMLEN;
1107		break;
1108	case _PC_PATH_MAX:
1109		*retval = PATH_MAX;
1110		break;
1111	case _PC_PIPE_BUF:
1112		*retval = PIPE_BUF;
1113		break;
1114	case _PC_CHOWN_RESTRICTED:
1115		*retval = 1;
1116		break;
1117	case _PC_NO_TRUNC:
1118		*retval = 1;
1119		break;
1120	case _PC_SYNC_IO:
1121		*retval = 1;
1122		break;
1123	case _PC_FILESIZEBITS:
1124		*retval = sizeof(off_t) * CHAR_BIT;
1125		break;
1126	default:
1127		error = EINVAL;
1128	}
1129	return error;
1130}
1131
1132int
1133tmpfs_advlock(void *v)
1134{
1135	struct vop_advlock_args /* {
1136		struct vnode	*a_vp;
1137		void *		a_id;
1138		int		a_op;
1139		struct flock	*a_fl;
1140		int		a_flags;
1141	} */ *ap = v;
1142	vnode_t *vp = ap->a_vp;
1143	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1144
1145	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1146}
1147
1148int
1149tmpfs_getpages(void *v)
1150{
1151	struct vop_getpages_args /* {
1152		struct vnode *a_vp;
1153		voff_t a_offset;
1154		struct vm_page **a_m;
1155		int *a_count;
1156		int a_centeridx;
1157		vm_prot_t a_access_type;
1158		int a_advice;
1159		int a_flags;
1160	} */ * const ap = v;
1161	vnode_t *vp = ap->a_vp;
1162	const voff_t offset = ap->a_offset;
1163	struct vm_page **pgs = ap->a_m;
1164	const int centeridx = ap->a_centeridx;
1165	const vm_prot_t access_type = ap->a_access_type;
1166	const int advice = ap->a_advice;
1167	const int flags = ap->a_flags;
1168	int error, npages = *ap->a_count;
1169	tmpfs_node_t *node;
1170	struct uvm_object *uobj;
1171
1172	KASSERT(vp->v_type == VREG);
1173	KASSERT(mutex_owned(vp->v_interlock));
1174
1175	node = VP_TO_TMPFS_NODE(vp);
1176	uobj = node->tn_spec.tn_reg.tn_aobj;
1177
1178	/*
1179	 * Currently, PGO_PASTEOF is not supported.
1180	 */
1181	if (vp->v_size <= offset + (centeridx << PAGE_SHIFT)) {
1182		if ((flags & PGO_LOCKED) == 0)
1183			mutex_exit(vp->v_interlock);
1184		return EINVAL;
1185	}
1186
1187	if (vp->v_size < offset + (npages << PAGE_SHIFT)) {
1188		npages = (round_page(vp->v_size) - offset) >> PAGE_SHIFT;
1189	}
1190
1191	if ((flags & PGO_LOCKED) != 0)
1192		return EBUSY;
1193
1194	if ((flags & PGO_NOTIMESTAMP) == 0) {
1195		u_int tflags = 0;
1196
1197		if ((vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
1198			tflags |= TMPFS_UPDATE_ATIME;
1199
1200		if ((access_type & VM_PROT_WRITE) != 0) {
1201			tflags |= TMPFS_UPDATE_MTIME;
1202			if (vp->v_mount->mnt_flag & MNT_RELATIME)
1203				tflags |= TMPFS_UPDATE_ATIME;
1204		}
1205		tmpfs_update(vp, tflags);
1206	}
1207
1208	/*
1209	 * Invoke the pager.
1210	 *
1211	 * Clean the array of pages before.  XXX: PR/32166
1212	 * Note that vnode lock is shared with underlying UVM object.
1213	 */
1214	if (pgs) {
1215		memset(pgs, 0, sizeof(struct vm_pages *) * npages);
1216	}
1217	KASSERT(vp->v_interlock == uobj->vmobjlock);
1218
1219	error = (*uobj->pgops->pgo_get)(uobj, offset, pgs, &npages, centeridx,
1220	    access_type, advice, flags | PGO_ALLPAGES);
1221
1222#if defined(DEBUG)
1223	if (!error && pgs) {
1224		for (int i = 0; i < npages; i++) {
1225			KASSERT(pgs[i] != NULL);
1226		}
1227	}
1228#endif
1229	return error;
1230}
1231
1232int
1233tmpfs_putpages(void *v)
1234{
1235	struct vop_putpages_args /* {
1236		struct vnode *a_vp;
1237		voff_t a_offlo;
1238		voff_t a_offhi;
1239		int a_flags;
1240	} */ * const ap = v;
1241	vnode_t *vp = ap->a_vp;
1242	const voff_t offlo = ap->a_offlo;
1243	const voff_t offhi = ap->a_offhi;
1244	const int flags = ap->a_flags;
1245	tmpfs_node_t *node;
1246	struct uvm_object *uobj;
1247	int error;
1248
1249	KASSERT(mutex_owned(vp->v_interlock));
1250
1251	if (vp->v_type != VREG) {
1252		mutex_exit(vp->v_interlock);
1253		return 0;
1254	}
1255
1256	node = VP_TO_TMPFS_NODE(vp);
1257	uobj = node->tn_spec.tn_reg.tn_aobj;
1258
1259	KASSERT(vp->v_interlock == uobj->vmobjlock);
1260	error = (*uobj->pgops->pgo_put)(uobj, offlo, offhi, flags);
1261
1262	/* XXX mtime */
1263
1264	return error;
1265}
1266
1267int
1268tmpfs_whiteout(void *v)
1269{
1270	struct vop_whiteout_args /* {
1271		struct vnode		*a_dvp;
1272		struct componentname	*a_cnp;
1273		int			a_flags;
1274	} */ *ap = v;
1275	vnode_t *dvp = ap->a_dvp;
1276	struct componentname *cnp = ap->a_cnp;
1277	const int flags = ap->a_flags;
1278	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
1279	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp);
1280	tmpfs_dirent_t *de;
1281	int error;
1282
1283	switch (flags) {
1284	case LOOKUP:
1285		break;
1286	case CREATE:
1287		error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr,
1288		    cnp->cn_namelen, &de);
1289		if (error)
1290			return error;
1291		tmpfs_dir_attach(dnode, de, TMPFS_NODE_WHITEOUT);
1292		break;
1293	case DELETE:
1294		cnp->cn_flags &= ~DOWHITEOUT; /* when in doubt, cargo cult */
1295		de = tmpfs_dir_lookup(dnode, cnp);
1296		if (de == NULL)
1297			return ENOENT;
1298		tmpfs_dir_detach(dnode, de);
1299		tmpfs_free_dirent(tmp, de);
1300		break;
1301	}
1302	tmpfs_update(dvp, TMPFS_UPDATE_MTIME | TMPFS_UPDATE_CTIME);
1303	return 0;
1304}
1305
1306int
1307tmpfs_print(void *v)
1308{
1309	struct vop_print_args /* {
1310		struct vnode	*a_vp;
1311	} */ *ap = v;
1312	vnode_t *vp = ap->a_vp;
1313	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1314
1315	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n"
1316	    "\tmode 0%o, owner %d, group %d, size %" PRIdMAX,
1317	    node, node->tn_flags, node->tn_links, node->tn_mode, node->tn_uid,
1318	    node->tn_gid, (uintmax_t)node->tn_size);
1319	if (vp->v_type == VFIFO) {
1320		VOCALL(fifo_vnodeop_p, VOFFSET(vop_print), v);
1321	}
1322	printf("\n");
1323	return 0;
1324}
1325