tmpfs_vnops.c revision 171029
1/*	$NetBSD: tmpfs_vnops.c,v 1.20 2006/01/26 20:07:34 jmmv Exp $	*/
2
3/*
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *        This product includes software developed by the NetBSD
22 *        Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 *    contributors may be used to endorse or promote products derived
25 *    from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/*
41 * tmpfs vnode interface.
42 */
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 171029 2007-06-25 18:46:13Z delphij $");
45
46#include <sys/param.h>
47#include <sys/fcntl.h>
48#include <sys/lockf.h>
49#include <sys/namei.h>
50#include <sys/priv.h>
51#include <sys/proc.h>
52#include <sys/resourcevar.h>
53#include <sys/stat.h>
54#include <sys/systm.h>
55#include <sys/unistd.h>
56#include <sys/vnode.h>
57
58#include <vm/vm.h>
59#include <vm/vm_object.h>
60#include <vm/vm_page.h>
61#include <vm/vm_pager.h>
62#include <sys/sched.h>
63#include <sys/sf_buf.h>
64#include <machine/_inttypes.h>
65
66#include <fs/fifofs/fifo.h>
67#include <fs/tmpfs/tmpfs_vnops.h>
68#include <fs/tmpfs/tmpfs.h>
69
70/* --------------------------------------------------------------------- */
71
72/*
73 * vnode operations vector used for files stored in a tmpfs file system.
74 */
75struct vop_vector tmpfs_vnodeop_entries = {
76	.vop_default =			&default_vnodeops,
77	.vop_lookup =			vfs_cache_lookup,
78	.vop_cachedlookup =		tmpfs_lookup,
79	.vop_create =			tmpfs_create,
80	.vop_mknod =			tmpfs_mknod,
81	.vop_open =			tmpfs_open,
82	.vop_close =			tmpfs_close,
83	.vop_access =			tmpfs_access,
84	.vop_getattr =			tmpfs_getattr,
85	.vop_setattr =			tmpfs_setattr,
86	.vop_read =			tmpfs_read,
87	.vop_write =			tmpfs_write,
88	.vop_fsync =			tmpfs_fsync,
89	.vop_remove =			tmpfs_remove,
90	.vop_link =			tmpfs_link,
91	.vop_rename =			tmpfs_rename,
92	.vop_mkdir =			tmpfs_mkdir,
93	.vop_rmdir =			tmpfs_rmdir,
94	.vop_symlink = 			tmpfs_symlink,
95	.vop_readdir =			tmpfs_readdir,
96	.vop_readlink =			tmpfs_readlink,
97	.vop_inactive =			tmpfs_inactive,
98	.vop_reclaim =			tmpfs_reclaim,
99	.vop_print =			tmpfs_print,
100	.vop_pathconf =			tmpfs_pathconf,
101	.vop_advlock =			tmpfs_advlock,
102	.vop_bmap =			VOP_EOPNOTSUPP,
103};
104
105/* --------------------------------------------------------------------- */
106
107int
108tmpfs_lookup(struct vop_cachedlookup_args *v)
109{
110	struct vnode *dvp = v->a_dvp;
111	struct vnode **vpp = v->a_vpp;
112	struct componentname *cnp = v->a_cnp;
113	struct thread *td = cnp->cn_thread;
114
115	int error;
116	struct tmpfs_dirent *de;
117	struct tmpfs_node *dnode;
118
119	dnode = VP_TO_TMPFS_DIR(dvp);
120	*vpp = NULLVP;
121
122	/* Check accessibility of requested node as a first step. */
123	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
124	if (error != 0)
125		goto out;
126
127	/* We cannot be requesting the parent directory of the root node. */
128	MPASS(IMPLIES(dnode->tn_type == VDIR &&
129	    dnode->tn_dir.tn_parent == dnode,
130	    !(cnp->cn_flags & ISDOTDOT)));
131
132	if (cnp->cn_flags & ISDOTDOT) {
133		VOP_UNLOCK(dvp, 0, td);
134
135		/* Allocate a new vnode on the matching entry. */
136		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent, vpp, td);
137
138		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
139
140		dnode->tn_dir.tn_parent->tn_lookup_dirent = NULL;
141	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
142		VREF(dvp);
143		*vpp = dvp;
144		dnode->tn_lookup_dirent = NULL;
145		error = 0;
146	} else {
147		de = tmpfs_dir_lookup(dnode, cnp);
148		if (de == NULL) {
149			/* The entry was not found in the directory.
150			 * This is OK if we are creating or renaming an
151			 * entry and are working on the last component of
152			 * the path name. */
153			if ((cnp->cn_flags & ISLASTCN) &&
154			    (cnp->cn_nameiop == CREATE || \
155			    cnp->cn_nameiop == RENAME)) {
156				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
157				    cnp->cn_thread);
158				if (error != 0)
159					goto out;
160
161				/* Keep the component name in the buffer for
162				 * future uses. */
163				cnp->cn_flags |= SAVENAME;
164
165				error = EJUSTRETURN;
166			} else
167				error = ENOENT;
168		} else {
169			struct tmpfs_node *tnode;
170
171			/* The entry was found, so get its associated
172			 * tmpfs_node. */
173			tnode = de->td_node;
174
175			/* If we are not at the last path component and
176			 * found a non-directory or non-link entry (which
177			 * may itself be pointing to a directory), raise
178			 * an error. */
179			if ((tnode->tn_type != VDIR &&
180			    tnode->tn_type != VLNK) &&
181			    !(cnp->cn_flags & ISLASTCN)) {
182				error = ENOTDIR;
183				goto out;
184			}
185
186			/* If we are deleting or renaming the entry, keep
187			 * track of its tmpfs_dirent so that it can be
188			 * easily deleted later. */
189			if ((cnp->cn_flags & ISLASTCN) &&
190			    (cnp->cn_nameiop == DELETE ||
191			    cnp->cn_nameiop == RENAME)) {
192				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
193				    cnp->cn_thread);
194				if (error != 0)
195					goto out;
196
197				/* Allocate a new vnode on the matching entry. */
198				error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp, td);
199				if (error != 0)
200					goto out;
201
202				if ((dnode->tn_mode & S_ISTXT) &&
203				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
204				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
205					error = EPERM;
206					vput(*vpp);
207					*vpp = NULL;
208					goto out;
209				}
210				tnode->tn_lookup_dirent = de;
211				cnp->cn_flags |= SAVENAME;
212			}
213			else
214				error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp, td);
215
216		}
217	}
218
219	/* Store the result of this lookup in the cache.  Avoid this if the
220	 * request was for creation, as it does not improve timings on
221	 * emprical tests. */
222	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
223		cache_enter(dvp, *vpp, cnp);
224
225out:
226	/* If there were no errors, *vpp cannot be null and it must be
227	 * locked. */
228	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp, td)));
229
230	return error;
231}
232
233/* --------------------------------------------------------------------- */
234
235int
236tmpfs_create(struct vop_create_args *v)
237{
238	struct vnode *dvp = v->a_dvp;
239	struct vnode **vpp = v->a_vpp;
240	struct componentname *cnp = v->a_cnp;
241	struct vattr *vap = v->a_vap;
242
243	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
244
245	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
246}
247/* --------------------------------------------------------------------- */
248
249int
250tmpfs_mknod(struct vop_mknod_args *v)
251{
252	struct vnode *dvp = v->a_dvp;
253	struct vnode **vpp = v->a_vpp;
254	struct componentname *cnp = v->a_cnp;
255	struct vattr *vap = v->a_vap;
256
257	if (vap->va_type != VBLK && vap->va_type != VCHR &&
258	    vap->va_type != VFIFO)
259		return EINVAL;
260
261	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
262}
263
264/* --------------------------------------------------------------------- */
265
266int
267tmpfs_open(struct vop_open_args *v)
268{
269	struct vnode *vp = v->a_vp;
270	int mode = v->a_mode;
271
272	int error;
273	struct tmpfs_node *node;
274
275	MPASS(VOP_ISLOCKED(vp, v->a_td));
276
277	node = VP_TO_TMPFS_NODE(vp);
278
279	/* The file is still active but all its names have been removed
280	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
281	 * it is about to die. */
282	if (node->tn_links < 1)
283		return (ENOENT);
284
285	/* If the file is marked append-only, deny write requests. */
286	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
287		error = EPERM;
288	else {
289		error = 0;
290		vnode_create_vobject(vp, node->tn_size, v->a_td);
291	}
292
293	MPASS(VOP_ISLOCKED(vp, v->a_td));
294	return error;
295}
296
297/* --------------------------------------------------------------------- */
298
299int
300tmpfs_close(struct vop_close_args *v)
301{
302	struct vnode *vp = v->a_vp;
303
304	struct tmpfs_node *node;
305
306	MPASS(VOP_ISLOCKED(vp, v->a_td));
307
308	node = VP_TO_TMPFS_NODE(vp);
309
310	if (node->tn_links > 0) {
311		/* Update node times.  No need to do it if the node has
312		 * been deleted, because it will vanish after we return. */
313		tmpfs_update(vp);
314	}
315
316	return 0;
317}
318
319/* --------------------------------------------------------------------- */
320
321int
322tmpfs_access(struct vop_access_args *v)
323{
324	struct vnode *vp = v->a_vp;
325	int mode = v->a_mode;
326	struct ucred *cred = v->a_cred;
327
328	int error;
329	struct tmpfs_node *node;
330
331	MPASS(VOP_ISLOCKED(vp, v->a_td));
332
333	node = VP_TO_TMPFS_NODE(vp);
334
335	switch (vp->v_type) {
336	case VDIR:
337		/* FALLTHROUGH */
338	case VLNK:
339		/* FALLTHROUGH */
340	case VREG:
341		if (mode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
342			error = EROFS;
343			goto out;
344		}
345		break;
346
347	case VBLK:
348		/* FALLTHROUGH */
349	case VCHR:
350		/* FALLTHROUGH */
351	case VSOCK:
352		/* FALLTHROUGH */
353	case VFIFO:
354		break;
355
356	default:
357		error = EINVAL;
358		goto out;
359	}
360
361	if (mode & VWRITE && node->tn_flags & IMMUTABLE) {
362		error = EPERM;
363		goto out;
364	}
365
366	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
367	    node->tn_gid, mode, cred, NULL);
368
369out:
370	MPASS(VOP_ISLOCKED(vp, v->a_td));
371
372	return error;
373}
374
375/* --------------------------------------------------------------------- */
376
377int
378tmpfs_getattr(struct vop_getattr_args *v)
379{
380	struct vnode *vp = v->a_vp;
381	struct vattr *vap = v->a_vap;
382
383	struct tmpfs_node *node;
384
385	node = VP_TO_TMPFS_NODE(vp);
386
387	VATTR_NULL(vap);
388
389	tmpfs_update(vp);
390
391	vap->va_type = vp->v_type;
392	vap->va_mode = node->tn_mode;
393	vap->va_nlink = node->tn_links;
394	vap->va_uid = node->tn_uid;
395	vap->va_gid = node->tn_gid;
396	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
397	vap->va_fileid = node->tn_id;
398	vap->va_size = node->tn_size;
399	vap->va_blocksize = PAGE_SIZE;
400	vap->va_atime = node->tn_atime;
401	vap->va_mtime = node->tn_mtime;
402	vap->va_ctime = node->tn_ctime;
403	vap->va_birthtime = node->tn_birthtime;
404	vap->va_gen = node->tn_gen;
405	vap->va_flags = node->tn_flags;
406	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
407		node->tn_rdev : VNOVAL;
408	vap->va_bytes = round_page(node->tn_size);
409	vap->va_filerev = VNOVAL;
410	vap->va_vaflags = 0;
411	vap->va_spare = VNOVAL; /* XXX */
412
413	return 0;
414}
415
416/* --------------------------------------------------------------------- */
417
418/* XXX Should this operation be atomic?  I think it should, but code in
419 * XXX other places (e.g., ufs) doesn't seem to be... */
420int
421tmpfs_setattr(struct vop_setattr_args *v)
422{
423	struct vnode *vp = v->a_vp;
424	struct vattr *vap = v->a_vap;
425	struct ucred *cred = v->a_cred;
426	struct thread *l = v->a_td;
427
428	int error;
429
430	MPASS(VOP_ISLOCKED(vp, l));
431
432	error = 0;
433
434	/* Abort if any unsettable attribute is given. */
435	if (vap->va_type != VNON ||
436	    vap->va_nlink != VNOVAL ||
437	    vap->va_fsid != VNOVAL ||
438	    vap->va_fileid != VNOVAL ||
439	    vap->va_blocksize != VNOVAL ||
440	    vap->va_gen != VNOVAL ||
441	    vap->va_rdev != VNOVAL ||
442	    vap->va_bytes != VNOVAL)
443		error = EINVAL;
444
445	if (error == 0 && (vap->va_flags != VNOVAL))
446		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
447
448	if (error == 0 && (vap->va_size != VNOVAL))
449		error = tmpfs_chsize(vp, vap->va_size, cred, l);
450
451	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
452		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred,
453		    l);
454
455	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
456		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
457
458	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
459	    vap->va_atime.tv_nsec != VNOVAL) ||
460	    (vap->va_mtime.tv_sec != VNOVAL &&
461	    vap->va_mtime.tv_nsec != VNOVAL) ||
462	    (vap->va_birthtime.tv_sec != VNOVAL &&
463	    vap->va_birthtime.tv_nsec != VNOVAL)))
464		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
465			&vap->va_birthtime, vap->va_vaflags, cred, l);
466
467	/* Update the node times.  We give preference to the error codes
468	 * generated by this function rather than the ones that may arise
469	 * from tmpfs_update. */
470	tmpfs_update(vp);
471
472	MPASS(VOP_ISLOCKED(vp, l));
473
474	return error;
475}
476
477/* --------------------------------------------------------------------- */
478static int
479tmpfs_uio_xfer(struct tmpfs_mount *tmp, struct tmpfs_node *node,
480    struct uio *uio, vm_object_t uobj)
481{
482	struct sf_buf *sf;
483	vm_pindex_t idx;
484	vm_offset_t d;
485	vm_page_t m;
486	size_t len;
487	int error = 0;
488
489	/* uobj - locked by caller */
490
491	VM_OBJECT_LOCK(uobj);
492	vm_object_pip_add(uobj, 1);
493	while (error == 0 && uio->uio_resid > 0) {
494		if (node->tn_size <= uio->uio_offset)
495			break;
496
497		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
498		if (len == 0)
499			break;
500
501		idx = OFF_TO_IDX(uio->uio_offset);
502		d = uio->uio_offset - IDX_TO_OFF(idx);
503		len = MIN(len, (PAGE_SIZE - d));
504		m = vm_page_grab(uobj, idx, VM_ALLOC_WIRED | VM_ALLOC_ZERO |
505				VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
506		if (uio->uio_rw == UIO_READ && m->valid != VM_PAGE_BITS_ALL)
507			vm_page_zero_invalid(m, TRUE);
508		VM_OBJECT_UNLOCK(uobj);
509		sched_pin();
510		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
511		error = uiomove((void *)(sf_buf_kva(sf) + d), len, uio);
512		sf_buf_free(sf);
513		sched_unpin();
514		VM_OBJECT_LOCK(uobj);
515		vm_page_lock_queues();
516		if (error == 0 && uio->uio_rw == UIO_WRITE) {
517			vm_page_set_validclean(m, d, len);
518			vm_page_zero_invalid(m, TRUE);
519			vm_page_dirty(m);
520		}
521		vm_page_unwire(m, 0);
522		vm_page_activate(m);
523		vm_page_wakeup(m);
524		vm_page_unlock_queues();
525	}
526	vm_object_pip_subtract(uobj, 1);
527	VM_OBJECT_UNLOCK(uobj);
528	return error;
529}
530
531int
532tmpfs_read(struct vop_read_args *v)
533{
534	struct vnode *vp = v->a_vp;
535	struct uio *uio = v->a_uio;
536
537	struct tmpfs_node *node;
538	vm_object_t uobj;
539
540	int error;
541
542	node = VP_TO_TMPFS_NODE(vp);
543
544	if (vp->v_type != VREG) {
545		error = EISDIR;
546		goto out;
547	}
548
549	if (uio->uio_offset < 0) {
550		error = EINVAL;
551		goto out;
552	}
553
554	node->tn_status |= TMPFS_NODE_ACCESSED;
555
556	uobj = node->tn_reg.tn_aobj;
557	error = tmpfs_uio_xfer(VFS_TO_TMPFS(vp->v_mount), node, uio, uobj);
558
559out:
560
561	return error;
562}
563
564/* --------------------------------------------------------------------- */
565
566int
567tmpfs_write(struct vop_write_args *v)
568{
569	struct vnode *vp = v->a_vp;
570	struct uio *uio = v->a_uio;
571	int ioflag = v->a_ioflag;
572	struct thread *td = uio->uio_td;
573
574	boolean_t extended;
575	int error;
576	off_t oldsize;
577	struct tmpfs_node *node;
578	vm_object_t uobj;
579
580	node = VP_TO_TMPFS_NODE(vp);
581	oldsize = node->tn_size;
582
583	if (uio->uio_offset < 0 || vp->v_type != VREG) {
584		error = EINVAL;
585		goto out;
586	}
587
588	if (uio->uio_resid == 0) {
589		error = 0;
590		goto out;
591	}
592
593	if (ioflag & IO_APPEND)
594		uio->uio_offset = node->tn_size;
595
596	if (uio->uio_offset + uio->uio_resid >
597	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
598		return (EFBIG);
599
600	if (vp->v_type == VREG && td != NULL) {
601		PROC_LOCK(td->td_proc);
602		if (uio->uio_offset + uio->uio_resid >
603		  lim_cur(td->td_proc, RLIMIT_FSIZE)) {
604			psignal(td->td_proc, SIGXFSZ);
605			PROC_UNLOCK(td->td_proc);
606			return (EFBIG);
607		}
608		PROC_UNLOCK(td->td_proc);
609	}
610
611	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
612	if (extended) {
613		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
614		if (error != 0)
615			goto out;
616	}
617
618	uobj = node->tn_reg.tn_aobj;
619	error = tmpfs_uio_xfer(VFS_TO_TMPFS(vp->v_mount), node, uio, uobj);
620
621	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
622	    (extended ? TMPFS_NODE_CHANGED : 0);
623
624	if (node->tn_mode & (S_ISUID | S_ISGID)) {
625		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
626			node->tn_mode &= ~(S_ISUID | S_ISGID);
627	}
628
629	if (error != 0)
630		(void)tmpfs_reg_resize(vp, oldsize);
631
632out:
633	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
634	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
635
636	return error;
637}
638
639/* --------------------------------------------------------------------- */
640
641int
642tmpfs_fsync(struct vop_fsync_args *v)
643{
644	struct vnode *vp = v->a_vp;
645
646	MPASS(VOP_ISLOCKED(vp, v->a_td));
647
648	tmpfs_update(vp);
649
650	return 0;
651}
652
653/* --------------------------------------------------------------------- */
654
655int
656tmpfs_remove(struct vop_remove_args *v)
657{
658	struct vnode *dvp = v->a_dvp;
659	struct vnode *vp = v->a_vp;
660
661	int error;
662	struct tmpfs_dirent *de;
663	struct tmpfs_mount *tmp;
664	struct tmpfs_node *dnode;
665	struct tmpfs_node *node;
666
667	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
668	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
669
670	if (vp->v_type == VDIR) {
671		error = EISDIR;
672		goto out;
673	}
674
675	dnode = VP_TO_TMPFS_DIR(dvp);
676	node = VP_TO_TMPFS_NODE(vp);
677	tmp = VFS_TO_TMPFS(vp->v_mount);
678	de = node->tn_lookup_dirent;
679	MPASS(de != NULL);
680
681	/* Files marked as immutable or append-only cannot be deleted. */
682	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
683	    (dnode->tn_flags & APPEND)) {
684		error = EPERM;
685		goto out;
686	}
687
688	/* Remove the entry from the directory; as it is a file, we do not
689	 * have to change the number of hard links of the directory. */
690	tmpfs_dir_detach(dvp, de);
691
692	/* Free the directory entry we just deleted.  Note that the node
693	 * referred by it will not be removed until the vnode is really
694	 * reclaimed. */
695	tmpfs_free_dirent(tmp, de, TRUE);
696
697	if (node->tn_links > 0)
698		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
699	    TMPFS_NODE_MODIFIED;
700	error = 0;
701
702out:
703
704	return error;
705}
706
707/* --------------------------------------------------------------------- */
708
709int
710tmpfs_link(struct vop_link_args *v)
711{
712	struct vnode *dvp = v->a_tdvp;
713	struct vnode *vp = v->a_vp;
714	struct componentname *cnp = v->a_cnp;
715
716	int error;
717	struct tmpfs_dirent *de;
718	struct tmpfs_node *dnode;
719	struct tmpfs_node *node;
720
721	MPASS(VOP_ISLOCKED(dvp, cnp->cn_thread));
722	MPASS(cnp->cn_flags & HASBUF);
723	MPASS(dvp != vp); /* XXX When can this be false? */
724
725	dnode = VP_TO_TMPFS_DIR(dvp);
726	node = VP_TO_TMPFS_NODE(vp);
727
728	/* XXX: Why aren't the following two tests done by the caller? */
729
730	/* Hard links of directories are forbidden. */
731	if (vp->v_type == VDIR) {
732		error = EPERM;
733		goto out;
734	}
735
736	/* Cannot create cross-device links. */
737	if (dvp->v_mount != vp->v_mount) {
738		error = EXDEV;
739		goto out;
740	}
741
742	/* Ensure that we do not overflow the maximum number of links imposed
743	 * by the system. */
744	MPASS(node->tn_links <= LINK_MAX);
745	if (node->tn_links == LINK_MAX) {
746		error = EMLINK;
747		goto out;
748	}
749
750	/* We cannot create links of files marked immutable or append-only. */
751	if (node->tn_flags & (IMMUTABLE | APPEND)) {
752		error = EPERM;
753		goto out;
754	}
755
756	/* Allocate a new directory entry to represent the node. */
757	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
758	    cnp->cn_nameptr, cnp->cn_namelen, &de);
759	if (error != 0)
760		goto out;
761
762	/* Insert the new directory entry into the appropriate directory. */
763	tmpfs_dir_attach(dvp, de);
764
765	/* vp link count has changed, so update node times. */
766	node->tn_status |= TMPFS_NODE_CHANGED;
767	tmpfs_update(vp);
768
769	error = 0;
770out:
771
772	return error;
773}
774
775/* --------------------------------------------------------------------- */
776
777int
778tmpfs_rename(struct vop_rename_args *v)
779{
780	struct vnode *fdvp = v->a_fdvp;
781	struct vnode *fvp = v->a_fvp;
782	struct componentname *fcnp = v->a_fcnp;
783	struct vnode *tdvp = v->a_tdvp;
784	struct vnode *tvp = v->a_tvp;
785	struct componentname *tcnp = v->a_tcnp;
786	struct tmpfs_node *tnode = 0; /* pacify gcc */
787
788	char *newname;
789	int error;
790	struct tmpfs_dirent *de;
791	struct tmpfs_mount *tmp;
792	struct tmpfs_node *fdnode;
793	struct tmpfs_node *fnode;
794	struct tmpfs_node *tdnode;
795
796	MPASS(VOP_ISLOCKED(tdvp, tcnp->cn_thread));
797	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp, tcnp->cn_thread)));
798	MPASS(fcnp->cn_flags & HASBUF);
799	MPASS(tcnp->cn_flags & HASBUF);
800
801	fdnode = VP_TO_TMPFS_DIR(fdvp);
802	fnode = VP_TO_TMPFS_NODE(fvp);
803	de = fnode->tn_lookup_dirent;
804
805	/* Disallow cross-device renames.
806	 * XXX Why isn't this done by the caller? */
807	if (fvp->v_mount != tdvp->v_mount ||
808	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
809		error = EXDEV;
810		goto out;
811	}
812
813	tmp = VFS_TO_TMPFS(tdvp->v_mount);
814	tdnode = VP_TO_TMPFS_DIR(tdvp);
815
816	/* If source and target are the same file, there is nothing to do. */
817	if (fvp == tvp) {
818		error = 0;
819		goto out;
820	}
821
822	/* Avoid manipulating '.' and '..' entries. */
823	if (de == NULL) {
824		MPASS(fvp->v_type == VDIR);
825		error = EINVAL;
826		goto out;
827	}
828	MPASS(de->td_node == fnode);
829
830	/* If re-naming a directory to another preexisting directory
831	 * ensure that the target directory is empty so that its
832	 * removal causes no side effects.
833	 * Kern_rename gurantees the destination to be a directory
834	 * if the source is one. */
835	if (tvp != NULL) {
836		tnode = VP_TO_TMPFS_NODE(tvp);
837
838		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
839		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
840			error = EPERM;
841			goto out;
842		}
843
844	    	if ((de->td_node->tn_type == VDIR) && (tnode->tn_size > 0)) {
845			error = ENOTEMPTY;
846			goto out;
847		}
848	}
849
850	/* If we need to move the directory between entries, lock the
851	 * source so that we can safely operate on it. */
852	if (fdnode != tdnode) {
853		error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, tcnp->cn_thread);
854		if (error != 0)
855			goto out;
856	}
857
858	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
859	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
860		error = EPERM;
861		goto out_locked;
862	}
863
864	/* Ensure that we have enough memory to hold the new name, if it
865	 * has to be changed. */
866	if (fcnp->cn_namelen != tcnp->cn_namelen ||
867	    memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
868		newname = tmpfs_str_zone_alloc(&tmp->tm_str_pool, M_WAITOK,
869		    tcnp->cn_namelen);
870		if (newname == NULL) {
871			error = ENOSPC;
872			goto out_locked;
873		}
874	} else
875		newname = NULL;
876
877	/* If the node is being moved to another directory, we have to do
878	 * the move. */
879	if (fdnode != tdnode) {
880		/* In case we are moving a directory, we have to adjust its
881		 * parent to point to the new parent. */
882		if (de->td_node->tn_type == VDIR) {
883			struct tmpfs_node *n;
884
885			/* Ensure the target directory is not a child of the
886			 * directory being moved.  Otherwise, we'd end up
887			 * with stale nodes. */
888			n = tdnode;
889			while (n != n->tn_dir.tn_parent) {
890				if (n == fnode) {
891					error = EINVAL;
892					if (newname != NULL)
893						tmpfs_str_zone_free(&tmp->tm_str_pool,
894						    newname, tcnp->cn_namelen);
895					goto out_locked;
896				}
897				n = n->tn_dir.tn_parent;
898			}
899
900			/* Adjust the parent pointer. */
901			TMPFS_VALIDATE_DIR(fnode);
902			de->td_node->tn_dir.tn_parent = tdnode;
903
904			/* As a result of changing the target of the '..'
905			 * entry, the link count of the source and target
906			 * directories has to be adjusted. */
907			fdnode->tn_links--;
908			tdnode->tn_links++;
909		}
910
911		/* Do the move: just remove the entry from the source directory
912		 * and insert it into the target one. */
913		tmpfs_dir_detach(fdvp, de);
914		tmpfs_dir_attach(tdvp, de);
915	}
916
917	/* If the name has changed, we need to make it effective by changing
918	 * it in the directory entry. */
919	if (newname != NULL) {
920		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
921
922		tmpfs_str_zone_free(&tmp->tm_str_pool, de->td_name,
923		    de->td_namelen);
924		de->td_namelen = (uint16_t)tcnp->cn_namelen;
925		memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
926		de->td_name = newname;
927
928		fnode->tn_status |= TMPFS_NODE_CHANGED;
929		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
930	}
931
932	/* If we are overwriting an entry, we have to remove the old one
933	 * from the target directory. */
934	if (tvp != NULL) {
935		/* Remove the old entry from the target directory. */
936		de = tnode->tn_lookup_dirent;
937		tmpfs_dir_detach(tdvp, de);
938
939		/* Free the directory entry we just deleted.  Note that the
940		 * node referred by it will not be removed until the vnode is
941		 * really reclaimed. */
942		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
943	}
944
945	error = 0;
946
947out_locked:
948	if (fdnode != tdnode)
949		VOP_UNLOCK(fdvp, 0, tcnp->cn_thread);
950
951out:
952	/* Release target nodes. */
953	/* XXX: I don't understand when tdvp can be the same as tvp, but
954	 * other code takes care of this... */
955	if (tdvp == tvp)
956		vrele(tdvp);
957	else
958		vput(tdvp);
959	if (tvp != NULL)
960		vput(tvp);
961
962	/* Release source nodes. */
963	vrele(fdvp);
964	vrele(fvp);
965
966	return error;
967}
968
969/* --------------------------------------------------------------------- */
970
971int
972tmpfs_mkdir(struct vop_mkdir_args *v)
973{
974	struct vnode *dvp = v->a_dvp;
975	struct vnode **vpp = v->a_vpp;
976	struct componentname *cnp = v->a_cnp;
977	struct vattr *vap = v->a_vap;
978
979	MPASS(vap->va_type == VDIR);
980
981	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
982}
983
984/* --------------------------------------------------------------------- */
985
986int
987tmpfs_rmdir(struct vop_rmdir_args *v)
988{
989	struct vnode *dvp = v->a_dvp;
990	struct vnode *vp = v->a_vp;
991
992	int error;
993	struct tmpfs_dirent *de;
994	struct tmpfs_mount *tmp;
995	struct tmpfs_node *dnode;
996	struct tmpfs_node *node;
997
998	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
999	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
1000
1001	tmp = VFS_TO_TMPFS(dvp->v_mount);
1002	dnode = VP_TO_TMPFS_DIR(dvp);
1003	node = VP_TO_TMPFS_DIR(vp);
1004
1005
1006	/* Directories with more than two entries ('.' and '..') cannot be
1007	  * removed. */
1008	 if (node->tn_size > 0) {
1009		 error = ENOTEMPTY;
1010		 goto out;
1011	 }
1012
1013	if ((dnode->tn_flags & APPEND)
1014	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1015		error = EPERM;
1016		goto out;
1017	}
1018
1019	/* This invariant holds only if we are not trying to remove "..".
1020	  * We checked for that above so this is safe now. */
1021	MPASS(node->tn_dir.tn_parent == dnode);
1022
1023	/* Get the directory entry associated with node (vp).  This was
1024	 * filled by tmpfs_lookup while looking up the entry. */
1025	de = node->tn_lookup_dirent;
1026	MPASS(TMPFS_DIRENT_MATCHES(de,
1027	    v->a_cnp->cn_nameptr,
1028	    v->a_cnp->cn_namelen));
1029
1030	/* Check flags to see if we are allowed to remove the directory. */
1031	if (dnode->tn_flags & APPEND
1032		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1033		error = EPERM;
1034		goto out;
1035	}
1036
1037	/* Detach the directory entry from the directory (dnode). */
1038	tmpfs_dir_detach(dvp, de);
1039
1040	node->tn_links--;
1041	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1042	    TMPFS_NODE_MODIFIED;
1043	node->tn_dir.tn_parent->tn_links--;
1044	node->tn_dir.tn_parent->tn_status |= TMPFS_NODE_ACCESSED | \
1045	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1046
1047	cache_purge(dvp);
1048	cache_purge(vp);
1049
1050	/* Free the directory entry we just deleted.  Note that the node
1051	 * referred by it will not be removed until the vnode is really
1052	 * reclaimed. */
1053	tmpfs_free_dirent(tmp, de, TRUE);
1054
1055	/* Release the deleted vnode (will destroy the node, notify
1056	 * interested parties and clean it from the cache). */
1057
1058	dnode->tn_status |= TMPFS_NODE_CHANGED;
1059	tmpfs_update(dvp);
1060
1061	error = 0;
1062
1063out:
1064	return error;
1065}
1066
1067/* --------------------------------------------------------------------- */
1068
1069int
1070tmpfs_symlink(struct vop_symlink_args *v)
1071{
1072	struct vnode *dvp = v->a_dvp;
1073	struct vnode **vpp = v->a_vpp;
1074	struct componentname *cnp = v->a_cnp;
1075	struct vattr *vap = v->a_vap;
1076	char *target = v->a_target;
1077
1078#ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1079	MPASS(vap->va_type == VLNK);
1080#else
1081	vap->va_type = VLNK;
1082#endif
1083
1084	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1085}
1086
1087/* --------------------------------------------------------------------- */
1088
1089int
1090tmpfs_readdir(struct vop_readdir_args *v)
1091{
1092	struct vnode *vp = v->a_vp;
1093	struct uio *uio = v->a_uio;
1094	int *eofflag = v->a_eofflag;
1095	u_long **cookies = v->a_cookies;
1096	int *ncookies = v->a_ncookies;
1097
1098	int error;
1099	off_t startoff;
1100	off_t cnt;
1101	struct tmpfs_node *node;
1102
1103	/* This operation only makes sense on directory nodes. */
1104	if (vp->v_type != VDIR) {
1105		error = ENOTDIR;
1106		goto out;
1107	}
1108
1109	node = VP_TO_TMPFS_DIR(vp);
1110
1111	startoff = uio->uio_offset;
1112
1113	cnt = 0;
1114	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1115		error = tmpfs_dir_getdotdent(node, uio);
1116		if (error == -1) {
1117			error = 0;
1118			goto outok;
1119		} else if (error != 0)
1120			goto outok;
1121		cnt++;
1122	}
1123
1124	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1125		error = tmpfs_dir_getdotdotdent(node, uio);
1126		if (error == -1) {
1127			error = 0;
1128			goto outok;
1129		} else if (error != 0)
1130			goto outok;
1131		cnt++;
1132	}
1133
1134	error = tmpfs_dir_getdents(node, uio, &cnt);
1135	if (error == -1)
1136		error = 0;
1137	MPASS(error >= 0);
1138
1139outok:
1140	/* This label assumes that startoff has been
1141	 * initialized.  If the compiler didn't spit out warnings, we'd
1142	 * simply make this one be 'out' and drop 'outok'. */
1143
1144	if (eofflag != NULL)
1145		*eofflag =
1146		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1147
1148	/* Update NFS-related variables. */
1149	if (error == 0 && cookies != NULL && ncookies != NULL) {
1150		off_t i;
1151		off_t off = startoff;
1152		struct tmpfs_dirent *de = NULL;
1153
1154		*ncookies = cnt;
1155		*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1156
1157		for (i = 0; i < cnt; i++) {
1158			MPASS(off != TMPFS_DIRCOOKIE_EOF);
1159			if (off == TMPFS_DIRCOOKIE_DOT) {
1160				off = TMPFS_DIRCOOKIE_DOTDOT;
1161			} else {
1162				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1163					de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
1164				} else if (de != NULL) {
1165					de = TAILQ_NEXT(de, td_entries);
1166				} else {
1167					de = tmpfs_dir_lookupbycookie(node,
1168					    off);
1169					MPASS(de != NULL);
1170					de = TAILQ_NEXT(de, td_entries);
1171				}
1172				if (de == NULL) {
1173					off = TMPFS_DIRCOOKIE_EOF;
1174				} else {
1175					off = TMPFS_DIRCOOKIE(de);
1176				}
1177			}
1178
1179			(*cookies)[i] = off;
1180		}
1181		MPASS(uio->uio_offset == off);
1182	}
1183
1184out:
1185	return error;
1186}
1187
1188/* --------------------------------------------------------------------- */
1189
1190int
1191tmpfs_readlink(struct vop_readlink_args *v)
1192{
1193	struct vnode *vp = v->a_vp;
1194	struct uio *uio = v->a_uio;
1195
1196	int error;
1197	struct tmpfs_node *node;
1198
1199	MPASS(uio->uio_offset == 0);
1200	MPASS(vp->v_type == VLNK);
1201
1202	node = VP_TO_TMPFS_NODE(vp);
1203
1204	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1205	    uio);
1206	node->tn_status |= TMPFS_NODE_ACCESSED;
1207
1208	return error;
1209}
1210
1211/* --------------------------------------------------------------------- */
1212
1213int
1214tmpfs_inactive(struct vop_inactive_args *v)
1215{
1216	struct vnode *vp = v->a_vp;
1217	struct thread *l = v->a_td;
1218
1219	struct tmpfs_node *node;
1220
1221	MPASS(VOP_ISLOCKED(vp, l));
1222
1223	node = VP_TO_TMPFS_NODE(vp);
1224
1225	if (node->tn_links == 0)
1226		vrecycle(vp, l);
1227
1228	return 0;
1229}
1230
1231/* --------------------------------------------------------------------- */
1232
1233int
1234tmpfs_reclaim(struct vop_reclaim_args *v)
1235{
1236	struct vnode *vp = v->a_vp;
1237
1238	struct tmpfs_mount *tmp;
1239	struct tmpfs_node *node;
1240
1241	node = VP_TO_TMPFS_NODE(vp);
1242	tmp = VFS_TO_TMPFS(vp->v_mount);
1243
1244	vnode_destroy_vobject(vp);
1245	cache_purge(vp);
1246	tmpfs_free_vp(vp);
1247
1248	/* If the node referenced by this vnode was deleted by the user,
1249	 * we must free its associated data structures (now that the vnode
1250	 * is being reclaimed). */
1251	if (node->tn_links == 0)
1252		tmpfs_free_node(tmp, node);
1253
1254	MPASS(vp->v_data == NULL);
1255	return 0;
1256}
1257
1258/* --------------------------------------------------------------------- */
1259
1260int
1261tmpfs_print(struct vop_print_args *v)
1262{
1263	struct vnode *vp = v->a_vp;
1264
1265	struct tmpfs_node *node;
1266
1267	node = VP_TO_TMPFS_NODE(vp);
1268
1269	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1270	    node, node->tn_flags, node->tn_links);
1271	printf("\tmode 0%o, owner %d, group %d, size %" PRIdMAX
1272	    ", status 0x%x\n",
1273	    node->tn_mode, node->tn_uid, node->tn_gid,
1274	    (uintmax_t)node->tn_size, node->tn_status);
1275
1276	if (vp->v_type == VFIFO)
1277		fifo_printinfo(vp);
1278
1279	printf("\n");
1280
1281	return 0;
1282}
1283
1284/* --------------------------------------------------------------------- */
1285
1286int
1287tmpfs_pathconf(struct vop_pathconf_args *v)
1288{
1289	int name = v->a_name;
1290	register_t *retval = v->a_retval;
1291
1292	int error;
1293
1294	error = 0;
1295
1296	switch (name) {
1297	case _PC_LINK_MAX:
1298		*retval = LINK_MAX;
1299		break;
1300
1301	case _PC_NAME_MAX:
1302		*retval = NAME_MAX;
1303		break;
1304
1305	case _PC_PATH_MAX:
1306		*retval = PATH_MAX;
1307		break;
1308
1309	case _PC_PIPE_BUF:
1310		*retval = PIPE_BUF;
1311		break;
1312
1313	case _PC_CHOWN_RESTRICTED:
1314		*retval = 1;
1315		break;
1316
1317	case _PC_NO_TRUNC:
1318		*retval = 1;
1319		break;
1320
1321	case _PC_SYNC_IO:
1322		*retval = 1;
1323		break;
1324
1325	case _PC_FILESIZEBITS:
1326		*retval = 0; /* XXX Don't know which value should I return. */
1327		break;
1328
1329	default:
1330		error = EINVAL;
1331	}
1332
1333	return error;
1334}
1335
1336/* --------------------------------------------------------------------- */
1337
1338int
1339tmpfs_advlock(struct vop_advlock_args *v)
1340{
1341	struct vnode *vp = v->a_vp;
1342
1343	struct tmpfs_node *node;
1344
1345	node = VP_TO_TMPFS_NODE(vp);
1346
1347	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1348}
1349
1350/* --------------------------------------------------------------------- */
1351
1352int
1353tmpfs_vptofh(struct vop_vptofh_args *ap)
1354{
1355	struct tmpfs_fid *tfhp;
1356	struct tmpfs_node *node;
1357
1358	tfhp = (struct tmpfs_fid *)ap->a_fhp;
1359	node = VP_TO_TMPFS_NODE(ap->a_vp);
1360
1361	tfhp->tf_len = sizeof(struct tmpfs_fid);
1362	tfhp->tf_id = node->tn_id;
1363	tfhp->tf_gen = node->tn_gen;
1364
1365	return (0);
1366}
1367