tmpfs_vnops.c revision 170903
1/*	$NetBSD: tmpfs_vnops.c,v 1.20 2006/01/26 20:07:34 jmmv Exp $	*/
2
3/*
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *        This product includes software developed by the NetBSD
22 *        Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 *    contributors may be used to endorse or promote products derived
25 *    from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40/*
41 * tmpfs vnode interface.
42 */
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 170903 2007-06-18 01:43:13Z delphij $");
45
46#include <sys/param.h>
47#include <sys/fcntl.h>
48#include <sys/lockf.h>
49#include <sys/namei.h>
50#include <sys/priv.h>
51#include <sys/proc.h>
52#include <sys/resourcevar.h>
53#include <sys/stat.h>
54#include <sys/systm.h>
55#include <sys/unistd.h>
56#include <sys/vnode.h>
57
58#include <vm/vm.h>
59#include <vm/vm_object.h>
60#include <vm/vm_page.h>
61#include <vm/vm_pager.h>
62#include <sys/sched.h>
63#include <sys/sf_buf.h>
64#include <machine/_inttypes.h>
65
66#include <fs/fifofs/fifo.h>
67#include <fs/tmpfs/tmpfs_vnops.h>
68#include <fs/tmpfs/tmpfs.h>
69
70/* --------------------------------------------------------------------- */
71
72/*
73 * vnode operations vector used for files stored in a tmpfs file system.
74 */
75struct vop_vector tmpfs_vnodeop_entries = {
76	.vop_default =			&default_vnodeops,
77	.vop_lookup =			vfs_cache_lookup,
78	.vop_cachedlookup =		tmpfs_lookup,
79	.vop_create =			tmpfs_create,
80	.vop_mknod =			tmpfs_mknod,
81	.vop_open =			tmpfs_open,
82	.vop_close =			tmpfs_close,
83	.vop_access =			tmpfs_access,
84	.vop_getattr =			tmpfs_getattr,
85	.vop_setattr =			tmpfs_setattr,
86	.vop_read =			tmpfs_read,
87	.vop_write =			tmpfs_write,
88	.vop_fsync =			tmpfs_fsync,
89	.vop_remove =			tmpfs_remove,
90	.vop_link =			tmpfs_link,
91	.vop_rename =			tmpfs_rename,
92	.vop_mkdir =			tmpfs_mkdir,
93	.vop_rmdir =			tmpfs_rmdir,
94	.vop_symlink = 			tmpfs_symlink,
95	.vop_readdir =			tmpfs_readdir,
96	.vop_readlink =			tmpfs_readlink,
97	.vop_inactive =			tmpfs_inactive,
98	.vop_reclaim =			tmpfs_reclaim,
99	.vop_print =			tmpfs_print,
100	.vop_pathconf =			tmpfs_pathconf,
101	.vop_advlock =			tmpfs_advlock,
102	.vop_bmap =			VOP_EOPNOTSUPP,
103};
104
105/* --------------------------------------------------------------------- */
106
107int
108tmpfs_lookup(struct vop_cachedlookup_args *v)
109{
110	struct vnode *dvp = v->a_dvp;
111	struct vnode **vpp = v->a_vpp;
112	struct componentname *cnp = v->a_cnp;
113	struct thread *td = cnp->cn_thread;
114
115	int error;
116	struct tmpfs_dirent *de;
117	struct tmpfs_node *dnode;
118
119	dnode = VP_TO_TMPFS_DIR(dvp);
120	*vpp = NULLVP;
121
122	/* Check accessibility of requested node as a first step. */
123	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td);
124	if (error != 0)
125		goto out;
126
127	/* We cannot be requesting the parent directory of the root node. */
128	MPASS(IMPLIES(dnode->tn_type == VDIR &&
129	    dnode->tn_dir.tn_parent == dnode,
130	    !(cnp->cn_flags & ISDOTDOT)));
131
132	if (cnp->cn_flags & ISDOTDOT) {
133		VOP_UNLOCK(dvp, 0, td);
134
135		/* Allocate a new vnode on the matching entry. */
136		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent, vpp, td);
137
138		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td);
139
140		dnode->tn_dir.tn_parent->tn_lookup_dirent = NULL;
141	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
142		VREF(dvp);
143		*vpp = dvp;
144		dnode->tn_lookup_dirent = NULL;
145		error = 0;
146	} else {
147		de = tmpfs_dir_lookup(dnode, cnp);
148		if (de == NULL) {
149			/* The entry was not found in the directory.
150			 * This is OK if we are creating or renaming an
151			 * entry and are working on the last component of
152			 * the path name. */
153			if ((cnp->cn_flags & ISLASTCN) &&
154			    (cnp->cn_nameiop == CREATE || \
155			    cnp->cn_nameiop == RENAME)) {
156				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
157				    cnp->cn_thread);
158				if (error != 0)
159					goto out;
160
161				/* Keep the component name in the buffer for
162				 * future uses. */
163				cnp->cn_flags |= SAVENAME;
164
165				error = EJUSTRETURN;
166			} else
167				error = ENOENT;
168		} else {
169			struct tmpfs_node *tnode;
170
171			/* The entry was found, so get its associated
172			 * tmpfs_node. */
173			tnode = de->td_node;
174
175			/* If we are not at the last path component and
176			 * found a non-directory or non-link entry (which
177			 * may itself be pointing to a directory), raise
178			 * an error. */
179			if ((tnode->tn_type != VDIR &&
180			    tnode->tn_type != VLNK) &&
181			    !(cnp->cn_flags & ISLASTCN)) {
182				error = ENOTDIR;
183				goto out;
184			}
185
186			/* If we are deleting or renaming the entry, keep
187			 * track of its tmpfs_dirent so that it can be
188			 * easily deleted later. */
189			if ((cnp->cn_flags & ISLASTCN) &&
190			    (cnp->cn_nameiop == DELETE ||
191			    cnp->cn_nameiop == RENAME)) {
192				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
193				    cnp->cn_thread);
194				if (error != 0)
195					goto out;
196
197				/* Allocate a new vnode on the matching entry. */
198				error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp, td);
199				if (error != 0)
200					goto out;
201
202				if ((dnode->tn_mode & S_ISTXT) &&
203				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
204				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
205					error = EPERM;
206					vput(*vpp);
207					*vpp = NULL;
208					goto out;
209				}
210				tnode->tn_lookup_dirent = de;
211				cnp->cn_flags |= SAVENAME;
212			}
213			else
214				error = tmpfs_alloc_vp(dvp->v_mount, tnode, vpp, td);
215
216		}
217	}
218
219	/* Store the result of this lookup in the cache.  Avoid this if the
220	 * request was for creation, as it does not improve timings on
221	 * emprical tests. */
222	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
223		cache_enter(dvp, *vpp, cnp);
224
225out:
226	/* If there were no errors, *vpp cannot be null and it must be
227	 * locked. */
228	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp, td)));
229
230	return error;
231}
232
233/* --------------------------------------------------------------------- */
234
235int
236tmpfs_create(struct vop_create_args *v)
237{
238	struct vnode *dvp = v->a_dvp;
239	struct vnode **vpp = v->a_vpp;
240	struct componentname *cnp = v->a_cnp;
241	struct vattr *vap = v->a_vap;
242
243	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
244
245	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
246}
247/* --------------------------------------------------------------------- */
248
249int
250tmpfs_mknod(struct vop_mknod_args *v)
251{
252	struct vnode *dvp = v->a_dvp;
253	struct vnode **vpp = v->a_vpp;
254	struct componentname *cnp = v->a_cnp;
255	struct vattr *vap = v->a_vap;
256
257	if (vap->va_type != VBLK && vap->va_type != VCHR &&
258	    vap->va_type != VFIFO)
259		return EINVAL;
260
261	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
262}
263
264/* --------------------------------------------------------------------- */
265
266int
267tmpfs_open(struct vop_open_args *v)
268{
269	struct vnode *vp = v->a_vp;
270	int mode = v->a_mode;
271
272	int error;
273	struct tmpfs_node *node;
274
275	MPASS(VOP_ISLOCKED(vp, v->a_td));
276
277	node = VP_TO_TMPFS_NODE(vp);
278
279	/* The file is still active but all its names have been removed
280	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
281	 * it is about to die. */
282	if (node->tn_links < 1)
283		return (ENOENT);
284
285	/* If the file is marked append-only, deny write requests. */
286	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
287		error = EPERM;
288	else {
289		error = 0;
290		vnode_create_vobject(vp, node->tn_size, v->a_td);
291	}
292
293	MPASS(VOP_ISLOCKED(vp, v->a_td));
294	return error;
295}
296
297/* --------------------------------------------------------------------- */
298
299int
300tmpfs_close(struct vop_close_args *v)
301{
302	struct vnode *vp = v->a_vp;
303
304	struct tmpfs_node *node;
305
306	MPASS(VOP_ISLOCKED(vp, v->a_td));
307
308	node = VP_TO_TMPFS_NODE(vp);
309
310	if (node->tn_links > 0) {
311		/* Update node times.  No need to do it if the node has
312		 * been deleted, because it will vanish after we return. */
313		tmpfs_update(vp);
314	}
315
316	return 0;
317}
318
319/* --------------------------------------------------------------------- */
320
321int
322tmpfs_access(struct vop_access_args *v)
323{
324	struct vnode *vp = v->a_vp;
325	int mode = v->a_mode;
326	struct ucred *cred = v->a_cred;
327
328	int error;
329	struct tmpfs_node *node;
330
331	MPASS(VOP_ISLOCKED(vp, v->a_td));
332
333	node = VP_TO_TMPFS_NODE(vp);
334
335	switch (vp->v_type) {
336	case VDIR:
337		/* FALLTHROUGH */
338	case VLNK:
339		/* FALLTHROUGH */
340	case VREG:
341		if (mode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
342			error = EROFS;
343			goto out;
344		}
345		break;
346
347	case VBLK:
348		/* FALLTHROUGH */
349	case VCHR:
350		/* FALLTHROUGH */
351	case VSOCK:
352		/* FALLTHROUGH */
353	case VFIFO:
354		break;
355
356	default:
357		error = EINVAL;
358		goto out;
359	}
360
361	if (mode & VWRITE && node->tn_flags & IMMUTABLE) {
362		error = EPERM;
363		goto out;
364	}
365
366	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
367	    node->tn_gid, mode, cred, NULL);
368
369out:
370	MPASS(VOP_ISLOCKED(vp, v->a_td));
371
372	return error;
373}
374
375/* --------------------------------------------------------------------- */
376
377int
378tmpfs_getattr(struct vop_getattr_args *v)
379{
380	struct vnode *vp = v->a_vp;
381	struct vattr *vap = v->a_vap;
382
383	struct tmpfs_node *node;
384
385	node = VP_TO_TMPFS_NODE(vp);
386
387	VATTR_NULL(vap);
388
389	tmpfs_update(vp);
390
391	vap->va_type = vp->v_type;
392	vap->va_mode = node->tn_mode;
393	vap->va_nlink = node->tn_links;
394	vap->va_uid = node->tn_uid;
395	vap->va_gid = node->tn_gid;
396	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
397	vap->va_fileid = node->tn_id;
398	vap->va_size = node->tn_size;
399	vap->va_blocksize = PAGE_SIZE;
400	vap->va_atime = node->tn_atime;
401	vap->va_mtime = node->tn_mtime;
402	vap->va_ctime = node->tn_ctime;
403	vap->va_birthtime = node->tn_birthtime;
404	vap->va_gen = node->tn_gen;
405	vap->va_flags = node->tn_flags;
406	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
407		node->tn_rdev : VNOVAL;
408	vap->va_bytes = round_page(node->tn_size);
409	vap->va_filerev = VNOVAL;
410	vap->va_vaflags = 0;
411	vap->va_spare = VNOVAL; /* XXX */
412
413	return 0;
414}
415
416/* --------------------------------------------------------------------- */
417
418/* XXX Should this operation be atomic?  I think it should, but code in
419 * XXX other places (e.g., ufs) doesn't seem to be... */
420int
421tmpfs_setattr(struct vop_setattr_args *v)
422{
423	struct vnode *vp = v->a_vp;
424	struct vattr *vap = v->a_vap;
425	struct ucred *cred = v->a_cred;
426	struct thread *l = v->a_td;
427
428	int error;
429
430	MPASS(VOP_ISLOCKED(vp, l));
431
432	error = 0;
433
434	/* Abort if any unsettable attribute is given. */
435	if (vap->va_type != VNON ||
436	    vap->va_nlink != VNOVAL ||
437	    vap->va_fsid != VNOVAL ||
438	    vap->va_fileid != VNOVAL ||
439	    vap->va_blocksize != VNOVAL ||
440	    vap->va_gen != VNOVAL ||
441	    vap->va_rdev != VNOVAL ||
442	    vap->va_bytes != VNOVAL)
443		error = EINVAL;
444
445	if (error == 0 && (vap->va_flags != VNOVAL))
446		error = tmpfs_chflags(vp, vap->va_flags, cred, l);
447
448	if (error == 0 && (vap->va_size != VNOVAL))
449		error = tmpfs_chsize(vp, vap->va_size, cred, l);
450
451	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
452		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred,
453		    l);
454
455	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
456		error = tmpfs_chmod(vp, vap->va_mode, cred, l);
457
458	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
459	    vap->va_atime.tv_nsec != VNOVAL) ||
460	    (vap->va_mtime.tv_sec != VNOVAL &&
461	    vap->va_mtime.tv_nsec != VNOVAL) ||
462	    (vap->va_birthtime.tv_sec != VNOVAL &&
463	    vap->va_birthtime.tv_nsec != VNOVAL)))
464		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
465			&vap->va_birthtime, vap->va_vaflags, cred, l);
466
467	/* Update the node times.  We give preference to the error codes
468	 * generated by this function rather than the ones that may arise
469	 * from tmpfs_update. */
470	tmpfs_update(vp);
471
472	MPASS(VOP_ISLOCKED(vp, l));
473
474	return error;
475}
476
477/* --------------------------------------------------------------------- */
478static int
479tmpfs_uio_xfer(struct tmpfs_mount *tmp, struct tmpfs_node *node,
480    struct uio *uio, vm_object_t uobj)
481{
482	struct sf_buf *sf;
483	vm_pindex_t idx;
484	vm_offset_t d;
485	vm_page_t m;
486	size_t len;
487	int error = 0;
488
489	/* uobj - locked by caller */
490
491	VM_OBJECT_LOCK(uobj);
492	vm_object_pip_add(uobj, 1);
493	while (error == 0 && uio->uio_resid > 0) {
494		if (node->tn_size <= uio->uio_offset)
495			break;
496
497		len = MIN(node->tn_size - uio->uio_offset, uio->uio_resid);
498		if (len == 0)
499			break;
500
501		idx = OFF_TO_IDX(uio->uio_offset);
502		d = uio->uio_offset - IDX_TO_OFF(idx);
503		len = MIN(len, (PAGE_SIZE - d));
504		m = vm_page_grab(uobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
505		if (uio->uio_rw == UIO_READ && m->valid != VM_PAGE_BITS_ALL)
506			if (vm_pager_get_pages(uobj, &m, 1, 0) != VM_PAGER_OK)
507				vm_page_zero_invalid(m, TRUE);
508		vm_page_lock_queues();
509		vm_page_hold(m);
510		vm_page_unlock_queues();
511		VM_OBJECT_UNLOCK(uobj);
512		sched_pin();
513		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
514		error = uiomove((void *)(sf_buf_kva(sf) + d), len, uio);
515		sf_buf_free(sf);
516		sched_unpin();
517		VM_OBJECT_LOCK(uobj);
518		vm_page_lock_queues();
519		if (error == 0 && uio->uio_rw == UIO_WRITE) {
520			vm_page_set_validclean(m, d, len);
521			vm_page_zero_invalid(m, TRUE);
522			vm_page_dirty(m);
523		}
524		vm_page_unhold(m);
525		vm_page_activate(m);
526		vm_page_wakeup(m);
527		vm_page_unlock_queues();
528	}
529	vm_object_pip_subtract(uobj, 1);
530	VM_OBJECT_UNLOCK(uobj);
531	return error;
532}
533
534int
535tmpfs_read(struct vop_read_args *v)
536{
537	struct vnode *vp = v->a_vp;
538	struct uio *uio = v->a_uio;
539
540	struct tmpfs_node *node;
541	vm_object_t uobj;
542
543	int error;
544
545	node = VP_TO_TMPFS_NODE(vp);
546
547	if (vp->v_type != VREG) {
548		error = EISDIR;
549		goto out;
550	}
551
552	if (uio->uio_offset < 0) {
553		error = EINVAL;
554		goto out;
555	}
556
557	node->tn_status |= TMPFS_NODE_ACCESSED;
558
559	uobj = node->tn_reg.tn_aobj;
560	error = tmpfs_uio_xfer(VFS_TO_TMPFS(vp->v_mount), node, uio, uobj);
561
562out:
563
564	return error;
565}
566
567/* --------------------------------------------------------------------- */
568
569int
570tmpfs_write(struct vop_write_args *v)
571{
572	struct vnode *vp = v->a_vp;
573	struct uio *uio = v->a_uio;
574	int ioflag = v->a_ioflag;
575	struct thread *td = uio->uio_td;
576
577	boolean_t extended;
578	int error;
579	off_t oldsize;
580	struct tmpfs_node *node;
581	vm_object_t uobj;
582
583	node = VP_TO_TMPFS_NODE(vp);
584	oldsize = node->tn_size;
585
586	if (uio->uio_offset < 0 || vp->v_type != VREG) {
587		error = EINVAL;
588		goto out;
589	}
590
591	if (uio->uio_resid == 0) {
592		error = 0;
593		goto out;
594	}
595
596	if (ioflag & IO_APPEND)
597		uio->uio_offset = node->tn_size;
598
599	if (uio->uio_offset + uio->uio_resid >
600	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
601		return (EFBIG);
602
603	if (vp->v_type == VREG && td != NULL) {
604		PROC_LOCK(td->td_proc);
605		if (uio->uio_offset + uio->uio_resid >
606		  lim_cur(td->td_proc, RLIMIT_FSIZE)) {
607			psignal(td->td_proc, SIGXFSZ);
608			PROC_UNLOCK(td->td_proc);
609			return (EFBIG);
610		}
611		PROC_UNLOCK(td->td_proc);
612	}
613
614	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
615	if (extended) {
616		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid);
617		if (error != 0)
618			goto out;
619	}
620
621	uobj = node->tn_reg.tn_aobj;
622	error = tmpfs_uio_xfer(VFS_TO_TMPFS(vp->v_mount), node, uio, uobj);
623
624	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
625	    (extended ? TMPFS_NODE_CHANGED : 0);
626
627	if (node->tn_mode & (S_ISUID | S_ISGID)) {
628		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
629			node->tn_mode &= ~(S_ISUID | S_ISGID);
630	}
631
632	if (error != 0)
633		(void)tmpfs_reg_resize(vp, oldsize);
634
635out:
636	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
637	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
638
639	return error;
640}
641
642/* --------------------------------------------------------------------- */
643
644int
645tmpfs_fsync(struct vop_fsync_args *v)
646{
647	struct vnode *vp = v->a_vp;
648
649	MPASS(VOP_ISLOCKED(vp, v->a_td));
650
651	tmpfs_update(vp);
652
653	return 0;
654}
655
656/* --------------------------------------------------------------------- */
657
658int
659tmpfs_remove(struct vop_remove_args *v)
660{
661	struct vnode *dvp = v->a_dvp;
662	struct vnode *vp = v->a_vp;
663
664	int error;
665	struct tmpfs_dirent *de;
666	struct tmpfs_mount *tmp;
667	struct tmpfs_node *dnode;
668	struct tmpfs_node *node;
669
670	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
671	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
672
673	if (vp->v_type == VDIR) {
674		error = EISDIR;
675		goto out;
676	}
677
678	dnode = VP_TO_TMPFS_DIR(dvp);
679	node = VP_TO_TMPFS_NODE(vp);
680	tmp = VFS_TO_TMPFS(vp->v_mount);
681	de = node->tn_lookup_dirent;
682	MPASS(de != NULL);
683
684	/* Files marked as immutable or append-only cannot be deleted. */
685	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
686	    (dnode->tn_flags & APPEND)) {
687		error = EPERM;
688		goto out;
689	}
690
691	/* Remove the entry from the directory; as it is a file, we do not
692	 * have to change the number of hard links of the directory. */
693	tmpfs_dir_detach(dvp, de);
694
695	/* Free the directory entry we just deleted.  Note that the node
696	 * referred by it will not be removed until the vnode is really
697	 * reclaimed. */
698	tmpfs_free_dirent(tmp, de, TRUE);
699
700	if (node->tn_links > 0)
701		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
702	    TMPFS_NODE_MODIFIED;
703	error = 0;
704
705out:
706
707	return error;
708}
709
710/* --------------------------------------------------------------------- */
711
712int
713tmpfs_link(struct vop_link_args *v)
714{
715	struct vnode *dvp = v->a_tdvp;
716	struct vnode *vp = v->a_vp;
717	struct componentname *cnp = v->a_cnp;
718
719	int error;
720	struct tmpfs_dirent *de;
721	struct tmpfs_node *dnode;
722	struct tmpfs_node *node;
723
724	MPASS(VOP_ISLOCKED(dvp, cnp->cn_thread));
725	MPASS(cnp->cn_flags & HASBUF);
726	MPASS(dvp != vp); /* XXX When can this be false? */
727
728	dnode = VP_TO_TMPFS_DIR(dvp);
729	node = VP_TO_TMPFS_NODE(vp);
730
731	/* XXX: Why aren't the following two tests done by the caller? */
732
733	/* Hard links of directories are forbidden. */
734	if (vp->v_type == VDIR) {
735		error = EPERM;
736		goto out;
737	}
738
739	/* Cannot create cross-device links. */
740	if (dvp->v_mount != vp->v_mount) {
741		error = EXDEV;
742		goto out;
743	}
744
745	/* Ensure that we do not overflow the maximum number of links imposed
746	 * by the system. */
747	MPASS(node->tn_links <= LINK_MAX);
748	if (node->tn_links == LINK_MAX) {
749		error = EMLINK;
750		goto out;
751	}
752
753	/* We cannot create links of files marked immutable or append-only. */
754	if (node->tn_flags & (IMMUTABLE | APPEND)) {
755		error = EPERM;
756		goto out;
757	}
758
759	/* Allocate a new directory entry to represent the node. */
760	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
761	    cnp->cn_nameptr, cnp->cn_namelen, &de);
762	if (error != 0)
763		goto out;
764
765	/* Insert the new directory entry into the appropriate directory. */
766	tmpfs_dir_attach(dvp, de);
767
768	/* vp link count has changed, so update node times. */
769	node->tn_status |= TMPFS_NODE_CHANGED;
770	tmpfs_update(vp);
771
772	error = 0;
773out:
774
775	return error;
776}
777
778/* --------------------------------------------------------------------- */
779
780int
781tmpfs_rename(struct vop_rename_args *v)
782{
783	struct vnode *fdvp = v->a_fdvp;
784	struct vnode *fvp = v->a_fvp;
785	struct componentname *fcnp = v->a_fcnp;
786	struct vnode *tdvp = v->a_tdvp;
787	struct vnode *tvp = v->a_tvp;
788	struct componentname *tcnp = v->a_tcnp;
789	struct tmpfs_node *tnode = 0; /* pacify gcc */
790
791	char *newname;
792	int error;
793	struct tmpfs_dirent *de;
794	struct tmpfs_mount *tmp;
795	struct tmpfs_node *fdnode;
796	struct tmpfs_node *fnode;
797	struct tmpfs_node *tdnode;
798
799	MPASS(VOP_ISLOCKED(tdvp, tcnp->cn_thread));
800	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp, tcnp->cn_thread)));
801	MPASS(fcnp->cn_flags & HASBUF);
802	MPASS(tcnp->cn_flags & HASBUF);
803
804	fdnode = VP_TO_TMPFS_DIR(fdvp);
805	fnode = VP_TO_TMPFS_NODE(fvp);
806	de = fnode->tn_lookup_dirent;
807
808	/* Disallow cross-device renames.
809	 * XXX Why isn't this done by the caller? */
810	if (fvp->v_mount != tdvp->v_mount ||
811	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
812		error = EXDEV;
813		goto out;
814	}
815
816	tmp = VFS_TO_TMPFS(tdvp->v_mount);
817	tdnode = VP_TO_TMPFS_DIR(tdvp);
818
819	/* If source and target are the same file, there is nothing to do. */
820	if (fvp == tvp) {
821		error = 0;
822		goto out;
823	}
824
825	/* Avoid manipulating '.' and '..' entries. */
826	if (de == NULL) {
827		MPASS(fvp->v_type == VDIR);
828		error = EINVAL;
829		goto out;
830	}
831	MPASS(de->td_node == fnode);
832
833	/* If re-naming a directory to another preexisting directory
834	 * ensure that the target directory is empty so that its
835	 * removal causes no side effects.
836	 * Kern_rename gurantees the destination to be a directory
837	 * if the source is one. */
838	if (tvp != NULL) {
839		tnode = VP_TO_TMPFS_NODE(tvp);
840
841		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
842		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
843			error = EPERM;
844			goto out;
845		}
846
847	    	if ((de->td_node->tn_type == VDIR) && (tnode->tn_size > 0)) {
848			error = ENOTEMPTY;
849			goto out;
850		}
851	}
852
853	/* If we need to move the directory between entries, lock the
854	 * source so that we can safely operate on it. */
855	if (fdnode != tdnode) {
856		error = vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, tcnp->cn_thread);
857		if (error != 0)
858			goto out;
859	}
860
861	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
862	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
863		error = EPERM;
864		goto out_locked;
865	}
866
867	/* Ensure that we have enough memory to hold the new name, if it
868	 * has to be changed. */
869	if (fcnp->cn_namelen != tcnp->cn_namelen ||
870	    memcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
871		newname = tmpfs_str_zone_alloc(&tmp->tm_str_pool, M_WAITOK,
872		    tcnp->cn_namelen);
873		if (newname == NULL) {
874			error = ENOSPC;
875			goto out_locked;
876		}
877	} else
878		newname = NULL;
879
880	/* If the node is being moved to another directory, we have to do
881	 * the move. */
882	if (fdnode != tdnode) {
883		/* In case we are moving a directory, we have to adjust its
884		 * parent to point to the new parent. */
885		if (de->td_node->tn_type == VDIR) {
886			struct tmpfs_node *n;
887
888			/* Ensure the target directory is not a child of the
889			 * directory being moved.  Otherwise, we'd end up
890			 * with stale nodes. */
891			n = tdnode;
892			while (n != n->tn_dir.tn_parent) {
893				if (n == fnode) {
894					error = EINVAL;
895					if (newname != NULL)
896						tmpfs_str_zone_free(&tmp->tm_str_pool,
897						    newname, tcnp->cn_namelen);
898					goto out_locked;
899				}
900				n = n->tn_dir.tn_parent;
901			}
902
903			/* Adjust the parent pointer. */
904			TMPFS_VALIDATE_DIR(fnode);
905			de->td_node->tn_dir.tn_parent = tdnode;
906
907			/* As a result of changing the target of the '..'
908			 * entry, the link count of the source and target
909			 * directories has to be adjusted. */
910			fdnode->tn_links--;
911			tdnode->tn_links++;
912		}
913
914		/* Do the move: just remove the entry from the source directory
915		 * and insert it into the target one. */
916		tmpfs_dir_detach(fdvp, de);
917		tmpfs_dir_attach(tdvp, de);
918	}
919
920	/* If the name has changed, we need to make it effective by changing
921	 * it in the directory entry. */
922	if (newname != NULL) {
923		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
924
925		tmpfs_str_zone_free(&tmp->tm_str_pool, de->td_name,
926		    de->td_namelen);
927		de->td_namelen = (uint16_t)tcnp->cn_namelen;
928		memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
929		de->td_name = newname;
930
931		fnode->tn_status |= TMPFS_NODE_CHANGED;
932		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
933	}
934
935	/* If we are overwriting an entry, we have to remove the old one
936	 * from the target directory. */
937	if (tvp != NULL) {
938		/* Remove the old entry from the target directory. */
939		de = tnode->tn_lookup_dirent;
940		tmpfs_dir_detach(tdvp, de);
941
942		/* Free the directory entry we just deleted.  Note that the
943		 * node referred by it will not be removed until the vnode is
944		 * really reclaimed. */
945		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
946	}
947
948	error = 0;
949
950out_locked:
951	if (fdnode != tdnode)
952		VOP_UNLOCK(fdvp, 0, tcnp->cn_thread);
953
954out:
955	/* Release target nodes. */
956	/* XXX: I don't understand when tdvp can be the same as tvp, but
957	 * other code takes care of this... */
958	if (tdvp == tvp)
959		vrele(tdvp);
960	else
961		vput(tdvp);
962	if (tvp != NULL)
963		vput(tvp);
964
965	/* Release source nodes. */
966	vrele(fdvp);
967	vrele(fvp);
968
969	return error;
970}
971
972/* --------------------------------------------------------------------- */
973
974int
975tmpfs_mkdir(struct vop_mkdir_args *v)
976{
977	struct vnode *dvp = v->a_dvp;
978	struct vnode **vpp = v->a_vpp;
979	struct componentname *cnp = v->a_cnp;
980	struct vattr *vap = v->a_vap;
981
982	MPASS(vap->va_type == VDIR);
983
984	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
985}
986
987/* --------------------------------------------------------------------- */
988
989int
990tmpfs_rmdir(struct vop_rmdir_args *v)
991{
992	struct vnode *dvp = v->a_dvp;
993	struct vnode *vp = v->a_vp;
994
995	int error;
996	struct tmpfs_dirent *de;
997	struct tmpfs_mount *tmp;
998	struct tmpfs_node *dnode;
999	struct tmpfs_node *node;
1000
1001	MPASS(VOP_ISLOCKED(dvp, v->a_cnp->cn_thread));
1002	MPASS(VOP_ISLOCKED(vp, v->a_cnp->cn_thread));
1003
1004	tmp = VFS_TO_TMPFS(dvp->v_mount);
1005	dnode = VP_TO_TMPFS_DIR(dvp);
1006	node = VP_TO_TMPFS_DIR(vp);
1007
1008
1009	/* Directories with more than two entries ('.' and '..') cannot be
1010	  * removed. */
1011	 if (node->tn_size > 0) {
1012		 error = ENOTEMPTY;
1013		 goto out;
1014	 }
1015
1016	if ((dnode->tn_flags & APPEND)
1017	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1018		error = EPERM;
1019		goto out;
1020	}
1021
1022	/* This invariant holds only if we are not trying to remove "..".
1023	  * We checked for that above so this is safe now. */
1024	MPASS(node->tn_dir.tn_parent == dnode);
1025
1026	/* Get the directory entry associated with node (vp).  This was
1027	 * filled by tmpfs_lookup while looking up the entry. */
1028	de = node->tn_lookup_dirent;
1029	MPASS(TMPFS_DIRENT_MATCHES(de,
1030	    v->a_cnp->cn_nameptr,
1031	    v->a_cnp->cn_namelen));
1032
1033	/* Check flags to see if we are allowed to remove the directory. */
1034	if (dnode->tn_flags & APPEND
1035		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1036		error = EPERM;
1037		goto out;
1038	}
1039
1040	/* Detach the directory entry from the directory (dnode). */
1041	tmpfs_dir_detach(dvp, de);
1042
1043	node->tn_links--;
1044	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1045	    TMPFS_NODE_MODIFIED;
1046	node->tn_dir.tn_parent->tn_links--;
1047	node->tn_dir.tn_parent->tn_status |= TMPFS_NODE_ACCESSED | \
1048	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1049
1050	cache_purge(dvp);
1051	cache_purge(vp);
1052
1053	/* Free the directory entry we just deleted.  Note that the node
1054	 * referred by it will not be removed until the vnode is really
1055	 * reclaimed. */
1056	tmpfs_free_dirent(tmp, de, TRUE);
1057
1058	/* Release the deleted vnode (will destroy the node, notify
1059	 * interested parties and clean it from the cache). */
1060
1061	dnode->tn_status |= TMPFS_NODE_CHANGED;
1062	tmpfs_update(dvp);
1063
1064	error = 0;
1065
1066out:
1067	return error;
1068}
1069
1070/* --------------------------------------------------------------------- */
1071
1072int
1073tmpfs_symlink(struct vop_symlink_args *v)
1074{
1075	struct vnode *dvp = v->a_dvp;
1076	struct vnode **vpp = v->a_vpp;
1077	struct componentname *cnp = v->a_cnp;
1078	struct vattr *vap = v->a_vap;
1079	char *target = v->a_target;
1080
1081#ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1082	MPASS(vap->va_type == VLNK);
1083#else
1084	vap->va_type = VLNK;
1085#endif
1086
1087	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1088}
1089
1090/* --------------------------------------------------------------------- */
1091
1092int
1093tmpfs_readdir(struct vop_readdir_args *v)
1094{
1095	struct vnode *vp = v->a_vp;
1096	struct uio *uio = v->a_uio;
1097	int *eofflag = v->a_eofflag;
1098	u_long **cookies = v->a_cookies;
1099	int *ncookies = v->a_ncookies;
1100
1101	int error;
1102	off_t startoff;
1103	off_t cnt;
1104	struct tmpfs_node *node;
1105
1106	/* This operation only makes sense on directory nodes. */
1107	if (vp->v_type != VDIR) {
1108		error = ENOTDIR;
1109		goto out;
1110	}
1111
1112	node = VP_TO_TMPFS_DIR(vp);
1113
1114	startoff = uio->uio_offset;
1115
1116	cnt = 0;
1117	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1118		error = tmpfs_dir_getdotdent(node, uio);
1119		if (error == -1) {
1120			error = 0;
1121			goto outok;
1122		} else if (error != 0)
1123			goto outok;
1124		cnt++;
1125	}
1126
1127	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1128		error = tmpfs_dir_getdotdotdent(node, uio);
1129		if (error == -1) {
1130			error = 0;
1131			goto outok;
1132		} else if (error != 0)
1133			goto outok;
1134		cnt++;
1135	}
1136
1137	error = tmpfs_dir_getdents(node, uio, &cnt);
1138	if (error == -1)
1139		error = 0;
1140	MPASS(error >= 0);
1141
1142outok:
1143	/* This label assumes that startoff has been
1144	 * initialized.  If the compiler didn't spit out warnings, we'd
1145	 * simply make this one be 'out' and drop 'outok'. */
1146
1147	if (eofflag != NULL)
1148		*eofflag =
1149		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1150
1151	/* Update NFS-related variables. */
1152	if (error == 0 && cookies != NULL && ncookies != NULL) {
1153		off_t i;
1154		off_t off = startoff;
1155		struct tmpfs_dirent *de = NULL;
1156
1157		*ncookies = cnt;
1158		*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1159
1160		for (i = 0; i < cnt; i++) {
1161			MPASS(off != TMPFS_DIRCOOKIE_EOF);
1162			if (off == TMPFS_DIRCOOKIE_DOT) {
1163				off = TMPFS_DIRCOOKIE_DOTDOT;
1164			} else {
1165				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1166					de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
1167				} else if (de != NULL) {
1168					de = TAILQ_NEXT(de, td_entries);
1169				} else {
1170					de = tmpfs_dir_lookupbycookie(node,
1171					    off);
1172					MPASS(de != NULL);
1173					de = TAILQ_NEXT(de, td_entries);
1174				}
1175				if (de == NULL) {
1176					off = TMPFS_DIRCOOKIE_EOF;
1177				} else {
1178					off = TMPFS_DIRCOOKIE(de);
1179				}
1180			}
1181
1182			(*cookies)[i] = off;
1183		}
1184		MPASS(uio->uio_offset == off);
1185	}
1186
1187out:
1188	return error;
1189}
1190
1191/* --------------------------------------------------------------------- */
1192
1193int
1194tmpfs_readlink(struct vop_readlink_args *v)
1195{
1196	struct vnode *vp = v->a_vp;
1197	struct uio *uio = v->a_uio;
1198
1199	int error;
1200	struct tmpfs_node *node;
1201
1202	MPASS(uio->uio_offset == 0);
1203	MPASS(vp->v_type == VLNK);
1204
1205	node = VP_TO_TMPFS_NODE(vp);
1206
1207	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1208	    uio);
1209	node->tn_status |= TMPFS_NODE_ACCESSED;
1210
1211	return error;
1212}
1213
1214/* --------------------------------------------------------------------- */
1215
1216int
1217tmpfs_inactive(struct vop_inactive_args *v)
1218{
1219	struct vnode *vp = v->a_vp;
1220	struct thread *l = v->a_td;
1221
1222	struct tmpfs_node *node;
1223
1224	MPASS(VOP_ISLOCKED(vp, l));
1225
1226	node = VP_TO_TMPFS_NODE(vp);
1227
1228	if (node->tn_links == 0)
1229		vrecycle(vp, l);
1230
1231	return 0;
1232}
1233
1234/* --------------------------------------------------------------------- */
1235
1236int
1237tmpfs_reclaim(struct vop_reclaim_args *v)
1238{
1239	struct vnode *vp = v->a_vp;
1240
1241	struct tmpfs_mount *tmp;
1242	struct tmpfs_node *node;
1243
1244	node = VP_TO_TMPFS_NODE(vp);
1245	tmp = VFS_TO_TMPFS(vp->v_mount);
1246
1247	vnode_destroy_vobject(vp);
1248	cache_purge(vp);
1249	tmpfs_free_vp(vp);
1250
1251	/* If the node referenced by this vnode was deleted by the user,
1252	 * we must free its associated data structures (now that the vnode
1253	 * is being reclaimed). */
1254	if (node->tn_links == 0)
1255		tmpfs_free_node(tmp, node);
1256
1257	MPASS(vp->v_data == NULL);
1258	return 0;
1259}
1260
1261/* --------------------------------------------------------------------- */
1262
1263int
1264tmpfs_print(struct vop_print_args *v)
1265{
1266	struct vnode *vp = v->a_vp;
1267
1268	struct tmpfs_node *node;
1269
1270	node = VP_TO_TMPFS_NODE(vp);
1271
1272	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1273	    node, node->tn_flags, node->tn_links);
1274	printf("\tmode 0%o, owner %d, group %d, size %" PRIdMAX
1275	    ", status 0x%x\n",
1276	    node->tn_mode, node->tn_uid, node->tn_gid,
1277	    (uintmax_t)node->tn_size, node->tn_status);
1278
1279	if (vp->v_type == VFIFO)
1280		fifo_printinfo(vp);
1281
1282	printf("\n");
1283
1284	return 0;
1285}
1286
1287/* --------------------------------------------------------------------- */
1288
1289int
1290tmpfs_pathconf(struct vop_pathconf_args *v)
1291{
1292	int name = v->a_name;
1293	register_t *retval = v->a_retval;
1294
1295	int error;
1296
1297	error = 0;
1298
1299	switch (name) {
1300	case _PC_LINK_MAX:
1301		*retval = LINK_MAX;
1302		break;
1303
1304	case _PC_NAME_MAX:
1305		*retval = NAME_MAX;
1306		break;
1307
1308	case _PC_PATH_MAX:
1309		*retval = PATH_MAX;
1310		break;
1311
1312	case _PC_PIPE_BUF:
1313		*retval = PIPE_BUF;
1314		break;
1315
1316	case _PC_CHOWN_RESTRICTED:
1317		*retval = 1;
1318		break;
1319
1320	case _PC_NO_TRUNC:
1321		*retval = 1;
1322		break;
1323
1324	case _PC_SYNC_IO:
1325		*retval = 1;
1326		break;
1327
1328	case _PC_FILESIZEBITS:
1329		*retval = 0; /* XXX Don't know which value should I return. */
1330		break;
1331
1332	default:
1333		error = EINVAL;
1334	}
1335
1336	return error;
1337}
1338
1339/* --------------------------------------------------------------------- */
1340
1341int
1342tmpfs_advlock(struct vop_advlock_args *v)
1343{
1344	struct vnode *vp = v->a_vp;
1345
1346	struct tmpfs_node *node;
1347
1348	node = VP_TO_TMPFS_NODE(vp);
1349
1350	return lf_advlock(v, &node->tn_lockf, node->tn_size);
1351}
1352
1353/* --------------------------------------------------------------------- */
1354
1355int
1356tmpfs_vptofh(struct vop_vptofh_args *ap)
1357{
1358	struct tmpfs_fid *tfhp;
1359	struct tmpfs_node *node;
1360
1361	tfhp = (struct tmpfs_fid *)ap->a_fhp;
1362	node = VP_TO_TMPFS_NODE(ap->a_vp);
1363
1364	tfhp->tf_len = sizeof(struct tmpfs_fid);
1365	tfhp->tf_id = node->tn_id;
1366	tfhp->tf_gen = node->tn_gen;
1367
1368	return (0);
1369}
1370