tmpfs_vnops.c revision 232960
1/*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2
3/*-
4 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 232960 2012-03-14 09:15:50Z gleb $");
38
39#include <sys/param.h>
40#include <sys/fcntl.h>
41#include <sys/lockf.h>
42#include <sys/namei.h>
43#include <sys/priv.h>
44#include <sys/proc.h>
45#include <sys/sched.h>
46#include <sys/sf_buf.h>
47#include <sys/stat.h>
48#include <sys/systm.h>
49#include <sys/sysctl.h>
50#include <sys/unistd.h>
51#include <sys/vnode.h>
52
53#include <vm/vm.h>
54#include <vm/vm_object.h>
55#include <vm/vm_page.h>
56#include <vm/vm_pager.h>
57
58#include <fs/tmpfs/tmpfs_vnops.h>
59#include <fs/tmpfs/tmpfs.h>
60
61SYSCTL_DECL(_vfs_tmpfs);
62
63static volatile int tmpfs_rename_restarts;
64SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
65    __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
66    "Times rename had to restart due to lock contention");
67
68/* --------------------------------------------------------------------- */
69
70static int
71tmpfs_lookup(struct vop_cachedlookup_args *v)
72{
73	struct vnode *dvp = v->a_dvp;
74	struct vnode **vpp = v->a_vpp;
75	struct componentname *cnp = v->a_cnp;
76
77	int error;
78	struct tmpfs_dirent *de;
79	struct tmpfs_node *dnode;
80
81	dnode = VP_TO_TMPFS_DIR(dvp);
82	*vpp = NULLVP;
83
84	/* Check accessibility of requested node as a first step. */
85	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
86	if (error != 0)
87		goto out;
88
89	/* We cannot be requesting the parent directory of the root node. */
90	MPASS(IMPLIES(dnode->tn_type == VDIR &&
91	    dnode->tn_dir.tn_parent == dnode,
92	    !(cnp->cn_flags & ISDOTDOT)));
93
94	TMPFS_ASSERT_LOCKED(dnode);
95	if (dnode->tn_dir.tn_parent == NULL) {
96		error = ENOENT;
97		goto out;
98	}
99	if (cnp->cn_flags & ISDOTDOT) {
100		int ltype = 0;
101
102		ltype = VOP_ISLOCKED(dvp);
103		vhold(dvp);
104		VOP_UNLOCK(dvp, 0);
105		/* Allocate a new vnode on the matching entry. */
106		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
107		    cnp->cn_lkflags, vpp);
108
109		vn_lock(dvp, ltype | LK_RETRY);
110		vdrop(dvp);
111	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
112		VREF(dvp);
113		*vpp = dvp;
114		error = 0;
115	} else {
116		de = tmpfs_dir_lookup(dnode, NULL, cnp);
117		if (de != NULL && de->td_node == NULL)
118			cnp->cn_flags |= ISWHITEOUT;
119		if (de == NULL || de->td_node == NULL) {
120			/* The entry was not found in the directory.
121			 * This is OK if we are creating or renaming an
122			 * entry and are working on the last component of
123			 * the path name. */
124			if ((cnp->cn_flags & ISLASTCN) &&
125			    (cnp->cn_nameiop == CREATE || \
126			    cnp->cn_nameiop == RENAME ||
127			    (cnp->cn_nameiop == DELETE &&
128			    cnp->cn_flags & DOWHITEOUT &&
129			    cnp->cn_flags & ISWHITEOUT))) {
130				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
131				    cnp->cn_thread);
132				if (error != 0)
133					goto out;
134
135				/* Keep the component name in the buffer for
136				 * future uses. */
137				cnp->cn_flags |= SAVENAME;
138
139				error = EJUSTRETURN;
140			} else
141				error = ENOENT;
142		} else {
143			struct tmpfs_node *tnode;
144
145			/* The entry was found, so get its associated
146			 * tmpfs_node. */
147			tnode = de->td_node;
148
149			/* If we are not at the last path component and
150			 * found a non-directory or non-link entry (which
151			 * may itself be pointing to a directory), raise
152			 * an error. */
153			if ((tnode->tn_type != VDIR &&
154			    tnode->tn_type != VLNK) &&
155			    !(cnp->cn_flags & ISLASTCN)) {
156				error = ENOTDIR;
157				goto out;
158			}
159
160			/* If we are deleting or renaming the entry, keep
161			 * track of its tmpfs_dirent so that it can be
162			 * easily deleted later. */
163			if ((cnp->cn_flags & ISLASTCN) &&
164			    (cnp->cn_nameiop == DELETE ||
165			    cnp->cn_nameiop == RENAME)) {
166				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
167				    cnp->cn_thread);
168				if (error != 0)
169					goto out;
170
171				/* Allocate a new vnode on the matching entry. */
172				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
173						cnp->cn_lkflags, vpp);
174				if (error != 0)
175					goto out;
176
177				if ((dnode->tn_mode & S_ISTXT) &&
178				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
179				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
180					error = EPERM;
181					vput(*vpp);
182					*vpp = NULL;
183					goto out;
184				}
185				cnp->cn_flags |= SAVENAME;
186			} else {
187				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
188						cnp->cn_lkflags, vpp);
189			}
190		}
191	}
192
193	/* Store the result of this lookup in the cache.  Avoid this if the
194	 * request was for creation, as it does not improve timings on
195	 * emprical tests. */
196	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
197		cache_enter(dvp, *vpp, cnp);
198
199out:
200	/* If there were no errors, *vpp cannot be null and it must be
201	 * locked. */
202	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
203
204	return error;
205}
206
207/* --------------------------------------------------------------------- */
208
209static int
210tmpfs_create(struct vop_create_args *v)
211{
212	struct vnode *dvp = v->a_dvp;
213	struct vnode **vpp = v->a_vpp;
214	struct componentname *cnp = v->a_cnp;
215	struct vattr *vap = v->a_vap;
216
217	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
218
219	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
220}
221/* --------------------------------------------------------------------- */
222
223static int
224tmpfs_mknod(struct vop_mknod_args *v)
225{
226	struct vnode *dvp = v->a_dvp;
227	struct vnode **vpp = v->a_vpp;
228	struct componentname *cnp = v->a_cnp;
229	struct vattr *vap = v->a_vap;
230
231	if (vap->va_type != VBLK && vap->va_type != VCHR &&
232	    vap->va_type != VFIFO)
233		return EINVAL;
234
235	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
236}
237
238/* --------------------------------------------------------------------- */
239
240static int
241tmpfs_open(struct vop_open_args *v)
242{
243	struct vnode *vp = v->a_vp;
244	int mode = v->a_mode;
245
246	int error;
247	struct tmpfs_node *node;
248
249	MPASS(VOP_ISLOCKED(vp));
250
251	node = VP_TO_TMPFS_NODE(vp);
252
253	/* The file is still active but all its names have been removed
254	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
255	 * it is about to die. */
256	if (node->tn_links < 1)
257		return (ENOENT);
258
259	/* If the file is marked append-only, deny write requests. */
260	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
261		error = EPERM;
262	else {
263		error = 0;
264		vnode_create_vobject(vp, node->tn_size, v->a_td);
265	}
266
267	MPASS(VOP_ISLOCKED(vp));
268	return error;
269}
270
271/* --------------------------------------------------------------------- */
272
273static int
274tmpfs_close(struct vop_close_args *v)
275{
276	struct vnode *vp = v->a_vp;
277
278	MPASS(VOP_ISLOCKED(vp));
279
280	/* Update node times. */
281	tmpfs_update(vp);
282
283	return (0);
284}
285
286/* --------------------------------------------------------------------- */
287
288int
289tmpfs_access(struct vop_access_args *v)
290{
291	struct vnode *vp = v->a_vp;
292	accmode_t accmode = v->a_accmode;
293	struct ucred *cred = v->a_cred;
294
295	int error;
296	struct tmpfs_node *node;
297
298	MPASS(VOP_ISLOCKED(vp));
299
300	node = VP_TO_TMPFS_NODE(vp);
301
302	switch (vp->v_type) {
303	case VDIR:
304		/* FALLTHROUGH */
305	case VLNK:
306		/* FALLTHROUGH */
307	case VREG:
308		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
309			error = EROFS;
310			goto out;
311		}
312		break;
313
314	case VBLK:
315		/* FALLTHROUGH */
316	case VCHR:
317		/* FALLTHROUGH */
318	case VSOCK:
319		/* FALLTHROUGH */
320	case VFIFO:
321		break;
322
323	default:
324		error = EINVAL;
325		goto out;
326	}
327
328	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
329		error = EPERM;
330		goto out;
331	}
332
333	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
334	    node->tn_gid, accmode, cred, NULL);
335
336out:
337	MPASS(VOP_ISLOCKED(vp));
338
339	return error;
340}
341
342/* --------------------------------------------------------------------- */
343
344int
345tmpfs_getattr(struct vop_getattr_args *v)
346{
347	struct vnode *vp = v->a_vp;
348	struct vattr *vap = v->a_vap;
349
350	struct tmpfs_node *node;
351
352	node = VP_TO_TMPFS_NODE(vp);
353
354	tmpfs_update(vp);
355
356	vap->va_type = vp->v_type;
357	vap->va_mode = node->tn_mode;
358	vap->va_nlink = node->tn_links;
359	vap->va_uid = node->tn_uid;
360	vap->va_gid = node->tn_gid;
361	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
362	vap->va_fileid = node->tn_id;
363	vap->va_size = node->tn_size;
364	vap->va_blocksize = PAGE_SIZE;
365	vap->va_atime = node->tn_atime;
366	vap->va_mtime = node->tn_mtime;
367	vap->va_ctime = node->tn_ctime;
368	vap->va_birthtime = node->tn_birthtime;
369	vap->va_gen = node->tn_gen;
370	vap->va_flags = node->tn_flags;
371	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
372		node->tn_rdev : NODEV;
373	vap->va_bytes = round_page(node->tn_size);
374	vap->va_filerev = 0;
375
376	return 0;
377}
378
379/* --------------------------------------------------------------------- */
380
381/* XXX Should this operation be atomic?  I think it should, but code in
382 * XXX other places (e.g., ufs) doesn't seem to be... */
383int
384tmpfs_setattr(struct vop_setattr_args *v)
385{
386	struct vnode *vp = v->a_vp;
387	struct vattr *vap = v->a_vap;
388	struct ucred *cred = v->a_cred;
389	struct thread *td = curthread;
390
391	int error;
392
393	MPASS(VOP_ISLOCKED(vp));
394
395	error = 0;
396
397	/* Abort if any unsettable attribute is given. */
398	if (vap->va_type != VNON ||
399	    vap->va_nlink != VNOVAL ||
400	    vap->va_fsid != VNOVAL ||
401	    vap->va_fileid != VNOVAL ||
402	    vap->va_blocksize != VNOVAL ||
403	    vap->va_gen != VNOVAL ||
404	    vap->va_rdev != VNOVAL ||
405	    vap->va_bytes != VNOVAL)
406		error = EINVAL;
407
408	if (error == 0 && (vap->va_flags != VNOVAL))
409		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
410
411	if (error == 0 && (vap->va_size != VNOVAL))
412		error = tmpfs_chsize(vp, vap->va_size, cred, td);
413
414	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
415		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
416
417	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
418		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
419
420	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
421	    vap->va_atime.tv_nsec != VNOVAL) ||
422	    (vap->va_mtime.tv_sec != VNOVAL &&
423	    vap->va_mtime.tv_nsec != VNOVAL) ||
424	    (vap->va_birthtime.tv_sec != VNOVAL &&
425	    vap->va_birthtime.tv_nsec != VNOVAL)))
426		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
427			&vap->va_birthtime, vap->va_vaflags, cred, td);
428
429	/* Update the node times.  We give preference to the error codes
430	 * generated by this function rather than the ones that may arise
431	 * from tmpfs_update. */
432	tmpfs_update(vp);
433
434	MPASS(VOP_ISLOCKED(vp));
435
436	return error;
437}
438
439/* --------------------------------------------------------------------- */
440static int
441tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
442    vm_offset_t offset, size_t tlen, struct uio *uio)
443{
444	vm_page_t	m;
445	int		error, rv;
446
447	VM_OBJECT_LOCK(tobj);
448	m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
449	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
450	if (m->valid != VM_PAGE_BITS_ALL) {
451		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
452			rv = vm_pager_get_pages(tobj, &m, 1, 0);
453			if (rv != VM_PAGER_OK) {
454				vm_page_lock(m);
455				vm_page_free(m);
456				vm_page_unlock(m);
457				VM_OBJECT_UNLOCK(tobj);
458				return (EIO);
459			}
460		} else
461			vm_page_zero_invalid(m, TRUE);
462	}
463	VM_OBJECT_UNLOCK(tobj);
464	error = uiomove_fromphys(&m, offset, tlen, uio);
465	VM_OBJECT_LOCK(tobj);
466	vm_page_lock(m);
467	vm_page_unwire(m, TRUE);
468	vm_page_unlock(m);
469	vm_page_wakeup(m);
470	VM_OBJECT_UNLOCK(tobj);
471
472	return (error);
473}
474
475static __inline int
476tmpfs_nocacheread_buf(vm_object_t tobj, vm_pindex_t idx,
477    vm_offset_t offset, size_t tlen, void *buf)
478{
479	struct uio uio;
480	struct iovec iov;
481
482	uio.uio_iovcnt = 1;
483	uio.uio_iov = &iov;
484	iov.iov_base = buf;
485	iov.iov_len = tlen;
486
487	uio.uio_offset = 0;
488	uio.uio_resid = tlen;
489	uio.uio_rw = UIO_READ;
490	uio.uio_segflg = UIO_SYSSPACE;
491	uio.uio_td = curthread;
492
493	return (tmpfs_nocacheread(tobj, idx, offset, tlen, &uio));
494}
495
496static int
497tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
498{
499	struct sf_buf	*sf;
500	vm_pindex_t	idx;
501	vm_page_t	m;
502	vm_offset_t	offset;
503	off_t		addr;
504	size_t		tlen;
505	char		*ma;
506	int		error;
507
508	addr = uio->uio_offset;
509	idx = OFF_TO_IDX(addr);
510	offset = addr & PAGE_MASK;
511	tlen = MIN(PAGE_SIZE - offset, len);
512
513	if ((vobj == NULL) ||
514	    (vobj->resident_page_count == 0 && vobj->cache == NULL))
515		goto nocache;
516
517	VM_OBJECT_LOCK(vobj);
518lookupvpg:
519	if (((m = vm_page_lookup(vobj, idx)) != NULL) &&
520	    vm_page_is_valid(m, offset, tlen)) {
521		if ((m->oflags & VPO_BUSY) != 0) {
522			/*
523			 * Reference the page before unlocking and sleeping so
524			 * that the page daemon is less likely to reclaim it.
525			 */
526			vm_page_reference(m);
527			vm_page_sleep(m, "tmfsmr");
528			goto lookupvpg;
529		}
530		vm_page_busy(m);
531		VM_OBJECT_UNLOCK(vobj);
532		error = uiomove_fromphys(&m, offset, tlen, uio);
533		VM_OBJECT_LOCK(vobj);
534		vm_page_wakeup(m);
535		VM_OBJECT_UNLOCK(vobj);
536		return	(error);
537	} else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
538		KASSERT(offset == 0,
539		    ("unexpected offset in tmpfs_mappedread for sendfile"));
540		if ((m->oflags & VPO_BUSY) != 0) {
541			/*
542			 * Reference the page before unlocking and sleeping so
543			 * that the page daemon is less likely to reclaim it.
544			 */
545			vm_page_reference(m);
546			vm_page_sleep(m, "tmfsmr");
547			goto lookupvpg;
548		}
549		vm_page_busy(m);
550		VM_OBJECT_UNLOCK(vobj);
551		sched_pin();
552		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
553		ma = (char *)sf_buf_kva(sf);
554		error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma);
555		if (error == 0) {
556			if (tlen != PAGE_SIZE)
557				bzero(ma + tlen, PAGE_SIZE - tlen);
558			uio->uio_offset += tlen;
559			uio->uio_resid -= tlen;
560		}
561		sf_buf_free(sf);
562		sched_unpin();
563		VM_OBJECT_LOCK(vobj);
564		if (error == 0)
565			m->valid = VM_PAGE_BITS_ALL;
566		vm_page_wakeup(m);
567		VM_OBJECT_UNLOCK(vobj);
568		return	(error);
569	}
570	VM_OBJECT_UNLOCK(vobj);
571nocache:
572	error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio);
573
574	return	(error);
575}
576
577static int
578tmpfs_read(struct vop_read_args *v)
579{
580	struct vnode *vp = v->a_vp;
581	struct uio *uio = v->a_uio;
582
583	struct tmpfs_node *node;
584	vm_object_t uobj;
585	size_t len;
586	int resid;
587
588	int error = 0;
589
590	node = VP_TO_TMPFS_NODE(vp);
591
592	if (vp->v_type != VREG) {
593		error = EISDIR;
594		goto out;
595	}
596
597	if (uio->uio_offset < 0) {
598		error = EINVAL;
599		goto out;
600	}
601
602	node->tn_status |= TMPFS_NODE_ACCESSED;
603
604	uobj = node->tn_reg.tn_aobj;
605	while ((resid = uio->uio_resid) > 0) {
606		error = 0;
607		if (node->tn_size <= uio->uio_offset)
608			break;
609		len = MIN(node->tn_size - uio->uio_offset, resid);
610		if (len == 0)
611			break;
612		error = tmpfs_mappedread(vp->v_object, uobj, len, uio);
613		if ((error != 0) || (resid == uio->uio_resid))
614			break;
615	}
616
617out:
618
619	return error;
620}
621
622/* --------------------------------------------------------------------- */
623
624static int
625tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
626{
627	vm_pindex_t	idx;
628	vm_page_t	vpg, tpg;
629	vm_offset_t	offset;
630	off_t		addr;
631	size_t		tlen;
632	int		error, rv;
633
634	error = 0;
635
636	addr = uio->uio_offset;
637	idx = OFF_TO_IDX(addr);
638	offset = addr & PAGE_MASK;
639	tlen = MIN(PAGE_SIZE - offset, len);
640
641	if ((vobj == NULL) ||
642	    (vobj->resident_page_count == 0 && vobj->cache == NULL)) {
643		vpg = NULL;
644		goto nocache;
645	}
646
647	VM_OBJECT_LOCK(vobj);
648lookupvpg:
649	if (((vpg = vm_page_lookup(vobj, idx)) != NULL) &&
650	    vm_page_is_valid(vpg, offset, tlen)) {
651		if ((vpg->oflags & VPO_BUSY) != 0) {
652			/*
653			 * Reference the page before unlocking and sleeping so
654			 * that the page daemon is less likely to reclaim it.
655			 */
656			vm_page_reference(vpg);
657			vm_page_sleep(vpg, "tmfsmw");
658			goto lookupvpg;
659		}
660		vm_page_busy(vpg);
661		vm_page_undirty(vpg);
662		VM_OBJECT_UNLOCK(vobj);
663		error = uiomove_fromphys(&vpg, offset, tlen, uio);
664	} else {
665		if (__predict_false(vobj->cache != NULL))
666			vm_page_cache_free(vobj, idx, idx + 1);
667		VM_OBJECT_UNLOCK(vobj);
668		vpg = NULL;
669	}
670nocache:
671	VM_OBJECT_LOCK(tobj);
672	tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
673	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
674	if (tpg->valid != VM_PAGE_BITS_ALL) {
675		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
676			rv = vm_pager_get_pages(tobj, &tpg, 1, 0);
677			if (rv != VM_PAGER_OK) {
678				vm_page_lock(tpg);
679				vm_page_free(tpg);
680				vm_page_unlock(tpg);
681				error = EIO;
682				goto out;
683			}
684		} else
685			vm_page_zero_invalid(tpg, TRUE);
686	}
687	VM_OBJECT_UNLOCK(tobj);
688	if (vpg == NULL)
689		error = uiomove_fromphys(&tpg, offset, tlen, uio);
690	else {
691		KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid"));
692		pmap_copy_page(vpg, tpg);
693	}
694	VM_OBJECT_LOCK(tobj);
695	if (error == 0) {
696		KASSERT(tpg->valid == VM_PAGE_BITS_ALL,
697		    ("parts of tpg invalid"));
698		vm_page_dirty(tpg);
699	}
700	vm_page_lock(tpg);
701	vm_page_unwire(tpg, TRUE);
702	vm_page_unlock(tpg);
703	vm_page_wakeup(tpg);
704out:
705	VM_OBJECT_UNLOCK(tobj);
706	if (vpg != NULL) {
707		VM_OBJECT_LOCK(vobj);
708		vm_page_wakeup(vpg);
709		VM_OBJECT_UNLOCK(vobj);
710	}
711
712	return	(error);
713}
714
715static int
716tmpfs_write(struct vop_write_args *v)
717{
718	struct vnode *vp = v->a_vp;
719	struct uio *uio = v->a_uio;
720	int ioflag = v->a_ioflag;
721
722	boolean_t extended;
723	int error = 0;
724	off_t oldsize;
725	struct tmpfs_node *node;
726	vm_object_t uobj;
727	size_t len;
728	int resid;
729
730	node = VP_TO_TMPFS_NODE(vp);
731	oldsize = node->tn_size;
732
733	if (uio->uio_offset < 0 || vp->v_type != VREG) {
734		error = EINVAL;
735		goto out;
736	}
737
738	if (uio->uio_resid == 0) {
739		error = 0;
740		goto out;
741	}
742
743	if (ioflag & IO_APPEND)
744		uio->uio_offset = node->tn_size;
745
746	if (uio->uio_offset + uio->uio_resid >
747	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
748		return (EFBIG);
749
750	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
751		return (EFBIG);
752
753	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
754	if (extended) {
755		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
756		    FALSE);
757		if (error != 0)
758			goto out;
759	}
760
761	uobj = node->tn_reg.tn_aobj;
762	while ((resid = uio->uio_resid) > 0) {
763		if (node->tn_size <= uio->uio_offset)
764			break;
765		len = MIN(node->tn_size - uio->uio_offset, resid);
766		if (len == 0)
767			break;
768		error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio);
769		if ((error != 0) || (resid == uio->uio_resid))
770			break;
771	}
772
773	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
774	    (extended ? TMPFS_NODE_CHANGED : 0);
775
776	if (node->tn_mode & (S_ISUID | S_ISGID)) {
777		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
778			node->tn_mode &= ~(S_ISUID | S_ISGID);
779	}
780
781	if (error != 0)
782		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
783
784out:
785	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
786	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
787
788	return error;
789}
790
791/* --------------------------------------------------------------------- */
792
793static int
794tmpfs_fsync(struct vop_fsync_args *v)
795{
796	struct vnode *vp = v->a_vp;
797
798	MPASS(VOP_ISLOCKED(vp));
799
800	tmpfs_update(vp);
801
802	return 0;
803}
804
805/* --------------------------------------------------------------------- */
806
807static int
808tmpfs_remove(struct vop_remove_args *v)
809{
810	struct vnode *dvp = v->a_dvp;
811	struct vnode *vp = v->a_vp;
812
813	int error;
814	struct tmpfs_dirent *de;
815	struct tmpfs_mount *tmp;
816	struct tmpfs_node *dnode;
817	struct tmpfs_node *node;
818
819	MPASS(VOP_ISLOCKED(dvp));
820	MPASS(VOP_ISLOCKED(vp));
821
822	if (vp->v_type == VDIR) {
823		error = EISDIR;
824		goto out;
825	}
826
827	dnode = VP_TO_TMPFS_DIR(dvp);
828	node = VP_TO_TMPFS_NODE(vp);
829	tmp = VFS_TO_TMPFS(vp->v_mount);
830	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
831	MPASS(de != NULL);
832
833	/* Files marked as immutable or append-only cannot be deleted. */
834	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
835	    (dnode->tn_flags & APPEND)) {
836		error = EPERM;
837		goto out;
838	}
839
840	/* Remove the entry from the directory; as it is a file, we do not
841	 * have to change the number of hard links of the directory. */
842	tmpfs_dir_detach(dvp, de);
843	if (v->a_cnp->cn_flags & DOWHITEOUT)
844		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
845
846	/* Free the directory entry we just deleted.  Note that the node
847	 * referred by it will not be removed until the vnode is really
848	 * reclaimed. */
849	tmpfs_free_dirent(tmp, de, TRUE);
850
851	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
852	error = 0;
853
854out:
855
856	return error;
857}
858
859/* --------------------------------------------------------------------- */
860
861static int
862tmpfs_link(struct vop_link_args *v)
863{
864	struct vnode *dvp = v->a_tdvp;
865	struct vnode *vp = v->a_vp;
866	struct componentname *cnp = v->a_cnp;
867
868	int error;
869	struct tmpfs_dirent *de;
870	struct tmpfs_node *node;
871
872	MPASS(VOP_ISLOCKED(dvp));
873	MPASS(cnp->cn_flags & HASBUF);
874	MPASS(dvp != vp); /* XXX When can this be false? */
875
876	node = VP_TO_TMPFS_NODE(vp);
877
878	/* XXX: Why aren't the following two tests done by the caller? */
879
880	/* Hard links of directories are forbidden. */
881	if (vp->v_type == VDIR) {
882		error = EPERM;
883		goto out;
884	}
885
886	/* Cannot create cross-device links. */
887	if (dvp->v_mount != vp->v_mount) {
888		error = EXDEV;
889		goto out;
890	}
891
892	/* Ensure that we do not overflow the maximum number of links imposed
893	 * by the system. */
894	MPASS(node->tn_links <= LINK_MAX);
895	if (node->tn_links == LINK_MAX) {
896		error = EMLINK;
897		goto out;
898	}
899
900	/* We cannot create links of files marked immutable or append-only. */
901	if (node->tn_flags & (IMMUTABLE | APPEND)) {
902		error = EPERM;
903		goto out;
904	}
905
906	/* Allocate a new directory entry to represent the node. */
907	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
908	    cnp->cn_nameptr, cnp->cn_namelen, &de);
909	if (error != 0)
910		goto out;
911
912	/* Insert the new directory entry into the appropriate directory. */
913	if (cnp->cn_flags & ISWHITEOUT)
914		tmpfs_dir_whiteout_remove(dvp, cnp);
915	tmpfs_dir_attach(dvp, de);
916
917	/* vp link count has changed, so update node times. */
918	node->tn_status |= TMPFS_NODE_CHANGED;
919	tmpfs_update(vp);
920
921	error = 0;
922
923out:
924	return error;
925}
926
927/* --------------------------------------------------------------------- */
928
929/*
930 * We acquire all but fdvp locks using non-blocking acquisitions.  If we
931 * fail to acquire any lock in the path we will drop all held locks,
932 * acquire the new lock in a blocking fashion, and then release it and
933 * restart the rename.  This acquire/release step ensures that we do not
934 * spin on a lock waiting for release.  On error release all vnode locks
935 * and decrement references the way tmpfs_rename() would do.
936 */
937static int
938tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
939    struct vnode *tdvp, struct vnode **tvpp,
940    struct componentname *fcnp, struct componentname *tcnp)
941{
942	struct vnode *nvp;
943	struct mount *mp;
944	struct tmpfs_dirent *de;
945	int error, restarts = 0;
946
947	VOP_UNLOCK(tdvp, 0);
948	if (*tvpp != NULL && *tvpp != tdvp)
949		VOP_UNLOCK(*tvpp, 0);
950	mp = fdvp->v_mount;
951
952relock:
953	restarts += 1;
954	error = vn_lock(fdvp, LK_EXCLUSIVE);
955	if (error)
956		goto releout;
957	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
958		VOP_UNLOCK(fdvp, 0);
959		error = vn_lock(tdvp, LK_EXCLUSIVE);
960		if (error)
961			goto releout;
962		VOP_UNLOCK(tdvp, 0);
963		goto relock;
964	}
965	/*
966	 * Re-resolve fvp to be certain it still exists and fetch the
967	 * correct vnode.
968	 */
969	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
970	if (de == NULL) {
971		VOP_UNLOCK(fdvp, 0);
972		VOP_UNLOCK(tdvp, 0);
973		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
974		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
975			error = EINVAL;
976		else
977			error = ENOENT;
978		goto releout;
979	}
980	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
981	if (error != 0) {
982		VOP_UNLOCK(fdvp, 0);
983		VOP_UNLOCK(tdvp, 0);
984		if (error != EBUSY)
985			goto releout;
986		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
987		if (error != 0)
988			goto releout;
989		VOP_UNLOCK(nvp, 0);
990		/*
991		 * Concurrent rename race.
992		 */
993		if (nvp == tdvp) {
994			vrele(nvp);
995			error = EINVAL;
996			goto releout;
997		}
998		vrele(*fvpp);
999		*fvpp = nvp;
1000		goto relock;
1001	}
1002	vrele(*fvpp);
1003	*fvpp = nvp;
1004	VOP_UNLOCK(*fvpp, 0);
1005	/*
1006	 * Re-resolve tvp and acquire the vnode lock if present.
1007	 */
1008	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
1009	/*
1010	 * If tvp disappeared we just carry on.
1011	 */
1012	if (de == NULL && *tvpp != NULL) {
1013		vrele(*tvpp);
1014		*tvpp = NULL;
1015	}
1016	/*
1017	 * Get the tvp ino if the lookup succeeded.  We may have to restart
1018	 * if the non-blocking acquire fails.
1019	 */
1020	if (de != NULL) {
1021		nvp = NULL;
1022		error = tmpfs_alloc_vp(mp, de->td_node,
1023		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1024		if (*tvpp != NULL)
1025			vrele(*tvpp);
1026		*tvpp = nvp;
1027		if (error != 0) {
1028			VOP_UNLOCK(fdvp, 0);
1029			VOP_UNLOCK(tdvp, 0);
1030			if (error != EBUSY)
1031				goto releout;
1032			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
1033			    &nvp);
1034			if (error != 0)
1035				goto releout;
1036			VOP_UNLOCK(nvp, 0);
1037			/*
1038			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
1039			 */
1040			if (nvp == fdvp) {
1041				error = ENOTEMPTY;
1042				goto releout;
1043			}
1044			goto relock;
1045		}
1046	}
1047	tmpfs_rename_restarts += restarts;
1048
1049	return (0);
1050
1051releout:
1052	vrele(fdvp);
1053	vrele(*fvpp);
1054	vrele(tdvp);
1055	if (*tvpp != NULL)
1056		vrele(*tvpp);
1057	tmpfs_rename_restarts += restarts;
1058
1059	return (error);
1060}
1061
1062static int
1063tmpfs_rename(struct vop_rename_args *v)
1064{
1065	struct vnode *fdvp = v->a_fdvp;
1066	struct vnode *fvp = v->a_fvp;
1067	struct componentname *fcnp = v->a_fcnp;
1068	struct vnode *tdvp = v->a_tdvp;
1069	struct vnode *tvp = v->a_tvp;
1070	struct componentname *tcnp = v->a_tcnp;
1071	struct mount *mp = NULL;
1072
1073	char *newname;
1074	int error;
1075	struct tmpfs_dirent *de;
1076	struct tmpfs_mount *tmp;
1077	struct tmpfs_node *fdnode;
1078	struct tmpfs_node *fnode;
1079	struct tmpfs_node *tnode;
1080	struct tmpfs_node *tdnode;
1081
1082	MPASS(VOP_ISLOCKED(tdvp));
1083	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
1084	MPASS(fcnp->cn_flags & HASBUF);
1085	MPASS(tcnp->cn_flags & HASBUF);
1086
1087	/* Disallow cross-device renames.
1088	 * XXX Why isn't this done by the caller? */
1089	if (fvp->v_mount != tdvp->v_mount ||
1090	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1091		error = EXDEV;
1092		goto out;
1093	}
1094
1095	/* If source and target are the same file, there is nothing to do. */
1096	if (fvp == tvp) {
1097		error = 0;
1098		goto out;
1099	}
1100
1101	/* If we need to move the directory between entries, lock the
1102	 * source so that we can safely operate on it. */
1103	if (fdvp != tdvp && fdvp != tvp) {
1104		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1105			mp = tdvp->v_mount;
1106			error = vfs_busy(mp, 0);
1107			if (error != 0) {
1108				mp = NULL;
1109				goto out;
1110			}
1111			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
1112			    fcnp, tcnp);
1113			if (error != 0) {
1114				vfs_unbusy(mp);
1115				return (error);
1116			}
1117			ASSERT_VOP_ELOCKED(fdvp,
1118			    "tmpfs_rename: fdvp not locked");
1119			ASSERT_VOP_ELOCKED(tdvp,
1120			    "tmpfs_rename: tdvp not locked");
1121			if (tvp != NULL)
1122				ASSERT_VOP_ELOCKED(tvp,
1123				    "tmpfs_rename: tvp not locked");
1124			if (fvp == tvp) {
1125				error = 0;
1126				goto out_locked;
1127			}
1128		}
1129	}
1130
1131	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1132	tdnode = VP_TO_TMPFS_DIR(tdvp);
1133	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
1134	fdnode = VP_TO_TMPFS_DIR(fdvp);
1135	fnode = VP_TO_TMPFS_NODE(fvp);
1136	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
1137
1138	/* Entry can disappear before we lock fdvp,
1139	 * also avoid manipulating '.' and '..' entries. */
1140	if (de == NULL) {
1141		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
1142		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
1143			error = EINVAL;
1144		else
1145			error = ENOENT;
1146		goto out_locked;
1147	}
1148	MPASS(de->td_node == fnode);
1149
1150	/* If re-naming a directory to another preexisting directory
1151	 * ensure that the target directory is empty so that its
1152	 * removal causes no side effects.
1153	 * Kern_rename gurantees the destination to be a directory
1154	 * if the source is one. */
1155	if (tvp != NULL) {
1156		MPASS(tnode != NULL);
1157
1158		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1159		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1160			error = EPERM;
1161			goto out_locked;
1162		}
1163
1164		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1165			if (tnode->tn_size > 0) {
1166				error = ENOTEMPTY;
1167				goto out_locked;
1168			}
1169		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1170			error = ENOTDIR;
1171			goto out_locked;
1172		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1173			error = EISDIR;
1174			goto out_locked;
1175		} else {
1176			MPASS(fnode->tn_type != VDIR &&
1177				tnode->tn_type != VDIR);
1178		}
1179	}
1180
1181	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1182	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1183		error = EPERM;
1184		goto out_locked;
1185	}
1186
1187	/* Ensure that we have enough memory to hold the new name, if it
1188	 * has to be changed. */
1189	if (fcnp->cn_namelen != tcnp->cn_namelen ||
1190	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1191		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1192	} else
1193		newname = NULL;
1194
1195	/* If the node is being moved to another directory, we have to do
1196	 * the move. */
1197	if (fdnode != tdnode) {
1198		/* In case we are moving a directory, we have to adjust its
1199		 * parent to point to the new parent. */
1200		if (de->td_node->tn_type == VDIR) {
1201			struct tmpfs_node *n;
1202
1203			/* Ensure the target directory is not a child of the
1204			 * directory being moved.  Otherwise, we'd end up
1205			 * with stale nodes. */
1206			n = tdnode;
1207			/* TMPFS_LOCK garanties that no nodes are freed while
1208			 * traversing the list. Nodes can only be marked as
1209			 * removed: tn_parent == NULL. */
1210			TMPFS_LOCK(tmp);
1211			TMPFS_NODE_LOCK(n);
1212			while (n != n->tn_dir.tn_parent) {
1213				struct tmpfs_node *parent;
1214
1215				if (n == fnode) {
1216					TMPFS_NODE_UNLOCK(n);
1217					TMPFS_UNLOCK(tmp);
1218					error = EINVAL;
1219					if (newname != NULL)
1220						    free(newname, M_TMPFSNAME);
1221					goto out_locked;
1222				}
1223				parent = n->tn_dir.tn_parent;
1224				TMPFS_NODE_UNLOCK(n);
1225				if (parent == NULL) {
1226					n = NULL;
1227					break;
1228				}
1229				TMPFS_NODE_LOCK(parent);
1230				if (parent->tn_dir.tn_parent == NULL) {
1231					TMPFS_NODE_UNLOCK(parent);
1232					n = NULL;
1233					break;
1234				}
1235				n = parent;
1236			}
1237			TMPFS_UNLOCK(tmp);
1238			if (n == NULL) {
1239				error = EINVAL;
1240				if (newname != NULL)
1241					    free(newname, M_TMPFSNAME);
1242				goto out_locked;
1243			}
1244			TMPFS_NODE_UNLOCK(n);
1245
1246			/* Adjust the parent pointer. */
1247			TMPFS_VALIDATE_DIR(fnode);
1248			TMPFS_NODE_LOCK(de->td_node);
1249			de->td_node->tn_dir.tn_parent = tdnode;
1250			TMPFS_NODE_UNLOCK(de->td_node);
1251
1252			/* As a result of changing the target of the '..'
1253			 * entry, the link count of the source and target
1254			 * directories has to be adjusted. */
1255			TMPFS_NODE_LOCK(tdnode);
1256			TMPFS_ASSERT_LOCKED(tdnode);
1257			tdnode->tn_links++;
1258			TMPFS_NODE_UNLOCK(tdnode);
1259
1260			TMPFS_NODE_LOCK(fdnode);
1261			TMPFS_ASSERT_LOCKED(fdnode);
1262			fdnode->tn_links--;
1263			TMPFS_NODE_UNLOCK(fdnode);
1264		}
1265
1266		/* Do the move: just remove the entry from the source directory
1267		 * and insert it into the target one. */
1268		tmpfs_dir_detach(fdvp, de);
1269		if (fcnp->cn_flags & DOWHITEOUT)
1270			tmpfs_dir_whiteout_add(fdvp, fcnp);
1271		if (tcnp->cn_flags & ISWHITEOUT)
1272			tmpfs_dir_whiteout_remove(tdvp, tcnp);
1273		tmpfs_dir_attach(tdvp, de);
1274	}
1275
1276	/* If the name has changed, we need to make it effective by changing
1277	 * it in the directory entry. */
1278	if (newname != NULL) {
1279		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1280
1281		free(de->td_name, M_TMPFSNAME);
1282		de->td_namelen = (uint16_t)tcnp->cn_namelen;
1283		memcpy(newname, tcnp->cn_nameptr, tcnp->cn_namelen);
1284		de->td_name = newname;
1285
1286		fnode->tn_status |= TMPFS_NODE_CHANGED;
1287		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1288	}
1289
1290	/* If we are overwriting an entry, we have to remove the old one
1291	 * from the target directory. */
1292	if (tvp != NULL) {
1293		/* Remove the old entry from the target directory. */
1294		de = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1295		tmpfs_dir_detach(tdvp, de);
1296
1297		/* Free the directory entry we just deleted.  Note that the
1298		 * node referred by it will not be removed until the vnode is
1299		 * really reclaimed. */
1300		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), de, TRUE);
1301	}
1302	cache_purge(fvp);
1303	if (tvp != NULL)
1304		cache_purge(tvp);
1305
1306	error = 0;
1307
1308out_locked:
1309	if (fdvp != tdvp && fdvp != tvp)
1310		VOP_UNLOCK(fdvp, 0);
1311
1312out:
1313	/* Release target nodes. */
1314	/* XXX: I don't understand when tdvp can be the same as tvp, but
1315	 * other code takes care of this... */
1316	if (tdvp == tvp)
1317		vrele(tdvp);
1318	else
1319		vput(tdvp);
1320	if (tvp != NULL)
1321		vput(tvp);
1322
1323	/* Release source nodes. */
1324	vrele(fdvp);
1325	vrele(fvp);
1326
1327	if (mp != NULL)
1328		vfs_unbusy(mp);
1329
1330	return error;
1331}
1332
1333/* --------------------------------------------------------------------- */
1334
1335static int
1336tmpfs_mkdir(struct vop_mkdir_args *v)
1337{
1338	struct vnode *dvp = v->a_dvp;
1339	struct vnode **vpp = v->a_vpp;
1340	struct componentname *cnp = v->a_cnp;
1341	struct vattr *vap = v->a_vap;
1342
1343	MPASS(vap->va_type == VDIR);
1344
1345	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
1346}
1347
1348/* --------------------------------------------------------------------- */
1349
1350static int
1351tmpfs_rmdir(struct vop_rmdir_args *v)
1352{
1353	struct vnode *dvp = v->a_dvp;
1354	struct vnode *vp = v->a_vp;
1355
1356	int error;
1357	struct tmpfs_dirent *de;
1358	struct tmpfs_mount *tmp;
1359	struct tmpfs_node *dnode;
1360	struct tmpfs_node *node;
1361
1362	MPASS(VOP_ISLOCKED(dvp));
1363	MPASS(VOP_ISLOCKED(vp));
1364
1365	tmp = VFS_TO_TMPFS(dvp->v_mount);
1366	dnode = VP_TO_TMPFS_DIR(dvp);
1367	node = VP_TO_TMPFS_DIR(vp);
1368
1369	/* Directories with more than two entries ('.' and '..') cannot be
1370	 * removed. */
1371	 if (node->tn_size > 0) {
1372		 error = ENOTEMPTY;
1373		 goto out;
1374	 }
1375
1376	if ((dnode->tn_flags & APPEND)
1377	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1378		error = EPERM;
1379		goto out;
1380	}
1381
1382	/* This invariant holds only if we are not trying to remove "..".
1383	  * We checked for that above so this is safe now. */
1384	MPASS(node->tn_dir.tn_parent == dnode);
1385
1386	/* Get the directory entry associated with node (vp).  This was
1387	 * filled by tmpfs_lookup while looking up the entry. */
1388	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1389	MPASS(TMPFS_DIRENT_MATCHES(de,
1390	    v->a_cnp->cn_nameptr,
1391	    v->a_cnp->cn_namelen));
1392
1393	/* Check flags to see if we are allowed to remove the directory. */
1394	if (dnode->tn_flags & APPEND
1395		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1396		error = EPERM;
1397		goto out;
1398	}
1399
1400
1401	/* Detach the directory entry from the directory (dnode). */
1402	tmpfs_dir_detach(dvp, de);
1403	if (v->a_cnp->cn_flags & DOWHITEOUT)
1404		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1405
1406	/* No vnode should be allocated for this entry from this point */
1407	TMPFS_NODE_LOCK(node);
1408	TMPFS_ASSERT_ELOCKED(node);
1409	node->tn_links--;
1410	node->tn_dir.tn_parent = NULL;
1411	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1412	    TMPFS_NODE_MODIFIED;
1413
1414	TMPFS_NODE_UNLOCK(node);
1415
1416	TMPFS_NODE_LOCK(dnode);
1417	TMPFS_ASSERT_ELOCKED(dnode);
1418	dnode->tn_links--;
1419	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
1420	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1421	TMPFS_NODE_UNLOCK(dnode);
1422
1423	cache_purge(dvp);
1424	cache_purge(vp);
1425
1426	/* Free the directory entry we just deleted.  Note that the node
1427	 * referred by it will not be removed until the vnode is really
1428	 * reclaimed. */
1429	tmpfs_free_dirent(tmp, de, TRUE);
1430
1431	/* Release the deleted vnode (will destroy the node, notify
1432	 * interested parties and clean it from the cache). */
1433
1434	dnode->tn_status |= TMPFS_NODE_CHANGED;
1435	tmpfs_update(dvp);
1436
1437	error = 0;
1438
1439out:
1440	return error;
1441}
1442
1443/* --------------------------------------------------------------------- */
1444
1445static int
1446tmpfs_symlink(struct vop_symlink_args *v)
1447{
1448	struct vnode *dvp = v->a_dvp;
1449	struct vnode **vpp = v->a_vpp;
1450	struct componentname *cnp = v->a_cnp;
1451	struct vattr *vap = v->a_vap;
1452	char *target = v->a_target;
1453
1454#ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1455	MPASS(vap->va_type == VLNK);
1456#else
1457	vap->va_type = VLNK;
1458#endif
1459
1460	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1461}
1462
1463/* --------------------------------------------------------------------- */
1464
1465static int
1466tmpfs_readdir(struct vop_readdir_args *v)
1467{
1468	struct vnode *vp = v->a_vp;
1469	struct uio *uio = v->a_uio;
1470	int *eofflag = v->a_eofflag;
1471	u_long **cookies = v->a_cookies;
1472	int *ncookies = v->a_ncookies;
1473
1474	int error;
1475	off_t startoff;
1476	off_t cnt = 0;
1477	struct tmpfs_node *node;
1478
1479	/* This operation only makes sense on directory nodes. */
1480	if (vp->v_type != VDIR)
1481		return ENOTDIR;
1482
1483	node = VP_TO_TMPFS_DIR(vp);
1484
1485	startoff = uio->uio_offset;
1486
1487	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1488		error = tmpfs_dir_getdotdent(node, uio);
1489		if (error != 0)
1490			goto outok;
1491		cnt++;
1492	}
1493
1494	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1495		error = tmpfs_dir_getdotdotdent(node, uio);
1496		if (error != 0)
1497			goto outok;
1498		cnt++;
1499	}
1500
1501	error = tmpfs_dir_getdents(node, uio, &cnt);
1502
1503outok:
1504	MPASS(error >= -1);
1505
1506	if (error == -1)
1507		error = (cnt != 0) ? 0 : EINVAL;
1508
1509	if (eofflag != NULL)
1510		*eofflag =
1511		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1512
1513	/* Update NFS-related variables. */
1514	if (error == 0 && cookies != NULL && ncookies != NULL) {
1515		off_t i;
1516		off_t off = startoff;
1517		struct tmpfs_dirent *de = NULL;
1518
1519		*ncookies = cnt;
1520		*cookies = malloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1521
1522		for (i = 0; i < cnt; i++) {
1523			MPASS(off != TMPFS_DIRCOOKIE_EOF);
1524			if (off == TMPFS_DIRCOOKIE_DOT) {
1525				off = TMPFS_DIRCOOKIE_DOTDOT;
1526			} else {
1527				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1528					de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
1529				} else if (de != NULL) {
1530					de = TAILQ_NEXT(de, td_entries);
1531				} else {
1532					de = tmpfs_dir_lookupbycookie(node,
1533					    off);
1534					MPASS(de != NULL);
1535					de = TAILQ_NEXT(de, td_entries);
1536				}
1537				if (de == NULL)
1538					off = TMPFS_DIRCOOKIE_EOF;
1539				else
1540					off = tmpfs_dircookie(de);
1541			}
1542
1543			(*cookies)[i] = off;
1544		}
1545		MPASS(uio->uio_offset == off);
1546	}
1547
1548	return error;
1549}
1550
1551/* --------------------------------------------------------------------- */
1552
1553static int
1554tmpfs_readlink(struct vop_readlink_args *v)
1555{
1556	struct vnode *vp = v->a_vp;
1557	struct uio *uio = v->a_uio;
1558
1559	int error;
1560	struct tmpfs_node *node;
1561
1562	MPASS(uio->uio_offset == 0);
1563	MPASS(vp->v_type == VLNK);
1564
1565	node = VP_TO_TMPFS_NODE(vp);
1566
1567	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1568	    uio);
1569	node->tn_status |= TMPFS_NODE_ACCESSED;
1570
1571	return error;
1572}
1573
1574/* --------------------------------------------------------------------- */
1575
1576static int
1577tmpfs_inactive(struct vop_inactive_args *v)
1578{
1579	struct vnode *vp = v->a_vp;
1580	struct thread *l = v->a_td;
1581
1582	struct tmpfs_node *node;
1583
1584	MPASS(VOP_ISLOCKED(vp));
1585
1586	node = VP_TO_TMPFS_NODE(vp);
1587
1588	if (node->tn_links == 0)
1589		vrecycle(vp, l);
1590
1591	return 0;
1592}
1593
1594/* --------------------------------------------------------------------- */
1595
1596int
1597tmpfs_reclaim(struct vop_reclaim_args *v)
1598{
1599	struct vnode *vp = v->a_vp;
1600
1601	struct tmpfs_mount *tmp;
1602	struct tmpfs_node *node;
1603
1604	node = VP_TO_TMPFS_NODE(vp);
1605	tmp = VFS_TO_TMPFS(vp->v_mount);
1606
1607	vnode_destroy_vobject(vp);
1608	cache_purge(vp);
1609
1610	TMPFS_NODE_LOCK(node);
1611	TMPFS_ASSERT_ELOCKED(node);
1612	tmpfs_free_vp(vp);
1613
1614	/* If the node referenced by this vnode was deleted by the user,
1615	 * we must free its associated data structures (now that the vnode
1616	 * is being reclaimed). */
1617	if (node->tn_links == 0 &&
1618	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1619		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1620		TMPFS_NODE_UNLOCK(node);
1621		tmpfs_free_node(tmp, node);
1622	} else
1623		TMPFS_NODE_UNLOCK(node);
1624
1625	MPASS(vp->v_data == NULL);
1626	return 0;
1627}
1628
1629/* --------------------------------------------------------------------- */
1630
1631static int
1632tmpfs_print(struct vop_print_args *v)
1633{
1634	struct vnode *vp = v->a_vp;
1635
1636	struct tmpfs_node *node;
1637
1638	node = VP_TO_TMPFS_NODE(vp);
1639
1640	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1641	    node, node->tn_flags, node->tn_links);
1642	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1643	    node->tn_mode, node->tn_uid, node->tn_gid,
1644	    (intmax_t)node->tn_size, node->tn_status);
1645
1646	if (vp->v_type == VFIFO)
1647		fifo_printinfo(vp);
1648
1649	printf("\n");
1650
1651	return 0;
1652}
1653
1654/* --------------------------------------------------------------------- */
1655
1656static int
1657tmpfs_pathconf(struct vop_pathconf_args *v)
1658{
1659	int name = v->a_name;
1660	register_t *retval = v->a_retval;
1661
1662	int error;
1663
1664	error = 0;
1665
1666	switch (name) {
1667	case _PC_LINK_MAX:
1668		*retval = LINK_MAX;
1669		break;
1670
1671	case _PC_NAME_MAX:
1672		*retval = NAME_MAX;
1673		break;
1674
1675	case _PC_PATH_MAX:
1676		*retval = PATH_MAX;
1677		break;
1678
1679	case _PC_PIPE_BUF:
1680		*retval = PIPE_BUF;
1681		break;
1682
1683	case _PC_CHOWN_RESTRICTED:
1684		*retval = 1;
1685		break;
1686
1687	case _PC_NO_TRUNC:
1688		*retval = 1;
1689		break;
1690
1691	case _PC_SYNC_IO:
1692		*retval = 1;
1693		break;
1694
1695	case _PC_FILESIZEBITS:
1696		*retval = 0; /* XXX Don't know which value should I return. */
1697		break;
1698
1699	default:
1700		error = EINVAL;
1701	}
1702
1703	return error;
1704}
1705
1706static int
1707tmpfs_vptofh(struct vop_vptofh_args *ap)
1708{
1709	struct tmpfs_fid *tfhp;
1710	struct tmpfs_node *node;
1711
1712	tfhp = (struct tmpfs_fid *)ap->a_fhp;
1713	node = VP_TO_TMPFS_NODE(ap->a_vp);
1714
1715	tfhp->tf_len = sizeof(struct tmpfs_fid);
1716	tfhp->tf_id = node->tn_id;
1717	tfhp->tf_gen = node->tn_gen;
1718
1719	return (0);
1720}
1721
1722static int
1723tmpfs_whiteout(struct vop_whiteout_args *ap)
1724{
1725	struct vnode *dvp = ap->a_dvp;
1726	struct componentname *cnp = ap->a_cnp;
1727	struct tmpfs_dirent *de;
1728
1729	switch (ap->a_flags) {
1730	case LOOKUP:
1731		return (0);
1732	case CREATE:
1733		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1734		if (de != NULL)
1735			return (de->td_node == NULL ? 0 : EEXIST);
1736		return (tmpfs_dir_whiteout_add(dvp, cnp));
1737	case DELETE:
1738		tmpfs_dir_whiteout_remove(dvp, cnp);
1739		return (0);
1740	default:
1741		panic("tmpfs_whiteout: unknown op");
1742	}
1743}
1744
1745/* --------------------------------------------------------------------- */
1746
1747/*
1748 * vnode operations vector used for files stored in a tmpfs file system.
1749 */
1750struct vop_vector tmpfs_vnodeop_entries = {
1751	.vop_default =			&default_vnodeops,
1752	.vop_lookup =			vfs_cache_lookup,
1753	.vop_cachedlookup =		tmpfs_lookup,
1754	.vop_create =			tmpfs_create,
1755	.vop_mknod =			tmpfs_mknod,
1756	.vop_open =			tmpfs_open,
1757	.vop_close =			tmpfs_close,
1758	.vop_access =			tmpfs_access,
1759	.vop_getattr =			tmpfs_getattr,
1760	.vop_setattr =			tmpfs_setattr,
1761	.vop_read =			tmpfs_read,
1762	.vop_write =			tmpfs_write,
1763	.vop_fsync =			tmpfs_fsync,
1764	.vop_remove =			tmpfs_remove,
1765	.vop_link =			tmpfs_link,
1766	.vop_rename =			tmpfs_rename,
1767	.vop_mkdir =			tmpfs_mkdir,
1768	.vop_rmdir =			tmpfs_rmdir,
1769	.vop_symlink =			tmpfs_symlink,
1770	.vop_readdir =			tmpfs_readdir,
1771	.vop_readlink =			tmpfs_readlink,
1772	.vop_inactive =			tmpfs_inactive,
1773	.vop_reclaim =			tmpfs_reclaim,
1774	.vop_print =			tmpfs_print,
1775	.vop_pathconf =			tmpfs_pathconf,
1776	.vop_vptofh =			tmpfs_vptofh,
1777	.vop_whiteout =			tmpfs_whiteout,
1778	.vop_bmap =			VOP_EOPNOTSUPP,
1779};
1780
1781