tmpfs_vnops.c revision 248610
1/*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
2
3/*-
4 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/*
34 * tmpfs vnode interface.
35 */
36#include <sys/cdefs.h>
37__FBSDID("$FreeBSD: head/sys/fs/tmpfs/tmpfs_vnops.c 248610 2013-03-22 07:40:34Z pjd $");
38
39#include <sys/param.h>
40#include <sys/fcntl.h>
41#include <sys/lockf.h>
42#include <sys/lock.h>
43#include <sys/namei.h>
44#include <sys/priv.h>
45#include <sys/proc.h>
46#include <sys/rwlock.h>
47#include <sys/sched.h>
48#include <sys/sf_buf.h>
49#include <sys/stat.h>
50#include <sys/systm.h>
51#include <sys/sysctl.h>
52#include <sys/unistd.h>
53#include <sys/vnode.h>
54
55#include <vm/vm.h>
56#include <vm/vm_param.h>
57#include <vm/vm_object.h>
58#include <vm/vm_page.h>
59#include <vm/vm_pager.h>
60
61#include <fs/tmpfs/tmpfs_vnops.h>
62#include <fs/tmpfs/tmpfs.h>
63
64SYSCTL_DECL(_vfs_tmpfs);
65
66static volatile int tmpfs_rename_restarts;
67SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
68    __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
69    "Times rename had to restart due to lock contention");
70
71/* --------------------------------------------------------------------- */
72
73static int
74tmpfs_lookup(struct vop_cachedlookup_args *v)
75{
76	struct vnode *dvp = v->a_dvp;
77	struct vnode **vpp = v->a_vpp;
78	struct componentname *cnp = v->a_cnp;
79
80	int error;
81	struct tmpfs_dirent *de;
82	struct tmpfs_node *dnode;
83
84	dnode = VP_TO_TMPFS_DIR(dvp);
85	*vpp = NULLVP;
86
87	/* Check accessibility of requested node as a first step. */
88	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
89	if (error != 0)
90		goto out;
91
92	/* We cannot be requesting the parent directory of the root node. */
93	MPASS(IMPLIES(dnode->tn_type == VDIR &&
94	    dnode->tn_dir.tn_parent == dnode,
95	    !(cnp->cn_flags & ISDOTDOT)));
96
97	TMPFS_ASSERT_LOCKED(dnode);
98	if (dnode->tn_dir.tn_parent == NULL) {
99		error = ENOENT;
100		goto out;
101	}
102	if (cnp->cn_flags & ISDOTDOT) {
103		int ltype = 0;
104
105		ltype = VOP_ISLOCKED(dvp);
106		vhold(dvp);
107		VOP_UNLOCK(dvp, 0);
108		/* Allocate a new vnode on the matching entry. */
109		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
110		    cnp->cn_lkflags, vpp);
111
112		vn_lock(dvp, ltype | LK_RETRY);
113		vdrop(dvp);
114	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
115		VREF(dvp);
116		*vpp = dvp;
117		error = 0;
118	} else {
119		de = tmpfs_dir_lookup(dnode, NULL, cnp);
120		if (de != NULL && de->td_node == NULL)
121			cnp->cn_flags |= ISWHITEOUT;
122		if (de == NULL || de->td_node == NULL) {
123			/* The entry was not found in the directory.
124			 * This is OK if we are creating or renaming an
125			 * entry and are working on the last component of
126			 * the path name. */
127			if ((cnp->cn_flags & ISLASTCN) &&
128			    (cnp->cn_nameiop == CREATE || \
129			    cnp->cn_nameiop == RENAME ||
130			    (cnp->cn_nameiop == DELETE &&
131			    cnp->cn_flags & DOWHITEOUT &&
132			    cnp->cn_flags & ISWHITEOUT))) {
133				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
134				    cnp->cn_thread);
135				if (error != 0)
136					goto out;
137
138				/* Keep the component name in the buffer for
139				 * future uses. */
140				cnp->cn_flags |= SAVENAME;
141
142				error = EJUSTRETURN;
143			} else
144				error = ENOENT;
145		} else {
146			struct tmpfs_node *tnode;
147
148			/* The entry was found, so get its associated
149			 * tmpfs_node. */
150			tnode = de->td_node;
151
152			/* If we are not at the last path component and
153			 * found a non-directory or non-link entry (which
154			 * may itself be pointing to a directory), raise
155			 * an error. */
156			if ((tnode->tn_type != VDIR &&
157			    tnode->tn_type != VLNK) &&
158			    !(cnp->cn_flags & ISLASTCN)) {
159				error = ENOTDIR;
160				goto out;
161			}
162
163			/* If we are deleting or renaming the entry, keep
164			 * track of its tmpfs_dirent so that it can be
165			 * easily deleted later. */
166			if ((cnp->cn_flags & ISLASTCN) &&
167			    (cnp->cn_nameiop == DELETE ||
168			    cnp->cn_nameiop == RENAME)) {
169				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
170				    cnp->cn_thread);
171				if (error != 0)
172					goto out;
173
174				/* Allocate a new vnode on the matching entry. */
175				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
176						cnp->cn_lkflags, vpp);
177				if (error != 0)
178					goto out;
179
180				if ((dnode->tn_mode & S_ISTXT) &&
181				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
182				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
183					error = EPERM;
184					vput(*vpp);
185					*vpp = NULL;
186					goto out;
187				}
188				cnp->cn_flags |= SAVENAME;
189			} else {
190				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
191						cnp->cn_lkflags, vpp);
192			}
193		}
194	}
195
196	/* Store the result of this lookup in the cache.  Avoid this if the
197	 * request was for creation, as it does not improve timings on
198	 * emprical tests. */
199	if ((cnp->cn_flags & MAKEENTRY) && cnp->cn_nameiop != CREATE)
200		cache_enter(dvp, *vpp, cnp);
201
202out:
203	/* If there were no errors, *vpp cannot be null and it must be
204	 * locked. */
205	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
206
207	return error;
208}
209
210/* --------------------------------------------------------------------- */
211
212static int
213tmpfs_create(struct vop_create_args *v)
214{
215	struct vnode *dvp = v->a_dvp;
216	struct vnode **vpp = v->a_vpp;
217	struct componentname *cnp = v->a_cnp;
218	struct vattr *vap = v->a_vap;
219
220	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
221
222	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
223}
224/* --------------------------------------------------------------------- */
225
226static int
227tmpfs_mknod(struct vop_mknod_args *v)
228{
229	struct vnode *dvp = v->a_dvp;
230	struct vnode **vpp = v->a_vpp;
231	struct componentname *cnp = v->a_cnp;
232	struct vattr *vap = v->a_vap;
233
234	if (vap->va_type != VBLK && vap->va_type != VCHR &&
235	    vap->va_type != VFIFO)
236		return EINVAL;
237
238	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
239}
240
241/* --------------------------------------------------------------------- */
242
243static int
244tmpfs_open(struct vop_open_args *v)
245{
246	struct vnode *vp = v->a_vp;
247	int mode = v->a_mode;
248
249	int error;
250	struct tmpfs_node *node;
251
252	MPASS(VOP_ISLOCKED(vp));
253
254	node = VP_TO_TMPFS_NODE(vp);
255
256	/* The file is still active but all its names have been removed
257	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
258	 * it is about to die. */
259	if (node->tn_links < 1)
260		return (ENOENT);
261
262	/* If the file is marked append-only, deny write requests. */
263	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
264		error = EPERM;
265	else {
266		error = 0;
267		vnode_create_vobject(vp, node->tn_size, v->a_td);
268	}
269
270	MPASS(VOP_ISLOCKED(vp));
271	return error;
272}
273
274/* --------------------------------------------------------------------- */
275
276static int
277tmpfs_close(struct vop_close_args *v)
278{
279	struct vnode *vp = v->a_vp;
280
281	MPASS(VOP_ISLOCKED(vp));
282
283	/* Update node times. */
284	tmpfs_update(vp);
285
286	return (0);
287}
288
289/* --------------------------------------------------------------------- */
290
291int
292tmpfs_access(struct vop_access_args *v)
293{
294	struct vnode *vp = v->a_vp;
295	accmode_t accmode = v->a_accmode;
296	struct ucred *cred = v->a_cred;
297
298	int error;
299	struct tmpfs_node *node;
300
301	MPASS(VOP_ISLOCKED(vp));
302
303	node = VP_TO_TMPFS_NODE(vp);
304
305	switch (vp->v_type) {
306	case VDIR:
307		/* FALLTHROUGH */
308	case VLNK:
309		/* FALLTHROUGH */
310	case VREG:
311		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
312			error = EROFS;
313			goto out;
314		}
315		break;
316
317	case VBLK:
318		/* FALLTHROUGH */
319	case VCHR:
320		/* FALLTHROUGH */
321	case VSOCK:
322		/* FALLTHROUGH */
323	case VFIFO:
324		break;
325
326	default:
327		error = EINVAL;
328		goto out;
329	}
330
331	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
332		error = EPERM;
333		goto out;
334	}
335
336	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
337	    node->tn_gid, accmode, cred, NULL);
338
339out:
340	MPASS(VOP_ISLOCKED(vp));
341
342	return error;
343}
344
345/* --------------------------------------------------------------------- */
346
347int
348tmpfs_getattr(struct vop_getattr_args *v)
349{
350	struct vnode *vp = v->a_vp;
351	struct vattr *vap = v->a_vap;
352
353	struct tmpfs_node *node;
354
355	node = VP_TO_TMPFS_NODE(vp);
356
357	tmpfs_update(vp);
358
359	vap->va_type = vp->v_type;
360	vap->va_mode = node->tn_mode;
361	vap->va_nlink = node->tn_links;
362	vap->va_uid = node->tn_uid;
363	vap->va_gid = node->tn_gid;
364	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
365	vap->va_fileid = node->tn_id;
366	vap->va_size = node->tn_size;
367	vap->va_blocksize = PAGE_SIZE;
368	vap->va_atime = node->tn_atime;
369	vap->va_mtime = node->tn_mtime;
370	vap->va_ctime = node->tn_ctime;
371	vap->va_birthtime = node->tn_birthtime;
372	vap->va_gen = node->tn_gen;
373	vap->va_flags = node->tn_flags;
374	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
375		node->tn_rdev : NODEV;
376	vap->va_bytes = round_page(node->tn_size);
377	vap->va_filerev = 0;
378
379	return 0;
380}
381
382/* --------------------------------------------------------------------- */
383
384/* XXX Should this operation be atomic?  I think it should, but code in
385 * XXX other places (e.g., ufs) doesn't seem to be... */
386int
387tmpfs_setattr(struct vop_setattr_args *v)
388{
389	struct vnode *vp = v->a_vp;
390	struct vattr *vap = v->a_vap;
391	struct ucred *cred = v->a_cred;
392	struct thread *td = curthread;
393
394	int error;
395
396	MPASS(VOP_ISLOCKED(vp));
397
398	error = 0;
399
400	/* Abort if any unsettable attribute is given. */
401	if (vap->va_type != VNON ||
402	    vap->va_nlink != VNOVAL ||
403	    vap->va_fsid != VNOVAL ||
404	    vap->va_fileid != VNOVAL ||
405	    vap->va_blocksize != VNOVAL ||
406	    vap->va_gen != VNOVAL ||
407	    vap->va_rdev != VNOVAL ||
408	    vap->va_bytes != VNOVAL)
409		error = EINVAL;
410
411	if (error == 0 && (vap->va_flags != VNOVAL))
412		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
413
414	if (error == 0 && (vap->va_size != VNOVAL))
415		error = tmpfs_chsize(vp, vap->va_size, cred, td);
416
417	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
418		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
419
420	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
421		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
422
423	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
424	    vap->va_atime.tv_nsec != VNOVAL) ||
425	    (vap->va_mtime.tv_sec != VNOVAL &&
426	    vap->va_mtime.tv_nsec != VNOVAL) ||
427	    (vap->va_birthtime.tv_sec != VNOVAL &&
428	    vap->va_birthtime.tv_nsec != VNOVAL)))
429		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
430			&vap->va_birthtime, vap->va_vaflags, cred, td);
431
432	/* Update the node times.  We give preference to the error codes
433	 * generated by this function rather than the ones that may arise
434	 * from tmpfs_update. */
435	tmpfs_update(vp);
436
437	MPASS(VOP_ISLOCKED(vp));
438
439	return error;
440}
441
442/* --------------------------------------------------------------------- */
443static int
444tmpfs_nocacheread(vm_object_t tobj, vm_pindex_t idx,
445    vm_offset_t offset, size_t tlen, struct uio *uio)
446{
447	vm_page_t	m;
448	int		error, rv;
449
450	VM_OBJECT_WLOCK(tobj);
451	m = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
452	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
453	if (m->valid != VM_PAGE_BITS_ALL) {
454		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
455			rv = vm_pager_get_pages(tobj, &m, 1, 0);
456			if (rv != VM_PAGER_OK) {
457				vm_page_lock(m);
458				vm_page_free(m);
459				vm_page_unlock(m);
460				VM_OBJECT_WUNLOCK(tobj);
461				return (EIO);
462			}
463		} else
464			vm_page_zero_invalid(m, TRUE);
465	}
466	VM_OBJECT_WUNLOCK(tobj);
467	error = uiomove_fromphys(&m, offset, tlen, uio);
468	VM_OBJECT_WLOCK(tobj);
469	vm_page_lock(m);
470	vm_page_unwire(m, TRUE);
471	vm_page_unlock(m);
472	vm_page_wakeup(m);
473	VM_OBJECT_WUNLOCK(tobj);
474
475	return (error);
476}
477
478static __inline int
479tmpfs_nocacheread_buf(vm_object_t tobj, vm_pindex_t idx,
480    vm_offset_t offset, size_t tlen, void *buf)
481{
482	struct uio uio;
483	struct iovec iov;
484
485	uio.uio_iovcnt = 1;
486	uio.uio_iov = &iov;
487	iov.iov_base = buf;
488	iov.iov_len = tlen;
489
490	uio.uio_offset = 0;
491	uio.uio_resid = tlen;
492	uio.uio_rw = UIO_READ;
493	uio.uio_segflg = UIO_SYSSPACE;
494	uio.uio_td = curthread;
495
496	return (tmpfs_nocacheread(tobj, idx, offset, tlen, &uio));
497}
498
499static int
500tmpfs_mappedread(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
501{
502	struct sf_buf	*sf;
503	vm_pindex_t	idx;
504	vm_page_t	m;
505	vm_offset_t	offset;
506	off_t		addr;
507	size_t		tlen;
508	char		*ma;
509	int		error;
510
511	addr = uio->uio_offset;
512	idx = OFF_TO_IDX(addr);
513	offset = addr & PAGE_MASK;
514	tlen = MIN(PAGE_SIZE - offset, len);
515
516	VM_OBJECT_WLOCK(vobj);
517lookupvpg:
518	if (((m = vm_page_lookup(vobj, idx)) != NULL) &&
519	    vm_page_is_valid(m, offset, tlen)) {
520		if ((m->oflags & VPO_BUSY) != 0) {
521			/*
522			 * Reference the page before unlocking and sleeping so
523			 * that the page daemon is less likely to reclaim it.
524			 */
525			vm_page_reference(m);
526			vm_page_sleep(m, "tmfsmr");
527			goto lookupvpg;
528		}
529		vm_page_busy(m);
530		VM_OBJECT_WUNLOCK(vobj);
531		error = uiomove_fromphys(&m, offset, tlen, uio);
532		VM_OBJECT_WLOCK(vobj);
533		vm_page_wakeup(m);
534		VM_OBJECT_WUNLOCK(vobj);
535		return	(error);
536	} else if (m != NULL && uio->uio_segflg == UIO_NOCOPY) {
537		KASSERT(offset == 0,
538		    ("unexpected offset in tmpfs_mappedread for sendfile"));
539		if ((m->oflags & VPO_BUSY) != 0) {
540			/*
541			 * Reference the page before unlocking and sleeping so
542			 * that the page daemon is less likely to reclaim it.
543			 */
544			vm_page_reference(m);
545			vm_page_sleep(m, "tmfsmr");
546			goto lookupvpg;
547		}
548		vm_page_busy(m);
549		VM_OBJECT_WUNLOCK(vobj);
550		sched_pin();
551		sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
552		ma = (char *)sf_buf_kva(sf);
553		error = tmpfs_nocacheread_buf(tobj, idx, 0, tlen, ma);
554		if (error == 0) {
555			if (tlen != PAGE_SIZE)
556				bzero(ma + tlen, PAGE_SIZE - tlen);
557			uio->uio_offset += tlen;
558			uio->uio_resid -= tlen;
559		}
560		sf_buf_free(sf);
561		sched_unpin();
562		VM_OBJECT_WLOCK(vobj);
563		if (error == 0)
564			m->valid = VM_PAGE_BITS_ALL;
565		vm_page_wakeup(m);
566		VM_OBJECT_WUNLOCK(vobj);
567		return	(error);
568	}
569	VM_OBJECT_WUNLOCK(vobj);
570	error = tmpfs_nocacheread(tobj, idx, offset, tlen, uio);
571
572	return	(error);
573}
574
575static int
576tmpfs_read(struct vop_read_args *v)
577{
578	struct vnode *vp = v->a_vp;
579	struct uio *uio = v->a_uio;
580
581	struct tmpfs_node *node;
582	vm_object_t uobj;
583	size_t len;
584	int resid;
585
586	int error = 0;
587
588	node = VP_TO_TMPFS_NODE(vp);
589
590	if (vp->v_type != VREG) {
591		error = EISDIR;
592		goto out;
593	}
594
595	if (uio->uio_offset < 0) {
596		error = EINVAL;
597		goto out;
598	}
599
600	node->tn_status |= TMPFS_NODE_ACCESSED;
601
602	uobj = node->tn_reg.tn_aobj;
603	while ((resid = uio->uio_resid) > 0) {
604		error = 0;
605		if (node->tn_size <= uio->uio_offset)
606			break;
607		len = MIN(node->tn_size - uio->uio_offset, resid);
608		if (len == 0)
609			break;
610		error = tmpfs_mappedread(vp->v_object, uobj, len, uio);
611		if ((error != 0) || (resid == uio->uio_resid))
612			break;
613	}
614
615out:
616
617	return error;
618}
619
620/* --------------------------------------------------------------------- */
621
622static int
623tmpfs_mappedwrite(vm_object_t vobj, vm_object_t tobj, size_t len, struct uio *uio)
624{
625	vm_pindex_t	idx;
626	vm_page_t	vpg, tpg;
627	vm_offset_t	offset;
628	off_t		addr;
629	size_t		tlen;
630	int		error, rv;
631
632	error = 0;
633
634	addr = uio->uio_offset;
635	idx = OFF_TO_IDX(addr);
636	offset = addr & PAGE_MASK;
637	tlen = MIN(PAGE_SIZE - offset, len);
638
639	VM_OBJECT_WLOCK(vobj);
640lookupvpg:
641	if (((vpg = vm_page_lookup(vobj, idx)) != NULL) &&
642	    vm_page_is_valid(vpg, offset, tlen)) {
643		if ((vpg->oflags & VPO_BUSY) != 0) {
644			/*
645			 * Reference the page before unlocking and sleeping so
646			 * that the page daemon is less likely to reclaim it.
647			 */
648			vm_page_reference(vpg);
649			vm_page_sleep(vpg, "tmfsmw");
650			goto lookupvpg;
651		}
652		vm_page_busy(vpg);
653		vm_page_undirty(vpg);
654		VM_OBJECT_WUNLOCK(vobj);
655		error = uiomove_fromphys(&vpg, offset, tlen, uio);
656	} else {
657		if (vm_page_is_cached(vobj, idx))
658			vm_page_cache_free(vobj, idx, idx + 1);
659		VM_OBJECT_WUNLOCK(vobj);
660		vpg = NULL;
661	}
662	VM_OBJECT_WLOCK(tobj);
663	tpg = vm_page_grab(tobj, idx, VM_ALLOC_WIRED |
664	    VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
665	if (tpg->valid != VM_PAGE_BITS_ALL) {
666		if (vm_pager_has_page(tobj, idx, NULL, NULL)) {
667			rv = vm_pager_get_pages(tobj, &tpg, 1, 0);
668			if (rv != VM_PAGER_OK) {
669				vm_page_lock(tpg);
670				vm_page_free(tpg);
671				vm_page_unlock(tpg);
672				error = EIO;
673				goto out;
674			}
675		} else
676			vm_page_zero_invalid(tpg, TRUE);
677	}
678	VM_OBJECT_WUNLOCK(tobj);
679	if (vpg == NULL)
680		error = uiomove_fromphys(&tpg, offset, tlen, uio);
681	else {
682		KASSERT(vpg->valid == VM_PAGE_BITS_ALL, ("parts of vpg invalid"));
683		pmap_copy_page(vpg, tpg);
684	}
685	VM_OBJECT_WLOCK(tobj);
686	if (error == 0) {
687		KASSERT(tpg->valid == VM_PAGE_BITS_ALL,
688		    ("parts of tpg invalid"));
689		vm_page_dirty(tpg);
690	}
691	vm_page_lock(tpg);
692	vm_page_unwire(tpg, TRUE);
693	vm_page_unlock(tpg);
694	vm_page_wakeup(tpg);
695out:
696	VM_OBJECT_WUNLOCK(tobj);
697	if (vpg != NULL) {
698		VM_OBJECT_WLOCK(vobj);
699		vm_page_wakeup(vpg);
700		VM_OBJECT_WUNLOCK(vobj);
701	}
702
703	return	(error);
704}
705
706static int
707tmpfs_write(struct vop_write_args *v)
708{
709	struct vnode *vp = v->a_vp;
710	struct uio *uio = v->a_uio;
711	int ioflag = v->a_ioflag;
712
713	boolean_t extended;
714	int error = 0;
715	off_t oldsize;
716	struct tmpfs_node *node;
717	vm_object_t uobj;
718	size_t len;
719	int resid;
720
721	node = VP_TO_TMPFS_NODE(vp);
722	oldsize = node->tn_size;
723
724	if (uio->uio_offset < 0 || vp->v_type != VREG) {
725		error = EINVAL;
726		goto out;
727	}
728
729	if (uio->uio_resid == 0) {
730		error = 0;
731		goto out;
732	}
733
734	if (ioflag & IO_APPEND)
735		uio->uio_offset = node->tn_size;
736
737	if (uio->uio_offset + uio->uio_resid >
738	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
739		return (EFBIG);
740
741	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
742		return (EFBIG);
743
744	extended = uio->uio_offset + uio->uio_resid > node->tn_size;
745	if (extended) {
746		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
747		    FALSE);
748		if (error != 0)
749			goto out;
750	}
751
752	uobj = node->tn_reg.tn_aobj;
753	while ((resid = uio->uio_resid) > 0) {
754		if (node->tn_size <= uio->uio_offset)
755			break;
756		len = MIN(node->tn_size - uio->uio_offset, resid);
757		if (len == 0)
758			break;
759		error = tmpfs_mappedwrite(vp->v_object, uobj, len, uio);
760		if ((error != 0) || (resid == uio->uio_resid))
761			break;
762	}
763
764	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
765	    (extended ? TMPFS_NODE_CHANGED : 0);
766
767	if (node->tn_mode & (S_ISUID | S_ISGID)) {
768		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
769			node->tn_mode &= ~(S_ISUID | S_ISGID);
770	}
771
772	if (error != 0)
773		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
774
775out:
776	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
777	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
778
779	return error;
780}
781
782/* --------------------------------------------------------------------- */
783
784static int
785tmpfs_fsync(struct vop_fsync_args *v)
786{
787	struct vnode *vp = v->a_vp;
788
789	MPASS(VOP_ISLOCKED(vp));
790
791	tmpfs_update(vp);
792
793	return 0;
794}
795
796/* --------------------------------------------------------------------- */
797
798static int
799tmpfs_remove(struct vop_remove_args *v)
800{
801	struct vnode *dvp = v->a_dvp;
802	struct vnode *vp = v->a_vp;
803
804	int error;
805	struct tmpfs_dirent *de;
806	struct tmpfs_mount *tmp;
807	struct tmpfs_node *dnode;
808	struct tmpfs_node *node;
809
810	MPASS(VOP_ISLOCKED(dvp));
811	MPASS(VOP_ISLOCKED(vp));
812
813	if (vp->v_type == VDIR) {
814		error = EISDIR;
815		goto out;
816	}
817
818	dnode = VP_TO_TMPFS_DIR(dvp);
819	node = VP_TO_TMPFS_NODE(vp);
820	tmp = VFS_TO_TMPFS(vp->v_mount);
821	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
822	MPASS(de != NULL);
823
824	/* Files marked as immutable or append-only cannot be deleted. */
825	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
826	    (dnode->tn_flags & APPEND)) {
827		error = EPERM;
828		goto out;
829	}
830
831	/* Remove the entry from the directory; as it is a file, we do not
832	 * have to change the number of hard links of the directory. */
833	tmpfs_dir_detach(dvp, de);
834	if (v->a_cnp->cn_flags & DOWHITEOUT)
835		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
836
837	/* Free the directory entry we just deleted.  Note that the node
838	 * referred by it will not be removed until the vnode is really
839	 * reclaimed. */
840	tmpfs_free_dirent(tmp, de);
841
842	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
843	error = 0;
844
845out:
846
847	return error;
848}
849
850/* --------------------------------------------------------------------- */
851
852static int
853tmpfs_link(struct vop_link_args *v)
854{
855	struct vnode *dvp = v->a_tdvp;
856	struct vnode *vp = v->a_vp;
857	struct componentname *cnp = v->a_cnp;
858
859	int error;
860	struct tmpfs_dirent *de;
861	struct tmpfs_node *node;
862
863	MPASS(VOP_ISLOCKED(dvp));
864	MPASS(cnp->cn_flags & HASBUF);
865	MPASS(dvp != vp); /* XXX When can this be false? */
866
867	node = VP_TO_TMPFS_NODE(vp);
868
869	/* XXX: Why aren't the following two tests done by the caller? */
870
871	/* Hard links of directories are forbidden. */
872	if (vp->v_type == VDIR) {
873		error = EPERM;
874		goto out;
875	}
876
877	/* Cannot create cross-device links. */
878	if (dvp->v_mount != vp->v_mount) {
879		error = EXDEV;
880		goto out;
881	}
882
883	/* Ensure that we do not overflow the maximum number of links imposed
884	 * by the system. */
885	MPASS(node->tn_links <= LINK_MAX);
886	if (node->tn_links == LINK_MAX) {
887		error = EMLINK;
888		goto out;
889	}
890
891	/* We cannot create links of files marked immutable or append-only. */
892	if (node->tn_flags & (IMMUTABLE | APPEND)) {
893		error = EPERM;
894		goto out;
895	}
896
897	/* Allocate a new directory entry to represent the node. */
898	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
899	    cnp->cn_nameptr, cnp->cn_namelen, &de);
900	if (error != 0)
901		goto out;
902
903	/* Insert the new directory entry into the appropriate directory. */
904	if (cnp->cn_flags & ISWHITEOUT)
905		tmpfs_dir_whiteout_remove(dvp, cnp);
906	tmpfs_dir_attach(dvp, de);
907
908	/* vp link count has changed, so update node times. */
909	node->tn_status |= TMPFS_NODE_CHANGED;
910	tmpfs_update(vp);
911
912	error = 0;
913
914out:
915	return error;
916}
917
918/* --------------------------------------------------------------------- */
919
920/*
921 * We acquire all but fdvp locks using non-blocking acquisitions.  If we
922 * fail to acquire any lock in the path we will drop all held locks,
923 * acquire the new lock in a blocking fashion, and then release it and
924 * restart the rename.  This acquire/release step ensures that we do not
925 * spin on a lock waiting for release.  On error release all vnode locks
926 * and decrement references the way tmpfs_rename() would do.
927 */
928static int
929tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
930    struct vnode *tdvp, struct vnode **tvpp,
931    struct componentname *fcnp, struct componentname *tcnp)
932{
933	struct vnode *nvp;
934	struct mount *mp;
935	struct tmpfs_dirent *de;
936	int error, restarts = 0;
937
938	VOP_UNLOCK(tdvp, 0);
939	if (*tvpp != NULL && *tvpp != tdvp)
940		VOP_UNLOCK(*tvpp, 0);
941	mp = fdvp->v_mount;
942
943relock:
944	restarts += 1;
945	error = vn_lock(fdvp, LK_EXCLUSIVE);
946	if (error)
947		goto releout;
948	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
949		VOP_UNLOCK(fdvp, 0);
950		error = vn_lock(tdvp, LK_EXCLUSIVE);
951		if (error)
952			goto releout;
953		VOP_UNLOCK(tdvp, 0);
954		goto relock;
955	}
956	/*
957	 * Re-resolve fvp to be certain it still exists and fetch the
958	 * correct vnode.
959	 */
960	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
961	if (de == NULL) {
962		VOP_UNLOCK(fdvp, 0);
963		VOP_UNLOCK(tdvp, 0);
964		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
965		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
966			error = EINVAL;
967		else
968			error = ENOENT;
969		goto releout;
970	}
971	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
972	if (error != 0) {
973		VOP_UNLOCK(fdvp, 0);
974		VOP_UNLOCK(tdvp, 0);
975		if (error != EBUSY)
976			goto releout;
977		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
978		if (error != 0)
979			goto releout;
980		VOP_UNLOCK(nvp, 0);
981		/*
982		 * Concurrent rename race.
983		 */
984		if (nvp == tdvp) {
985			vrele(nvp);
986			error = EINVAL;
987			goto releout;
988		}
989		vrele(*fvpp);
990		*fvpp = nvp;
991		goto relock;
992	}
993	vrele(*fvpp);
994	*fvpp = nvp;
995	VOP_UNLOCK(*fvpp, 0);
996	/*
997	 * Re-resolve tvp and acquire the vnode lock if present.
998	 */
999	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
1000	/*
1001	 * If tvp disappeared we just carry on.
1002	 */
1003	if (de == NULL && *tvpp != NULL) {
1004		vrele(*tvpp);
1005		*tvpp = NULL;
1006	}
1007	/*
1008	 * Get the tvp ino if the lookup succeeded.  We may have to restart
1009	 * if the non-blocking acquire fails.
1010	 */
1011	if (de != NULL) {
1012		nvp = NULL;
1013		error = tmpfs_alloc_vp(mp, de->td_node,
1014		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
1015		if (*tvpp != NULL)
1016			vrele(*tvpp);
1017		*tvpp = nvp;
1018		if (error != 0) {
1019			VOP_UNLOCK(fdvp, 0);
1020			VOP_UNLOCK(tdvp, 0);
1021			if (error != EBUSY)
1022				goto releout;
1023			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
1024			    &nvp);
1025			if (error != 0)
1026				goto releout;
1027			VOP_UNLOCK(nvp, 0);
1028			/*
1029			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
1030			 */
1031			if (nvp == fdvp) {
1032				error = ENOTEMPTY;
1033				goto releout;
1034			}
1035			goto relock;
1036		}
1037	}
1038	tmpfs_rename_restarts += restarts;
1039
1040	return (0);
1041
1042releout:
1043	vrele(fdvp);
1044	vrele(*fvpp);
1045	vrele(tdvp);
1046	if (*tvpp != NULL)
1047		vrele(*tvpp);
1048	tmpfs_rename_restarts += restarts;
1049
1050	return (error);
1051}
1052
1053static int
1054tmpfs_rename(struct vop_rename_args *v)
1055{
1056	struct vnode *fdvp = v->a_fdvp;
1057	struct vnode *fvp = v->a_fvp;
1058	struct componentname *fcnp = v->a_fcnp;
1059	struct vnode *tdvp = v->a_tdvp;
1060	struct vnode *tvp = v->a_tvp;
1061	struct componentname *tcnp = v->a_tcnp;
1062	struct mount *mp = NULL;
1063
1064	char *newname;
1065	int error;
1066	struct tmpfs_dirent *de;
1067	struct tmpfs_mount *tmp;
1068	struct tmpfs_node *fdnode;
1069	struct tmpfs_node *fnode;
1070	struct tmpfs_node *tnode;
1071	struct tmpfs_node *tdnode;
1072
1073	MPASS(VOP_ISLOCKED(tdvp));
1074	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
1075	MPASS(fcnp->cn_flags & HASBUF);
1076	MPASS(tcnp->cn_flags & HASBUF);
1077
1078	/* Disallow cross-device renames.
1079	 * XXX Why isn't this done by the caller? */
1080	if (fvp->v_mount != tdvp->v_mount ||
1081	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1082		error = EXDEV;
1083		goto out;
1084	}
1085
1086	/* If source and target are the same file, there is nothing to do. */
1087	if (fvp == tvp) {
1088		error = 0;
1089		goto out;
1090	}
1091
1092	/* If we need to move the directory between entries, lock the
1093	 * source so that we can safely operate on it. */
1094	if (fdvp != tdvp && fdvp != tvp) {
1095		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
1096			mp = tdvp->v_mount;
1097			error = vfs_busy(mp, 0);
1098			if (error != 0) {
1099				mp = NULL;
1100				goto out;
1101			}
1102			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
1103			    fcnp, tcnp);
1104			if (error != 0) {
1105				vfs_unbusy(mp);
1106				return (error);
1107			}
1108			ASSERT_VOP_ELOCKED(fdvp,
1109			    "tmpfs_rename: fdvp not locked");
1110			ASSERT_VOP_ELOCKED(tdvp,
1111			    "tmpfs_rename: tdvp not locked");
1112			if (tvp != NULL)
1113				ASSERT_VOP_ELOCKED(tvp,
1114				    "tmpfs_rename: tvp not locked");
1115			if (fvp == tvp) {
1116				error = 0;
1117				goto out_locked;
1118			}
1119		}
1120	}
1121
1122	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1123	tdnode = VP_TO_TMPFS_DIR(tdvp);
1124	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
1125	fdnode = VP_TO_TMPFS_DIR(fdvp);
1126	fnode = VP_TO_TMPFS_NODE(fvp);
1127	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
1128
1129	/* Entry can disappear before we lock fdvp,
1130	 * also avoid manipulating '.' and '..' entries. */
1131	if (de == NULL) {
1132		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
1133		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
1134			error = EINVAL;
1135		else
1136			error = ENOENT;
1137		goto out_locked;
1138	}
1139	MPASS(de->td_node == fnode);
1140
1141	/* If re-naming a directory to another preexisting directory
1142	 * ensure that the target directory is empty so that its
1143	 * removal causes no side effects.
1144	 * Kern_rename gurantees the destination to be a directory
1145	 * if the source is one. */
1146	if (tvp != NULL) {
1147		MPASS(tnode != NULL);
1148
1149		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1150		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1151			error = EPERM;
1152			goto out_locked;
1153		}
1154
1155		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1156			if (tnode->tn_size > 0) {
1157				error = ENOTEMPTY;
1158				goto out_locked;
1159			}
1160		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1161			error = ENOTDIR;
1162			goto out_locked;
1163		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1164			error = EISDIR;
1165			goto out_locked;
1166		} else {
1167			MPASS(fnode->tn_type != VDIR &&
1168				tnode->tn_type != VDIR);
1169		}
1170	}
1171
1172	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
1173	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1174		error = EPERM;
1175		goto out_locked;
1176	}
1177
1178	/* Ensure that we have enough memory to hold the new name, if it
1179	 * has to be changed. */
1180	if (fcnp->cn_namelen != tcnp->cn_namelen ||
1181	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
1182		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
1183	} else
1184		newname = NULL;
1185
1186	/* If the node is being moved to another directory, we have to do
1187	 * the move. */
1188	if (fdnode != tdnode) {
1189		/* In case we are moving a directory, we have to adjust its
1190		 * parent to point to the new parent. */
1191		if (de->td_node->tn_type == VDIR) {
1192			struct tmpfs_node *n;
1193
1194			/* Ensure the target directory is not a child of the
1195			 * directory being moved.  Otherwise, we'd end up
1196			 * with stale nodes. */
1197			n = tdnode;
1198			/* TMPFS_LOCK garanties that no nodes are freed while
1199			 * traversing the list. Nodes can only be marked as
1200			 * removed: tn_parent == NULL. */
1201			TMPFS_LOCK(tmp);
1202			TMPFS_NODE_LOCK(n);
1203			while (n != n->tn_dir.tn_parent) {
1204				struct tmpfs_node *parent;
1205
1206				if (n == fnode) {
1207					TMPFS_NODE_UNLOCK(n);
1208					TMPFS_UNLOCK(tmp);
1209					error = EINVAL;
1210					if (newname != NULL)
1211						    free(newname, M_TMPFSNAME);
1212					goto out_locked;
1213				}
1214				parent = n->tn_dir.tn_parent;
1215				TMPFS_NODE_UNLOCK(n);
1216				if (parent == NULL) {
1217					n = NULL;
1218					break;
1219				}
1220				TMPFS_NODE_LOCK(parent);
1221				if (parent->tn_dir.tn_parent == NULL) {
1222					TMPFS_NODE_UNLOCK(parent);
1223					n = NULL;
1224					break;
1225				}
1226				n = parent;
1227			}
1228			TMPFS_UNLOCK(tmp);
1229			if (n == NULL) {
1230				error = EINVAL;
1231				if (newname != NULL)
1232					    free(newname, M_TMPFSNAME);
1233				goto out_locked;
1234			}
1235			TMPFS_NODE_UNLOCK(n);
1236
1237			/* Adjust the parent pointer. */
1238			TMPFS_VALIDATE_DIR(fnode);
1239			TMPFS_NODE_LOCK(de->td_node);
1240			de->td_node->tn_dir.tn_parent = tdnode;
1241			TMPFS_NODE_UNLOCK(de->td_node);
1242
1243			/* As a result of changing the target of the '..'
1244			 * entry, the link count of the source and target
1245			 * directories has to be adjusted. */
1246			TMPFS_NODE_LOCK(tdnode);
1247			TMPFS_ASSERT_LOCKED(tdnode);
1248			tdnode->tn_links++;
1249			TMPFS_NODE_UNLOCK(tdnode);
1250
1251			TMPFS_NODE_LOCK(fdnode);
1252			TMPFS_ASSERT_LOCKED(fdnode);
1253			fdnode->tn_links--;
1254			TMPFS_NODE_UNLOCK(fdnode);
1255		}
1256	}
1257
1258	/* Do the move: just remove the entry from the source directory
1259	 * and insert it into the target one. */
1260	tmpfs_dir_detach(fdvp, de);
1261
1262	if (fcnp->cn_flags & DOWHITEOUT)
1263		tmpfs_dir_whiteout_add(fdvp, fcnp);
1264	if (tcnp->cn_flags & ISWHITEOUT)
1265		tmpfs_dir_whiteout_remove(tdvp, tcnp);
1266
1267	/* If the name has changed, we need to make it effective by changing
1268	 * it in the directory entry. */
1269	if (newname != NULL) {
1270		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
1271
1272		free(de->ud.td_name, M_TMPFSNAME);
1273		de->ud.td_name = newname;
1274		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
1275
1276		fnode->tn_status |= TMPFS_NODE_CHANGED;
1277		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1278	}
1279
1280	/* If we are overwriting an entry, we have to remove the old one
1281	 * from the target directory. */
1282	if (tvp != NULL) {
1283		struct tmpfs_dirent *tde;
1284
1285		/* Remove the old entry from the target directory. */
1286		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
1287		tmpfs_dir_detach(tdvp, tde);
1288
1289		/* Free the directory entry we just deleted.  Note that the
1290		 * node referred by it will not be removed until the vnode is
1291		 * really reclaimed. */
1292		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1293	}
1294
1295	tmpfs_dir_attach(tdvp, de);
1296
1297	cache_purge(fvp);
1298	if (tvp != NULL)
1299		cache_purge(tvp);
1300	cache_purge_negative(tdvp);
1301
1302	error = 0;
1303
1304out_locked:
1305	if (fdvp != tdvp && fdvp != tvp)
1306		VOP_UNLOCK(fdvp, 0);
1307
1308out:
1309	/* Release target nodes. */
1310	/* XXX: I don't understand when tdvp can be the same as tvp, but
1311	 * other code takes care of this... */
1312	if (tdvp == tvp)
1313		vrele(tdvp);
1314	else
1315		vput(tdvp);
1316	if (tvp != NULL)
1317		vput(tvp);
1318
1319	/* Release source nodes. */
1320	vrele(fdvp);
1321	vrele(fvp);
1322
1323	if (mp != NULL)
1324		vfs_unbusy(mp);
1325
1326	return error;
1327}
1328
1329/* --------------------------------------------------------------------- */
1330
1331static int
1332tmpfs_mkdir(struct vop_mkdir_args *v)
1333{
1334	struct vnode *dvp = v->a_dvp;
1335	struct vnode **vpp = v->a_vpp;
1336	struct componentname *cnp = v->a_cnp;
1337	struct vattr *vap = v->a_vap;
1338
1339	MPASS(vap->va_type == VDIR);
1340
1341	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
1342}
1343
1344/* --------------------------------------------------------------------- */
1345
1346static int
1347tmpfs_rmdir(struct vop_rmdir_args *v)
1348{
1349	struct vnode *dvp = v->a_dvp;
1350	struct vnode *vp = v->a_vp;
1351
1352	int error;
1353	struct tmpfs_dirent *de;
1354	struct tmpfs_mount *tmp;
1355	struct tmpfs_node *dnode;
1356	struct tmpfs_node *node;
1357
1358	MPASS(VOP_ISLOCKED(dvp));
1359	MPASS(VOP_ISLOCKED(vp));
1360
1361	tmp = VFS_TO_TMPFS(dvp->v_mount);
1362	dnode = VP_TO_TMPFS_DIR(dvp);
1363	node = VP_TO_TMPFS_DIR(vp);
1364
1365	/* Directories with more than two entries ('.' and '..') cannot be
1366	 * removed. */
1367	 if (node->tn_size > 0) {
1368		 error = ENOTEMPTY;
1369		 goto out;
1370	 }
1371
1372	if ((dnode->tn_flags & APPEND)
1373	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1374		error = EPERM;
1375		goto out;
1376	}
1377
1378	/* This invariant holds only if we are not trying to remove "..".
1379	  * We checked for that above so this is safe now. */
1380	MPASS(node->tn_dir.tn_parent == dnode);
1381
1382	/* Get the directory entry associated with node (vp).  This was
1383	 * filled by tmpfs_lookup while looking up the entry. */
1384	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
1385	MPASS(TMPFS_DIRENT_MATCHES(de,
1386	    v->a_cnp->cn_nameptr,
1387	    v->a_cnp->cn_namelen));
1388
1389	/* Check flags to see if we are allowed to remove the directory. */
1390	if (dnode->tn_flags & APPEND
1391		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1392		error = EPERM;
1393		goto out;
1394	}
1395
1396
1397	/* Detach the directory entry from the directory (dnode). */
1398	tmpfs_dir_detach(dvp, de);
1399	if (v->a_cnp->cn_flags & DOWHITEOUT)
1400		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
1401
1402	/* No vnode should be allocated for this entry from this point */
1403	TMPFS_NODE_LOCK(node);
1404	TMPFS_ASSERT_ELOCKED(node);
1405	node->tn_links--;
1406	node->tn_dir.tn_parent = NULL;
1407	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1408	    TMPFS_NODE_MODIFIED;
1409
1410	TMPFS_NODE_UNLOCK(node);
1411
1412	TMPFS_NODE_LOCK(dnode);
1413	TMPFS_ASSERT_ELOCKED(dnode);
1414	dnode->tn_links--;
1415	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
1416	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1417	TMPFS_NODE_UNLOCK(dnode);
1418
1419	cache_purge(dvp);
1420	cache_purge(vp);
1421
1422	/* Free the directory entry we just deleted.  Note that the node
1423	 * referred by it will not be removed until the vnode is really
1424	 * reclaimed. */
1425	tmpfs_free_dirent(tmp, de);
1426
1427	/* Release the deleted vnode (will destroy the node, notify
1428	 * interested parties and clean it from the cache). */
1429
1430	dnode->tn_status |= TMPFS_NODE_CHANGED;
1431	tmpfs_update(dvp);
1432
1433	error = 0;
1434
1435out:
1436	return error;
1437}
1438
1439/* --------------------------------------------------------------------- */
1440
1441static int
1442tmpfs_symlink(struct vop_symlink_args *v)
1443{
1444	struct vnode *dvp = v->a_dvp;
1445	struct vnode **vpp = v->a_vpp;
1446	struct componentname *cnp = v->a_cnp;
1447	struct vattr *vap = v->a_vap;
1448	char *target = v->a_target;
1449
1450#ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
1451	MPASS(vap->va_type == VLNK);
1452#else
1453	vap->va_type = VLNK;
1454#endif
1455
1456	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
1457}
1458
1459/* --------------------------------------------------------------------- */
1460
1461static int
1462tmpfs_readdir(struct vop_readdir_args *v)
1463{
1464	struct vnode *vp = v->a_vp;
1465	struct uio *uio = v->a_uio;
1466	int *eofflag = v->a_eofflag;
1467	u_long **cookies = v->a_cookies;
1468	int *ncookies = v->a_ncookies;
1469
1470	int error;
1471	ssize_t startresid;
1472	int cnt = 0;
1473	struct tmpfs_node *node;
1474
1475	/* This operation only makes sense on directory nodes. */
1476	if (vp->v_type != VDIR)
1477		return ENOTDIR;
1478
1479	node = VP_TO_TMPFS_DIR(vp);
1480
1481	startresid = uio->uio_resid;
1482
1483	if (cookies != NULL && ncookies != NULL) {
1484		cnt = howmany(node->tn_size, sizeof(struct tmpfs_dirent)) + 2;
1485		*cookies = malloc(cnt * sizeof(**cookies), M_TEMP, M_WAITOK);
1486		*ncookies = 0;
1487	}
1488
1489	if (cnt == 0)
1490		error = tmpfs_dir_getdents(node, uio, 0, NULL, NULL);
1491	else
1492		error = tmpfs_dir_getdents(node, uio, cnt, *cookies, ncookies);
1493
1494	if (error == EJUSTRETURN)
1495		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
1496
1497	if (error != 0 && cnt != 0)
1498		free(*cookies, M_TEMP);
1499
1500	if (eofflag != NULL)
1501		*eofflag =
1502		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1503
1504	return error;
1505}
1506
1507/* --------------------------------------------------------------------- */
1508
1509static int
1510tmpfs_readlink(struct vop_readlink_args *v)
1511{
1512	struct vnode *vp = v->a_vp;
1513	struct uio *uio = v->a_uio;
1514
1515	int error;
1516	struct tmpfs_node *node;
1517
1518	MPASS(uio->uio_offset == 0);
1519	MPASS(vp->v_type == VLNK);
1520
1521	node = VP_TO_TMPFS_NODE(vp);
1522
1523	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1524	    uio);
1525	node->tn_status |= TMPFS_NODE_ACCESSED;
1526
1527	return error;
1528}
1529
1530/* --------------------------------------------------------------------- */
1531
1532static int
1533tmpfs_inactive(struct vop_inactive_args *v)
1534{
1535	struct vnode *vp = v->a_vp;
1536
1537	struct tmpfs_node *node;
1538
1539	MPASS(VOP_ISLOCKED(vp));
1540
1541	node = VP_TO_TMPFS_NODE(vp);
1542
1543	if (node->tn_links == 0)
1544		vrecycle(vp);
1545
1546	return 0;
1547}
1548
1549/* --------------------------------------------------------------------- */
1550
1551int
1552tmpfs_reclaim(struct vop_reclaim_args *v)
1553{
1554	struct vnode *vp = v->a_vp;
1555
1556	struct tmpfs_mount *tmp;
1557	struct tmpfs_node *node;
1558
1559	node = VP_TO_TMPFS_NODE(vp);
1560	tmp = VFS_TO_TMPFS(vp->v_mount);
1561
1562	vnode_destroy_vobject(vp);
1563	cache_purge(vp);
1564
1565	TMPFS_NODE_LOCK(node);
1566	TMPFS_ASSERT_ELOCKED(node);
1567	tmpfs_free_vp(vp);
1568
1569	/* If the node referenced by this vnode was deleted by the user,
1570	 * we must free its associated data structures (now that the vnode
1571	 * is being reclaimed). */
1572	if (node->tn_links == 0 &&
1573	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
1574		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1575		TMPFS_NODE_UNLOCK(node);
1576		tmpfs_free_node(tmp, node);
1577	} else
1578		TMPFS_NODE_UNLOCK(node);
1579
1580	MPASS(vp->v_data == NULL);
1581	return 0;
1582}
1583
1584/* --------------------------------------------------------------------- */
1585
1586static int
1587tmpfs_print(struct vop_print_args *v)
1588{
1589	struct vnode *vp = v->a_vp;
1590
1591	struct tmpfs_node *node;
1592
1593	node = VP_TO_TMPFS_NODE(vp);
1594
1595	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %d\n",
1596	    node, node->tn_flags, node->tn_links);
1597	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
1598	    node->tn_mode, node->tn_uid, node->tn_gid,
1599	    (intmax_t)node->tn_size, node->tn_status);
1600
1601	if (vp->v_type == VFIFO)
1602		fifo_printinfo(vp);
1603
1604	printf("\n");
1605
1606	return 0;
1607}
1608
1609/* --------------------------------------------------------------------- */
1610
1611static int
1612tmpfs_pathconf(struct vop_pathconf_args *v)
1613{
1614	int name = v->a_name;
1615	register_t *retval = v->a_retval;
1616
1617	int error;
1618
1619	error = 0;
1620
1621	switch (name) {
1622	case _PC_LINK_MAX:
1623		*retval = LINK_MAX;
1624		break;
1625
1626	case _PC_NAME_MAX:
1627		*retval = NAME_MAX;
1628		break;
1629
1630	case _PC_PATH_MAX:
1631		*retval = PATH_MAX;
1632		break;
1633
1634	case _PC_PIPE_BUF:
1635		*retval = PIPE_BUF;
1636		break;
1637
1638	case _PC_CHOWN_RESTRICTED:
1639		*retval = 1;
1640		break;
1641
1642	case _PC_NO_TRUNC:
1643		*retval = 1;
1644		break;
1645
1646	case _PC_SYNC_IO:
1647		*retval = 1;
1648		break;
1649
1650	case _PC_FILESIZEBITS:
1651		*retval = 0; /* XXX Don't know which value should I return. */
1652		break;
1653
1654	default:
1655		error = EINVAL;
1656	}
1657
1658	return error;
1659}
1660
1661static int
1662tmpfs_vptofh(struct vop_vptofh_args *ap)
1663{
1664	struct tmpfs_fid *tfhp;
1665	struct tmpfs_node *node;
1666
1667	tfhp = (struct tmpfs_fid *)ap->a_fhp;
1668	node = VP_TO_TMPFS_NODE(ap->a_vp);
1669
1670	tfhp->tf_len = sizeof(struct tmpfs_fid);
1671	tfhp->tf_id = node->tn_id;
1672	tfhp->tf_gen = node->tn_gen;
1673
1674	return (0);
1675}
1676
1677static int
1678tmpfs_whiteout(struct vop_whiteout_args *ap)
1679{
1680	struct vnode *dvp = ap->a_dvp;
1681	struct componentname *cnp = ap->a_cnp;
1682	struct tmpfs_dirent *de;
1683
1684	switch (ap->a_flags) {
1685	case LOOKUP:
1686		return (0);
1687	case CREATE:
1688		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
1689		if (de != NULL)
1690			return (de->td_node == NULL ? 0 : EEXIST);
1691		return (tmpfs_dir_whiteout_add(dvp, cnp));
1692	case DELETE:
1693		tmpfs_dir_whiteout_remove(dvp, cnp);
1694		return (0);
1695	default:
1696		panic("tmpfs_whiteout: unknown op");
1697	}
1698}
1699
1700/* --------------------------------------------------------------------- */
1701
1702/*
1703 * vnode operations vector used for files stored in a tmpfs file system.
1704 */
1705struct vop_vector tmpfs_vnodeop_entries = {
1706	.vop_default =			&default_vnodeops,
1707	.vop_lookup =			vfs_cache_lookup,
1708	.vop_cachedlookup =		tmpfs_lookup,
1709	.vop_create =			tmpfs_create,
1710	.vop_mknod =			tmpfs_mknod,
1711	.vop_open =			tmpfs_open,
1712	.vop_close =			tmpfs_close,
1713	.vop_access =			tmpfs_access,
1714	.vop_getattr =			tmpfs_getattr,
1715	.vop_setattr =			tmpfs_setattr,
1716	.vop_read =			tmpfs_read,
1717	.vop_write =			tmpfs_write,
1718	.vop_fsync =			tmpfs_fsync,
1719	.vop_remove =			tmpfs_remove,
1720	.vop_link =			tmpfs_link,
1721	.vop_rename =			tmpfs_rename,
1722	.vop_mkdir =			tmpfs_mkdir,
1723	.vop_rmdir =			tmpfs_rmdir,
1724	.vop_symlink =			tmpfs_symlink,
1725	.vop_readdir =			tmpfs_readdir,
1726	.vop_readlink =			tmpfs_readlink,
1727	.vop_inactive =			tmpfs_inactive,
1728	.vop_reclaim =			tmpfs_reclaim,
1729	.vop_print =			tmpfs_print,
1730	.vop_pathconf =			tmpfs_pathconf,
1731	.vop_vptofh =			tmpfs_vptofh,
1732	.vop_whiteout =			tmpfs_whiteout,
1733	.vop_bmap =			VOP_EOPNOTSUPP,
1734};
1735
1736