1/*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-4-Clause
5 *
6 * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
7 * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
8 * All rights reserved.
9 * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by TooLs GmbH.
22 * 4. The name of TooLs GmbH may not be used to endorse or promote products
23 *    derived from this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */
36/*-
37 * Written by Paul Popelka (paulp@uts.amdahl.com)
38 *
39 * You can do anything you want with this software, just don't say you wrote
40 * it, and don't remove this notice.
41 *
42 * This software is provided "as is".
43 *
44 * The author supplies this software to be publicly redistributed on the
45 * understanding that the author is not responsible for the correct
46 * functioning of this software in any circumstances and is not liable for
47 * any damages caused by this software.
48 *
49 * October 1992
50 */
51
52#include <sys/param.h>
53#include <sys/systm.h>
54#include <sys/bio.h>
55#include <sys/buf.h>
56#include <sys/clock.h>
57#include <sys/dirent.h>
58#include <sys/lock.h>
59#include <sys/lockf.h>
60#include <sys/malloc.h>
61#include <sys/mount.h>
62#include <sys/mutex.h>
63#include <sys/namei.h>
64#include <sys/priv.h>
65#include <sys/stat.h>
66#include <sys/sysctl.h>
67#include <sys/unistd.h>
68#include <sys/vmmeter.h>
69#include <sys/vnode.h>
70
71#include <vm/vm.h>
72#include <vm/vm_extern.h>
73#include <vm/vnode_pager.h>
74
75#include <fs/msdosfs/bpb.h>
76#include <fs/msdosfs/direntry.h>
77#include <fs/msdosfs/denode.h>
78#include <fs/msdosfs/fat.h>
79#include <fs/msdosfs/msdosfsmount.h>
80
81/*
82 * Prototypes for MSDOSFS vnode operations
83 */
84static vop_create_t	msdosfs_create;
85static vop_mknod_t	msdosfs_mknod;
86static vop_open_t	msdosfs_open;
87static vop_close_t	msdosfs_close;
88static vop_access_t	msdosfs_access;
89static vop_getattr_t	msdosfs_getattr;
90static vop_setattr_t	msdosfs_setattr;
91static vop_read_t	msdosfs_read;
92static vop_write_t	msdosfs_write;
93static vop_fsync_t	msdosfs_fsync;
94static vop_remove_t	msdosfs_remove;
95static vop_link_t	msdosfs_link;
96static vop_rename_t	msdosfs_rename;
97static vop_mkdir_t	msdosfs_mkdir;
98static vop_rmdir_t	msdosfs_rmdir;
99static vop_symlink_t	msdosfs_symlink;
100static vop_readdir_t	msdosfs_readdir;
101static vop_bmap_t	msdosfs_bmap;
102static vop_getpages_t	msdosfs_getpages;
103static vop_strategy_t	msdosfs_strategy;
104static vop_print_t	msdosfs_print;
105static vop_pathconf_t	msdosfs_pathconf;
106static vop_vptofh_t	msdosfs_vptofh;
107
108/*
109 * Some general notes:
110 *
111 * In the ufs filesystem the inodes, superblocks, and indirect blocks are
112 * read/written using the vnode for the filesystem. Blocks that represent
113 * the contents of a file are read/written using the vnode for the file
114 * (including directories when they are read/written as files). This
115 * presents problems for the dos filesystem because data that should be in
116 * an inode (if dos had them) resides in the directory itself.  Since we
117 * must update directory entries without the benefit of having the vnode
118 * for the directory we must use the vnode for the filesystem.  This means
119 * that when a directory is actually read/written (via read, write, or
120 * readdir, or seek) we must use the vnode for the filesystem instead of
121 * the vnode for the directory as would happen in ufs. This is to insure we
122 * retrieve the correct block from the buffer cache since the hash value is
123 * based upon the vnode address and the desired block number.
124 */
125
126/*
127 * Create a regular file. On entry the directory to contain the file being
128 * created is locked.  We must release before we return. We must also free
129 * the pathname buffer pointed at by cnp->cn_pnbuf, always on error.
130 */
131static int
132msdosfs_create(struct vop_create_args *ap)
133{
134	struct componentname *cnp = ap->a_cnp;
135	struct denode ndirent;
136	struct denode *dep;
137	struct denode *pdep = VTODE(ap->a_dvp);
138	struct timespec ts;
139	int error;
140
141#ifdef MSDOSFS_DEBUG
142	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
143#endif
144
145	/*
146	 * If this is the root directory and there is no space left we
147	 * can't do anything.  This is because the root directory can not
148	 * change size.
149	 */
150	if (pdep->de_StartCluster == MSDOSFSROOT
151	    && pdep->de_fndoffset >= pdep->de_FileSize) {
152		error = ENOSPC;
153		goto bad;
154	}
155
156	/*
157	 * Create a directory entry for the file, then call createde() to
158	 * have it installed. NOTE: DOS files are always executable.  We
159	 * use the absence of the owner write bit to make the file
160	 * readonly.
161	 */
162	memset(&ndirent, 0, sizeof(ndirent));
163	error = uniqdosname(pdep, cnp, ndirent.de_Name);
164	if (error)
165		goto bad;
166
167	ndirent.de_Attributes = ATTR_ARCHIVE;
168	ndirent.de_LowerCase = 0;
169	ndirent.de_StartCluster = 0;
170	ndirent.de_FileSize = 0;
171	ndirent.de_pmp = pdep->de_pmp;
172	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
173	vfs_timestamp(&ts);
174	DETIMES(&ndirent, &ts, &ts, &ts);
175	error = createde(&ndirent, pdep, &dep, cnp);
176	if (error)
177		goto bad;
178	*ap->a_vpp = DETOV(dep);
179	if ((cnp->cn_flags & MAKEENTRY) != 0)
180		cache_enter(ap->a_dvp, *ap->a_vpp, cnp);
181	return (0);
182
183bad:
184	return (error);
185}
186
187static int
188msdosfs_mknod(struct vop_mknod_args *ap)
189{
190
191    return (EINVAL);
192}
193
194static int
195msdosfs_open(struct vop_open_args *ap)
196{
197	struct denode *dep = VTODE(ap->a_vp);
198	vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td);
199	return 0;
200}
201
202static int
203msdosfs_close(struct vop_close_args *ap)
204{
205	struct vnode *vp = ap->a_vp;
206	struct denode *dep = VTODE(vp);
207	struct timespec ts;
208
209	VI_LOCK(vp);
210	if (vp->v_usecount > 1) {
211		vfs_timestamp(&ts);
212		DETIMES(dep, &ts, &ts, &ts);
213	}
214	VI_UNLOCK(vp);
215	return 0;
216}
217
218static int
219msdosfs_access(struct vop_access_args *ap)
220{
221	struct vnode *vp = ap->a_vp;
222	struct denode *dep = VTODE(ap->a_vp);
223	struct msdosfsmount *pmp = dep->de_pmp;
224	mode_t file_mode;
225	accmode_t accmode = ap->a_accmode;
226
227	file_mode = S_IRWXU|S_IRWXG|S_IRWXO;
228	file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
229
230	/*
231	 * Disallow writing to directories and regular files if the
232	 * filesystem is read-only.
233	 */
234	if (accmode & VWRITE) {
235		switch (vp->v_type) {
236		case VREG:
237		case VDIR:
238			if (vp->v_mount->mnt_flag & MNT_RDONLY)
239				return (EROFS);
240			break;
241		default:
242			break;
243		}
244	}
245
246	return (vaccess(vp->v_type, file_mode, pmp->pm_uid, pmp->pm_gid,
247	    ap->a_accmode, ap->a_cred));
248}
249
250static int
251msdosfs_getattr(struct vop_getattr_args *ap)
252{
253	struct denode *dep = VTODE(ap->a_vp);
254	struct msdosfsmount *pmp = dep->de_pmp;
255	struct vattr *vap = ap->a_vap;
256	mode_t mode;
257	struct timespec ts;
258	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
259	uint64_t fileid;
260
261	vfs_timestamp(&ts);
262	DETIMES(dep, &ts, &ts, &ts);
263	vap->va_fsid = dev2udev(pmp->pm_dev);
264	/*
265	 * The following computation of the fileid must be the same as that
266	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
267	 * doesn't work.
268	 */
269	if (dep->de_Attributes & ATTR_DIRECTORY) {
270		fileid = (uint64_t)cntobn(pmp, dep->de_StartCluster) *
271		    dirsperblk;
272		if (dep->de_StartCluster == MSDOSFSROOT)
273			fileid = 1;
274	} else {
275		fileid = (uint64_t)cntobn(pmp, dep->de_dirclust) *
276		    dirsperblk;
277		if (dep->de_dirclust == MSDOSFSROOT)
278			fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk;
279		fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry);
280	}
281	vap->va_fileid = fileid;
282
283	mode = S_IRWXU|S_IRWXG|S_IRWXO;
284	if (dep->de_Attributes & ATTR_READONLY)
285		mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH);
286	vap->va_mode = mode &
287	    (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask);
288	vap->va_uid = pmp->pm_uid;
289	vap->va_gid = pmp->pm_gid;
290	vap->va_nlink = 1;
291	vap->va_rdev = NODEV;
292	vap->va_size = dep->de_FileSize;
293	fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime);
294	vap->va_ctime = vap->va_mtime;
295	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
296		fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime);
297		fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun,
298		    0, &vap->va_birthtime);
299	} else {
300		vap->va_atime = vap->va_mtime;
301		vap->va_birthtime.tv_sec = -1;
302		vap->va_birthtime.tv_nsec = 0;
303	}
304	vap->va_flags = 0;
305	if (dep->de_Attributes & ATTR_ARCHIVE)
306		vap->va_flags |= UF_ARCHIVE;
307	if (dep->de_Attributes & ATTR_HIDDEN)
308		vap->va_flags |= UF_HIDDEN;
309	if (dep->de_Attributes & ATTR_READONLY)
310		vap->va_flags |= UF_READONLY;
311	if (dep->de_Attributes & ATTR_SYSTEM)
312		vap->va_flags |= UF_SYSTEM;
313	vap->va_gen = 0;
314	vap->va_blocksize = pmp->pm_bpcluster;
315	if (dep->de_StartCluster != MSDOSFSROOT)
316		vap->va_bytes =
317		    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
318	else
319		vap->va_bytes = 0; /* FAT12/FAT16 root dir in reserved area */
320	vap->va_type = ap->a_vp->v_type;
321	vap->va_filerev = dep->de_modrev;
322	return (0);
323}
324
325static int
326msdosfs_setattr(struct vop_setattr_args *ap)
327{
328	struct vnode *vp = ap->a_vp;
329	struct denode *dep = VTODE(ap->a_vp);
330	struct msdosfsmount *pmp = dep->de_pmp;
331	struct vattr *vap = ap->a_vap;
332	struct ucred *cred = ap->a_cred;
333	struct thread *td = curthread;
334	int error = 0;
335
336#ifdef MSDOSFS_DEBUG
337	printf("msdosfs_setattr(): vp %p, vap %p, cred %p\n",
338	    ap->a_vp, vap, cred);
339#endif
340
341	/*
342	 * Check for unsettable attributes.
343	 */
344	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
345	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
346	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
347	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
348#ifdef MSDOSFS_DEBUG
349		printf("msdosfs_setattr(): returning EINVAL\n");
350		printf("    va_type %d, va_nlink %llx, va_fsid %llx, va_fileid %llx\n",
351		    vap->va_type, (unsigned long long)vap->va_nlink,
352		    (unsigned long long)vap->va_fsid,
353		    (unsigned long long)vap->va_fileid);
354		printf("    va_blocksize %lx, va_rdev %llx, va_bytes %llx, va_gen %lx\n",
355		    vap->va_blocksize, (unsigned long long)vap->va_rdev,
356		    (unsigned long long)vap->va_bytes, vap->va_gen);
357		printf("    va_uid %x, va_gid %x\n",
358		    vap->va_uid, vap->va_gid);
359#endif
360		return (EINVAL);
361	}
362
363	/*
364	 * We don't allow setting attributes on the root directory.
365	 * The special case for the root directory is because before
366	 * FAT32, the root directory didn't have an entry for itself
367	 * (and was otherwise special).  With FAT32, the root
368	 * directory is not so special, but still doesn't have an
369	 * entry for itself.
370	 */
371	if (vp->v_vflag & VV_ROOT)
372		return (EINVAL);
373
374	if (vap->va_flags != VNOVAL) {
375		if (vp->v_mount->mnt_flag & MNT_RDONLY)
376			return (EROFS);
377		if (cred->cr_uid != pmp->pm_uid) {
378			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
379			if (error)
380				return (error);
381		}
382		/*
383		 * We are very inconsistent about handling unsupported
384		 * attributes.  We ignored the access time and the
385		 * read and execute bits.  We were strict for the other
386		 * attributes.
387		 */
388		if (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_READONLY |
389		    UF_SYSTEM))
390			return EOPNOTSUPP;
391		if (vap->va_flags & UF_ARCHIVE)
392			dep->de_Attributes |= ATTR_ARCHIVE;
393		else
394			dep->de_Attributes &= ~ATTR_ARCHIVE;
395		if (vap->va_flags & UF_HIDDEN)
396			dep->de_Attributes |= ATTR_HIDDEN;
397		else
398			dep->de_Attributes &= ~ATTR_HIDDEN;
399		/* We don't allow changing the readonly bit on directories. */
400		if (vp->v_type != VDIR) {
401			if (vap->va_flags & UF_READONLY)
402				dep->de_Attributes |= ATTR_READONLY;
403			else
404				dep->de_Attributes &= ~ATTR_READONLY;
405		}
406		if (vap->va_flags & UF_SYSTEM)
407			dep->de_Attributes |= ATTR_SYSTEM;
408		else
409			dep->de_Attributes &= ~ATTR_SYSTEM;
410		dep->de_flag |= DE_MODIFIED;
411	}
412
413	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
414		uid_t uid;
415		gid_t gid;
416
417		if (vp->v_mount->mnt_flag & MNT_RDONLY)
418			return (EROFS);
419		uid = vap->va_uid;
420		if (uid == (uid_t)VNOVAL)
421			uid = pmp->pm_uid;
422		gid = vap->va_gid;
423		if (gid == (gid_t)VNOVAL)
424			gid = pmp->pm_gid;
425		if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
426		    (gid != pmp->pm_gid && !groupmember(gid, cred))) {
427			error = priv_check_cred(cred, PRIV_VFS_CHOWN);
428			if (error)
429				return (error);
430		}
431		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
432			return EINVAL;
433	}
434
435	if (vap->va_size != VNOVAL) {
436		switch (vp->v_type) {
437		case VDIR:
438			return (EISDIR);
439		case VREG:
440			/*
441			 * Truncation is only supported for regular files,
442			 * Disallow it if the filesystem is read-only.
443			 */
444			if (vp->v_mount->mnt_flag & MNT_RDONLY)
445				return (EROFS);
446			break;
447		default:
448			/*
449			 * According to POSIX, the result is unspecified
450			 * for file types other than regular files,
451			 * directories and shared memory objects.  We
452			 * don't support any file types except regular
453			 * files and directories in this file system, so
454			 * this (default) case is unreachable and can do
455			 * anything.  Keep falling through to detrunc()
456			 * for now.
457			 */
458			break;
459		}
460		error = vn_rlimit_trunc(vap->va_size, td);
461		if (error != 0)
462			return (error);
463		error = detrunc(dep, vap->va_size, 0, cred);
464		if (error)
465			return error;
466	}
467	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
468		if (vp->v_mount->mnt_flag & MNT_RDONLY)
469			return (EROFS);
470		error = vn_utimes_perm(vp, vap, cred, td);
471		if (error != 0)
472			return (error);
473		if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
474		    vap->va_atime.tv_sec != VNOVAL) {
475			dep->de_flag &= ~DE_ACCESS;
476			timespec2fattime(&vap->va_atime, 0,
477			    &dep->de_ADate, NULL, NULL);
478		}
479		if (vap->va_mtime.tv_sec != VNOVAL) {
480			dep->de_flag &= ~DE_UPDATE;
481			timespec2fattime(&vap->va_mtime, 0,
482			    &dep->de_MDate, &dep->de_MTime, NULL);
483		}
484		/*
485		 * We don't set the archive bit when modifying the time of
486		 * a directory to emulate the Windows/DOS behavior.
487		 */
488		if (vp->v_type != VDIR)
489			dep->de_Attributes |= ATTR_ARCHIVE;
490		dep->de_flag |= DE_MODIFIED;
491	}
492	/*
493	 * DOS files only have the ability to have their writability
494	 * attribute set, so we use the owner write bit to set the readonly
495	 * attribute.
496	 */
497	if (vap->va_mode != (mode_t)VNOVAL) {
498		if (vp->v_mount->mnt_flag & MNT_RDONLY)
499			return (EROFS);
500		if (cred->cr_uid != pmp->pm_uid) {
501			error = priv_check_cred(cred, PRIV_VFS_ADMIN);
502			if (error)
503				return (error);
504		}
505		if (vp->v_type != VDIR) {
506			/* We ignore the read and execute bits. */
507			if (vap->va_mode & S_IWUSR)
508				dep->de_Attributes &= ~ATTR_READONLY;
509			else
510				dep->de_Attributes |= ATTR_READONLY;
511			dep->de_Attributes |= ATTR_ARCHIVE;
512			dep->de_flag |= DE_MODIFIED;
513		}
514	}
515	return (deupdat(dep, 0));
516}
517
518static int
519msdosfs_read(struct vop_read_args *ap)
520{
521	int error = 0;
522	int blsize;
523	int isadir;
524	ssize_t orig_resid;
525	u_int n;
526	u_long diff;
527	u_long on;
528	daddr_t lbn;
529	daddr_t rablock;
530	int rasize;
531	int seqcount;
532	struct buf *bp;
533	struct vnode *vp = ap->a_vp;
534	struct denode *dep = VTODE(vp);
535	struct msdosfsmount *pmp = dep->de_pmp;
536	struct uio *uio = ap->a_uio;
537
538	/*
539	 * If they didn't ask for any data, then we are done.
540	 */
541	orig_resid = uio->uio_resid;
542	if (orig_resid == 0)
543		return (0);
544
545	/*
546	 * The caller is supposed to ensure that
547	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
548	 * We don't need to check for large offsets as in ffs because
549	 * dep->de_FileSize <= MSDOSFS_FILESIZE_MAX < OFF_MAX, so large
550	 * offsets cannot cause overflow even in theory.
551	 */
552
553	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
554
555	isadir = dep->de_Attributes & ATTR_DIRECTORY;
556	do {
557		if (uio->uio_offset >= dep->de_FileSize)
558			break;
559		lbn = de_cluster(pmp, uio->uio_offset);
560		rablock = lbn + 1;
561		blsize = pmp->pm_bpcluster;
562		on = uio->uio_offset & pmp->pm_crbomask;
563		/*
564		 * If we are operating on a directory file then be sure to
565		 * do i/o with the vnode for the filesystem instead of the
566		 * vnode for the directory.
567		 */
568		if (isadir) {
569			/* convert cluster # to block # */
570			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
571			if (error == E2BIG) {
572				error = EINVAL;
573				break;
574			} else if (error)
575				break;
576			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
577		} else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) {
578			error = bread(vp, lbn, blsize, NOCRED, &bp);
579		} else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
580			error = cluster_read(vp, dep->de_FileSize, lbn, blsize,
581			    NOCRED, on + uio->uio_resid, seqcount, 0, &bp);
582		} else if (seqcount > 1) {
583			rasize = blsize;
584			error = breadn(vp, lbn,
585			    blsize, &rablock, &rasize, 1, NOCRED, &bp);
586		} else {
587			error = bread(vp, lbn, blsize, NOCRED, &bp);
588		}
589		if (error) {
590			brelse(bp);
591			break;
592		}
593		diff = pmp->pm_bpcluster - on;
594		n = diff > uio->uio_resid ? uio->uio_resid : diff;
595		diff = dep->de_FileSize - uio->uio_offset;
596		if (diff < n)
597			n = diff;
598		diff = blsize - bp->b_resid;
599		if (diff < n)
600			n = diff;
601		error = vn_io_fault_uiomove(bp->b_data + on, (int) n, uio);
602		brelse(bp);
603	} while (error == 0 && uio->uio_resid > 0 && n != 0);
604	if (!isadir && (error == 0 || uio->uio_resid != orig_resid) &&
605	    (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0)
606		dep->de_flag |= DE_ACCESS;
607	return (error);
608}
609
610/*
611 * Write data to a file or directory.
612 */
613static int
614msdosfs_write(struct vop_write_args *ap)
615{
616	int n;
617	int croffset;
618	ssize_t resid, r;
619	u_long osize;
620	int error = 0;
621	u_long count;
622	int seqcount;
623	daddr_t bn, lastcn;
624	struct buf *bp;
625	int ioflag = ap->a_ioflag;
626	struct uio *uio = ap->a_uio;
627	struct vnode *vp = ap->a_vp;
628	struct vnode *thisvp;
629	struct denode *dep = VTODE(vp);
630	struct msdosfsmount *pmp = dep->de_pmp;
631	struct ucred *cred = ap->a_cred;
632
633#ifdef MSDOSFS_DEBUG
634	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
635	    vp, uio, ioflag, cred);
636	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
637	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
638#endif
639
640	switch (vp->v_type) {
641	case VREG:
642		if (ioflag & IO_APPEND)
643			uio->uio_offset = dep->de_FileSize;
644		thisvp = vp;
645		break;
646	case VDIR:
647		return EISDIR;
648	default:
649		panic("msdosfs_write(): bad file type");
650	}
651
652	/*
653	 * This is needed (unlike in ffs_write()) because we extend the
654	 * file outside of the loop but we don't want to extend the file
655	 * for writes of 0 bytes.
656	 */
657	if (uio->uio_resid == 0)
658		return (0);
659
660	/*
661	 * The caller is supposed to ensure that
662	 * uio->uio_offset >= 0 and uio->uio_resid >= 0.
663	 *
664	 * If they've exceeded their filesize limit, tell them about it.
665	 */
666	error = vn_rlimit_fsizex(vp, uio, MSDOSFS_FILESIZE_MAX, &r,
667	    uio->uio_td);
668	if (error != 0) {
669		vn_rlimit_fsizex_res(uio, r);
670		return (error);
671	}
672
673	/*
674	 * If the offset we are starting the write at is beyond the end of
675	 * the file, then they've done a seek.  Unix filesystems allow
676	 * files with holes in them, DOS doesn't so we must fill the hole
677	 * with zeroed blocks.
678	 */
679	if (uio->uio_offset > dep->de_FileSize) {
680		error = deextend(dep, uio->uio_offset, cred);
681		if (error != 0) {
682			vn_rlimit_fsizex_res(uio, r);
683			return (error);
684		}
685	}
686
687	/*
688	 * Remember some values in case the write fails.
689	 */
690	resid = uio->uio_resid;
691	osize = dep->de_FileSize;
692
693	/*
694	 * If we write beyond the end of the file, extend it to its ultimate
695	 * size ahead of the time to hopefully get a contiguous area.
696	 */
697	if (uio->uio_offset + resid > osize) {
698		count = de_clcount(pmp, uio->uio_offset + resid) -
699			de_clcount(pmp, osize);
700		error = extendfile(dep, count, NULL, NULL, 0);
701		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
702			goto errexit;
703		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
704	} else
705		lastcn = de_clcount(pmp, osize) - 1;
706
707	seqcount = ioflag >> IO_SEQSHIFT;
708	do {
709		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
710			error = ENOSPC;
711			break;
712		}
713
714		croffset = uio->uio_offset & pmp->pm_crbomask;
715		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
716		if (uio->uio_offset + n > dep->de_FileSize) {
717			dep->de_FileSize = uio->uio_offset + n;
718			/* The object size needs to be set before buffer is allocated */
719			vnode_pager_setsize(vp, dep->de_FileSize);
720		}
721
722		bn = de_cluster(pmp, uio->uio_offset);
723		if ((uio->uio_offset & pmp->pm_crbomask) == 0
724		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid)
725			> de_cluster(pmp, uio->uio_offset)
726			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
727			/*
728			 * If either the whole cluster gets written,
729			 * or we write the cluster from its start beyond EOF,
730			 * then no need to read data from disk.
731			 */
732			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0);
733			/*
734			 * This call to vfs_bio_clrbuf() ensures that
735			 * even if vn_io_fault_uiomove() below faults,
736			 * garbage from the newly instantiated buffer
737			 * is not exposed to the userspace via mmap().
738			 */
739			vfs_bio_clrbuf(bp);
740			/*
741			 * Do the bmap now, since pcbmap needs buffers
742			 * for the FAT table. (see msdosfs_strategy)
743			 */
744			if (bp->b_blkno == bp->b_lblkno) {
745				error = pcbmap(dep, bp->b_lblkno, &bn, 0, 0);
746				if (error)
747					bp->b_blkno = -1;
748				else
749					bp->b_blkno = bn;
750			}
751			if (bp->b_blkno == -1) {
752				brelse(bp);
753				if (!error)
754					error = EIO;		/* XXX */
755				break;
756			}
757		} else {
758			/*
759			 * The block we need to write into exists, so read it in.
760			 */
761			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
762			if (error) {
763				break;
764			}
765		}
766
767		/*
768		 * Should these vnode_pager_* functions be done on dir
769		 * files?
770		 */
771
772		/*
773		 * Copy the data from user space into the buf header.
774		 */
775		error = vn_io_fault_uiomove(bp->b_data + croffset, n, uio);
776		if (error) {
777			brelse(bp);
778			break;
779		}
780
781		/* Prepare for clustered writes in some else clauses. */
782		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
783			bp->b_flags |= B_CLUSTEROK;
784
785		/*
786		 * If IO_SYNC, then each buffer is written synchronously.
787		 * Otherwise, if we have a severe page deficiency then
788		 * write the buffer asynchronously.  Otherwise, if on a
789		 * cluster boundary then write the buffer asynchronously,
790		 * combining it with contiguous clusters if permitted and
791		 * possible, since we don't expect more writes into this
792		 * buffer soon.  Otherwise, do a delayed write because we
793		 * expect more writes into this buffer soon.
794		 */
795		if (ioflag & IO_SYNC)
796			(void)bwrite(bp);
797		else if (vm_page_count_severe() || buf_dirty_count_severe())
798			bawrite(bp);
799		else if (n + croffset == pmp->pm_bpcluster) {
800			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0)
801				cluster_write(vp, &dep->de_clusterw, bp,
802				    dep->de_FileSize, seqcount, 0);
803			else
804				bawrite(bp);
805		} else
806			bdwrite(bp);
807		dep->de_flag |= DE_UPDATE;
808	} while (error == 0 && uio->uio_resid > 0);
809
810	/*
811	 * If the write failed and they want us to, truncate the file back
812	 * to the size it was before the write was attempted.
813	 */
814errexit:
815	if (error) {
816		if (ioflag & IO_UNIT) {
817			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED);
818			uio->uio_offset -= resid - uio->uio_resid;
819			uio->uio_resid = resid;
820		} else {
821			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED);
822			if (uio->uio_resid != resid)
823				error = 0;
824		}
825	} else if (ioflag & IO_SYNC)
826		error = deupdat(dep, 1);
827	vn_rlimit_fsizex_res(uio, r);
828	return (error);
829}
830
831/*
832 * Flush the blocks of a file to disk.
833 */
834static int
835msdosfs_fsync(struct vop_fsync_args *ap)
836{
837	struct vnode *devvp;
838	int allerror, error;
839
840	vop_stdfsync(ap);
841
842	/*
843	* If the syncing request comes from fsync(2), sync the entire
844	* FAT and any other metadata that happens to be on devvp.  We
845	* need this mainly for the FAT.  We write the FAT sloppily, and
846	* syncing it all now is the best we can easily do to get all
847	* directory entries associated with the file (not just the file)
848	* fully synced.  The other metadata includes critical metadata
849	* for all directory entries, but only in the MNT_ASYNC case.  We
850	* will soon sync all metadata in the file's directory entry.
851	* Non-critical metadata for associated directory entries only
852	* gets synced accidentally, as in most file systems.
853	*/
854	if (ap->a_waitfor != MNT_NOWAIT) {
855		devvp = VTODE(ap->a_vp)->de_pmp->pm_devvp;
856		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
857		allerror = VOP_FSYNC(devvp, MNT_WAIT, ap->a_td);
858		VOP_UNLOCK(devvp);
859	} else
860		allerror = 0;
861
862	error = deupdat(VTODE(ap->a_vp), ap->a_waitfor != MNT_NOWAIT);
863	if (allerror == 0)
864		allerror = error;
865	return (allerror);
866}
867
868static int
869msdosfs_remove(struct vop_remove_args *ap)
870{
871	struct denode *dep = VTODE(ap->a_vp);
872	struct denode *ddep = VTODE(ap->a_dvp);
873	int error;
874
875	if (ap->a_vp->v_type == VDIR)
876		error = EPERM;
877	else
878		error = removede(ddep, dep);
879#ifdef MSDOSFS_DEBUG
880	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
881#endif
882	return (error);
883}
884
885/*
886 * DOS filesystems don't know what links are.
887 */
888static int
889msdosfs_link(struct vop_link_args *ap)
890{
891	return (EOPNOTSUPP);
892}
893
894/*
895 * Renames on files require moving the denode to a new hash queue since the
896 * denode's location is used to compute which hash queue to put the file
897 * in. Unless it is a rename in place.  For example "mv a b".
898 *
899 * What follows is the basic algorithm:
900 *
901 * if (file move) {
902 *	if (dest file exists) {
903 *		remove dest file
904 *	}
905 *	if (dest and src in same directory) {
906 *		rewrite name in existing directory slot
907 *	} else {
908 *		write new entry in dest directory
909 *		update offset and dirclust in denode
910 *		move denode to new hash chain
911 *		clear old directory entry
912 *	}
913 * } else {
914 *	directory move
915 *	if (dest directory exists) {
916 *		if (dest is not empty) {
917 *			return ENOTEMPTY
918 *		}
919 *		remove dest directory
920 *	}
921 *	if (dest and src in same directory) {
922 *		rewrite name in existing entry
923 *	} else {
924 *		be sure dest is not a child of src directory
925 *		write entry in dest directory
926 *		update "." and ".." in moved directory
927 *		clear old directory entry for moved directory
928 *	}
929 * }
930 *
931 * On entry:
932 *	source's parent directory is unlocked
933 *	source file or directory is unlocked
934 *	destination's parent directory is locked
935 *	destination file or directory is locked if it exists
936 *
937 * On exit:
938 *	all denodes should be released
939 */
940static int
941msdosfs_rename(struct vop_rename_args *ap)
942{
943	struct vnode *fdvp, *fvp, *tdvp, *tvp, *vp;
944	struct componentname *fcnp, *tcnp;
945	struct denode *fdip, *fip, *tdip, *tip, *nip;
946	u_char toname[12], oldname[11];
947	u_long to_diroffset;
948	bool checkpath_locked, doingdirectory, newparent;
949	int error;
950	u_long cn, pcl, blkoff;
951	daddr_t bn, wait_scn, scn;
952	struct msdosfsmount *pmp;
953	struct direntry *dotdotp;
954	struct buf *bp;
955
956	tdvp = ap->a_tdvp;
957	fvp = ap->a_fvp;
958	fdvp = ap->a_fdvp;
959	tvp = ap->a_tvp;
960	tcnp = ap->a_tcnp;
961	fcnp = ap->a_fcnp;
962	pmp = VFSTOMSDOSFS(fdvp->v_mount);
963
964	/*
965	 * Check for cross-device rename.
966	 */
967	if (fvp->v_mount != tdvp->v_mount ||
968	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
969		error = EXDEV;
970		goto abortit;
971	}
972
973	/*
974	 * If source and dest are the same, do nothing.
975	 */
976	if (tvp == fvp) {
977		error = 0;
978		goto abortit;
979	}
980
981	/*
982	 * When the target exists, both the directory
983	 * and target vnodes are passed locked.
984	 */
985	VOP_UNLOCK(tdvp);
986	if (tvp != NULL && tvp != tdvp)
987		VOP_UNLOCK(tvp);
988
989	checkpath_locked = false;
990
991relock:
992	doingdirectory = newparent = false;
993
994	error = vn_lock(fdvp, LK_EXCLUSIVE);
995	if (error != 0)
996		goto releout;
997	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
998		VOP_UNLOCK(fdvp);
999		error = vn_lock(tdvp, LK_EXCLUSIVE);
1000		if (error != 0)
1001			goto releout;
1002		VOP_UNLOCK(tdvp);
1003		goto relock;
1004	}
1005
1006	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
1007	if (error != 0) {
1008		VOP_UNLOCK(fdvp);
1009		VOP_UNLOCK(tdvp);
1010		goto releout;
1011	}
1012	error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT, &nip);
1013	if (error != 0) {
1014		VOP_UNLOCK(fdvp);
1015		VOP_UNLOCK(tdvp);
1016		if (error != EBUSY)
1017			goto releout;
1018		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE, &nip);
1019		if (error != 0)
1020			goto releout;
1021		vp = fvp;
1022		fvp = DETOV(nip);
1023		VOP_UNLOCK(fvp);
1024		vrele(vp);
1025		goto relock;
1026	}
1027	vrele(fvp);
1028	fvp = DETOV(nip);
1029
1030	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
1031	if (error != 0 && error != EJUSTRETURN) {
1032		VOP_UNLOCK(fdvp);
1033		VOP_UNLOCK(tdvp);
1034		VOP_UNLOCK(fvp);
1035		goto releout;
1036	}
1037	if (error == EJUSTRETURN && tvp != NULL) {
1038		vrele(tvp);
1039		tvp = NULL;
1040	}
1041	if (error == 0) {
1042		nip = NULL;
1043		error = deget(pmp, scn, blkoff, LK_EXCLUSIVE | LK_NOWAIT,
1044		    &nip);
1045		if (tvp != NULL) {
1046			vrele(tvp);
1047			tvp = NULL;
1048		}
1049		if (error != 0) {
1050			VOP_UNLOCK(fdvp);
1051			VOP_UNLOCK(tdvp);
1052			VOP_UNLOCK(fvp);
1053			if (error != EBUSY)
1054				goto releout;
1055			error = deget(pmp, scn, blkoff, LK_EXCLUSIVE,
1056			    &nip);
1057			if (error != 0)
1058				goto releout;
1059			vput(DETOV(nip));
1060			goto relock;
1061		}
1062		tvp = DETOV(nip);
1063	}
1064
1065	fdip = VTODE(fdvp);
1066	fip = VTODE(fvp);
1067	tdip = VTODE(tdvp);
1068	tip = tvp != NULL ? VTODE(tvp) : NULL;
1069
1070	/*
1071	 * Remember direntry place to use for destination
1072	 */
1073	to_diroffset = tdip->de_fndoffset;
1074
1075	/*
1076	 * Be sure we are not renaming ".", "..", or an alias of ".". This
1077	 * leads to a crippled directory tree.  It's pretty tough to do a
1078	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
1079	 * doesn't work if the ".." entry is missing.
1080	 */
1081	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0) {
1082		/*
1083		 * Avoid ".", "..", and aliases of "." for obvious reasons.
1084		 */
1085		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
1086		    fdip == fip ||
1087		    (fcnp->cn_flags & ISDOTDOT) != 0 ||
1088		    (tcnp->cn_flags & ISDOTDOT) != 0) {
1089			error = EINVAL;
1090			goto unlock;
1091		}
1092		doingdirectory = true;
1093	}
1094
1095	/*
1096	 * If ".." must be changed (ie the directory gets a new
1097	 * parent) then the source directory must not be in the
1098	 * directory hierarchy above the target, as this would
1099	 * orphan everything below the source directory. Also
1100	 * the user must have write permission in the source so
1101	 * as to be able to change "..". We must repeat the call
1102	 * to namei, as the parent directory is unlocked by the
1103	 * call to doscheckpath().
1104	 */
1105	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, curthread);
1106	if (fdip->de_StartCluster != tdip->de_StartCluster)
1107		newparent = true;
1108	if (doingdirectory && newparent) {
1109		if (error != 0)	/* write access check above */
1110			goto unlock;
1111		lockmgr(&pmp->pm_checkpath_lock, LK_EXCLUSIVE, NULL);
1112		checkpath_locked = true;
1113		error = doscheckpath(fip, tdip, &wait_scn);
1114		if (wait_scn != 0) {
1115			lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
1116			checkpath_locked = false;
1117			VOP_UNLOCK(fdvp);
1118			VOP_UNLOCK(tdvp);
1119			VOP_UNLOCK(fvp);
1120			if (tvp != NULL && tvp != tdvp)
1121				VOP_UNLOCK(tvp);
1122			error = deget(pmp, wait_scn, 0, LK_EXCLUSIVE,
1123			    &nip);
1124			if (error == 0) {
1125				vput(DETOV(nip));
1126				goto relock;
1127			}
1128		}
1129		if (error != 0)
1130			goto unlock;
1131	}
1132
1133	if (tip != NULL) {
1134		/*
1135		 * Target must be empty if a directory and have no links
1136		 * to it. Also, ensure source and target are compatible
1137		 * (both directories, or both not directories).
1138		 */
1139		if ((tip->de_Attributes & ATTR_DIRECTORY) != 0) {
1140			if (!dosdirempty(tip)) {
1141				error = ENOTEMPTY;
1142				goto unlock;
1143			}
1144			if (!doingdirectory) {
1145				error = ENOTDIR;
1146				goto unlock;
1147			}
1148			cache_purge(tdvp);
1149		} else if (doingdirectory) {
1150			error = EISDIR;
1151			goto unlock;
1152		}
1153		error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
1154		MPASS(error == 0);
1155		error = removede(tdip, tip);
1156		if (error != 0)
1157			goto unlock;
1158		vput(tvp);
1159		tvp = NULL;
1160		tip = NULL;
1161	}
1162
1163	/*
1164	 * Convert the filename in tcnp into a dos filename. We copy this
1165	 * into the denode and directory entry for the destination
1166	 * file/directory.
1167	 */
1168	error = uniqdosname(tdip, tcnp, toname);
1169	if (error != 0)
1170		goto unlock;
1171
1172	/*
1173	 * First write a new entry in the destination
1174	 * directory and mark the entry in the source directory
1175	 * as deleted.  Then move the denode to the correct hash
1176	 * chain for its new location in the filesystem.  And, if
1177	 * we moved a directory, then update its .. entry to point
1178	 * to the new parent directory.
1179	 */
1180	memcpy(oldname, fip->de_Name, 11);
1181	memcpy(fip->de_Name, toname, 11);	/* update denode */
1182	error = msdosfs_lookup_ino(tdvp, NULL, tcnp, &scn, &blkoff);
1183	if (error == EJUSTRETURN) {
1184		tdip->de_fndoffset = to_diroffset;
1185		error = createde(fip, tdip, NULL, tcnp);
1186	}
1187	if (error != 0) {
1188		memcpy(fip->de_Name, oldname, 11);
1189		goto unlock;
1190	}
1191
1192	/*
1193	 * If fip is for a directory, then its name should always
1194	 * be "." since it is for the directory entry in the
1195	 * directory itself (msdosfs_lookup() always translates
1196	 * to the "." entry so as to get a unique denode, except
1197	 * for the root directory there are different
1198	 * complications).  However, we just corrupted its name
1199	 * to pass the correct name to createde().  Undo this.
1200	 */
1201	if ((fip->de_Attributes & ATTR_DIRECTORY) != 0)
1202		memcpy(fip->de_Name, oldname, 11);
1203	fip->de_refcnt++;
1204	error = msdosfs_lookup_ino(fdvp, NULL, fcnp, &scn, &blkoff);
1205	MPASS(error == 0);
1206	error = removede(fdip, fip);
1207	if (error != 0) {
1208		printf("%s: removede %s %s err %d\n",
1209		    pmp->pm_mountp->mnt_stat.f_mntonname,
1210		    fdip->de_Name, fip->de_Name, error);
1211		msdosfs_integrity_error(pmp);
1212		goto unlock;
1213	}
1214	if (!doingdirectory) {
1215		error = pcbmap(tdip, de_cluster(pmp, to_diroffset), 0,
1216		    &fip->de_dirclust, 0);
1217		if (error != 0) {
1218			/*
1219			 * XXX should downgrade to ro here,
1220			 * fs is corrupt
1221			 */
1222			goto unlock;
1223		}
1224		if (fip->de_dirclust == MSDOSFSROOT)
1225			fip->de_diroffset = to_diroffset;
1226		else
1227			fip->de_diroffset = to_diroffset & pmp->pm_crbomask;
1228	}
1229	reinsert(fip);
1230
1231	/*
1232	 * If we moved a directory to a new parent directory, then we must
1233	 * fixup the ".." entry in the moved directory.
1234	 */
1235	if (doingdirectory && newparent) {
1236		cn = fip->de_StartCluster;
1237		if (cn == MSDOSFSROOT) {
1238			/* this should never happen */
1239			panic("msdosfs_rename(): updating .. in root directory?");
1240		} else
1241			bn = cntobn(pmp, cn);
1242		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
1243		    NOCRED, &bp);
1244		if (error != 0) {
1245			printf("%s: block read error %d while renaming dir\n",
1246			    pmp->pm_mountp->mnt_stat.f_mntonname,
1247			    error);
1248			msdosfs_integrity_error(pmp);
1249			goto unlock;
1250		}
1251		dotdotp = (struct direntry *)bp->b_data + 1;
1252		pcl = tdip->de_StartCluster;
1253		if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
1254			pcl = MSDOSFSROOT;
1255		putushort(dotdotp->deStartCluster, pcl);
1256		if (FAT32(pmp))
1257			putushort(dotdotp->deHighClust, pcl >> 16);
1258		if (DOINGASYNC(fvp))
1259			bdwrite(bp);
1260		else if ((error = bwrite(bp)) != 0) {
1261			printf("%s: block write error %d while renaming dir\n",
1262			    pmp->pm_mountp->mnt_stat.f_mntonname,
1263			    error);
1264			msdosfs_integrity_error(pmp);
1265			goto unlock;
1266		}
1267	}
1268
1269	/*
1270	 * The msdosfs lookup is case insensitive. Several aliases may
1271	 * be inserted for a single directory entry. As a consequnce,
1272	 * name cache purge done by lookup for fvp when DELETE op for
1273	 * namei is specified, might be not enough to expunge all
1274	 * namecache entries that were installed for this direntry.
1275	 */
1276	cache_purge(fvp);
1277
1278unlock:
1279	if (checkpath_locked)
1280		lockmgr(&pmp->pm_checkpath_lock, LK_RELEASE, NULL);
1281	vput(fdvp);
1282	vput(fvp);
1283	if (tvp != NULL) {
1284		if (tvp != tdvp)
1285			vput(tvp);
1286		else
1287			vrele(tvp);
1288	}
1289	vput(tdvp);
1290	return (error);
1291releout:
1292	MPASS(!checkpath_locked);
1293	vrele(tdvp);
1294	if (tvp != NULL)
1295		vrele(tvp);
1296	vrele(fdvp);
1297	vrele(fvp);
1298	return (error);
1299abortit:
1300	if (tdvp == tvp)
1301		vrele(tdvp);
1302	else
1303		vput(tdvp);
1304	if (tvp != NULL)
1305		vput(tvp);
1306	vrele(fdvp);
1307	vrele(fvp);
1308	return (error);
1309}
1310
1311static struct {
1312	struct direntry dot;
1313	struct direntry dotdot;
1314} dosdirtemplate = {
1315	{	".          ",				/* the . entry */
1316		ATTR_DIRECTORY,				/* file attribute */
1317		0,					/* reserved */
1318		0, { 0, 0 }, { 0, 0 },			/* create time & date */
1319		{ 0, 0 },				/* access date */
1320		{ 0, 0 },				/* high bits of start cluster */
1321		{ 210, 4 }, { 210, 4 },			/* modify time & date */
1322		{ 0, 0 },				/* startcluster */
1323		{ 0, 0, 0, 0 }				/* filesize */
1324	},
1325	{	"..         ",				/* the .. entry */
1326		ATTR_DIRECTORY,				/* file attribute */
1327		0,					/* reserved */
1328		0, { 0, 0 }, { 0, 0 },			/* create time & date */
1329		{ 0, 0 },				/* access date */
1330		{ 0, 0 },				/* high bits of start cluster */
1331		{ 210, 4 }, { 210, 4 },			/* modify time & date */
1332		{ 0, 0 },				/* startcluster */
1333		{ 0, 0, 0, 0 }				/* filesize */
1334	}
1335};
1336
1337static int
1338msdosfs_mkdir(struct vop_mkdir_args *ap)
1339{
1340	struct componentname *cnp = ap->a_cnp;
1341	struct denode *dep;
1342	struct denode *pdep = VTODE(ap->a_dvp);
1343	struct direntry *denp;
1344	struct msdosfsmount *pmp = pdep->de_pmp;
1345	struct buf *bp;
1346	u_long newcluster, pcl;
1347	int bn;
1348	int error;
1349	struct denode ndirent;
1350	struct timespec ts;
1351
1352	/*
1353	 * If this is the root directory and there is no space left we
1354	 * can't do anything.  This is because the root directory can not
1355	 * change size.
1356	 */
1357	if (pdep->de_StartCluster == MSDOSFSROOT
1358	    && pdep->de_fndoffset >= pdep->de_FileSize) {
1359		error = ENOSPC;
1360		goto bad2;
1361	}
1362
1363	/*
1364	 * Allocate a cluster to hold the about to be created directory.
1365	 */
1366	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
1367	if (error)
1368		goto bad2;
1369
1370	memset(&ndirent, 0, sizeof(ndirent));
1371	ndirent.de_pmp = pmp;
1372	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
1373	vfs_timestamp(&ts);
1374	DETIMES(&ndirent, &ts, &ts, &ts);
1375
1376	/*
1377	 * Now fill the cluster with the "." and ".." entries. And write
1378	 * the cluster to disk.  This way it is there for the parent
1379	 * directory to be pointing at if there were a crash.
1380	 */
1381	bn = cntobn(pmp, newcluster);
1382	/* always succeeds */
1383	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0);
1384	memset(bp->b_data, 0, pmp->pm_bpcluster);
1385	memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate);
1386	denp = (struct direntry *)bp->b_data;
1387	putushort(denp[0].deStartCluster, newcluster);
1388	putushort(denp[0].deCDate, ndirent.de_CDate);
1389	putushort(denp[0].deCTime, ndirent.de_CTime);
1390	denp[0].deCHundredth = ndirent.de_CHun;
1391	putushort(denp[0].deADate, ndirent.de_ADate);
1392	putushort(denp[0].deMDate, ndirent.de_MDate);
1393	putushort(denp[0].deMTime, ndirent.de_MTime);
1394	pcl = pdep->de_StartCluster;
1395	/*
1396	 * Although the root directory has a non-magic starting cluster
1397	 * number for FAT32, chkdsk and fsck_msdosfs still require
1398	 * references to it in dotdot entries to be magic.
1399	 */
1400	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
1401		pcl = MSDOSFSROOT;
1402	putushort(denp[1].deStartCluster, pcl);
1403	putushort(denp[1].deCDate, ndirent.de_CDate);
1404	putushort(denp[1].deCTime, ndirent.de_CTime);
1405	denp[1].deCHundredth = ndirent.de_CHun;
1406	putushort(denp[1].deADate, ndirent.de_ADate);
1407	putushort(denp[1].deMDate, ndirent.de_MDate);
1408	putushort(denp[1].deMTime, ndirent.de_MTime);
1409	if (FAT32(pmp)) {
1410		putushort(denp[0].deHighClust, newcluster >> 16);
1411		putushort(denp[1].deHighClust, pcl >> 16);
1412	}
1413
1414	if (DOINGASYNC(ap->a_dvp))
1415		bdwrite(bp);
1416	else if ((error = bwrite(bp)) != 0)
1417		goto bad;
1418
1419	/*
1420	 * Now build up a directory entry pointing to the newly allocated
1421	 * cluster.  This will be written to an empty slot in the parent
1422	 * directory.
1423	 */
1424	error = uniqdosname(pdep, cnp, ndirent.de_Name);
1425	if (error)
1426		goto bad;
1427
1428	ndirent.de_Attributes = ATTR_DIRECTORY;
1429	ndirent.de_LowerCase = 0;
1430	ndirent.de_StartCluster = newcluster;
1431	ndirent.de_FileSize = 0;
1432	error = createde(&ndirent, pdep, &dep, cnp);
1433	if (error)
1434		goto bad;
1435	*ap->a_vpp = DETOV(dep);
1436	return (0);
1437
1438bad:
1439	clusterfree(pmp, newcluster);
1440bad2:
1441	return (error);
1442}
1443
1444static int
1445msdosfs_rmdir(struct vop_rmdir_args *ap)
1446{
1447	struct vnode *vp = ap->a_vp;
1448	struct vnode *dvp = ap->a_dvp;
1449	struct componentname *cnp = ap->a_cnp;
1450	struct denode *ip, *dp;
1451	int error;
1452
1453	ip = VTODE(vp);
1454	dp = VTODE(dvp);
1455
1456	/*
1457	 * Verify the directory is empty (and valid).
1458	 * (Rmdir ".." won't be valid since
1459	 *  ".." will contain a reference to
1460	 *  the current directory and thus be
1461	 *  non-empty.)
1462	 */
1463	error = 0;
1464	if (!dosdirempty(ip)) {
1465		error = ENOTEMPTY;
1466		goto out;
1467	}
1468	/*
1469	 * Delete the entry from the directory.  For dos filesystems this
1470	 * gets rid of the directory entry on disk, the in memory copy
1471	 * still exists but the de_refcnt is <= 0.  This prevents it from
1472	 * being found by deget().  When the vput() on dep is done we give
1473	 * up access and eventually msdosfs_reclaim() will be called which
1474	 * will remove it from the denode cache.
1475	 */
1476	error = removede(dp, ip);
1477	if (error)
1478		goto out;
1479	/*
1480	 * This is where we decrement the link count in the parent
1481	 * directory.  Since dos filesystems don't do this we just purge
1482	 * the name cache.
1483	 */
1484	cache_purge(dvp);
1485	/*
1486	 * Truncate the directory that is being deleted.
1487	 */
1488	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred);
1489	cache_purge(vp);
1490
1491out:
1492	return (error);
1493}
1494
1495/*
1496 * DOS filesystems don't know what symlinks are.
1497 */
1498static int
1499msdosfs_symlink(struct vop_symlink_args *ap)
1500{
1501	return (EOPNOTSUPP);
1502}
1503
1504static int
1505msdosfs_readdir(struct vop_readdir_args *ap)
1506{
1507	struct mbnambuf nb;
1508	int error = 0;
1509	int diff;
1510	long n;
1511	int blsize;
1512	long on;
1513	u_long cn;
1514	u_long dirsperblk;
1515	long bias = 0;
1516	daddr_t bn, lbn;
1517	struct buf *bp;
1518	struct denode *dep = VTODE(ap->a_vp);
1519	struct msdosfsmount *pmp = dep->de_pmp;
1520	struct direntry *dentp;
1521	struct dirent dirbuf;
1522	struct uio *uio = ap->a_uio;
1523	uint64_t *cookies = NULL;
1524	int ncookies = 0;
1525	off_t offset, off;
1526	int chksum = -1;
1527
1528#ifdef MSDOSFS_DEBUG
1529	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
1530	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
1531#endif
1532
1533	/*
1534	 * msdosfs_readdir() won't operate properly on regular files since
1535	 * it does i/o only with the filesystem vnode, and hence can
1536	 * retrieve the wrong block from the buffer cache for a plain file.
1537	 * So, fail attempts to readdir() on a plain file.
1538	 */
1539	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
1540		return (ENOTDIR);
1541
1542	/*
1543	 * To be safe, initialize dirbuf
1544	 */
1545	memset(dirbuf.d_name, 0, sizeof(dirbuf.d_name));
1546
1547	/*
1548	 * If the user buffer is smaller than the size of one dos directory
1549	 * entry or the file offset is not a multiple of the size of a
1550	 * directory entry, then we fail the read.
1551	 */
1552	off = offset = uio->uio_offset;
1553	if (uio->uio_resid < sizeof(struct direntry) ||
1554	    (offset & (sizeof(struct direntry) - 1)))
1555		return (EINVAL);
1556
1557	if (ap->a_ncookies) {
1558		ncookies = uio->uio_resid / 16;
1559		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP,
1560		       M_WAITOK);
1561		*ap->a_cookies = cookies;
1562		*ap->a_ncookies = ncookies;
1563	}
1564
1565	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
1566
1567	/*
1568	 * If they are reading from the root directory then, we simulate
1569	 * the . and .. entries since these don't exist in the root
1570	 * directory.  We also set the offset bias to make up for having to
1571	 * simulate these entries. By this I mean that at file offset 64 we
1572	 * read the first entry in the root directory that lives on disk.
1573	 */
1574	if (dep->de_StartCluster == MSDOSFSROOT
1575	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
1576#if 0
1577		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
1578		    offset);
1579#endif
1580		bias = 2 * sizeof(struct direntry);
1581		if (offset < bias) {
1582			for (n = (int)offset / sizeof(struct direntry);
1583			     n < 2; n++) {
1584				dirbuf.d_fileno = FAT32(pmp) ?
1585				    (uint64_t)cntobn(pmp, pmp->pm_rootdirblk) *
1586				    dirsperblk : 1;
1587				dirbuf.d_type = DT_DIR;
1588				switch (n) {
1589				case 0:
1590					dirbuf.d_namlen = 1;
1591					dirbuf.d_name[0] = '.';
1592					break;
1593				case 1:
1594					dirbuf.d_namlen = 2;
1595					dirbuf.d_name[0] = '.';
1596					dirbuf.d_name[1] = '.';
1597					break;
1598				}
1599				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
1600				/* NOTE: d_off is the offset of the *next* entry. */
1601				dirbuf.d_off = offset + sizeof(struct direntry);
1602				dirent_terminate(&dirbuf);
1603				if (uio->uio_resid < dirbuf.d_reclen)
1604					goto out;
1605				error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
1606				if (error)
1607					goto out;
1608				offset += sizeof(struct direntry);
1609				off = offset;
1610				if (cookies) {
1611					*cookies++ = offset;
1612					if (--ncookies <= 0)
1613						goto out;
1614				}
1615			}
1616		}
1617	}
1618
1619	mbnambuf_init(&nb);
1620	off = offset;
1621	while (uio->uio_resid > 0) {
1622		lbn = de_cluster(pmp, offset - bias);
1623		on = (offset - bias) & pmp->pm_crbomask;
1624		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
1625		diff = dep->de_FileSize - (offset - bias);
1626		if (diff <= 0)
1627			break;
1628		n = min(n, diff);
1629		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
1630		if (error)
1631			break;
1632		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
1633		if (error) {
1634			return (error);
1635		}
1636		n = min(n, blsize - bp->b_resid);
1637		if (n == 0) {
1638			brelse(bp);
1639			return (EIO);
1640		}
1641
1642		/*
1643		 * Convert from dos directory entries to fs-independent
1644		 * directory entries.
1645		 */
1646		for (dentp = (struct direntry *)(bp->b_data + on);
1647		     (char *)dentp < bp->b_data + on + n;
1648		     dentp++, offset += sizeof(struct direntry)) {
1649#if 0
1650			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
1651			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
1652#endif
1653			/*
1654			 * If this is an unused entry, we can stop.
1655			 */
1656			if (dentp->deName[0] == SLOT_EMPTY) {
1657				brelse(bp);
1658				goto out;
1659			}
1660			/*
1661			 * Skip deleted entries.
1662			 */
1663			if (dentp->deName[0] == SLOT_DELETED) {
1664				chksum = -1;
1665				mbnambuf_init(&nb);
1666				continue;
1667			}
1668
1669			/*
1670			 * Handle Win95 long directory entries
1671			 */
1672			if (dentp->deAttributes == ATTR_WIN95) {
1673				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
1674					continue;
1675				chksum = win2unixfn(&nb,
1676				    (struct winentry *)dentp, chksum, pmp);
1677				continue;
1678			}
1679
1680			/*
1681			 * Skip volume labels
1682			 */
1683			if (dentp->deAttributes & ATTR_VOLUME) {
1684				chksum = -1;
1685				mbnambuf_init(&nb);
1686				continue;
1687			}
1688			/*
1689			 * This computation of d_fileno must match
1690			 * the computation of va_fileid in
1691			 * msdosfs_getattr.
1692			 */
1693			if (dentp->deAttributes & ATTR_DIRECTORY) {
1694				cn = getushort(dentp->deStartCluster);
1695				if (FAT32(pmp)) {
1696					cn |= getushort(dentp->deHighClust) <<
1697					    16;
1698					if (cn == MSDOSFSROOT)
1699						cn = pmp->pm_rootdirblk;
1700				}
1701				if (cn == MSDOSFSROOT && !FAT32(pmp))
1702					dirbuf.d_fileno = 1;
1703				else
1704					dirbuf.d_fileno = cntobn(pmp, cn) *
1705					    dirsperblk;
1706				dirbuf.d_type = DT_DIR;
1707			} else {
1708				dirbuf.d_fileno = (uoff_t)offset /
1709				    sizeof(struct direntry);
1710				dirbuf.d_type = DT_REG;
1711			}
1712
1713			if (chksum != winChksum(dentp->deName)) {
1714				dirbuf.d_namlen = dos2unixfn(dentp->deName,
1715				    (u_char *)dirbuf.d_name,
1716				    dentp->deLowerCase |
1717					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
1718					(LCASE_BASE | LCASE_EXT) : 0),
1719				    pmp);
1720				mbnambuf_init(&nb);
1721			} else
1722				mbnambuf_flush(&nb, &dirbuf);
1723			chksum = -1;
1724			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
1725			/* NOTE: d_off is the offset of the *next* entry. */
1726			dirbuf.d_off = offset + sizeof(struct direntry);
1727			dirent_terminate(&dirbuf);
1728			if (uio->uio_resid < dirbuf.d_reclen) {
1729				brelse(bp);
1730				goto out;
1731			}
1732			error = uiomove(&dirbuf, dirbuf.d_reclen, uio);
1733			if (error) {
1734				brelse(bp);
1735				goto out;
1736			}
1737			if (cookies) {
1738				*cookies++ = offset + sizeof(struct direntry);
1739				if (--ncookies <= 0) {
1740					brelse(bp);
1741					goto out;
1742				}
1743			}
1744			off = offset + sizeof(struct direntry);
1745		}
1746		brelse(bp);
1747	}
1748out:
1749	/* Subtract unused cookies */
1750	if (ap->a_ncookies)
1751		*ap->a_ncookies -= ncookies;
1752
1753	uio->uio_offset = off;
1754
1755	/*
1756	 * Set the eofflag (NFS uses it)
1757	 */
1758	if (ap->a_eofflag) {
1759		if (dep->de_FileSize - (offset - bias) <= 0)
1760			*ap->a_eofflag = 1;
1761		else
1762			*ap->a_eofflag = 0;
1763	}
1764	return (error);
1765}
1766
1767/*-
1768 * a_vp   - pointer to the file's vnode
1769 * a_bn   - logical block number within the file (cluster number for us)
1770 * a_bop  - where to return the bufobj of the special file containing the fs
1771 * a_bnp  - where to return the "physical" block number corresponding to a_bn
1772 *          (relative to the special file; units are blocks of size DEV_BSIZE)
1773 * a_runp - where to return the "run past" a_bn.  This is the count of logical
1774 *          blocks whose physical blocks (together with a_bn's physical block)
1775 *          are contiguous.
1776 * a_runb - where to return the "run before" a_bn.
1777 */
1778static int
1779msdosfs_bmap(struct vop_bmap_args *ap)
1780{
1781	struct fatcache savefc;
1782	struct denode *dep;
1783	struct mount *mp;
1784	struct msdosfsmount *pmp;
1785	struct vnode *vp;
1786	daddr_t runbn;
1787	u_long cn;
1788	int bnpercn, error, maxio, maxrun, run;
1789
1790	vp = ap->a_vp;
1791	dep = VTODE(vp);
1792	pmp = dep->de_pmp;
1793	if (ap->a_bop != NULL)
1794		*ap->a_bop = &pmp->pm_devvp->v_bufobj;
1795	if (ap->a_bnp == NULL)
1796		return (0);
1797	if (ap->a_runp != NULL)
1798		*ap->a_runp = 0;
1799	if (ap->a_runb != NULL)
1800		*ap->a_runb = 0;
1801	cn = ap->a_bn;
1802	if (cn != ap->a_bn)
1803		return (EFBIG);
1804	error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL);
1805	if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL))
1806		return (error);
1807
1808	/*
1809	 * Prepare to back out updates of the fatchain cache after the one
1810	 * for the first block done by pcbmap() above.  Without the backout,
1811	 * then whenever the caller doesn't do i/o to all of the blocks that
1812	 * we find, the single useful cache entry would be too far in advance
1813	 * of the actual i/o to work for the next sequential i/o.  Then the
1814	 * FAT would be searched from the beginning.  With the backout, the
1815	 * FAT is searched starting at most a few blocks early.  This wastes
1816	 * much less time.  Time is also wasted finding more blocks than the
1817	 * caller will do i/o to.  This is necessary because the runlength
1818	 * parameters are output-only.
1819	 */
1820	savefc = dep->de_fc[FC_LASTMAP];
1821
1822	mp = vp->v_mount;
1823	maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize;
1824	bnpercn = de_cn2bn(pmp, 1);
1825	if (ap->a_runp != NULL) {
1826		maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn);
1827		for (run = 1; run <= maxrun; run++) {
1828			if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 ||
1829			    runbn != *ap->a_bnp + run * bnpercn)
1830				break;
1831		}
1832		*ap->a_runp = run - 1;
1833	}
1834	if (ap->a_runb != NULL) {
1835		maxrun = ulmin(maxio - 1, cn);
1836		for (run = 1; run < maxrun; run++) {
1837			if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 ||
1838			    runbn != *ap->a_bnp - run * bnpercn)
1839				break;
1840		}
1841		*ap->a_runb = run - 1;
1842	}
1843	dep->de_fc[FC_LASTMAP] = savefc;
1844	return (0);
1845}
1846
1847SYSCTL_NODE(_vfs, OID_AUTO, msdosfs, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1848    "msdos filesystem");
1849static int use_buf_pager = 1;
1850SYSCTL_INT(_vfs_msdosfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN,
1851    &use_buf_pager, 0,
1852    "Use buffer pager instead of bmap");
1853
1854static daddr_t
1855msdosfs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
1856{
1857
1858	return (de_cluster(VTODE(vp)->de_pmp, off));
1859}
1860
1861static int
1862msdosfs_gbp_getblksz(struct vnode *vp, daddr_t lbn, long *sz)
1863{
1864
1865	*sz = VTODE(vp)->de_pmp->pm_bpcluster;
1866	return (0);
1867}
1868
1869static int
1870msdosfs_getpages(struct vop_getpages_args *ap)
1871{
1872
1873	if (use_buf_pager)
1874		return (vfs_bio_getpages(ap->a_vp, ap->a_m, ap->a_count,
1875		    ap->a_rbehind, ap->a_rahead, msdosfs_gbp_getblkno,
1876		    msdosfs_gbp_getblksz));
1877	return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
1878	    ap->a_rbehind, ap->a_rahead, NULL, NULL));
1879}
1880
1881static int
1882msdosfs_strategy(struct vop_strategy_args *ap)
1883{
1884	struct buf *bp = ap->a_bp;
1885	struct denode *dep = VTODE(ap->a_vp);
1886	struct bufobj *bo;
1887	int error = 0;
1888	daddr_t blkno;
1889
1890	/*
1891	 * If we don't already know the filesystem relative block number
1892	 * then get it using pcbmap().  If pcbmap() returns the block
1893	 * number as -1 then we've got a hole in the file.  DOS filesystems
1894	 * don't allow files with holes, so we shouldn't ever see this.
1895	 */
1896	if (bp->b_blkno == bp->b_lblkno) {
1897		error = pcbmap(dep, bp->b_lblkno, &blkno, 0, 0);
1898		bp->b_blkno = blkno;
1899		if (error) {
1900			bp->b_error = error;
1901			bp->b_ioflags |= BIO_ERROR;
1902			bufdone(bp);
1903			return (0);
1904		}
1905		if ((long)bp->b_blkno == -1)
1906			vfs_bio_clrbuf(bp);
1907	}
1908	if (bp->b_blkno == -1) {
1909		bufdone(bp);
1910		return (0);
1911	}
1912	/*
1913	 * Read/write the block from/to the disk that contains the desired
1914	 * file block.
1915	 */
1916	bp->b_iooffset = dbtob(bp->b_blkno);
1917	bo = dep->de_pmp->pm_bo;
1918	BO_STRATEGY(bo, bp);
1919	return (0);
1920}
1921
1922static int
1923msdosfs_print(struct vop_print_args *ap)
1924{
1925	struct denode *dep = VTODE(ap->a_vp);
1926
1927	printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ",
1928	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
1929	printf("on dev %s\n", devtoname(dep->de_pmp->pm_dev));
1930	return (0);
1931}
1932
1933static int
1934msdosfs_pathconf(struct vop_pathconf_args *ap)
1935{
1936	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
1937
1938	switch (ap->a_name) {
1939	case _PC_FILESIZEBITS:
1940		*ap->a_retval = 32;
1941		return (0);
1942	case _PC_LINK_MAX:
1943		*ap->a_retval = 1;
1944		return (0);
1945	case _PC_NAME_MAX:
1946		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
1947		return (0);
1948	case _PC_CHOWN_RESTRICTED:
1949		*ap->a_retval = 1;
1950		return (0);
1951	case _PC_NO_TRUNC:
1952		*ap->a_retval = 0;
1953		return (0);
1954	default:
1955		return (vop_stdpathconf(ap));
1956	}
1957	/* NOTREACHED */
1958}
1959
1960static int
1961msdosfs_vptofh(struct vop_vptofh_args *ap)
1962{
1963	struct denode *dep;
1964	struct defid *defhp;
1965
1966	dep = VTODE(ap->a_vp);
1967	defhp = (struct defid *)ap->a_fhp;
1968	defhp->defid_len = sizeof(struct defid);
1969	defhp->defid_dirclust = dep->de_dirclust;
1970	defhp->defid_dirofs = dep->de_diroffset;
1971	/* defhp->defid_gen = dep->de_gen; */
1972	return (0);
1973}
1974
1975/* Global vfs data structures for msdosfs */
1976struct vop_vector msdosfs_vnodeops = {
1977	.vop_default =		&default_vnodeops,
1978
1979	.vop_access =		msdosfs_access,
1980	.vop_bmap =		msdosfs_bmap,
1981	.vop_getpages =		msdosfs_getpages,
1982	.vop_cachedlookup =	msdosfs_lookup,
1983	.vop_open =		msdosfs_open,
1984	.vop_close =		msdosfs_close,
1985	.vop_create =		msdosfs_create,
1986	.vop_fsync =		msdosfs_fsync,
1987	.vop_fdatasync =	vop_stdfdatasync_buf,
1988	.vop_getattr =		msdosfs_getattr,
1989	.vop_inactive =		msdosfs_inactive,
1990	.vop_link =		msdosfs_link,
1991	.vop_lookup =		vfs_cache_lookup,
1992	.vop_mkdir =		msdosfs_mkdir,
1993	.vop_mknod =		msdosfs_mknod,
1994	.vop_pathconf =		msdosfs_pathconf,
1995	.vop_print =		msdosfs_print,
1996	.vop_read =		msdosfs_read,
1997	.vop_readdir =		msdosfs_readdir,
1998	.vop_reclaim =		msdosfs_reclaim,
1999	.vop_remove =		msdosfs_remove,
2000	.vop_rename =		msdosfs_rename,
2001	.vop_rmdir =		msdosfs_rmdir,
2002	.vop_setattr =		msdosfs_setattr,
2003	.vop_strategy =		msdosfs_strategy,
2004	.vop_symlink =		msdosfs_symlink,
2005	.vop_write =		msdosfs_write,
2006	.vop_vptofh =		msdosfs_vptofh,
2007};
2008VFS_VOP_VECTOR_REGISTER(msdosfs_vnodeops);
2009