1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/param.h>
28#include <sys/systm.h>
29#include <sys/kmem.h>
30#include <sys/user.h>
31#include <sys/proc.h>
32#include <sys/cred.h>
33#include <sys/disp.h>
34#include <sys/buf.h>
35#include <sys/vfs.h>
36#include <sys/vfs_opreg.h>
37#include <sys/vnode.h>
38#include <sys/fdio.h>
39#include <sys/file.h>
40#include <sys/uio.h>
41#include <sys/conf.h>
42#include <sys/statvfs.h>
43#include <sys/mount.h>
44#include <sys/pathname.h>
45#include <sys/cmn_err.h>
46#include <sys/debug.h>
47#include <sys/sysmacros.h>
48#include <sys/conf.h>
49#include <sys/mkdev.h>
50#include <sys/swap.h>
51#include <sys/sunddi.h>
52#include <sys/sunldi.h>
53#include <sys/dktp/fdisk.h>
54#include <sys/fs/pc_label.h>
55#include <sys/fs/pc_fs.h>
56#include <sys/fs/pc_dir.h>
57#include <sys/fs/pc_node.h>
58#include <fs/fs_subr.h>
59#include <sys/modctl.h>
60#include <sys/dkio.h>
61#include <sys/open.h>
62#include <sys/mntent.h>
63#include <sys/policy.h>
64#include <sys/atomic.h>
65#include <sys/sdt.h>
66
67/*
68 * The majority of PC media use a 512 sector size, but
69 * occasionally you will run across a 1k sector size.
70 * For media with a 1k sector size, fd_strategy() requires
71 * the I/O size to be a 1k multiple; so when the sector size
72 * is not yet known, always read 1k.
73 */
74#define	PC_SAFESECSIZE	(PC_SECSIZE * 2)
75
76static int pcfs_pseudo_floppy(dev_t);
77
78static int pcfsinit(int, char *);
79static int pcfs_mount(struct vfs *, struct vnode *, struct mounta *,
80	struct cred *);
81static int pcfs_unmount(struct vfs *, int, struct cred *);
82static int pcfs_root(struct vfs *, struct vnode **);
83static int pcfs_statvfs(struct vfs *, struct statvfs64 *);
84static int pc_syncfsnodes(struct pcfs *);
85static int pcfs_sync(struct vfs *, short, struct cred *);
86static int pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp);
87static void pcfs_freevfs(vfs_t *vfsp);
88
89static int pc_readfat(struct pcfs *fsp, uchar_t *fatp);
90static int pc_writefat(struct pcfs *fsp, daddr_t start);
91
92static int pc_getfattype(struct pcfs *fsp);
93static void pcfs_parse_mntopts(struct pcfs *fsp);
94
95
96/*
97 * pcfs mount options table
98 */
99
100static char *nohidden_cancel[] = { MNTOPT_PCFS_HIDDEN, NULL };
101static char *hidden_cancel[] = { MNTOPT_PCFS_NOHIDDEN, NULL };
102static char *nofoldcase_cancel[] = { MNTOPT_PCFS_FOLDCASE, NULL };
103static char *foldcase_cancel[] = { MNTOPT_PCFS_NOFOLDCASE, NULL };
104static char *clamptime_cancel[] = { MNTOPT_PCFS_NOCLAMPTIME, NULL };
105static char *noclamptime_cancel[] = { MNTOPT_PCFS_CLAMPTIME, NULL };
106static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
107static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
108
109static mntopt_t mntopts[] = {
110/*
111 *	option name	cancel option	default arg	flags	opt data
112 */
113	{ MNTOPT_PCFS_NOHIDDEN, nohidden_cancel, NULL, 0, NULL },
114	{ MNTOPT_PCFS_HIDDEN, hidden_cancel, NULL, MO_DEFAULT, NULL },
115	{ MNTOPT_PCFS_NOFOLDCASE, nofoldcase_cancel, NULL, MO_DEFAULT, NULL },
116	{ MNTOPT_PCFS_FOLDCASE, foldcase_cancel, NULL, 0, NULL },
117	{ MNTOPT_PCFS_CLAMPTIME, clamptime_cancel, NULL, MO_DEFAULT, NULL },
118	{ MNTOPT_PCFS_NOCLAMPTIME, noclamptime_cancel, NULL, NULL, NULL },
119	{ MNTOPT_NOATIME, noatime_cancel, NULL, NULL, NULL },
120	{ MNTOPT_ATIME, atime_cancel, NULL, NULL, NULL },
121	{ MNTOPT_PCFS_TIMEZONE, NULL, "+0", MO_DEFAULT | MO_HASVALUE, NULL },
122	{ MNTOPT_PCFS_SECSIZE, NULL, NULL, MO_HASVALUE, NULL }
123};
124
125static mntopts_t pcfs_mntopts = {
126	sizeof (mntopts) / sizeof (mntopt_t),
127	mntopts
128};
129
130int pcfsdebuglevel = 0;
131
132/*
133 * pcfslock:	protects the list of mounted pc filesystems "pc_mounttab.
134 * pcfs_lock:	(inside per filesystem structure "pcfs")
135 *		per filesystem lock. Most of the vfsops and vnodeops are
136 *		protected by this lock.
137 * pcnodes_lock: protects the pcnode hash table "pcdhead", "pcfhead".
138 *
139 * Lock hierarchy: pcfslock > pcfs_lock > pcnodes_lock
140 *
141 * pcfs_mountcount:	used to prevent module unloads while there is still
142 *			pcfs state from a former mount hanging around. With
143 *			forced umount support, the filesystem module must not
144 *			be allowed to go away before the last VFS_FREEVFS()
145 *			call has been made.
146 *			Since this is just an atomic counter, there's no need
147 *			for locking.
148 */
149kmutex_t	pcfslock;
150krwlock_t	pcnodes_lock;
151uint32_t	pcfs_mountcount;
152
153static int pcfstype;
154
155static vfsdef_t vfw = {
156	VFSDEF_VERSION,
157	"pcfs",
158	pcfsinit,
159	VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
160	&pcfs_mntopts
161};
162
163extern struct mod_ops mod_fsops;
164
165static struct modlfs modlfs = {
166	&mod_fsops,
167	"PC filesystem",
168	&vfw
169};
170
171static struct modlinkage modlinkage = {
172	MODREV_1,
173	&modlfs,
174	NULL
175};
176
177int
178_init(void)
179{
180	int	error;
181
182#if !defined(lint)
183	/* make sure the on-disk structures are sane */
184	ASSERT(sizeof (struct pcdir) == 32);
185	ASSERT(sizeof (struct pcdir_lfn) == 32);
186#endif
187	mutex_init(&pcfslock, NULL, MUTEX_DEFAULT, NULL);
188	rw_init(&pcnodes_lock, NULL, RW_DEFAULT, NULL);
189	error = mod_install(&modlinkage);
190	if (error) {
191		mutex_destroy(&pcfslock);
192		rw_destroy(&pcnodes_lock);
193	}
194	return (error);
195}
196
197int
198_fini(void)
199{
200	int	error;
201
202	/*
203	 * If a forcedly unmounted instance is still hanging around,
204	 * we cannot allow the module to be unloaded because that would
205	 * cause panics once the VFS framework decides it's time to call
206	 * into VFS_FREEVFS().
207	 */
208	if (pcfs_mountcount)
209		return (EBUSY);
210
211	error = mod_remove(&modlinkage);
212	if (error)
213		return (error);
214	mutex_destroy(&pcfslock);
215	rw_destroy(&pcnodes_lock);
216	/*
217	 * Tear down the operations vectors
218	 */
219	(void) vfs_freevfsops_by_type(pcfstype);
220	vn_freevnodeops(pcfs_fvnodeops);
221	vn_freevnodeops(pcfs_dvnodeops);
222	return (0);
223}
224
225int
226_info(struct modinfo *modinfop)
227{
228	return (mod_info(&modlinkage, modinfop));
229}
230
231/* ARGSUSED1 */
232static int
233pcfsinit(int fstype, char *name)
234{
235	static const fs_operation_def_t pcfs_vfsops_template[] = {
236		VFSNAME_MOUNT,		{ .vfs_mount = pcfs_mount },
237		VFSNAME_UNMOUNT,	{ .vfs_unmount = pcfs_unmount },
238		VFSNAME_ROOT,		{ .vfs_root = pcfs_root },
239		VFSNAME_STATVFS,	{ .vfs_statvfs = pcfs_statvfs },
240		VFSNAME_SYNC,		{ .vfs_sync = pcfs_sync },
241		VFSNAME_VGET,		{ .vfs_vget = pcfs_vget },
242		VFSNAME_FREEVFS,	{ .vfs_freevfs = pcfs_freevfs },
243		NULL,			NULL
244	};
245	int error;
246
247	error = vfs_setfsops(fstype, pcfs_vfsops_template, NULL);
248	if (error != 0) {
249		cmn_err(CE_WARN, "pcfsinit: bad vfs ops template");
250		return (error);
251	}
252
253	error = vn_make_ops("pcfs", pcfs_fvnodeops_template, &pcfs_fvnodeops);
254	if (error != 0) {
255		(void) vfs_freevfsops_by_type(fstype);
256		cmn_err(CE_WARN, "pcfsinit: bad file vnode ops template");
257		return (error);
258	}
259
260	error = vn_make_ops("pcfsd", pcfs_dvnodeops_template, &pcfs_dvnodeops);
261	if (error != 0) {
262		(void) vfs_freevfsops_by_type(fstype);
263		vn_freevnodeops(pcfs_fvnodeops);
264		cmn_err(CE_WARN, "pcfsinit: bad dir vnode ops template");
265		return (error);
266	}
267
268	pcfstype = fstype;
269	(void) pc_init();
270	pcfs_mountcount = 0;
271	return (0);
272}
273
274static struct pcfs *pc_mounttab = NULL;
275
276extern struct pcfs_args pc_tz;
277
278/*
279 *  Define some special logical drives we use internal to this file.
280 */
281#define	BOOT_PARTITION_DRIVE	99
282#define	PRIMARY_DOS_DRIVE	1
283#define	UNPARTITIONED_DRIVE	0
284
285static int
286pcfs_device_identify(
287	struct vfs *vfsp,
288	struct mounta *uap,
289	struct cred *cr,
290	int *dos_ldrive,
291	dev_t *xdev)
292{
293	struct pathname special;
294	char *c;
295	struct vnode *svp = NULL;
296	struct vnode *lvp = NULL;
297	int oflag, aflag;
298	int error;
299
300	/*
301	 * Resolve path name of special file being mounted.
302	 */
303	if (error = pn_get(uap->spec, UIO_USERSPACE, &special)) {
304		return (error);
305	}
306
307	*dos_ldrive = -1;
308
309	if (error =
310	    lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW, NULLVPP, &svp)) {
311		/*
312		 * If there's no device node, the name specified most likely
313		 * maps to a PCFS-style "partition specifier" to select a
314		 * harddisk primary/logical partition. Disable floppy-specific
315		 * checks in such cases unless an explicit :A or :B is
316		 * requested.
317		 */
318
319		/*
320		 * Split the pathname string at the last ':' separator.
321		 * If there's no ':' in the device name, or the ':' is the
322		 * last character in the string, the name is invalid and
323		 * the error from the previous lookup will be returned.
324		 */
325		c = strrchr(special.pn_path, ':');
326		if (c == NULL || strlen(c) == 0)
327			goto devlookup_done;
328
329		*c++ = '\0';
330
331		/*
332		 * PCFS partition name suffixes can be:
333		 *	- "boot" to indicate the X86BOOT partition
334		 *	- a drive letter [c-z] for the "DOS logical drive"
335		 *	- a drive number 1..24 for the "DOS logical drive"
336		 *	- a "floppy name letter", 'a' or 'b' (just strip this)
337		 */
338		if (strcasecmp(c, "boot") == 0) {
339			/*
340			 * The Solaris boot partition is requested.
341			 */
342			*dos_ldrive = BOOT_PARTITION_DRIVE;
343		} else if (strspn(c, "0123456789") == strlen(c)) {
344			/*
345			 * All digits - parse the partition number.
346			 */
347			long drvnum = 0;
348
349			if ((error = ddi_strtol(c, NULL, 10, &drvnum)) == 0) {
350				/*
351				 * A number alright - in the allowed range ?
352				 */
353				if (drvnum > 24 || drvnum == 0)
354					error = ENXIO;
355			}
356			if (error)
357				goto devlookup_done;
358			*dos_ldrive = (int)drvnum;
359		} else if (strlen(c) == 1) {
360			/*
361			 * A single trailing character was specified.
362			 *	- [c-zC-Z] means a harddisk partition, and
363			 *	  we retrieve the partition number.
364			 *	- [abAB] means a floppy drive, so we swallow
365			 *	  the "drive specifier" and test later
366			 *	  whether the physical device is a floppy.
367			 */
368			*c = tolower(*c);
369			if (*c == 'a' || *c == 'b') {
370				*dos_ldrive = UNPARTITIONED_DRIVE;
371			} else if (*c < 'c' || *c > 'z') {
372				error = ENXIO;
373				goto devlookup_done;
374			} else {
375				*dos_ldrive = 1 + *c - 'c';
376			}
377		} else {
378			/*
379			 * Can't parse this - pass through previous error.
380			 */
381			goto devlookup_done;
382		}
383
384
385		error = lookupname(special.pn_path, UIO_SYSSPACE, FOLLOW,
386		    NULLVPP, &svp);
387	} else {
388		*dos_ldrive = UNPARTITIONED_DRIVE;
389	}
390devlookup_done:
391	pn_free(&special);
392	if (error)
393		return (error);
394
395	ASSERT(*dos_ldrive >= UNPARTITIONED_DRIVE);
396
397	/*
398	 * Verify caller's permission to open the device special file.
399	 */
400	if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
401	    ((uap->flags & MS_RDONLY) != 0)) {
402		oflag = FREAD;
403		aflag = VREAD;
404	} else {
405		oflag = FREAD | FWRITE;
406		aflag = VREAD | VWRITE;
407	}
408
409	error = vfs_get_lofi(vfsp, &lvp);
410
411	if (error > 0) {
412		if (error == ENOENT)
413			error = ENODEV;
414		goto out;
415	} else if (error == 0) {
416		*xdev = lvp->v_rdev;
417	} else {
418		*xdev = svp->v_rdev;
419
420		if (svp->v_type != VBLK) {
421			error = ENOTBLK;
422			goto out;
423		}
424
425		if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0)
426			goto out;
427	}
428
429	if (getmajor(*xdev) >= devcnt) {
430		error = ENXIO;
431		goto out;
432	}
433
434	if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
435		goto out;
436
437out:
438	if (svp != NULL)
439		VN_RELE(svp);
440	if (lvp != NULL)
441		VN_RELE(lvp);
442	return (error);
443}
444
445static int
446pcfs_device_ismounted(
447	struct vfs *vfsp,
448	int dos_ldrive,
449	dev_t xdev,
450	int *remounting,
451	dev_t *pseudodev)
452{
453	struct pcfs *fsp;
454	int remount = *remounting;
455
456	/*
457	 * Ensure that this logical drive isn't already mounted, unless
458	 * this is a REMOUNT request.
459	 * Note: The framework will perform this check if the "...:c"
460	 * PCFS-style "logical drive" syntax has not been used and an
461	 * actually existing physical device is backing this filesystem.
462	 * Once all block device drivers support PC-style partitioning,
463	 * this codeblock can be dropped.
464	 */
465	*pseudodev = xdev;
466
467	if (dos_ldrive) {
468		mutex_enter(&pcfslock);
469		for (fsp = pc_mounttab; fsp; fsp = fsp->pcfs_nxt)
470			if (fsp->pcfs_xdev == xdev &&
471			    fsp->pcfs_ldrive == dos_ldrive) {
472				mutex_exit(&pcfslock);
473				if (remount) {
474					return (0);
475				} else {
476					return (EBUSY);
477				}
478			}
479		/*
480		 * Assign a unique device number for the vfs
481		 * The old way (getudev() + a constantly incrementing
482		 * major number) was wrong because it changes vfs_dev
483		 * across mounts and reboots, which breaks nfs file handles.
484		 * UFS just uses the real dev_t. We can't do that because
485		 * of the way pcfs opens fdisk partitons (the :c and :d
486		 * partitions are on the same dev_t). Though that _might_
487		 * actually be ok, since the file handle contains an
488		 * absolute block number, it's probably better to make them
489		 * different. So I think we should retain the original
490		 * dev_t, but come up with a different minor number based
491		 * on the logical drive that will _always_ come up the same.
492		 * For now, we steal the upper 6 bits.
493		 */
494#ifdef notdef
495		/* what should we do here? */
496		if (((getminor(xdev) >> 12) & 0x3F) != 0)
497			printf("whoops - upper bits used!\n");
498#endif
499		*pseudodev = makedevice(getmajor(xdev),
500		    ((dos_ldrive << 12) | getminor(xdev)) & MAXMIN32);
501		if (vfs_devmounting(*pseudodev, vfsp)) {
502			mutex_exit(&pcfslock);
503			return (EBUSY);
504		}
505		if (vfs_devismounted(*pseudodev)) {
506			mutex_exit(&pcfslock);
507			if (remount) {
508				return (0);
509			} else {
510				return (EBUSY);
511			}
512		}
513		mutex_exit(&pcfslock);
514	} else {
515		*pseudodev = xdev;
516		if (vfs_devmounting(*pseudodev, vfsp)) {
517			return (EBUSY);
518		}
519		if (vfs_devismounted(*pseudodev))
520			if (remount) {
521				return (0);
522			} else {
523				return (EBUSY);
524			}
525	}
526
527	/*
528	 * This is not a remount. Even if MS_REMOUNT was requested,
529	 * the caller needs to proceed as it would on an ordinary
530	 * mount.
531	 */
532	*remounting = 0;
533
534	ASSERT(*pseudodev);
535	return (0);
536}
537
538/*
539 * Get the PCFS-specific mount options from the VFS framework.
540 * For "timezone" and "secsize", we need to parse the number
541 * ourselves and ensure its validity.
542 * Note: "secsize" is deliberately undocumented at this time,
543 * it's a workaround for devices (particularly: lofi image files)
544 * that don't support the DKIOCGMEDIAINFO ioctl for autodetection.
545 */
546static void
547pcfs_parse_mntopts(struct pcfs *fsp)
548{
549	char *c;
550	char *endptr;
551	long l;
552	struct vfs *vfsp = fsp->pcfs_vfs;
553
554	ASSERT(fsp->pcfs_secondswest == 0);
555	ASSERT(fsp->pcfs_secsize == 0);
556
557	if (vfs_optionisset(vfsp, MNTOPT_PCFS_HIDDEN, NULL))
558		fsp->pcfs_flags |= PCFS_HIDDEN;
559	if (vfs_optionisset(vfsp, MNTOPT_PCFS_FOLDCASE, NULL))
560		fsp->pcfs_flags |= PCFS_FOLDCASE;
561	if (vfs_optionisset(vfsp, MNTOPT_PCFS_NOCLAMPTIME, NULL))
562		fsp->pcfs_flags |= PCFS_NOCLAMPTIME;
563	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
564		fsp->pcfs_flags |= PCFS_NOATIME;
565
566	if (vfs_optionisset(vfsp, MNTOPT_PCFS_TIMEZONE, &c)) {
567		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
568		    endptr == c + strlen(c)) {
569			/*
570			 * A number alright - in the allowed range ?
571			 */
572			if (l <= -12*3600 || l >= 12*3600) {
573				cmn_err(CE_WARN, "!pcfs: invalid use of "
574				    "'timezone' mount option - %ld "
575				    "is out of range. Assuming 0.", l);
576				l = 0;
577			}
578		} else {
579			cmn_err(CE_WARN, "!pcfs: invalid use of "
580			    "'timezone' mount option - argument %s "
581			    "is not a valid number. Assuming 0.", c);
582			l = 0;
583		}
584		fsp->pcfs_secondswest = l;
585	}
586
587	/*
588	 * The "secsize=..." mount option is a workaround for the lack of
589	 * lofi(7d) support for DKIOCGMEDIAINFO. If PCFS wants to parse the
590	 * partition table of a disk image and it has been partitioned with
591	 * sector sizes other than 512 bytes, we'd fail on loopback'ed disk
592	 * images.
593	 * That should really be fixed in lofi ... this is a workaround.
594	 */
595	if (vfs_optionisset(vfsp, MNTOPT_PCFS_SECSIZE, &c)) {
596		if (ddi_strtol(c, &endptr, 10, &l) == 0 &&
597		    endptr == c + strlen(c)) {
598			/*
599			 * A number alright - a valid sector size as well ?
600			 */
601			if (!VALID_SECSIZE(l)) {
602				cmn_err(CE_WARN, "!pcfs: invalid use of "
603				    "'secsize' mount option - %ld is "
604				    "unsupported. Autodetecting.", l);
605				l = 0;
606			}
607		} else {
608			cmn_err(CE_WARN, "!pcfs: invalid use of "
609			    "'secsize' mount option - argument %s "
610			    "is not a valid number. Autodetecting.", c);
611			l = 0;
612		}
613		fsp->pcfs_secsize = l;
614		fsp->pcfs_sdshift = ddi_ffs(l / DEV_BSIZE) - 1;
615	}
616}
617
618/*
619 * vfs operations
620 */
621
622/*
623 * pcfs_mount - backend for VFS_MOUNT() on PCFS.
624 */
625static int
626pcfs_mount(
627	struct vfs *vfsp,
628	struct vnode *mvp,
629	struct mounta *uap,
630	struct cred *cr)
631{
632	struct pcfs *fsp;
633	struct vnode *devvp;
634	dev_t pseudodev;
635	dev_t xdev;
636	int dos_ldrive = 0;
637	int error;
638	int remounting;
639
640	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
641		return (error);
642
643	if (mvp->v_type != VDIR)
644		return (ENOTDIR);
645
646	mutex_enter(&mvp->v_lock);
647	if ((uap->flags & MS_REMOUNT) == 0 &&
648	    (uap->flags & MS_OVERLAY) == 0 &&
649	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
650		mutex_exit(&mvp->v_lock);
651		return (EBUSY);
652	}
653	mutex_exit(&mvp->v_lock);
654
655	/*
656	 * PCFS doesn't do mount arguments anymore - everything's a mount
657	 * option these days. In order not to break existing callers, we
658	 * don't reject it yet, just warn that the data (if any) is ignored.
659	 */
660	if (uap->datalen != 0)
661		cmn_err(CE_WARN, "!pcfs: deprecated use of mount(2) with "
662		    "mount argument structures instead of mount options. "
663		    "Ignoring mount(2) 'dataptr' argument.");
664
665	/*
666	 * This is needed early, to make sure the access / open calls
667	 * are done using the correct mode. Processing this mount option
668	 * only when calling pcfs_parse_mntopts() would lead us to attempt
669	 * a read/write access to a possibly writeprotected device, and
670	 * a readonly mount attempt might fail because of that.
671	 */
672	if (uap->flags & MS_RDONLY) {
673		vfsp->vfs_flag |= VFS_RDONLY;
674		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
675	}
676
677	/*
678	 * For most filesystems, this is just a lookupname() on the
679	 * mount pathname string. PCFS historically has to do its own
680	 * partition table parsing because not all Solaris architectures
681	 * support all styles of partitioning that PC media can have, and
682	 * hence PCFS understands "device names" that don't map to actual
683	 * physical device nodes. Parsing the "PCFS syntax" for device
684	 * names is done in pcfs_device_identify() - see there.
685	 *
686	 * Once all block device drivers that can host FAT filesystems have
687	 * been enhanced to create device nodes for all PC-style partitions,
688	 * this code can go away.
689	 */
690	if (error = pcfs_device_identify(vfsp, uap, cr, &dos_ldrive, &xdev))
691		return (error);
692
693	/*
694	 * As with looking up the actual device to mount, PCFS cannot rely
695	 * on just the checks done by vfs_ismounted() whether a given device
696	 * is mounted already. The additional check against the "PCFS syntax"
697	 * is done in  pcfs_device_ismounted().
698	 */
699	remounting = (uap->flags & MS_REMOUNT);
700
701	if (error = pcfs_device_ismounted(vfsp, dos_ldrive, xdev, &remounting,
702	    &pseudodev))
703		return (error);
704
705	if (remounting)
706		return (0);
707
708	/*
709	 * Mount the filesystem.
710	 * An instance structure is required before the attempt to locate
711	 * and parse the FAT BPB. This is because mount options may change
712	 * the behaviour of the filesystem type matching code. Precreate
713	 * it and fill it in to a degree that allows parsing the mount
714	 * options.
715	 */
716	devvp = makespecvp(xdev, VBLK);
717	if (IS_SWAPVP(devvp)) {
718		VN_RELE(devvp);
719		return (EBUSY);
720	}
721	error = VOP_OPEN(&devvp,
722	    (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD | FWRITE, cr, NULL);
723	if (error) {
724		VN_RELE(devvp);
725		return (error);
726	}
727
728	fsp = kmem_zalloc(sizeof (*fsp), KM_SLEEP);
729	fsp->pcfs_vfs = vfsp;
730	fsp->pcfs_xdev = xdev;
731	fsp->pcfs_devvp = devvp;
732	fsp->pcfs_ldrive = dos_ldrive;
733	mutex_init(&fsp->pcfs_lock, NULL, MUTEX_DEFAULT, NULL);
734
735	pcfs_parse_mntopts(fsp);
736
737	/*
738	 * This is the actual "mount" - the PCFS superblock check.
739	 *
740	 * Find the requested logical drive and the FAT BPB therein.
741	 * Check device type and flag the instance if media is removeable.
742	 *
743	 * Initializes most members of the filesystem instance structure.
744	 * Returns EINVAL if no valid BPB can be found. Other errors may
745	 * occur after I/O failures, or when invalid / unparseable partition
746	 * tables are encountered.
747	 */
748	if (error = pc_getfattype(fsp))
749		goto errout;
750
751	/*
752	 * Now that the BPB has been parsed, this structural information
753	 * is available and known to be valid. Initialize the VFS.
754	 */
755	vfsp->vfs_data = fsp;
756	vfsp->vfs_dev = pseudodev;
757	vfsp->vfs_fstype = pcfstype;
758	vfs_make_fsid(&vfsp->vfs_fsid, pseudodev, pcfstype);
759	vfsp->vfs_bcount = 0;
760	vfsp->vfs_bsize = fsp->pcfs_clsize;
761
762	/*
763	 * Validate that we can access the FAT and that it is, to the
764	 * degree we can verify here, self-consistent.
765	 */
766	if (error = pc_verify(fsp))
767		goto errout;
768
769	/*
770	 * Record the time of the mount, to return as an "approximate"
771	 * timestamp for the FAT root directory. Since FAT roots don't
772	 * have timestamps, this is less confusing to the user than
773	 * claiming "zero" / Jan/01/1970.
774	 */
775	gethrestime(&fsp->pcfs_mounttime);
776
777	/*
778	 * Fix up the mount options. Because "noatime" is made default on
779	 * removeable media only, a fixed disk will have neither "atime"
780	 * nor "noatime" set. We set the options explicitly depending on
781	 * the PCFS_NOATIME flag, to inform the user of what applies.
782	 * Mount option cancellation will take care that the mutually
783	 * exclusive 'other' is cleared.
784	 */
785	vfs_setmntopt(vfsp,
786	    fsp->pcfs_flags & PCFS_NOATIME ? MNTOPT_NOATIME : MNTOPT_ATIME,
787	    NULL, 0);
788
789	/*
790	 * All clear - insert the FS instance into PCFS' list.
791	 */
792	mutex_enter(&pcfslock);
793	fsp->pcfs_nxt = pc_mounttab;
794	pc_mounttab = fsp;
795	mutex_exit(&pcfslock);
796	atomic_inc_32(&pcfs_mountcount);
797	return (0);
798
799errout:
800	(void) VOP_CLOSE(devvp,
801	    vfsp->vfs_flag & VFS_RDONLY ? FREAD : FREAD | FWRITE,
802	    1, (offset_t)0, cr, NULL);
803	VN_RELE(devvp);
804	mutex_destroy(&fsp->pcfs_lock);
805	kmem_free(fsp, sizeof (*fsp));
806	return (error);
807
808}
809
810static int
811pcfs_unmount(
812	struct vfs *vfsp,
813	int flag,
814	struct cred *cr)
815{
816	struct pcfs *fsp, *fsp1;
817
818	if (secpolicy_fs_unmount(cr, vfsp) != 0)
819		return (EPERM);
820
821	fsp = VFSTOPCFS(vfsp);
822
823	/*
824	 * We don't have to lock fsp because the VVFSLOCK in vfs layer will
825	 * prevent lookuppn from crossing the mount point.
826	 * If this is not a forced umount request and there's ongoing I/O,
827	 * don't allow the mount to proceed.
828	 */
829	if (flag & MS_FORCE)
830		vfsp->vfs_flag |= VFS_UNMOUNTED;
831	else if (fsp->pcfs_nrefs)
832		return (EBUSY);
833
834	mutex_enter(&pcfslock);
835
836	/*
837	 * If this is a forced umount request or if the fs instance has
838	 * been marked as beyond recovery, allow the umount to proceed
839	 * regardless of state. pc_diskchanged() forcibly releases all
840	 * inactive vnodes/pcnodes.
841	 */
842	if (flag & MS_FORCE || fsp->pcfs_flags & PCFS_IRRECOV) {
843		rw_enter(&pcnodes_lock, RW_WRITER);
844		pc_diskchanged(fsp);
845		rw_exit(&pcnodes_lock);
846	}
847
848	/* now there should be no pcp node on pcfhead or pcdhead. */
849
850	if (fsp == pc_mounttab) {
851		pc_mounttab = fsp->pcfs_nxt;
852	} else {
853		for (fsp1 = pc_mounttab; fsp1 != NULL; fsp1 = fsp1->pcfs_nxt)
854			if (fsp1->pcfs_nxt == fsp)
855				fsp1->pcfs_nxt = fsp->pcfs_nxt;
856	}
857
858	mutex_exit(&pcfslock);
859
860	/*
861	 * Since we support VFS_FREEVFS(), there's no need to
862	 * free the fsp right now. The framework will tell us
863	 * when the right time to do so has arrived by calling
864	 * into pcfs_freevfs.
865	 */
866	return (0);
867}
868
869/*
870 * find root of pcfs
871 */
872static int
873pcfs_root(
874	struct vfs *vfsp,
875	struct vnode **vpp)
876{
877	struct pcfs *fsp;
878	struct pcnode *pcp;
879	int error;
880
881	fsp = VFSTOPCFS(vfsp);
882	if (error = pc_lockfs(fsp, 0, 0))
883		return (error);
884
885	pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
886	pc_unlockfs(fsp);
887	*vpp = PCTOV(pcp);
888	pcp->pc_flags |= PC_EXTERNAL;
889	return (0);
890}
891
892/*
893 * Get file system statistics.
894 */
895static int
896pcfs_statvfs(
897	struct vfs *vfsp,
898	struct statvfs64 *sp)
899{
900	struct pcfs *fsp;
901	int error;
902	dev32_t d32;
903
904	fsp = VFSTOPCFS(vfsp);
905	error = pc_getfat(fsp);
906	if (error)
907		return (error);
908	bzero(sp, sizeof (*sp));
909	sp->f_bsize = sp->f_frsize = fsp->pcfs_clsize;
910	sp->f_blocks = (fsblkcnt64_t)fsp->pcfs_ncluster;
911	sp->f_bavail = sp->f_bfree = (fsblkcnt64_t)pc_freeclusters(fsp);
912	sp->f_files = (fsfilcnt64_t)-1;
913	sp->f_ffree = (fsfilcnt64_t)-1;
914	sp->f_favail = (fsfilcnt64_t)-1;
915#ifdef notdef
916	(void) cmpldev(&d32, fsp->pcfs_devvp->v_rdev);
917#endif /* notdef */
918	(void) cmpldev(&d32, vfsp->vfs_dev);
919	sp->f_fsid = d32;
920	(void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
921	sp->f_flag = vf_to_stf(vfsp->vfs_flag);
922	sp->f_namemax = PCMAXNAMLEN;
923	return (0);
924}
925
926static int
927pc_syncfsnodes(struct pcfs *fsp)
928{
929	struct pchead *hp;
930	struct pcnode *pcp;
931	int error;
932
933	if (error = pc_lockfs(fsp, 0, 0))
934		return (error);
935
936	if (!(error = pc_syncfat(fsp))) {
937		hp = pcfhead;
938		while (hp < & pcfhead [ NPCHASH ]) {
939			rw_enter(&pcnodes_lock, RW_READER);
940			pcp = hp->pch_forw;
941			while (pcp != (struct pcnode *)hp) {
942				if (VFSTOPCFS(PCTOV(pcp) -> v_vfsp) == fsp)
943					if (error = pc_nodesync(pcp))
944						break;
945				pcp = pcp -> pc_forw;
946			}
947			rw_exit(&pcnodes_lock);
948			if (error)
949				break;
950			hp++;
951		}
952	}
953	pc_unlockfs(fsp);
954	return (error);
955}
956
957/*
958 * Flush any pending I/O.
959 */
960/*ARGSUSED*/
961static int
962pcfs_sync(
963	struct vfs *vfsp,
964	short flag,
965	struct cred *cr)
966{
967	struct pcfs *fsp;
968	int error = 0;
969
970	/* this prevents the filesystem from being umounted. */
971	mutex_enter(&pcfslock);
972	if (vfsp != NULL) {
973		fsp = VFSTOPCFS(vfsp);
974		if (!(fsp->pcfs_flags & PCFS_IRRECOV)) {
975			error = pc_syncfsnodes(fsp);
976		} else {
977			rw_enter(&pcnodes_lock, RW_WRITER);
978			pc_diskchanged(fsp);
979			rw_exit(&pcnodes_lock);
980			error = EIO;
981		}
982	} else {
983		fsp = pc_mounttab;
984		while (fsp != NULL) {
985			if (fsp->pcfs_flags & PCFS_IRRECOV) {
986				rw_enter(&pcnodes_lock, RW_WRITER);
987				pc_diskchanged(fsp);
988				rw_exit(&pcnodes_lock);
989				error = EIO;
990				break;
991			}
992			error = pc_syncfsnodes(fsp);
993			if (error) break;
994			fsp = fsp->pcfs_nxt;
995		}
996	}
997	mutex_exit(&pcfslock);
998	return (error);
999}
1000
1001int
1002pc_lockfs(struct pcfs *fsp, int diskchanged, int releasing)
1003{
1004	int err;
1005
1006	if ((fsp->pcfs_flags & PCFS_IRRECOV) && !releasing)
1007		return (EIO);
1008
1009	if ((fsp->pcfs_flags & PCFS_LOCKED) && (fsp->pcfs_owner == curthread)) {
1010		fsp->pcfs_count++;
1011	} else {
1012		mutex_enter(&fsp->pcfs_lock);
1013		if (fsp->pcfs_flags & PCFS_LOCKED)
1014			panic("pc_lockfs");
1015		/*
1016		 * We check the IRRECOV bit again just in case somebody
1017		 * snuck past the initial check but then got held up before
1018		 * they could grab the lock.  (And in the meantime someone
1019		 * had grabbed the lock and set the bit)
1020		 */
1021		if (!diskchanged && !(fsp->pcfs_flags & PCFS_IRRECOV)) {
1022			if ((err = pc_getfat(fsp))) {
1023				mutex_exit(&fsp->pcfs_lock);
1024				return (err);
1025			}
1026		}
1027		fsp->pcfs_flags |= PCFS_LOCKED;
1028		fsp->pcfs_owner = curthread;
1029		fsp->pcfs_count++;
1030	}
1031	return (0);
1032}
1033
1034void
1035pc_unlockfs(struct pcfs *fsp)
1036{
1037
1038	if ((fsp->pcfs_flags & PCFS_LOCKED) == 0)
1039		panic("pc_unlockfs");
1040	if (--fsp->pcfs_count < 0)
1041		panic("pc_unlockfs: count");
1042	if (fsp->pcfs_count == 0) {
1043		fsp->pcfs_flags &= ~PCFS_LOCKED;
1044		fsp->pcfs_owner = 0;
1045		mutex_exit(&fsp->pcfs_lock);
1046	}
1047}
1048
1049int
1050pc_syncfat(struct pcfs *fsp)
1051{
1052	struct buf *bp;
1053	int nfat;
1054	int	error = 0;
1055	struct fat_od_fsi *fsinfo_disk;
1056
1057	if ((fsp->pcfs_fatp == (uchar_t *)0) ||
1058	    !(fsp->pcfs_flags & PCFS_FATMOD))
1059		return (0);
1060	/*
1061	 * write out all copies of FATs
1062	 */
1063	fsp->pcfs_flags &= ~PCFS_FATMOD;
1064	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
1065	for (nfat = 0; nfat < fsp->pcfs_numfat; nfat++) {
1066		error = pc_writefat(fsp, pc_dbdaddr(fsp,
1067		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec));
1068		if (error) {
1069			pc_mark_irrecov(fsp);
1070			return (EIO);
1071		}
1072	}
1073	pc_clear_fatchanges(fsp);
1074
1075	/*
1076	 * Write out fsinfo sector.
1077	 */
1078	if (IS_FAT32(fsp)) {
1079		bp = bread(fsp->pcfs_xdev,
1080		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
1081		if (bp->b_flags & (B_ERROR | B_STALE)) {
1082			error = geterror(bp);
1083		}
1084		fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
1085		if (!error && FSISIG_OK(fsinfo_disk)) {
1086			fsinfo_disk->fsi_incore.fs_free_clusters =
1087			    LE_32(fsp->pcfs_fsinfo.fs_free_clusters);
1088			fsinfo_disk->fsi_incore.fs_next_free =
1089			    LE_32(FSINFO_UNKNOWN);
1090			bwrite2(bp);
1091			error = geterror(bp);
1092		}
1093		brelse(bp);
1094		if (error) {
1095			pc_mark_irrecov(fsp);
1096			return (EIO);
1097		}
1098	}
1099	return (0);
1100}
1101
1102void
1103pc_invalfat(struct pcfs *fsp)
1104{
1105	struct pcfs *xfsp;
1106	int mount_cnt = 0;
1107
1108	if (fsp->pcfs_fatp == (uchar_t *)0)
1109		panic("pc_invalfat");
1110	/*
1111	 * Release FAT
1112	 */
1113	kmem_free(fsp->pcfs_fatp, fsp->pcfs_fatsec * fsp->pcfs_secsize);
1114	fsp->pcfs_fatp = NULL;
1115	kmem_free(fsp->pcfs_fat_changemap, fsp->pcfs_fat_changemapsize);
1116	fsp->pcfs_fat_changemap = NULL;
1117	/*
1118	 * Invalidate all the blocks associated with the device.
1119	 * Not needed if stateless.
1120	 */
1121	for (xfsp = pc_mounttab; xfsp; xfsp = xfsp->pcfs_nxt)
1122		if (xfsp != fsp && xfsp->pcfs_xdev == fsp->pcfs_xdev)
1123			mount_cnt++;
1124
1125	if (!mount_cnt)
1126		binval(fsp->pcfs_xdev);
1127	/*
1128	 * close mounted device
1129	 */
1130	(void) VOP_CLOSE(fsp->pcfs_devvp,
1131	    (PCFSTOVFS(fsp)->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
1132	    1, (offset_t)0, CRED(), NULL);
1133}
1134
1135void
1136pc_badfs(struct pcfs *fsp)
1137{
1138	cmn_err(CE_WARN, "corrupted PC file system on dev (%x.%x):%d\n",
1139	    getmajor(fsp->pcfs_devvp->v_rdev),
1140	    getminor(fsp->pcfs_devvp->v_rdev), fsp->pcfs_ldrive);
1141}
1142
1143/*
1144 * The problem with supporting NFS on the PCFS filesystem is that there
1145 * is no good place to keep the generation number. The only possible
1146 * place is inside a directory entry. There are a few words that we
1147 * don't use - they store NT & OS/2 attributes, and the creation/last access
1148 * time of the file - but it seems wrong to use them. In addition, directory
1149 * entries come and go. If a directory is removed completely, its directory
1150 * blocks are freed and the generation numbers are lost. Whereas in ufs,
1151 * inode blocks are dedicated for inodes, so the generation numbers are
1152 * permanently kept on the disk.
1153 */
1154static int
1155pcfs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
1156{
1157	struct pcnode *pcp;
1158	struct pc_fid *pcfid;
1159	struct pcfs *fsp;
1160	struct pcdir *ep;
1161	daddr_t eblkno;
1162	int eoffset;
1163	struct buf *bp;
1164	int error;
1165	pc_cluster32_t	cn;
1166
1167	pcfid = (struct pc_fid *)fidp;
1168	fsp = VFSTOPCFS(vfsp);
1169
1170	error = pc_lockfs(fsp, 0, 0);
1171	if (error) {
1172		*vpp = NULL;
1173		return (error);
1174	}
1175
1176	if (pcfid->pcfid_block == 0) {
1177		pcp = pc_getnode(fsp, (daddr_t)0, 0, (struct pcdir *)0);
1178		pcp->pc_flags |= PC_EXTERNAL;
1179		*vpp = PCTOV(pcp);
1180		pc_unlockfs(fsp);
1181		return (0);
1182	}
1183	eblkno = pcfid->pcfid_block;
1184	eoffset = pcfid->pcfid_offset;
1185
1186	if ((pc_dbtocl(fsp,
1187	    eblkno - fsp->pcfs_dosstart) >= fsp->pcfs_ncluster) ||
1188	    (eoffset > fsp->pcfs_clsize)) {
1189		pc_unlockfs(fsp);
1190		*vpp = NULL;
1191		return (EINVAL);
1192	}
1193
1194	if (eblkno >= fsp->pcfs_datastart || (eblkno - fsp->pcfs_rdirstart)
1195	    < (fsp->pcfs_rdirsec & ~(fsp->pcfs_spcl - 1))) {
1196		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1197		    fsp->pcfs_clsize);
1198	} else {
1199		/*
1200		 * This is an access "backwards" into the FAT12/FAT16
1201		 * root directory. A better code structure would
1202		 * significantly improve maintainability here ...
1203		 */
1204		bp = bread(fsp->pcfs_xdev, pc_dbdaddr(fsp, eblkno),
1205		    (int)(fsp->pcfs_datastart - eblkno) * fsp->pcfs_secsize);
1206	}
1207	if (bp->b_flags & (B_ERROR | B_STALE)) {
1208		error = geterror(bp);
1209		brelse(bp);
1210		if (error)
1211			pc_mark_irrecov(fsp);
1212		*vpp = NULL;
1213		pc_unlockfs(fsp);
1214		return (error);
1215	}
1216	ep = (struct pcdir *)(bp->b_un.b_addr + eoffset);
1217	/*
1218	 * Ok, if this is a valid file handle that we gave out,
1219	 * then simply ensuring that the creation time matches,
1220	 * the entry has not been deleted, and it has a valid first
1221	 * character should be enough.
1222	 *
1223	 * Unfortunately, verifying that the <blkno, offset> _still_
1224	 * refers to a directory entry is not easy, since we'd have
1225	 * to search _all_ directories starting from root to find it.
1226	 * That's a high price to pay just in case somebody is forging
1227	 * file handles. So instead we verify that as much of the
1228	 * entry is valid as we can:
1229	 *
1230	 * 1. The starting cluster is 0 (unallocated) or valid
1231	 * 2. It is not an LFN entry
1232	 * 3. It is not hidden (unless mounted as such)
1233	 * 4. It is not the label
1234	 */
1235	cn = pc_getstartcluster(fsp, ep);
1236	/*
1237	 * if the starting cluster is valid, but not valid according
1238	 * to pc_validcl(), force it to be to simplify the following if.
1239	 */
1240	if (cn == 0)
1241		cn = PCF_FIRSTCLUSTER;
1242	if (IS_FAT32(fsp)) {
1243		if (cn >= PCF_LASTCLUSTER32)
1244			cn = PCF_FIRSTCLUSTER;
1245	} else {
1246		if (cn >= PCF_LASTCLUSTER)
1247			cn = PCF_FIRSTCLUSTER;
1248	}
1249	if ((!pc_validcl(fsp, cn)) ||
1250	    (PCDL_IS_LFN(ep)) ||
1251	    (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) ||
1252	    ((ep->pcd_attr & PCA_LABEL) == PCA_LABEL)) {
1253		bp->b_flags |= B_STALE | B_AGE;
1254		brelse(bp);
1255		pc_unlockfs(fsp);
1256		return (EINVAL);
1257	}
1258	if ((ep->pcd_crtime.pct_time == pcfid->pcfid_ctime) &&
1259	    (ep->pcd_filename[0] != PCD_ERASED) &&
1260	    (pc_validchar(ep->pcd_filename[0]) ||
1261	    (ep->pcd_filename[0] == '.' && ep->pcd_filename[1] == '.'))) {
1262		pcp = pc_getnode(fsp, eblkno, eoffset, ep);
1263		pcp->pc_flags |= PC_EXTERNAL;
1264		*vpp = PCTOV(pcp);
1265	} else {
1266		*vpp = NULL;
1267	}
1268	bp->b_flags |= B_STALE | B_AGE;
1269	brelse(bp);
1270	pc_unlockfs(fsp);
1271	return (0);
1272}
1273
1274/*
1275 * Unfortunately, FAT32 fat's can be pretty big (On a 1 gig jaz drive, about
1276 * a meg), so we can't bread() it all in at once. This routine reads a
1277 * fat a chunk at a time.
1278 */
1279static int
1280pc_readfat(struct pcfs *fsp, uchar_t *fatp)
1281{
1282	struct buf *bp;
1283	size_t off;
1284	size_t readsize;
1285	daddr_t diskblk;
1286	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1287	daddr_t start = fsp->pcfs_fatstart;
1288
1289	readsize = fsp->pcfs_clsize;
1290	for (off = 0; off < fatsize; off += readsize, fatp += readsize) {
1291		if (readsize > (fatsize - off))
1292			readsize = fatsize - off;
1293		diskblk = pc_dbdaddr(fsp, start +
1294		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1295		bp = bread(fsp->pcfs_xdev, diskblk, readsize);
1296		if (bp->b_flags & (B_ERROR | B_STALE)) {
1297			brelse(bp);
1298			return (EIO);
1299		}
1300		bp->b_flags |= B_STALE | B_AGE;
1301		bcopy(bp->b_un.b_addr, fatp, readsize);
1302		brelse(bp);
1303	}
1304	return (0);
1305}
1306
1307/*
1308 * We write the FAT out a _lot_, in order to make sure that it
1309 * is up-to-date. But on a FAT32 system (large drive, small clusters)
1310 * the FAT might be a couple of megabytes, and writing it all out just
1311 * because we created or deleted a small file is painful (especially
1312 * since we do it for each alternate FAT too). So instead, for FAT16 and
1313 * FAT32 we only write out the bit that has changed. We don't clear
1314 * the 'updated' fields here because the caller might be writing out
1315 * several FATs, so the caller must use pc_clear_fatchanges() after
1316 * all FATs have been updated.
1317 * This function doesn't take "start" from fsp->pcfs_dosstart because
1318 * callers can use it to write either the primary or any of the alternate
1319 * FAT tables.
1320 */
1321static int
1322pc_writefat(struct pcfs *fsp, daddr_t start)
1323{
1324	struct buf *bp;
1325	size_t off;
1326	size_t writesize;
1327	int	error;
1328	uchar_t *fatp = fsp->pcfs_fatp;
1329	size_t fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
1330
1331	writesize = fsp->pcfs_clsize;
1332	for (off = 0; off < fatsize; off += writesize, fatp += writesize) {
1333		if (writesize > (fatsize - off))
1334			writesize = fatsize - off;
1335		if (!pc_fat_is_changed(fsp, pc_lblkno(fsp, off))) {
1336			continue;
1337		}
1338		bp = ngeteblk(writesize);
1339		bp->b_edev = fsp->pcfs_xdev;
1340		bp->b_dev = cmpdev(bp->b_edev);
1341		bp->b_blkno = pc_dbdaddr(fsp, start +
1342		    pc_cltodb(fsp, pc_lblkno(fsp, off)));
1343		bcopy(fatp, bp->b_un.b_addr, writesize);
1344		bwrite2(bp);
1345		error = geterror(bp);
1346		brelse(bp);
1347		if (error) {
1348			return (error);
1349		}
1350	}
1351	return (0);
1352}
1353
1354/*
1355 * Mark the FAT cluster that 'cn' is stored in as modified.
1356 */
1357void
1358pc_mark_fat_updated(struct pcfs *fsp, pc_cluster32_t cn)
1359{
1360	pc_cluster32_t	bn;
1361	size_t		size;
1362
1363	/* which fat block is the cluster number stored in? */
1364	if (IS_FAT32(fsp)) {
1365		size = sizeof (pc_cluster32_t);
1366		bn = pc_lblkno(fsp, cn * size);
1367		fsp->pcfs_fat_changemap[bn] = 1;
1368	} else if (IS_FAT16(fsp)) {
1369		size = sizeof (pc_cluster16_t);
1370		bn = pc_lblkno(fsp, cn * size);
1371		fsp->pcfs_fat_changemap[bn] = 1;
1372	} else {
1373		offset_t off;
1374		pc_cluster32_t nbn;
1375
1376		ASSERT(IS_FAT12(fsp));
1377		off = cn + (cn >> 1);
1378		bn = pc_lblkno(fsp, off);
1379		fsp->pcfs_fat_changemap[bn] = 1;
1380		/* does this field wrap into the next fat cluster? */
1381		nbn = pc_lblkno(fsp, off + 1);
1382		if (nbn != bn) {
1383			fsp->pcfs_fat_changemap[nbn] = 1;
1384		}
1385	}
1386}
1387
1388/*
1389 * return whether the FAT cluster 'bn' is updated and needs to
1390 * be written out.
1391 */
1392int
1393pc_fat_is_changed(struct pcfs *fsp, pc_cluster32_t bn)
1394{
1395	return (fsp->pcfs_fat_changemap[bn] == 1);
1396}
1397
1398/*
1399 * Implementation of VFS_FREEVFS() to support forced umounts.
1400 * This is called by the vfs framework after umount, to trigger
1401 * the release of any resources still associated with the given
1402 * vfs_t once the need to keep them has gone away.
1403 */
1404void
1405pcfs_freevfs(vfs_t *vfsp)
1406{
1407	struct pcfs *fsp = VFSTOPCFS(vfsp);
1408
1409	mutex_enter(&pcfslock);
1410	/*
1411	 * Purging the FAT closes the device - can't do any more
1412	 * I/O after this.
1413	 */
1414	if (fsp->pcfs_fatp != (uchar_t *)0)
1415		pc_invalfat(fsp);
1416	mutex_exit(&pcfslock);
1417
1418	VN_RELE(fsp->pcfs_devvp);
1419	mutex_destroy(&fsp->pcfs_lock);
1420	kmem_free(fsp, sizeof (*fsp));
1421
1422	/*
1423	 * Allow _fini() to succeed now, if so desired.
1424	 */
1425	atomic_dec_32(&pcfs_mountcount);
1426}
1427
1428
1429/*
1430 * PC-style partition parsing and FAT BPB identification/validation code.
1431 * The partition parsers here assume:
1432 *	- a FAT filesystem will be in a partition that has one of a set of
1433 *	  recognized partition IDs
1434 *	- the user wants the 'numbering' (C:, D:, ...) that one would get
1435 *	  on MSDOS 6.x.
1436 *	  That means any non-FAT partition type (NTFS, HPFS, or any Linux fs)
1437 *	  will not factor in the enumeration.
1438 * These days, such assumptions should be revisited. FAT is no longer the
1439 * only game in 'PC town'.
1440 */
1441/*
1442 * isDosDrive()
1443 *	Boolean function.  Give it the systid field for an fdisk partition
1444 *	and it decides if that's a systid that describes a DOS drive.  We
1445 *	use systid values defined in sys/dktp/fdisk.h.
1446 */
1447static int
1448isDosDrive(uchar_t checkMe)
1449{
1450	return ((checkMe == DOSOS12) || (checkMe == DOSOS16) ||
1451	    (checkMe == DOSHUGE) || (checkMe == FDISK_WINDOWS) ||
1452	    (checkMe == FDISK_EXT_WIN) || (checkMe == FDISK_FAT95) ||
1453	    (checkMe == DIAGPART));
1454}
1455
1456
1457/*
1458 * isDosExtended()
1459 *	Boolean function.  Give it the systid field for an fdisk partition
1460 *	and it decides if that's a systid that describes an extended DOS
1461 *	partition.
1462 */
1463static int
1464isDosExtended(uchar_t checkMe)
1465{
1466	return ((checkMe == EXTDOS) || (checkMe == FDISK_EXTLBA));
1467}
1468
1469
1470/*
1471 * isBootPart()
1472 *	Boolean function.  Give it the systid field for an fdisk partition
1473 *	and it decides if that's a systid that describes a Solaris boot
1474 *	partition.
1475 */
1476static int
1477isBootPart(uchar_t checkMe)
1478{
1479	return (checkMe == X86BOOT);
1480}
1481
1482
1483/*
1484 * noLogicalDrive()
1485 *	Display error message about not being able to find a logical
1486 *	drive.
1487 */
1488static void
1489noLogicalDrive(int ldrive)
1490{
1491	if (ldrive == BOOT_PARTITION_DRIVE) {
1492		cmn_err(CE_NOTE, "!pcfs: no boot partition");
1493	} else {
1494		cmn_err(CE_NOTE, "!pcfs: %d: no such logical drive", ldrive);
1495	}
1496}
1497
1498
1499/*
1500 * findTheDrive()
1501 *	Discover offset of the requested logical drive, and return
1502 *	that offset (startSector), the systid of that drive (sysid),
1503 *	and a buffer pointer (bp), with the buffer contents being
1504 *	the first sector of the logical drive (i.e., the sector that
1505 *	contains the BPB for that drive).
1506 *
1507 * Note: this code is not capable of addressing >2TB disks, as it uses
1508 *       daddr_t not diskaddr_t, some of the calculations would overflow
1509 */
1510#define	COPY_PTBL(mbr, ptblp)					\
1511	bcopy(&(((struct mboot *)(mbr))->parts), (ptblp),	\
1512	    FD_NUMPART * sizeof (struct ipart))
1513
1514static int
1515findTheDrive(struct pcfs *fsp, buf_t **bp)
1516{
1517	int ldrive = fsp->pcfs_ldrive;
1518	dev_t dev = fsp->pcfs_devvp->v_rdev;
1519
1520	struct ipart dosp[FD_NUMPART];	/* incore fdisk partition structure */
1521	daddr_t lastseek = 0;		/* Disk block we sought previously */
1522	daddr_t diskblk = 0;		/* Disk block to get */
1523	daddr_t xstartsect;		/* base of Extended DOS partition */
1524	int logicalDriveCount = 0;	/* Count of logical drives seen */
1525	int extendedPart = -1;		/* index of extended dos partition */
1526	int primaryPart = -1;		/* index of primary dos partition */
1527	int bootPart = -1;		/* index of a Solaris boot partition */
1528	uint32_t xnumsect = 0;		/* length of extended DOS partition */
1529	int driveIndex;			/* computed FDISK table index */
1530	daddr_t startsec;
1531	len_t mediasize;
1532	int i;
1533	/*
1534	 * Count of drives in the current extended partition's
1535	 * FDISK table, and indexes of the drives themselves.
1536	 */
1537	int extndDrives[FD_NUMPART];
1538	int numDrives = 0;
1539
1540	/*
1541	 * Count of drives (beyond primary) in master boot record's
1542	 * FDISK table, and indexes of the drives themselves.
1543	 */
1544	int extraDrives[FD_NUMPART];
1545	int numExtraDrives = 0;
1546
1547	/*
1548	 * "ldrive == 0" should never happen, as this is a request to
1549	 * mount the physical device (and ignore partitioning). The code
1550	 * in pcfs_mount() should have made sure that a logical drive number
1551	 * is at least 1, meaning we're looking for drive "C:". It is not
1552	 * safe (and a bug in the callers of this function) to request logical
1553	 * drive number 0; we could ASSERT() but a graceful EIO is a more
1554	 * polite way.
1555	 */
1556	if (ldrive == 0) {
1557		cmn_err(CE_NOTE, "!pcfs: request for logical partition zero");
1558		noLogicalDrive(ldrive);
1559		return (EIO);
1560	}
1561
1562	/*
1563	 *  Copy from disk block into memory aligned structure for fdisk usage.
1564	 */
1565	COPY_PTBL((*bp)->b_un.b_addr, dosp);
1566
1567	/*
1568	 * This check is ok because a FAT BPB and a master boot record (MBB)
1569	 * have the same signature, in the same position within the block.
1570	 */
1571	if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1572		cmn_err(CE_NOTE, "!pcfs: MBR partition table signature err, "
1573		    "device (%x.%x):%d\n",
1574		    getmajor(dev), getminor(dev), ldrive);
1575		return (EINVAL);
1576	}
1577
1578	/*
1579	 * Get a summary of what is in the Master FDISK table.
1580	 * Normally we expect to find one partition marked as a DOS drive.
1581	 * This partition is the one Windows calls the primary dos partition.
1582	 * If the machine has any logical drives then we also expect
1583	 * to find a partition marked as an extended DOS partition.
1584	 *
1585	 * Sometimes we'll find multiple partitions marked as DOS drives.
1586	 * The Solaris fdisk program allows these partitions
1587	 * to be created, but Windows fdisk no longer does.  We still need
1588	 * to support these, though, since Windows does.  We also need to fix
1589	 * our fdisk to behave like the Windows version.
1590	 *
1591	 * It turns out that some off-the-shelf media have *only* an
1592	 * Extended partition, so we need to deal with that case as well.
1593	 *
1594	 * Only a single (the first) Extended or Boot Partition will
1595	 * be recognized.  Any others will be ignored.
1596	 */
1597	for (i = 0; i < FD_NUMPART; i++) {
1598		DTRACE_PROBE4(primarypart, struct pcfs *, fsp,
1599		    uint_t, (uint_t)dosp[i].systid,
1600		    uint_t, LE_32(dosp[i].relsect),
1601		    uint_t, LE_32(dosp[i].numsect));
1602
1603		if (isDosDrive(dosp[i].systid)) {
1604			if (primaryPart < 0) {
1605				logicalDriveCount++;
1606				primaryPart = i;
1607			} else {
1608				extraDrives[numExtraDrives++] = i;
1609			}
1610			continue;
1611		}
1612		if ((extendedPart < 0) && isDosExtended(dosp[i].systid)) {
1613			extendedPart = i;
1614			continue;
1615		}
1616		if ((bootPart < 0) && isBootPart(dosp[i].systid)) {
1617			bootPart = i;
1618			continue;
1619		}
1620	}
1621
1622	if (ldrive == BOOT_PARTITION_DRIVE) {
1623		if (bootPart < 0) {
1624			noLogicalDrive(ldrive);
1625			return (EINVAL);
1626		}
1627		startsec = LE_32(dosp[bootPart].relsect);
1628		mediasize = LE_32(dosp[bootPart].numsect);
1629		goto found;
1630	}
1631
1632	if (ldrive == PRIMARY_DOS_DRIVE && primaryPart >= 0) {
1633		startsec = LE_32(dosp[primaryPart].relsect);
1634		mediasize = LE_32(dosp[primaryPart].numsect);
1635		goto found;
1636	}
1637
1638	/*
1639	 * We are not looking for the C: drive (or the primary drive
1640	 * was not found), so we had better have an extended partition
1641	 * or extra drives in the Master FDISK table.
1642	 */
1643	if ((extendedPart < 0) && (numExtraDrives == 0)) {
1644		cmn_err(CE_NOTE, "!pcfs: no extended dos partition");
1645		noLogicalDrive(ldrive);
1646		return (EINVAL);
1647	}
1648
1649	if (extendedPart >= 0) {
1650		diskblk = xstartsect = LE_32(dosp[extendedPart].relsect);
1651		xnumsect = LE_32(dosp[extendedPart].numsect);
1652		do {
1653			/*
1654			 *  If the seek would not cause us to change
1655			 *  position on the drive, then we're out of
1656			 *  extended partitions to examine.
1657			 */
1658			if (diskblk == lastseek)
1659				break;
1660			logicalDriveCount += numDrives;
1661			/*
1662			 *  Seek the next extended partition, and find
1663			 *  logical drives within it.
1664			 */
1665			brelse(*bp);
1666			/*
1667			 * bread() block numbers are multiples of DEV_BSIZE
1668			 * but the device sector size (the unit of partitioning)
1669			 * might be larger than that; pcfs_get_device_info()
1670			 * has calculated the multiplicator for us.
1671			 */
1672			*bp = bread(dev,
1673			    pc_dbdaddr(fsp, diskblk), fsp->pcfs_secsize);
1674			if ((*bp)->b_flags & B_ERROR) {
1675				return (EIO);
1676			}
1677
1678			lastseek = diskblk;
1679			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1680			if (bpb_get_BPBSig((*bp)->b_un.b_addr) != MBB_MAGIC) {
1681				cmn_err(CE_NOTE, "!pcfs: "
1682				    "extended partition table signature err, "
1683				    "device (%x.%x):%d, LBA %u",
1684				    getmajor(dev), getminor(dev), ldrive,
1685				    (uint_t)pc_dbdaddr(fsp, diskblk));
1686				return (EINVAL);
1687			}
1688			/*
1689			 *  Count up drives, and track where the next
1690			 *  extended partition is in case we need it.  We
1691			 *  are expecting only one extended partition.  If
1692			 *  there is more than one we'll only go to the
1693			 *  first one we see, but warn about ignoring.
1694			 */
1695			numDrives = 0;
1696			for (i = 0; i < FD_NUMPART; i++) {
1697				DTRACE_PROBE4(extendedpart,
1698				    struct pcfs *, fsp,
1699				    uint_t, (uint_t)dosp[i].systid,
1700				    uint_t, LE_32(dosp[i].relsect),
1701				    uint_t, LE_32(dosp[i].numsect));
1702				if (isDosDrive(dosp[i].systid)) {
1703					extndDrives[numDrives++] = i;
1704				} else if (isDosExtended(dosp[i].systid)) {
1705					if (diskblk != lastseek) {
1706						/*
1707						 * Already found an extended
1708						 * partition in this table.
1709						 */
1710						cmn_err(CE_NOTE,
1711						    "!pcfs: ignoring unexpected"
1712						    " additional extended"
1713						    " partition");
1714					} else {
1715						diskblk = xstartsect +
1716						    LE_32(dosp[i].relsect);
1717					}
1718				}
1719			}
1720		} while (ldrive > logicalDriveCount + numDrives);
1721
1722		ASSERT(numDrives <= FD_NUMPART);
1723
1724		if (ldrive <= logicalDriveCount + numDrives) {
1725			/*
1726			 * The number of logical drives we've found thus
1727			 * far is enough to get us to the one we were
1728			 * searching for.
1729			 */
1730			driveIndex = logicalDriveCount + numDrives - ldrive;
1731			mediasize =
1732			    LE_32(dosp[extndDrives[driveIndex]].numsect);
1733			startsec =
1734			    LE_32(dosp[extndDrives[driveIndex]].relsect) +
1735			    lastseek;
1736			if (startsec > (xstartsect + xnumsect)) {
1737				cmn_err(CE_NOTE, "!pcfs: extended partition "
1738				    "values bad");
1739				return (EINVAL);
1740			}
1741			goto found;
1742		} else {
1743			/*
1744			 * We ran out of extended dos partition
1745			 * drives.  The only hope now is to go
1746			 * back to extra drives defined in the master
1747			 * fdisk table.  But we overwrote that table
1748			 * already, so we must load it in again.
1749			 */
1750			logicalDriveCount += numDrives;
1751			brelse(*bp);
1752			ASSERT(fsp->pcfs_dosstart == 0);
1753			*bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
1754			    fsp->pcfs_secsize);
1755			if ((*bp)->b_flags & B_ERROR) {
1756				return (EIO);
1757			}
1758			COPY_PTBL((*bp)->b_un.b_addr, dosp);
1759		}
1760	}
1761	/*
1762	 *  Still haven't found the drive, is it an extra
1763	 *  drive defined in the main FDISK table?
1764	 */
1765	if (ldrive <= logicalDriveCount + numExtraDrives) {
1766		driveIndex = logicalDriveCount + numExtraDrives - ldrive;
1767		ASSERT(driveIndex < MIN(numExtraDrives, FD_NUMPART));
1768		mediasize = LE_32(dosp[extraDrives[driveIndex]].numsect);
1769		startsec = LE_32(dosp[extraDrives[driveIndex]].relsect);
1770		goto found;
1771	}
1772	/*
1773	 *  Still haven't found the drive, and there is
1774	 *  nowhere else to look.
1775	 */
1776	noLogicalDrive(ldrive);
1777	return (EINVAL);
1778
1779found:
1780	/*
1781	 * We need this value in units of sectorsize, because PCFS' internal
1782	 * offset calculations go haywire for > 512Byte sectors unless all
1783	 * pcfs_.*start values are in units of sectors.
1784	 * So, assign before the capacity check (that's done in DEV_BSIZE)
1785	 */
1786	fsp->pcfs_dosstart = startsec;
1787
1788	/*
1789	 * convert from device sectors to proper units:
1790	 *	- starting sector: DEV_BSIZE (as argument to bread())
1791	 *	- media size: Bytes
1792	 */
1793	startsec = pc_dbdaddr(fsp, startsec);
1794	mediasize *= fsp->pcfs_secsize;
1795
1796	/*
1797	 * some additional validation / warnings in case the partition table
1798	 * and the actual media capacity are not in accordance ...
1799	 */
1800	if (fsp->pcfs_mediasize != 0) {
1801		diskaddr_t startoff =
1802		    (diskaddr_t)startsec * (diskaddr_t)DEV_BSIZE;
1803
1804		if (startoff >= fsp->pcfs_mediasize ||
1805		    startoff + mediasize > fsp->pcfs_mediasize) {
1806			cmn_err(CE_WARN,
1807			    "!pcfs: partition size (LBA start %u, %lld bytes, "
1808			    "device (%x.%x):%d) smaller than "
1809			    "mediasize (%lld bytes).\n"
1810			    "filesystem may be truncated, access errors "
1811			    "may result.\n",
1812			    (uint_t)startsec, (long long)mediasize,
1813			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1814			    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1815		}
1816	} else {
1817		fsp->pcfs_mediasize = mediasize;
1818	}
1819
1820	return (0);
1821}
1822
1823
1824static fattype_t
1825secondaryBPBChecks(struct pcfs *fsp, uchar_t *bpb, size_t secsize)
1826{
1827	uint32_t ncl = fsp->pcfs_ncluster;
1828
1829	if (ncl <= 4096) {
1830		if (bpb_get_FatSz16(bpb) == 0)
1831			return (FAT_UNKNOWN);
1832
1833		if (bpb_get_FatSz16(bpb) * secsize < ncl * 2 &&
1834		    bpb_get_FatSz16(bpb) * secsize >= (3 * ncl / 2))
1835			return (FAT12);
1836		if (bcmp(bpb_FilSysType16(bpb), "FAT12", 5) == 0)
1837			return (FAT12);
1838		if (bcmp(bpb_FilSysType16(bpb), "FAT16", 5) == 0)
1839			return (FAT16);
1840
1841		switch (bpb_get_Media(bpb)) {
1842			case SS8SPT:
1843			case DS8SPT:
1844			case SS9SPT:
1845			case DS9SPT:
1846			case DS18SPT:
1847			case DS9_15SPT:
1848				/*
1849				 * Is this reliable - all floppies are FAT12 ?
1850				 */
1851				return (FAT12);
1852			case MD_FIXED:
1853				/*
1854				 * Is this reliable - disks are always FAT16 ?
1855				 */
1856				return (FAT16);
1857			default:
1858				break;
1859		}
1860	} else if (ncl <= 65536) {
1861		if (bpb_get_FatSz16(bpb) == 0 && bpb_get_FatSz32(bpb) > 0)
1862			return (FAT32);
1863		if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
1864			return (FAT32);
1865		if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
1866			return (FAT32);
1867
1868		if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
1869			return (FAT16);
1870		if (bpb_get_FatSz16(bpb) * secsize < ncl * 4)
1871			return (FAT16);
1872	}
1873
1874	/*
1875	 * We don't know
1876	 */
1877	return (FAT_UNKNOWN);
1878}
1879
1880/*
1881 * Check to see if the BPB we found is correct.
1882 *
1883 * This looks far more complicated that it needs to be for pure structural
1884 * validation. The reason for this is that parseBPB() is also used for
1885 * debugging purposes (mdb dcmd) and we therefore want a bitmap of which
1886 * BPB fields (do not) have 'known good' values, even if we (do not) reject
1887 * the BPB when attempting to mount the filesystem.
1888 *
1889 * Real-world usage of FAT shows there are a lot of corner-case situations
1890 * and, following the specification strictly, invalid filesystems out there.
1891 * Known are situations such as:
1892 *	- FAT12/FAT16 filesystems with garbage in either totsec16/32
1893 *	  instead of the zero in one of the fields mandated by the spec
1894 *	- filesystems that claim to be larger than the partition they're in
1895 *	- filesystems without valid media descriptor
1896 *	- FAT32 filesystems with RootEntCnt != 0
1897 *	- FAT32 filesystems with less than 65526 clusters
1898 *	- FAT32 filesystems without valid FSI sector
1899 *	- FAT32 filesystems with FAT size in fatsec16 instead of fatsec32
1900 *
1901 * Such filesystems are accessible by PCFS - if it'd know to start with that
1902 * the filesystem should be treated as a specific FAT type. Before S10, it
1903 * relied on the PC/fdisk partition type for the purpose and almost completely
1904 * ignored the BPB; now it ignores the partition type for anything else but
1905 * logical drive enumeration, which can result in rejection of (invalid)
1906 * FAT32 - if the partition ID says FAT32, but the filesystem, for example
1907 * has less than 65526 clusters.
1908 *
1909 * Without a "force this fs as FAT{12,16,32}" tunable or mount option, it's
1910 * not possible to allow all such mostly-compliant filesystems in unless one
1911 * accepts false positives (definitely invalid filesystems that cause problems
1912 * later). This at least allows to pinpoint why the mount failed.
1913 *
1914 * Due to the use of FAT on removeable media, all relaxations of the rules
1915 * here need to be carefully evaluated wrt. to potential effects on PCFS
1916 * resilience. A faulty/"mis-crafted" filesystem must not cause a panic, so
1917 * beware.
1918 */
1919static int
1920parseBPB(struct pcfs *fsp, uchar_t *bpb, int *valid)
1921{
1922	fattype_t type;
1923
1924	uint32_t	ncl;	/* number of clusters in file area */
1925	uint32_t	rec;
1926	uint32_t	reserved;
1927	uint32_t	fsisec, bkbootsec;
1928	blkcnt_t	totsec, totsec16, totsec32, datasec;
1929	size_t		fatsec, fatsec16, fatsec32, rdirsec;
1930	size_t		secsize;
1931	len_t		mediasize;
1932	uint64_t	validflags = 0;
1933
1934	if (VALID_BPBSIG(bpb_get_BPBSig(bpb)))
1935		validflags |= BPB_BPBSIG_OK;
1936
1937	rec = bpb_get_RootEntCnt(bpb);
1938	reserved = bpb_get_RsvdSecCnt(bpb);
1939	fsisec = bpb_get_FSInfo32(bpb);
1940	bkbootsec = bpb_get_BkBootSec32(bpb);
1941	totsec16 = (blkcnt_t)bpb_get_TotSec16(bpb);
1942	totsec32 = (blkcnt_t)bpb_get_TotSec32(bpb);
1943	fatsec16 = bpb_get_FatSz16(bpb);
1944	fatsec32 = bpb_get_FatSz32(bpb);
1945
1946	totsec = totsec16 ? totsec16 : totsec32;
1947	fatsec = fatsec16 ? fatsec16 : fatsec32;
1948
1949	secsize = bpb_get_BytesPerSec(bpb);
1950	if (!VALID_SECSIZE(secsize))
1951		secsize = fsp->pcfs_secsize;
1952	if (secsize != fsp->pcfs_secsize) {
1953		PC_DPRINTF3(3, "!pcfs: parseBPB, device (%x.%x):%d:\n",
1954		    getmajor(fsp->pcfs_xdev),
1955		    getminor(fsp->pcfs_xdev), fsp->pcfs_ldrive);
1956		PC_DPRINTF2(3, "!BPB secsize %d != "
1957		    "autodetected media block size %d\n",
1958		    (int)secsize, (int)fsp->pcfs_secsize);
1959		if (fsp->pcfs_ldrive) {
1960			/*
1961			 * We've already attempted to parse the partition
1962			 * table. If the block size used for that don't match
1963			 * the PCFS sector size, we're hosed one way or the
1964			 * other. Just try what happens.
1965			 */
1966			secsize = fsp->pcfs_secsize;
1967			PC_DPRINTF1(3,
1968			    "!pcfs: Using autodetected secsize %d\n",
1969			    (int)secsize);
1970		} else {
1971			/*
1972			 * This allows mounting lofi images of PCFS partitions
1973			 * with sectorsize != DEV_BSIZE. We can't parse the
1974			 * partition table on whole-disk images unless the
1975			 * (undocumented) "secsize=..." mount option is used,
1976			 * but at least this allows us to mount if we have
1977			 * an image of a partition.
1978			 */
1979			PC_DPRINTF1(3,
1980			    "!pcfs: Using BPB secsize %d\n", (int)secsize);
1981		}
1982	}
1983
1984	if (fsp->pcfs_mediasize == 0) {
1985		mediasize = (len_t)totsec * (len_t)secsize;
1986		/*
1987		 * This is not an error because not all devices support the
1988		 * dkio(7i) mediasize queries, and/or not all devices are
1989		 * partitioned. If we have not been able to figure out the
1990		 * size of the underlaying medium, we have to trust the BPB.
1991		 */
1992		PC_DPRINTF4(3, "!pcfs: parseBPB: mediasize autodetect failed "
1993		    "on device (%x.%x):%d, trusting BPB totsec (%lld Bytes)\n",
1994		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
1995		    fsp->pcfs_ldrive, (long long)fsp->pcfs_mediasize);
1996	} else if ((len_t)totsec * (len_t)secsize > fsp->pcfs_mediasize) {
1997		cmn_err(CE_WARN,
1998		    "!pcfs: autodetected mediasize (%lld Bytes) smaller than "
1999		    "FAT BPB mediasize (%lld Bytes).\n"
2000		    "truncated filesystem on device (%x.%x):%d, access errors "
2001		    "possible.\n",
2002		    (long long)fsp->pcfs_mediasize,
2003		    (long long)(totsec * (blkcnt_t)secsize),
2004		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2005		    fsp->pcfs_ldrive);
2006		mediasize = fsp->pcfs_mediasize;
2007	} else {
2008		/*
2009		 * This is actually ok. A FAT needs not occupy the maximum
2010		 * space available in its partition, it can be shorter.
2011		 */
2012		mediasize = (len_t)totsec * (len_t)secsize;
2013	}
2014
2015	/*
2016	 * Since we let just about anything pass through this function,
2017	 * fence against divide-by-zero here.
2018	 */
2019	if (secsize)
2020		rdirsec = roundup(rec * 32, secsize) / secsize;
2021	else
2022		rdirsec = 0;
2023
2024	/*
2025	 * This assignment is necessary before pc_dbdaddr() can first be
2026	 * used. Must initialize the value here.
2027	 */
2028	fsp->pcfs_secsize = secsize;
2029	fsp->pcfs_sdshift = ddi_ffs(secsize / DEV_BSIZE) - 1;
2030
2031	fsp->pcfs_mediasize = mediasize;
2032
2033	fsp->pcfs_spcl = bpb_get_SecPerClus(bpb);
2034	fsp->pcfs_numfat = bpb_get_NumFATs(bpb);
2035	fsp->pcfs_mediadesc = bpb_get_Media(bpb);
2036	fsp->pcfs_clsize = secsize * fsp->pcfs_spcl;
2037	fsp->pcfs_rdirsec = rdirsec;
2038
2039	/*
2040	 * Remember: All PCFS offset calculations in sectors. Before I/O
2041	 * is done, convert to DEV_BSIZE units via pc_dbdaddr(). This is
2042	 * necessary so that media with > 512Byte sector sizes work correctly.
2043	 */
2044	fsp->pcfs_fatstart = fsp->pcfs_dosstart + reserved;
2045	fsp->pcfs_rdirstart = fsp->pcfs_fatstart + fsp->pcfs_numfat * fatsec;
2046	fsp->pcfs_datastart = fsp->pcfs_rdirstart + rdirsec;
2047	datasec = totsec -
2048	    (blkcnt_t)fatsec * fsp->pcfs_numfat -
2049	    (blkcnt_t)rdirsec -
2050	    (blkcnt_t)reserved;
2051
2052	DTRACE_PROBE4(fatgeometry,
2053	    blkcnt_t, totsec, size_t, fatsec,
2054	    size_t, rdirsec, blkcnt_t, datasec);
2055
2056	/*
2057	 * 'totsec' is taken directly from the BPB and guaranteed to fit
2058	 * into a 32bit unsigned integer. The calculation of 'datasec',
2059	 * on the other hand, could underflow for incorrect values in
2060	 * rdirsec/reserved/fatsec. Check for that.
2061	 * We also check that the BPB conforms to the FAT specification's
2062	 * requirement that either of the 16/32bit total sector counts
2063	 * must be zero.
2064	 */
2065	if (totsec != 0 &&
2066	    (totsec16 == totsec32 || totsec16 == 0 || totsec32 == 0) &&
2067	    datasec < totsec && datasec <= UINT32_MAX)
2068		validflags |= BPB_TOTSEC_OK;
2069
2070	if ((len_t)totsec * (len_t)secsize <= mediasize)
2071		validflags |= BPB_MEDIASZ_OK;
2072
2073	if (VALID_SECSIZE(secsize))
2074		validflags |= BPB_SECSIZE_OK;
2075	if (VALID_SPCL(fsp->pcfs_spcl))
2076		validflags |= BPB_SECPERCLUS_OK;
2077	if (VALID_CLSIZE(fsp->pcfs_clsize))
2078		validflags |= BPB_CLSIZE_OK;
2079	if (VALID_NUMFATS(fsp->pcfs_numfat))
2080		validflags |= BPB_NUMFAT_OK;
2081	if (VALID_RSVDSEC(reserved) && reserved < totsec)
2082		validflags |= BPB_RSVDSECCNT_OK;
2083	if (VALID_MEDIA(fsp->pcfs_mediadesc))
2084		validflags |= BPB_MEDIADESC_OK;
2085	if (VALID_BOOTSIG(bpb_get_BootSig16(bpb)))
2086		validflags |= BPB_BOOTSIG16_OK;
2087	if (VALID_BOOTSIG(bpb_get_BootSig32(bpb)))
2088		validflags |= BPB_BOOTSIG32_OK;
2089	if (VALID_FSTYPSTR16(bpb_FilSysType16(bpb)))
2090		validflags |= BPB_FSTYPSTR16_OK;
2091	if (VALID_FSTYPSTR32(bpb_FilSysType32(bpb)))
2092		validflags |= BPB_FSTYPSTR32_OK;
2093	if (VALID_OEMNAME(bpb_OEMName(bpb)))
2094		validflags |= BPB_OEMNAME_OK;
2095	if (bkbootsec > 0 && bkbootsec <= reserved && fsisec != bkbootsec)
2096		validflags |= BPB_BKBOOTSEC_OK;
2097	if (fsisec > 0 && fsisec <= reserved)
2098		validflags |= BPB_FSISEC_OK;
2099	if (VALID_JMPBOOT(bpb_jmpBoot(bpb)))
2100		validflags |= BPB_JMPBOOT_OK;
2101	if (VALID_FSVER32(bpb_get_FSVer32(bpb)))
2102		validflags |= BPB_FSVER_OK;
2103	if (VALID_VOLLAB(bpb_VolLab16(bpb)))
2104		validflags |= BPB_VOLLAB16_OK;
2105	if (VALID_VOLLAB(bpb_VolLab32(bpb)))
2106		validflags |= BPB_VOLLAB32_OK;
2107	if (VALID_EXTFLAGS(bpb_get_ExtFlags32(bpb)))
2108		validflags |= BPB_EXTFLAGS_OK;
2109
2110	/*
2111	 * Try to determine which FAT format to use.
2112	 *
2113	 * Calculate the number of clusters in order to determine
2114	 * the type of FAT we are looking at.  This is the only
2115	 * recommended way of determining FAT type, though there
2116	 * are other hints in the data, this is the best way.
2117	 *
2118	 * Since we let just about "anything" pass through this function
2119	 * without early exits, fence against divide-by-zero here.
2120	 *
2121	 * datasec was already validated against UINT32_MAX so we know
2122	 * the result will not overflow the 32bit calculation.
2123	 */
2124	if (fsp->pcfs_spcl)
2125		ncl = (uint32_t)datasec / fsp->pcfs_spcl;
2126	else
2127		ncl = 0;
2128
2129	fsp->pcfs_ncluster = ncl;
2130
2131	/*
2132	 * From the Microsoft FAT specification:
2133	 * In the following example, when it says <, it does not mean <=.
2134	 * Note also that the numbers are correct.  The first number for
2135	 * FAT12 is 4085; the second number for FAT16 is 65525. These numbers
2136	 * and the '<' signs are not wrong.
2137	 *
2138	 * We "specialdetect" the corner cases, and use at least one "extra"
2139	 * criterion to decide whether it's FAT16 or FAT32 if the cluster
2140	 * count is dangerously close to the boundaries.
2141	 */
2142
2143	if (ncl <= PCF_FIRSTCLUSTER) {
2144		type = FAT_UNKNOWN;
2145	} else if (ncl < 4085) {
2146		type = FAT12;
2147	} else if (ncl <= 4096) {
2148		type = FAT_QUESTIONABLE;
2149	} else if (ncl < 65525) {
2150		type = FAT16;
2151	} else if (ncl <= 65536) {
2152		type = FAT_QUESTIONABLE;
2153	} else if (ncl < PCF_LASTCLUSTER32) {
2154		type = FAT32;
2155	} else {
2156		type = FAT_UNKNOWN;
2157	}
2158
2159	DTRACE_PROBE4(parseBPB__initial,
2160	    struct pcfs *, fsp, unsigned char *, bpb,
2161	    int, validflags, fattype_t, type);
2162
2163recheck:
2164	fsp->pcfs_fatsec = fatsec;
2165
2166	/* Do some final sanity checks for each specific type of FAT */
2167	switch (type) {
2168		case FAT12:
2169			if (rec != 0)
2170				validflags |= BPB_ROOTENTCNT_OK;
2171			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2172			    bpb_get_TotSec16(bpb) == 0)
2173				validflags |= BPB_TOTSEC16_OK;
2174			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2175			    bpb_get_TotSec32(bpb) == 0)
2176				validflags |= BPB_TOTSEC32_OK;
2177			if (bpb_get_FatSz16(bpb) == fatsec)
2178				validflags |= BPB_FATSZ16_OK;
2179			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER)
2180			    * 3 / 2)
2181				validflags |= BPB_FATSZ_OK;
2182			if (ncl < 4085)
2183				validflags |= BPB_NCLUSTERS_OK;
2184
2185			fsp->pcfs_lastclmark = (PCF_LASTCLUSTER & 0xfff);
2186			fsp->pcfs_rootblksize =
2187			    fsp->pcfs_rdirsec * secsize;
2188			fsp->pcfs_fsistart = 0;
2189
2190			if ((validflags & FAT12_VALIDMSK) != FAT12_VALIDMSK)
2191				type = FAT_UNKNOWN;
2192			break;
2193		case FAT16:
2194			if (rec != 0)
2195				validflags |= BPB_ROOTENTCNT_OK;
2196			if ((blkcnt_t)bpb_get_TotSec16(bpb) == totsec ||
2197			    bpb_get_TotSec16(bpb) == 0)
2198				validflags |= BPB_TOTSEC16_OK;
2199			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec ||
2200			    bpb_get_TotSec32(bpb) == 0)
2201				validflags |= BPB_TOTSEC32_OK;
2202			if (bpb_get_FatSz16(bpb) == fatsec)
2203				validflags |= BPB_FATSZ16_OK;
2204			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 2)
2205				validflags |= BPB_FATSZ_OK;
2206			if (ncl >= 4085 && ncl < 65525)
2207				validflags |= BPB_NCLUSTERS_OK;
2208
2209			fsp->pcfs_lastclmark = PCF_LASTCLUSTER;
2210			fsp->pcfs_rootblksize =
2211			    fsp->pcfs_rdirsec * secsize;
2212			fsp->pcfs_fsistart = 0;
2213
2214			if ((validflags & FAT16_VALIDMSK) != FAT16_VALIDMSK)
2215				type = FAT_UNKNOWN;
2216			break;
2217		case FAT32:
2218			if (rec == 0)
2219				validflags |= BPB_ROOTENTCNT_OK;
2220			if (bpb_get_TotSec16(bpb) == 0)
2221				validflags |= BPB_TOTSEC16_OK;
2222			if ((blkcnt_t)bpb_get_TotSec32(bpb) == totsec)
2223				validflags |= BPB_TOTSEC32_OK;
2224			if (bpb_get_FatSz16(bpb) == 0)
2225				validflags |= BPB_FATSZ16_OK;
2226			if (bpb_get_FatSz32(bpb) == fatsec)
2227				validflags |= BPB_FATSZ32_OK;
2228			if (fatsec * secsize >= (ncl + PCF_FIRSTCLUSTER) * 4)
2229				validflags |= BPB_FATSZ_OK;
2230			if (ncl >= 65525 && ncl < PCF_LASTCLUSTER32)
2231				validflags |= BPB_NCLUSTERS_OK;
2232
2233			fsp->pcfs_lastclmark = PCF_LASTCLUSTER32;
2234			fsp->pcfs_rootblksize = fsp->pcfs_clsize;
2235			fsp->pcfs_fsistart = fsp->pcfs_dosstart + fsisec;
2236			if (validflags & BPB_FSISEC_OK)
2237				fsp->pcfs_flags |= PCFS_FSINFO_OK;
2238			fsp->pcfs_rootclnum = bpb_get_RootClus32(bpb);
2239			if (pc_validcl(fsp, fsp->pcfs_rootclnum))
2240				validflags |= BPB_ROOTCLUSTER_OK;
2241
2242			/*
2243			 * Current PCFS code only works if 'pcfs_rdirstart'
2244			 * contains the root cluster number on FAT32.
2245			 * That's a mis-use and would better be changed.
2246			 */
2247			fsp->pcfs_rdirstart = (daddr_t)fsp->pcfs_rootclnum;
2248
2249			if ((validflags & FAT32_VALIDMSK) != FAT32_VALIDMSK)
2250				type = FAT_UNKNOWN;
2251			break;
2252		case FAT_QUESTIONABLE:
2253			type = secondaryBPBChecks(fsp, bpb, secsize);
2254			goto recheck;
2255		default:
2256			ASSERT(type == FAT_UNKNOWN);
2257			break;
2258	}
2259
2260	ASSERT(type != FAT_QUESTIONABLE);
2261
2262	fsp->pcfs_fattype = type;
2263
2264	if (valid)
2265		*valid = validflags;
2266
2267	DTRACE_PROBE4(parseBPB__final,
2268	    struct pcfs *, fsp, unsigned char *, bpb,
2269	    int, validflags, fattype_t, type);
2270
2271	if (type != FAT_UNKNOWN) {
2272		ASSERT((secsize & (DEV_BSIZE - 1)) == 0);
2273		ASSERT(ISP2(secsize / DEV_BSIZE));
2274		return (1);
2275	}
2276
2277	return (0);
2278}
2279
2280
2281/*
2282 * Detect the device's native block size (sector size).
2283 *
2284 * Test whether the device is:
2285 *	- a floppy device from a known controller type via DKIOCINFO
2286 *	- a real floppy using the fd(7d) driver and capable of fdio(7I) ioctls
2287 *	- a USB floppy drive (identified by drive geometry)
2288 *
2289 * Detecting a floppy will make PCFS metadata updates on such media synchronous,
2290 * to minimize risks due to slow I/O and user hotplugging / device ejection.
2291 *
2292 * This might be a bit wasteful on kernel stack space; if anyone's
2293 * bothered by this, kmem_alloc/kmem_free the ioctl arguments...
2294 */
2295static void
2296pcfs_device_getinfo(struct pcfs *fsp)
2297{
2298	dev_t			rdev = fsp->pcfs_xdev;
2299	int			error;
2300	union {
2301		struct dk_minfo		mi;
2302		struct dk_cinfo		ci;
2303		struct dk_geom		gi;
2304		struct fd_char		fc;
2305	} arg;				/* save stackspace ... */
2306	intptr_t argp = (intptr_t)&arg;
2307	ldi_handle_t		lh;
2308	ldi_ident_t		li;
2309	int isfloppy, isremoveable, ishotpluggable;
2310	cred_t			*cr = CRED();
2311
2312	if (ldi_ident_from_dev(rdev, &li))
2313		goto out;
2314
2315	error = ldi_open_by_dev(&rdev, OTYP_CHR, FREAD, cr, &lh, li);
2316	ldi_ident_release(li);
2317	if (error)
2318		goto out;
2319
2320	/*
2321	 * Not sure if this could possibly happen. It'd be a bit like
2322	 * VOP_OPEN() changing the passed-in vnode ptr. We're just not
2323	 * expecting it, needs some thought if triggered ...
2324	 */
2325	ASSERT(fsp->pcfs_xdev == rdev);
2326
2327	/*
2328	 * Check for removeable/hotpluggable media.
2329	 */
2330	if (ldi_ioctl(lh, DKIOCREMOVABLE,
2331	    (intptr_t)&isremoveable, FKIOCTL, cr, NULL)) {
2332		isremoveable = 0;
2333	}
2334	if (ldi_ioctl(lh, DKIOCHOTPLUGGABLE,
2335	    (intptr_t)&ishotpluggable, FKIOCTL, cr, NULL)) {
2336		ishotpluggable = 0;
2337	}
2338
2339	/*
2340	 * Make sure we don't use "half-initialized" values if the ioctls fail.
2341	 */
2342	if (ldi_ioctl(lh, DKIOCGMEDIAINFO, argp, FKIOCTL, cr, NULL)) {
2343		bzero(&arg, sizeof (arg));
2344		fsp->pcfs_mediasize = 0;
2345	} else {
2346		fsp->pcfs_mediasize =
2347		    (len_t)arg.mi.dki_lbsize *
2348		    (len_t)arg.mi.dki_capacity;
2349	}
2350
2351	if (VALID_SECSIZE(arg.mi.dki_lbsize)) {
2352		if (fsp->pcfs_secsize == 0) {
2353			fsp->pcfs_secsize = arg.mi.dki_lbsize;
2354			fsp->pcfs_sdshift =
2355			    ddi_ffs(arg.mi.dki_lbsize / DEV_BSIZE) - 1;
2356		} else {
2357			PC_DPRINTF4(1, "!pcfs: autodetected media block size "
2358			    "%d, device (%x.%x), different from user-provided "
2359			    "%d. User override - ignoring autodetect result.\n",
2360			    arg.mi.dki_lbsize,
2361			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2362			    fsp->pcfs_secsize);
2363		}
2364	} else if (arg.mi.dki_lbsize) {
2365		PC_DPRINTF3(1, "!pcfs: autodetected media block size "
2366		    "%d, device (%x.%x), invalid (not 512, 1024, 2048, 4096). "
2367		    "Ignoring autodetect result.\n",
2368		    arg.mi.dki_lbsize,
2369		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev));
2370	}
2371
2372	/*
2373	 * We treat the following media types as a floppy by default.
2374	 */
2375	isfloppy =
2376	    (arg.mi.dki_media_type == DK_FLOPPY ||
2377	    arg.mi.dki_media_type == DK_ZIP ||
2378	    arg.mi.dki_media_type == DK_JAZ);
2379
2380	/*
2381	 * if this device understands fdio(7I) requests it's
2382	 * obviously a floppy drive.
2383	 */
2384	if (!isfloppy &&
2385	    !ldi_ioctl(lh, FDIOGCHAR, argp, FKIOCTL, cr, NULL))
2386		isfloppy = 1;
2387
2388	/*
2389	 * some devices we like to treat as floppies, but they don't
2390	 * understand fdio(7I) requests.
2391	 */
2392	if (!isfloppy &&
2393	    !ldi_ioctl(lh, DKIOCINFO, argp, FKIOCTL, cr, NULL) &&
2394	    (arg.ci.dki_ctype == DKC_WDC2880 ||
2395	    arg.ci.dki_ctype == DKC_NCRFLOPPY ||
2396	    arg.ci.dki_ctype == DKC_SMSFLOPPY ||
2397	    arg.ci.dki_ctype == DKC_INTEL82077))
2398		isfloppy = 1;
2399
2400	/*
2401	 * This is the "final fallback" test - media with
2402	 * 2 heads and 80 cylinders are assumed to be floppies.
2403	 * This is normally true for USB floppy drives ...
2404	 */
2405	if (!isfloppy &&
2406	    !ldi_ioctl(lh, DKIOCGGEOM, argp, FKIOCTL, cr, NULL) &&
2407	    (arg.gi.dkg_ncyl == 80 && arg.gi.dkg_nhead == 2))
2408		isfloppy = 1;
2409
2410	/*
2411	 * This is similar to the "old" PCFS code that sets this flag
2412	 * just based on the media descriptor being 0xf8 (MD_FIXED).
2413	 * Should be re-worked. We really need some specialcasing for
2414	 * removeable media.
2415	 */
2416	if (!isfloppy) {
2417		fsp->pcfs_flags |= PCFS_NOCHK;
2418	}
2419
2420	/*
2421	 * We automatically disable access time updates if the medium is
2422	 * removeable and/or hotpluggable, and the admin did not explicitly
2423	 * request access time updates (via the "atime" mount option).
2424	 * The majority of flash-based media should fit this category.
2425	 * Minimizing write access extends the lifetime of your memory stick !
2426	 */
2427	if (!vfs_optionisset(fsp->pcfs_vfs, MNTOPT_ATIME, NULL) &&
2428	    (isremoveable || ishotpluggable | isfloppy)) {
2429		fsp->pcfs_flags |= PCFS_NOATIME;
2430	}
2431
2432	(void) ldi_close(lh, FREAD, cr);
2433out:
2434	if (fsp->pcfs_secsize == 0) {
2435		PC_DPRINTF3(1, "!pcfs: media block size autodetection "
2436		    "device (%x.%x) failed, no user-provided fallback. "
2437		    "Using %d bytes.\n",
2438		    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2439		    DEV_BSIZE);
2440		fsp->pcfs_secsize = DEV_BSIZE;
2441		fsp->pcfs_sdshift = 0;
2442	}
2443	ASSERT(fsp->pcfs_secsize % DEV_BSIZE == 0);
2444	ASSERT(VALID_SECSIZE(fsp->pcfs_secsize));
2445}
2446
2447/*
2448 * Get the FAT type for the DOS medium.
2449 *
2450 * -------------------------
2451 * According to Microsoft:
2452 *   The FAT type one of FAT12, FAT16, or FAT32 is determined by the
2453 * count of clusters on the volume and nothing else.
2454 * -------------------------
2455 *
2456 */
2457static int
2458pc_getfattype(struct pcfs *fsp)
2459{
2460	int error = 0;
2461	buf_t *bp = NULL;
2462	struct vnode *devvp = fsp->pcfs_devvp;
2463	dev_t	dev = devvp->v_rdev;
2464
2465	/*
2466	 * Detect the native block size of the medium, and attempt to
2467	 * detect whether the medium is removeable.
2468	 * We do treat removable media (floppies, USB and FireWire disks)
2469	 * differently wrt. to the frequency and synchronicity of FAT updates.
2470	 * We need to know the media block size in order to be able to
2471	 * parse the partition table.
2472	 */
2473	pcfs_device_getinfo(fsp);
2474
2475	/*
2476	 * Unpartitioned media (floppies and some removeable devices)
2477	 * don't have a partition table, the FAT BPB is at disk block 0.
2478	 * Start out by reading block 0.
2479	 */
2480	fsp->pcfs_dosstart = 0;
2481	bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart), fsp->pcfs_secsize);
2482
2483	if (error = geterror(bp))
2484		goto out;
2485
2486	/*
2487	 * If a logical drive number is requested, parse the partition table
2488	 * and attempt to locate it. Otherwise, proceed immediately to the
2489	 * BPB check. findTheDrive(), if successful, returns the disk block
2490	 * number where the requested partition starts in "startsec".
2491	 */
2492	if (fsp->pcfs_ldrive != 0) {
2493		PC_DPRINTF3(5, "!pcfs: pc_getfattype: using FDISK table on "
2494		    "device (%x,%x):%d to find BPB\n",
2495		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive);
2496
2497		if (error = findTheDrive(fsp, &bp))
2498			goto out;
2499
2500		ASSERT(fsp->pcfs_dosstart != 0);
2501
2502		brelse(bp);
2503		bp = bread(dev, pc_dbdaddr(fsp, fsp->pcfs_dosstart),
2504		    fsp->pcfs_secsize);
2505		if (error = geterror(bp))
2506			goto out;
2507	}
2508
2509	/*
2510	 * Validate the BPB and fill in the instance structure.
2511	 */
2512	if (!parseBPB(fsp, (uchar_t *)bp->b_un.b_addr, NULL)) {
2513		PC_DPRINTF4(1, "!pcfs: pc_getfattype: No FAT BPB on "
2514		    "device (%x.%x):%d, disk LBA %u\n",
2515		    getmajor(dev), getminor(dev), fsp->pcfs_ldrive,
2516		    (uint_t)pc_dbdaddr(fsp, fsp->pcfs_dosstart));
2517		error = EINVAL;
2518		goto out;
2519	}
2520
2521	ASSERT(fsp->pcfs_fattype != FAT_UNKNOWN);
2522
2523out:
2524	/*
2525	 * Release the buffer used
2526	 */
2527	if (bp != NULL)
2528		brelse(bp);
2529	return (error);
2530}
2531
2532
2533/*
2534 * Get the file allocation table.
2535 * If there is an old FAT, invalidate it.
2536 */
2537int
2538pc_getfat(struct pcfs *fsp)
2539{
2540	struct buf *bp = NULL;
2541	uchar_t *fatp = NULL;
2542	uchar_t *fat_changemap = NULL;
2543	int error;
2544	int fat_changemapsize;
2545	int flags = 0;
2546	int nfat;
2547	int altfat_mustmatch = 0;
2548	int fatsize = fsp->pcfs_fatsec * fsp->pcfs_secsize;
2549
2550	if (fsp->pcfs_fatp) {
2551		/*
2552		 * There is a FAT in core.
2553		 * If there are open file pcnodes or we have modified it or
2554		 * it hasn't timed out yet use the in core FAT.
2555		 * Otherwise invalidate it and get a new one
2556		 */
2557#ifdef notdef
2558		if (fsp->pcfs_frefs ||
2559		    (fsp->pcfs_flags & PCFS_FATMOD) ||
2560		    (gethrestime_sec() < fsp->pcfs_fattime)) {
2561			return (0);
2562		} else {
2563			mutex_enter(&pcfslock);
2564			pc_invalfat(fsp);
2565			mutex_exit(&pcfslock);
2566		}
2567#endif /* notdef */
2568		return (0);
2569	}
2570
2571	/*
2572	 * Get FAT and check it for validity
2573	 */
2574	fatp = kmem_alloc(fatsize, KM_SLEEP);
2575	error = pc_readfat(fsp, fatp);
2576	if (error) {
2577		flags = B_ERROR;
2578		goto out;
2579	}
2580	fat_changemapsize = (fatsize / fsp->pcfs_clsize) + 1;
2581	fat_changemap = kmem_zalloc(fat_changemapsize, KM_SLEEP);
2582	fsp->pcfs_fatp = fatp;
2583	fsp->pcfs_fat_changemapsize = fat_changemapsize;
2584	fsp->pcfs_fat_changemap = fat_changemap;
2585
2586	/*
2587	 * The only definite signature check is that the
2588	 * media descriptor byte should match the first byte
2589	 * of the FAT block.
2590	 */
2591	if (fatp[0] != fsp->pcfs_mediadesc) {
2592		cmn_err(CE_NOTE, "!pcfs: FAT signature mismatch, "
2593		    "media descriptor %x, FAT[0] lowbyte %x\n",
2594		    (uint32_t)fsp->pcfs_mediadesc, (uint32_t)fatp[0]);
2595		cmn_err(CE_NOTE, "!pcfs: Enforcing alternate FAT validation\n");
2596		altfat_mustmatch = 1;
2597	}
2598
2599	/*
2600	 * Get alternate FATs and check for consistency
2601	 * This is an inlined version of pc_readfat().
2602	 * Since we're only comparing FAT and alternate FAT,
2603	 * there's no reason to let pc_readfat() copy data out
2604	 * of the buf. Instead, compare in-situ, one cluster
2605	 * at a time.
2606	 */
2607	for (nfat = 1; nfat < fsp->pcfs_numfat; nfat++) {
2608		size_t startsec;
2609		size_t off;
2610
2611		startsec = pc_dbdaddr(fsp,
2612		    fsp->pcfs_fatstart + nfat * fsp->pcfs_fatsec);
2613
2614		for (off = 0; off < fatsize; off += fsp->pcfs_clsize) {
2615			daddr_t fatblk = startsec + pc_dbdaddr(fsp,
2616			    pc_cltodb(fsp, pc_lblkno(fsp, off)));
2617
2618			bp = bread(fsp->pcfs_xdev, fatblk,
2619			    MIN(fsp->pcfs_clsize, fatsize - off));
2620			if (bp->b_flags & (B_ERROR | B_STALE)) {
2621				cmn_err(CE_NOTE,
2622				    "!pcfs: alternate FAT #%d (start LBA %p)"
2623				    " read error at offset %ld on device"
2624				    " (%x.%x):%d",
2625				    nfat, (void *)(uintptr_t)startsec, off,
2626				    getmajor(fsp->pcfs_xdev),
2627				    getminor(fsp->pcfs_xdev),
2628				    fsp->pcfs_ldrive);
2629				flags = B_ERROR;
2630				error = EIO;
2631				goto out;
2632			}
2633			bp->b_flags |= B_STALE | B_AGE;
2634			if (bcmp(bp->b_un.b_addr, fatp + off,
2635			    MIN(fsp->pcfs_clsize, fatsize - off))) {
2636				cmn_err(CE_NOTE,
2637				    "!pcfs: alternate FAT #%d (start LBA %p)"
2638				    " corrupted at offset %ld on device"
2639				    " (%x.%x):%d",
2640				    nfat, (void *)(uintptr_t)startsec, off,
2641				    getmajor(fsp->pcfs_xdev),
2642				    getminor(fsp->pcfs_xdev),
2643				    fsp->pcfs_ldrive);
2644				if (altfat_mustmatch) {
2645					flags = B_ERROR;
2646					error = EIO;
2647					goto out;
2648				}
2649			}
2650			brelse(bp);
2651			bp = NULL;	/* prevent double release */
2652		}
2653	}
2654
2655	fsp->pcfs_fattime = gethrestime_sec() + PCFS_DISKTIMEOUT;
2656	fsp->pcfs_fatjustread = 1;
2657
2658	/*
2659	 * Retrieve FAT32 fsinfo sector.
2660	 * A failure to read this is not fatal to accessing the volume.
2661	 * It simply means operations that count or search free blocks
2662	 * will have to do a full FAT walk, vs. a possibly quicker lookup
2663	 * of the summary information.
2664	 * Hence, we log a message but return success overall after this point.
2665	 */
2666	if (IS_FAT32(fsp) && (fsp->pcfs_flags & PCFS_FSINFO_OK)) {
2667		struct fat_od_fsi *fsinfo_disk;
2668
2669		bp = bread(fsp->pcfs_xdev,
2670		    pc_dbdaddr(fsp, fsp->pcfs_fsistart), fsp->pcfs_secsize);
2671		fsinfo_disk = (struct fat_od_fsi *)bp->b_un.b_addr;
2672		if (bp->b_flags & (B_ERROR | B_STALE) ||
2673		    !FSISIG_OK(fsinfo_disk)) {
2674			cmn_err(CE_NOTE,
2675			    "!pcfs: error reading fat32 fsinfo from "
2676			    "device (%x.%x):%d, block %lld",
2677			    getmajor(fsp->pcfs_xdev), getminor(fsp->pcfs_xdev),
2678			    fsp->pcfs_ldrive,
2679			    (long long)pc_dbdaddr(fsp, fsp->pcfs_fsistart));
2680			fsp->pcfs_flags &= ~PCFS_FSINFO_OK;
2681			fsp->pcfs_fsinfo.fs_free_clusters = FSINFO_UNKNOWN;
2682			fsp->pcfs_fsinfo.fs_next_free = FSINFO_UNKNOWN;
2683		} else {
2684			bp->b_flags |= B_STALE | B_AGE;
2685			fsinfo_disk = (fat_od_fsi_t *)(bp->b_un.b_addr);
2686			fsp->pcfs_fsinfo.fs_free_clusters =
2687			    LE_32(fsinfo_disk->fsi_incore.fs_free_clusters);
2688			fsp->pcfs_fsinfo.fs_next_free =
2689			    LE_32(fsinfo_disk->fsi_incore.fs_next_free);
2690		}
2691		brelse(bp);
2692		bp = NULL;
2693	}
2694
2695	if (pc_validcl(fsp, (pc_cluster32_t)fsp->pcfs_fsinfo.fs_next_free))
2696		fsp->pcfs_nxfrecls = fsp->pcfs_fsinfo.fs_next_free;
2697	else
2698		fsp->pcfs_nxfrecls = PCF_FIRSTCLUSTER;
2699
2700	return (0);
2701
2702out:
2703	cmn_err(CE_NOTE, "!pcfs: illegal disk format");
2704	if (bp)
2705		brelse(bp);
2706	if (fatp)
2707		kmem_free(fatp, fatsize);
2708	if (fat_changemap)
2709		kmem_free(fat_changemap, fat_changemapsize);
2710
2711	if (flags) {
2712		pc_mark_irrecov(fsp);
2713	}
2714	return (error);
2715}
2716