1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2019 Google LLC
5 * Copyright (C) 1995, 1996, 1997 Wolfgang Solfrank
6 * Copyright (c) 1995 Martin Husemann
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29
30#include <sys/cdefs.h>
31#ifndef lint
32__RCSID("$NetBSD: fat.c,v 1.18 2006/06/05 16:51:18 christos Exp $");
33static const char rcsid[] =
34  "$FreeBSD: stable/11/sbin/fsck_msdosfs/fat.c 360490 2020-04-30 06:34:34Z delphij $";
35#endif /* not lint */
36
37#include <sys/endian.h>
38#include <sys/queue.h>
39#include <sys/limits.h>
40#include <sys/mman.h>
41#include <sys/param.h>
42
43#include <assert.h>
44#include <stdbool.h>
45#include <stdlib.h>
46#include <string.h>
47#include <ctype.h>
48#include <stdio.h>
49#include <unistd.h>
50
51#include "ext.h"
52#include "fsutil.h"
53
54static int _readfat(struct fat_descriptor *);
55static inline struct bootblock* boot_of_(struct fat_descriptor *);
56static inline int fd_of_(struct fat_descriptor *);
57static inline bool valid_cl(struct fat_descriptor *, cl_t);
58
59
60/*
61 * Head bitmap for FAT scanning.
62 *
63 * FAT32 have up to 2^28 = 256M entries, and FAT16/12 have much less.
64 * For each cluster, we use 1 bit to represent if it's a head cluster
65 * (the first cluster of a cluster chain).
66 *
67 * Head bitmap
68 * ===========
69 * Initially, we set all bits to 1.  In readfat(), we traverse the
70 * whole FAT and mark each cluster identified as "next" cluster as
71 * 0.  After the scan, we have a bitmap with 1's to indicate the
72 * corresponding cluster was a "head" cluster.
73 *
74 * We use head bitmap to identify lost chains: a head cluster that was
75 * not being claimed by any file or directories is the head cluster of
76 * a lost chain.
77 *
78 * Handle of lost chains
79 * =====================
80 * At the end of scanning, we can easily find all lost chain's heads
81 * by finding out the 1's in the head bitmap.
82 */
83
84typedef struct long_bitmap {
85	unsigned long	*map;
86	size_t		 count;		/* Total set bits in the map */
87} long_bitmap_t;
88
89static inline void
90bitmap_clear(long_bitmap_t *lbp, cl_t cl)
91{
92	cl_t i = cl / LONG_BIT;
93	unsigned long clearmask = ~(1UL << (cl % LONG_BIT));
94
95	assert((lbp->map[i] & ~clearmask) != 0);
96	lbp->map[i] &= clearmask;
97	lbp->count--;
98}
99
100static inline bool
101bitmap_get(long_bitmap_t *lbp, cl_t cl)
102{
103	cl_t i = cl / LONG_BIT;
104	unsigned long usedbit = 1UL << (cl % LONG_BIT);
105
106	return ((lbp->map[i] & usedbit) == usedbit);
107}
108
109static inline bool
110bitmap_none_in_range(long_bitmap_t *lbp, cl_t cl)
111{
112	cl_t i = cl / LONG_BIT;
113
114	return (lbp->map[i] == 0);
115}
116
117static inline size_t
118bitmap_count(long_bitmap_t *lbp)
119{
120	return (lbp->count);
121}
122
123static int
124bitmap_ctor(long_bitmap_t *lbp, size_t bits, bool allone)
125{
126	size_t bitmap_size = roundup2(bits, LONG_BIT) / (LONG_BIT / 8);
127
128	free(lbp->map);
129	lbp->map = calloc(1, bitmap_size);
130	if (lbp->map == NULL)
131		return FSFATAL;
132
133	if (allone) {
134		memset(lbp->map, 0xff, bitmap_size);
135		lbp->count = bits;
136	} else {
137		lbp->count = 0;
138	}
139	return FSOK;
140}
141
142static void
143bitmap_dtor(long_bitmap_t *lbp)
144{
145	free(lbp->map);
146	lbp->map = NULL;
147}
148
149/*
150 * FAT32 can be as big as 256MiB (2^26 entries * 4 bytes), when we
151 * can not ask the kernel to manage the access, use a simple LRU
152 * cache with chunk size of 128 KiB to manage it.
153 */
154struct fat32_cache_entry {
155	TAILQ_ENTRY(fat32_cache_entry)	entries;
156	uint8_t			*chunk;		/* pointer to chunk */
157	off_t			 addr;		/* offset */
158	bool			 dirty;		/* dirty bit */
159};
160
161static const size_t	fat32_cache_chunk_size = 131072; /* MAXPHYS */
162static const size_t	fat32_cache_size = 4194304;
163static const size_t	fat32_cache_entries = 32; /* XXXgcc: cache_size / cache_chunk_size */
164
165/*
166 * FAT table descriptor, represents a FAT table that is already loaded
167 * into memory.
168 */
169struct fat_descriptor {
170	struct bootblock	*boot;
171	uint8_t			*fatbuf;
172	cl_t			(*get)(struct fat_descriptor *, cl_t);
173	int			(*set)(struct fat_descriptor *, cl_t, cl_t);
174	long_bitmap_t		 headbitmap;
175	int			 fd;
176	bool			 is_mmapped;
177	bool			 use_cache;
178	size_t		  	 fatsize;
179
180	size_t			 fat32_cached_chunks;
181	TAILQ_HEAD(cachehead, fat32_cache_entry)	fat32_cache_head;
182	struct fat32_cache_entry	*fat32_cache_allentries;
183	off_t			 fat32_offset;
184	off_t			 fat32_lastaddr;
185};
186
187void
188fat_clear_cl_head(struct fat_descriptor *fat, cl_t cl)
189{
190	bitmap_clear(&fat->headbitmap, cl);
191}
192
193bool
194fat_is_cl_head(struct fat_descriptor *fat, cl_t cl)
195{
196	return (bitmap_get(&fat->headbitmap, cl));
197}
198
199static inline bool
200fat_is_cl_head_in_range(struct fat_descriptor *fat, cl_t cl)
201{
202	return (!(bitmap_none_in_range(&fat->headbitmap, cl)));
203}
204
205static size_t
206fat_get_head_count(struct fat_descriptor *fat)
207{
208	return (bitmap_count(&fat->headbitmap));
209}
210
211/*
212 * FAT12 accessors.
213 *
214 * FAT12s are sufficiently small, expect it to always fit in the RAM.
215 */
216static inline uint8_t *
217fat_get_fat12_ptr(struct fat_descriptor *fat, cl_t cl)
218{
219	return (fat->fatbuf + ((cl + (cl >> 1))));
220}
221
222static cl_t
223fat_get_fat12_next(struct fat_descriptor *fat, cl_t cl)
224{
225	const uint8_t	*p;
226	cl_t	retval;
227
228	p = fat_get_fat12_ptr(fat, cl);
229	retval = le16dec(p);
230	/* Odd cluster: lower 4 bits belongs to the subsequent cluster */
231	if ((cl & 1) == 1)
232		retval >>= 4;
233	retval &= CLUST12_MASK;
234
235	if (retval >= (CLUST_BAD & CLUST12_MASK))
236		retval |= ~CLUST12_MASK;
237
238	return (retval);
239}
240
241static int
242fat_set_fat12_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
243{
244	uint8_t	*p;
245
246	/* Truncate 'nextcl' value, if needed */
247	nextcl &= CLUST12_MASK;
248
249	p = fat_get_fat12_ptr(fat, cl);
250
251	/*
252	 * Read in the 4 bits from the subsequent (for even clusters)
253	 * or the preceding (for odd clusters) cluster and combine
254	 * it to the nextcl value for encoding
255	 */
256	if ((cl & 1) == 0) {
257		nextcl |= ((p[1] & 0xf0) << 8);
258	} else {
259		nextcl <<= 4;
260		nextcl |= (p[0] & 0x0f);
261	}
262
263	le16enc(p, (uint16_t)nextcl);
264
265	return (0);
266}
267
268/*
269 * FAT16 accessors.
270 *
271 * FAT16s are sufficiently small, expect it to always fit in the RAM.
272 */
273static inline uint8_t *
274fat_get_fat16_ptr(struct fat_descriptor *fat, cl_t cl)
275{
276	return (fat->fatbuf + (cl << 1));
277}
278
279static cl_t
280fat_get_fat16_next(struct fat_descriptor *fat, cl_t cl)
281{
282	const uint8_t	*p;
283	cl_t	retval;
284
285	p = fat_get_fat16_ptr(fat, cl);
286	retval = le16dec(p) & CLUST16_MASK;
287
288	if (retval >= (CLUST_BAD & CLUST16_MASK))
289		retval |= ~CLUST16_MASK;
290
291	return (retval);
292}
293
294static int
295fat_set_fat16_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
296{
297	uint8_t	*p;
298
299	/* Truncate 'nextcl' value, if needed */
300	nextcl &= CLUST16_MASK;
301
302	p = fat_get_fat16_ptr(fat, cl);
303
304	le16enc(p, (uint16_t)nextcl);
305
306	return (0);
307}
308
309/*
310 * FAT32 accessors.
311 */
312static inline uint8_t *
313fat_get_fat32_ptr(struct fat_descriptor *fat, cl_t cl)
314{
315	return (fat->fatbuf + (cl << 2));
316}
317
318static cl_t
319fat_get_fat32_next(struct fat_descriptor *fat, cl_t cl)
320{
321	const uint8_t	*p;
322	cl_t	retval;
323
324	p = fat_get_fat32_ptr(fat, cl);
325	retval = le32dec(p) & CLUST32_MASK;
326
327	if (retval >= (CLUST_BAD & CLUST32_MASK))
328		retval |= ~CLUST32_MASK;
329
330	return (retval);
331}
332
333static int
334fat_set_fat32_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
335{
336	uint8_t	*p;
337
338	/* Truncate 'nextcl' value, if needed */
339	nextcl &= CLUST32_MASK;
340
341	p = fat_get_fat32_ptr(fat, cl);
342
343	le32enc(p, (uint32_t)nextcl);
344
345	return (0);
346}
347
348static inline size_t
349fat_get_iosize(struct fat_descriptor *fat, off_t address)
350{
351
352	if (address == fat->fat32_lastaddr) {
353		return (fat->fatsize & ((off_t)fat32_cache_chunk_size - 1));
354	} else {
355		return (fat32_cache_chunk_size);
356	}
357}
358
359static int
360fat_flush_fat32_cache_entry(struct fat_descriptor *fat,
361    struct fat32_cache_entry *entry)
362{
363	int fd;
364	off_t fat_addr;
365	size_t writesize;
366
367	fd = fd_of_(fat);
368
369	if (!entry->dirty)
370		return (FSOK);
371
372	writesize = fat_get_iosize(fat, entry->addr);
373
374	fat_addr = fat->fat32_offset + entry->addr;
375	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
376	    (size_t)write(fd, entry->chunk, writesize) != writesize) {
377			pfatal("Unable to write FAT");
378			return (FSFATAL);
379	}
380
381	entry->dirty = false;
382	return (FSOK);
383}
384
385static struct fat32_cache_entry *
386fat_get_fat32_cache_entry(struct fat_descriptor *fat, off_t addr,
387    bool writing)
388{
389	int fd;
390	struct fat32_cache_entry *entry, *first;
391	off_t	fat_addr;
392	size_t	rwsize;
393
394	addr &= ~(fat32_cache_chunk_size - 1);
395
396	first = TAILQ_FIRST(&fat->fat32_cache_head);
397
398	/*
399	 * Cache hit: if we already have the chunk, move it to list head
400	 */
401	TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
402		if (entry->addr == addr) {
403			if (writing) {
404				entry->dirty = true;
405			}
406			if (entry != first) {
407
408				TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
409				TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
410			}
411			return (entry);
412		}
413	}
414
415	/*
416	 * Cache miss: detach the chunk at tail of list, overwrite with
417	 * the located chunk, and populate with data from disk.
418	 */
419	entry = TAILQ_LAST(&fat->fat32_cache_head, cachehead);
420	TAILQ_REMOVE(&fat->fat32_cache_head, entry, entries);
421	if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
422		return (NULL);
423	}
424
425	rwsize = fat_get_iosize(fat, addr);
426	fat_addr = fat->fat32_offset + addr;
427	entry->addr = addr;
428	fd = fd_of_(fat);
429	if (lseek(fd, fat_addr, SEEK_SET) != fat_addr ||
430		(size_t)read(fd, entry->chunk, rwsize) != rwsize) {
431		pfatal("Unable to read FAT");
432		return (NULL);
433	}
434	if (writing) {
435		entry->dirty = true;
436	}
437	TAILQ_INSERT_HEAD(&fat->fat32_cache_head, entry, entries);
438
439	return (entry);
440}
441
442static inline uint8_t *
443fat_get_fat32_cached_ptr(struct fat_descriptor *fat, cl_t cl, bool writing)
444{
445	off_t addr, off;
446	struct fat32_cache_entry *entry;
447
448	addr = cl << 2;
449	entry = fat_get_fat32_cache_entry(fat, addr, writing);
450
451	if (entry != NULL) {
452		off = addr & (fat32_cache_chunk_size - 1);
453		return (entry->chunk + off);
454	} else {
455		return (NULL);
456	}
457}
458
459
460static cl_t
461fat_get_fat32_cached_next(struct fat_descriptor *fat, cl_t cl)
462{
463	const uint8_t	*p;
464	cl_t	retval;
465
466	p = fat_get_fat32_cached_ptr(fat, cl, false);
467	if (p != NULL) {
468		retval = le32dec(p) & CLUST32_MASK;
469		if (retval >= (CLUST_BAD & CLUST32_MASK))
470			retval |= ~CLUST32_MASK;
471	} else {
472		retval = CLUST_DEAD;
473	}
474
475	return (retval);
476}
477
478static int
479fat_set_fat32_cached_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
480{
481	uint8_t	*p;
482
483	/* Truncate 'nextcl' value, if needed */
484	nextcl &= CLUST32_MASK;
485
486	p = fat_get_fat32_cached_ptr(fat, cl, true);
487	if (p != NULL) {
488		le32enc(p, (uint32_t)nextcl);
489		return FSOK;
490	} else {
491		return FSFATAL;
492	}
493}
494
495cl_t fat_get_cl_next(struct fat_descriptor *fat, cl_t cl)
496{
497
498	if (!valid_cl(fat, cl)) {
499		pfatal("Invalid cluster: %ud", cl);
500		return CLUST_DEAD;
501	}
502
503	return (fat->get(fat, cl));
504}
505
506int fat_set_cl_next(struct fat_descriptor *fat, cl_t cl, cl_t nextcl)
507{
508
509	if (rdonly) {
510		pwarn(" (NO WRITE)\n");
511		return FSFATAL;
512	}
513
514	if (!valid_cl(fat, cl)) {
515		pfatal("Invalid cluster: %ud", cl);
516		return FSFATAL;
517	}
518
519	return (fat->set(fat, cl, nextcl));
520}
521
522static inline struct bootblock*
523boot_of_(struct fat_descriptor *fat) {
524
525	return (fat->boot);
526}
527
528struct bootblock*
529fat_get_boot(struct fat_descriptor *fat) {
530
531	return (boot_of_(fat));
532}
533
534static inline int
535fd_of_(struct fat_descriptor *fat)
536{
537	return (fat->fd);
538}
539
540int
541fat_get_fd(struct fat_descriptor * fat)
542{
543	return (fd_of_(fat));
544}
545
546/*
547 * Whether a cl is in valid data range.
548 */
549bool
550fat_is_valid_cl(struct fat_descriptor *fat, cl_t cl)
551{
552
553	return (valid_cl(fat, cl));
554}
555
556static inline bool
557valid_cl(struct fat_descriptor *fat, cl_t cl)
558{
559	const struct bootblock *boot = boot_of_(fat);
560
561	return (cl >= CLUST_FIRST && cl < boot->NumClusters);
562}
563
564/*
565 * The first 2 FAT entries contain pseudo-cluster numbers with the following
566 * layout:
567 *
568 * 31...... ........ ........ .......0
569 * rrrr1111 11111111 11111111 mmmmmmmm         FAT32 entry 0
570 * rrrrsh11 11111111 11111111 11111xxx         FAT32 entry 1
571 *
572 *                   11111111 mmmmmmmm         FAT16 entry 0
573 *                   sh111111 11111xxx         FAT16 entry 1
574 *
575 * r = reserved
576 * m = BPB media ID byte
577 * s = clean flag (1 = dismounted; 0 = still mounted)
578 * h = hard error flag (1 = ok; 0 = I/O error)
579 * x = any value ok
580 */
581int
582checkdirty(int fs, struct bootblock *boot)
583{
584	off_t off;
585	u_char *buffer;
586	int ret = 0;
587	size_t len;
588
589	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
590		return 0;
591
592	off = boot->bpbResSectors;
593	off *= boot->bpbBytesPerSec;
594
595	buffer = malloc(len = boot->bpbBytesPerSec);
596	if (buffer == NULL) {
597		perr("No space for FAT sectors (%zu)", len);
598		return 1;
599	}
600
601	if (lseek(fs, off, SEEK_SET) != off) {
602		perr("Unable to read FAT");
603		goto err;
604	}
605
606	if ((size_t)read(fs, buffer, boot->bpbBytesPerSec) !=
607	    boot->bpbBytesPerSec) {
608		perr("Unable to read FAT");
609		goto err;
610	}
611
612	/*
613	 * If we don't understand the FAT, then the file system must be
614	 * assumed to be unclean.
615	 */
616	if (buffer[0] != boot->bpbMedia || buffer[1] != 0xff)
617		goto err;
618	if (boot->ClustMask == CLUST16_MASK) {
619		if ((buffer[2] & 0xf8) != 0xf8 || (buffer[3] & 0x3f) != 0x3f)
620			goto err;
621	} else {
622		if (buffer[2] != 0xff || (buffer[3] & 0x0f) != 0x0f
623		    || (buffer[4] & 0xf8) != 0xf8 || buffer[5] != 0xff
624		    || buffer[6] != 0xff || (buffer[7] & 0x03) != 0x03)
625			goto err;
626	}
627
628	/*
629	 * Now check the actual clean flag (and the no-error flag).
630	 */
631	if (boot->ClustMask == CLUST16_MASK) {
632		if ((buffer[3] & 0xc0) == 0xc0)
633			ret = 1;
634	} else {
635		if ((buffer[7] & 0x0c) == 0x0c)
636			ret = 1;
637	}
638
639err:
640	free(buffer);
641	return ret;
642}
643
644int
645cleardirty(struct fat_descriptor *fat)
646{
647	int fd, ret = FSERROR;
648	struct bootblock *boot;
649	u_char *buffer;
650	size_t len;
651	off_t off;
652
653	boot = boot_of_(fat);
654	fd = fd_of_(fat);
655
656	if (boot->ClustMask != CLUST16_MASK && boot->ClustMask != CLUST32_MASK)
657		return 0;
658
659	off = boot->bpbResSectors;
660	off *= boot->bpbBytesPerSec;
661
662	buffer = malloc(len = boot->bpbBytesPerSec);
663	if (buffer == NULL) {
664		perr("No memory for FAT sectors (%zu)", len);
665		return 1;
666	}
667
668	if ((size_t)pread(fd, buffer, len, off) != len) {
669		perr("Unable to read FAT");
670		goto err;
671	}
672
673	if (boot->ClustMask == CLUST16_MASK) {
674		buffer[3] |= 0x80;
675	} else {
676		buffer[7] |= 0x08;
677	}
678
679	if ((size_t)pwrite(fd, buffer, len, off) != len) {
680		perr("Unable to write FAT");
681		goto err;
682	}
683
684	ret = FSOK;
685
686err:
687	free(buffer);
688	return ret;
689}
690
691/*
692 * Read a FAT from disk. Returns 1 if successful, 0 otherwise.
693 */
694static int
695_readfat(struct fat_descriptor *fat)
696{
697	int fd;
698	size_t i;
699	off_t off;
700	size_t readsize;
701	struct bootblock *boot;
702	struct fat32_cache_entry *entry;
703
704	boot = boot_of_(fat);
705	fd = fd_of_(fat);
706	fat->fatsize = boot->FATsecs * boot->bpbBytesPerSec;
707
708	off = boot->bpbResSectors;
709	off *= boot->bpbBytesPerSec;
710
711	fat->is_mmapped = false;
712	fat->use_cache = false;
713
714	/* Attempt to mmap() first */
715	if (allow_mmap) {
716		fat->fatbuf = mmap(NULL, fat->fatsize,
717				PROT_READ | (rdonly ? 0 : PROT_WRITE),
718				MAP_SHARED, fd_of_(fat), off);
719		if (fat->fatbuf != MAP_FAILED) {
720			fat->is_mmapped = true;
721			return 1;
722		}
723	}
724
725	/*
726	 * Unfortunately, we were unable to mmap().
727	 *
728	 * Only use the cache manager when it's necessary, that is,
729	 * when the FAT is sufficiently large; in that case, only
730	 * read in the first 4 MiB of FAT into memory, and split the
731	 * buffer into chunks and insert to the LRU queue to populate
732	 * the cache with data.
733	 */
734	if (boot->ClustMask == CLUST32_MASK &&
735	    fat->fatsize >= fat32_cache_size) {
736		readsize = fat32_cache_size;
737		fat->use_cache = true;
738
739		fat->fat32_offset = boot->bpbResSectors * boot->bpbBytesPerSec;
740		fat->fat32_lastaddr = fat->fatsize & ~(fat32_cache_chunk_size);
741	} else {
742		readsize = fat->fatsize;
743	}
744	fat->fatbuf = malloc(readsize);
745	if (fat->fatbuf == NULL) {
746		perr("No space for FAT (%zu)", readsize);
747		return 0;
748	}
749
750	if (lseek(fd, off, SEEK_SET) != off) {
751		perr("Unable to read FAT");
752		goto err;
753	}
754	if ((size_t)read(fd, fat->fatbuf, readsize) != readsize) {
755		perr("Unable to read FAT");
756		goto err;
757	}
758
759	/*
760	 * When cache is used, split the buffer into chunks, and
761	 * connect the buffer into the cache.
762	 */
763	if (fat->use_cache) {
764		TAILQ_INIT(&fat->fat32_cache_head);
765		entry = calloc(fat32_cache_entries, sizeof(*entry));
766		if (entry == NULL) {
767			perr("No space for FAT cache (%zu of %zu)",
768			    fat32_cache_entries, sizeof(entry));
769			goto err;
770		}
771		for (i = 0; i < fat32_cache_entries; i++) {
772			entry[i].addr = fat32_cache_chunk_size * i;
773			entry[i].chunk = &fat->fatbuf[entry[i].addr];
774			TAILQ_INSERT_TAIL(&fat->fat32_cache_head,
775			    &entry[i], entries);
776		}
777		fat->fat32_cache_allentries = entry;
778	}
779
780	return 1;
781
782err:
783	free(fat->fatbuf);
784	fat->fatbuf = NULL;
785	return 0;
786}
787
788static void
789releasefat(struct fat_descriptor *fat)
790{
791	if (fat->is_mmapped) {
792		munmap(fat->fatbuf, fat->fatsize);
793	} else {
794		if (fat->use_cache) {
795			free(fat->fat32_cache_allentries);
796			fat->fat32_cache_allentries = NULL;
797		}
798		free(fat->fatbuf);
799	}
800	fat->fatbuf = NULL;
801	bitmap_dtor(&fat->headbitmap);
802}
803
804/*
805 * Read or map a FAT and populate head bitmap
806 */
807int
808readfat(int fs, struct bootblock *boot, struct fat_descriptor **fp)
809{
810	struct fat_descriptor *fat;
811	u_char *buffer, *p;
812	cl_t cl, nextcl;
813	int ret = FSOK;
814
815	boot->NumFree = boot->NumBad = 0;
816
817	fat = calloc(1, sizeof(struct fat_descriptor));
818	if (fat == NULL) {
819		perr("No space for FAT descriptor");
820		return FSFATAL;
821	}
822
823	fat->fd = fs;
824	fat->boot = boot;
825
826	if (!_readfat(fat)) {
827		free(fat);
828		return FSFATAL;
829	}
830	buffer = fat->fatbuf;
831
832	/* Populate accessors */
833	switch(boot->ClustMask) {
834	case CLUST12_MASK:
835		fat->get = fat_get_fat12_next;
836		fat->set = fat_set_fat12_next;
837		break;
838	case CLUST16_MASK:
839		fat->get = fat_get_fat16_next;
840		fat->set = fat_set_fat16_next;
841		break;
842	case CLUST32_MASK:
843		if (fat->is_mmapped || !fat->use_cache) {
844			fat->get = fat_get_fat32_next;
845			fat->set = fat_set_fat32_next;
846		} else {
847			fat->get = fat_get_fat32_cached_next;
848			fat->set = fat_set_fat32_cached_next;
849		}
850		break;
851	default:
852		pfatal("Invalid ClustMask: %d", boot->ClustMask);
853		releasefat(fat);
854		free(fat);
855		return FSFATAL;
856	}
857
858	if (bitmap_ctor(&fat->headbitmap, boot->NumClusters,
859	    true) != FSOK) {
860		perr("No space for head bitmap for FAT clusters (%zu)",
861		    (size_t)boot->NumClusters);
862		releasefat(fat);
863		free(fat);
864		return FSFATAL;
865	}
866
867	if (buffer[0] != boot->bpbMedia
868	    || buffer[1] != 0xff || buffer[2] != 0xff
869	    || (boot->ClustMask == CLUST16_MASK && buffer[3] != 0xff)
870	    || (boot->ClustMask == CLUST32_MASK
871		&& ((buffer[3]&0x0f) != 0x0f
872		    || buffer[4] != 0xff || buffer[5] != 0xff
873		    || buffer[6] != 0xff || (buffer[7]&0x0f) != 0x0f))) {
874
875		/* Windows 95 OSR2 (and possibly any later) changes
876		 * the FAT signature to 0xXXffff7f for FAT16 and to
877		 * 0xXXffff0fffffff07 for FAT32 upon boot, to know that the
878		 * file system is dirty if it doesn't reboot cleanly.
879		 * Check this special condition before errorring out.
880		 */
881		if (buffer[0] == boot->bpbMedia && buffer[1] == 0xff
882		    && buffer[2] == 0xff
883		    && ((boot->ClustMask == CLUST16_MASK && buffer[3] == 0x7f)
884			|| (boot->ClustMask == CLUST32_MASK
885			    && buffer[3] == 0x0f && buffer[4] == 0xff
886			    && buffer[5] == 0xff && buffer[6] == 0xff
887			    && buffer[7] == 0x07)))
888			ret |= FSDIRTY;
889		else {
890			/* just some odd byte sequence in FAT */
891
892			switch (boot->ClustMask) {
893			case CLUST32_MASK:
894				pwarn("%s (%02x%02x%02x%02x%02x%02x%02x%02x)\n",
895				      "FAT starts with odd byte sequence",
896				      buffer[0], buffer[1], buffer[2], buffer[3],
897				      buffer[4], buffer[5], buffer[6], buffer[7]);
898				break;
899			case CLUST16_MASK:
900				pwarn("%s (%02x%02x%02x%02x)\n",
901				    "FAT starts with odd byte sequence",
902				    buffer[0], buffer[1], buffer[2], buffer[3]);
903				break;
904			default:
905				pwarn("%s (%02x%02x%02x)\n",
906				    "FAT starts with odd byte sequence",
907				    buffer[0], buffer[1], buffer[2]);
908				break;
909			}
910
911			if (ask(1, "Correct")) {
912				ret |= FSFATMOD;
913				p = buffer;
914
915				*p++ = (u_char)boot->bpbMedia;
916				*p++ = 0xff;
917				*p++ = 0xff;
918				switch (boot->ClustMask) {
919				case CLUST16_MASK:
920					*p++ = 0xff;
921					break;
922				case CLUST32_MASK:
923					*p++ = 0x0f;
924					*p++ = 0xff;
925					*p++ = 0xff;
926					*p++ = 0xff;
927					*p++ = 0x0f;
928					break;
929				default:
930					break;
931				}
932			}
933		}
934	}
935
936	/*
937	 * Traverse the FAT table and populate head map.  Initially, we
938	 * consider all clusters as possible head cluster (beginning of
939	 * a file or directory), and traverse the whole allocation table
940	 * by marking every non-head nodes as such (detailed below) and
941	 * fix obvious issues while we walk.
942	 *
943	 * For each "next" cluster, the possible values are:
944	 *
945	 * a) CLUST_FREE or CLUST_BAD.  The *current* cluster can't be a
946	 *    head node.
947	 * b) An out-of-range value. The only fix would be to truncate at
948	 *    the cluster.
949	 * c) A valid cluster.  It means that cluster (nextcl) is not a
950	 *    head cluster.  Note that during the scan, every cluster is
951	 *    expected to be seen for at most once, and when we saw them
952	 *    twice, it means a cross-linked chain which should be
953	 *    truncated at the current cluster.
954	 *
955	 * After scan, the remaining set bits indicates all possible
956	 * head nodes, because they were never claimed by any other
957	 * node as the next node, but we do not know if these chains
958	 * would end with a valid EOF marker.  We will check that in
959	 * checkchain() at a later time when checking directories,
960	 * where these head nodes would be marked as non-head.
961	 *
962	 * In the final pass, all head nodes should be cleared, and if
963	 * there is still head nodes, these would be leaders of lost
964	 * chain.
965	 */
966	for (cl = CLUST_FIRST; cl < boot->NumClusters; cl++) {
967		nextcl = fat_get_cl_next(fat, cl);
968
969		/* Check if the next cluster number is valid */
970		if (nextcl == CLUST_FREE) {
971			/* Save a hint for next free cluster */
972			if (boot->FSNext == 0) {
973				boot->FSNext = cl;
974			}
975			if (fat_is_cl_head(fat, cl)) {
976				fat_clear_cl_head(fat, cl);
977			}
978			boot->NumFree++;
979		} else if (nextcl == CLUST_BAD) {
980			if (fat_is_cl_head(fat, cl)) {
981				fat_clear_cl_head(fat, cl);
982			}
983			boot->NumBad++;
984		} else if (!valid_cl(fat, nextcl) && nextcl < CLUST_RSRVD) {
985			pwarn("Cluster %u continues with out of range "
986			    "cluster number %u\n",
987			    cl,
988			    nextcl & boot->ClustMask);
989			if (ask(0, "Truncate")) {
990				ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
991				ret |= FSFATMOD;
992			}
993		} else if (valid_cl(fat, nextcl)) {
994			if (fat_is_cl_head(fat, nextcl)) {
995				fat_clear_cl_head(fat, nextcl);
996			} else {
997				pwarn("Cluster %u crossed another chain at %u\n",
998				    cl, nextcl);
999				if (ask(0, "Truncate")) {
1000					ret |= fat_set_cl_next(fat, cl, CLUST_EOF);
1001					ret |= FSFATMOD;
1002				}
1003			}
1004		}
1005
1006	}
1007
1008	if (ret & FSFATAL) {
1009		releasefat(fat);
1010		free(fat);
1011		*fp = NULL;
1012	} else
1013		*fp = fat;
1014	return ret;
1015}
1016
1017/*
1018 * Get type of reserved cluster
1019 */
1020const char *
1021rsrvdcltype(cl_t cl)
1022{
1023	if (cl == CLUST_FREE)
1024		return "free";
1025	if (cl < CLUST_BAD)
1026		return "reserved";
1027	if (cl > CLUST_BAD)
1028		return "as EOF";
1029	return "bad";
1030}
1031
1032/*
1033 * Examine a cluster chain for errors and count its size.
1034 */
1035int
1036checkchain(struct fat_descriptor *fat, cl_t head, size_t *chainsize)
1037{
1038	cl_t prev_cl, current_cl, next_cl;
1039	const char *op;
1040
1041	/*
1042	 * We expect that the caller to give us a real, unvisited 'head'
1043	 * cluster, and it must be a valid cluster.  While scanning the
1044	 * FAT table, we already excluded all clusters that was claimed
1045	 * as a "next" cluster.  Assert all the three conditions.
1046	 */
1047	assert(valid_cl(fat, head));
1048	assert(fat_is_cl_head(fat, head));
1049
1050	/*
1051	 * Immediately mark the 'head' cluster that we are about to visit.
1052	 */
1053	fat_clear_cl_head(fat, head);
1054
1055	/*
1056	 * The allocation of a non-zero sized file or directory is
1057	 * represented as a singly linked list, and the tail node
1058	 * would be the EOF marker (>=CLUST_EOFS).
1059	 *
1060	 * With a valid head node at hand, we expect all subsequent
1061	 * cluster to be either a not yet seen and valid cluster (we
1062	 * would continue counting), or the EOF marker (we conclude
1063	 * the scan of this chain).
1064	 *
1065	 * For all other cases, the chain is invalid, and the only
1066	 * viable fix would be to truncate at the current node (mark
1067	 * it as EOF) when the next node violates that.
1068	 */
1069	*chainsize = 0;
1070	prev_cl = current_cl = head;
1071	for (next_cl = fat_get_cl_next(fat, current_cl);
1072	    valid_cl(fat, next_cl);
1073	    prev_cl = current_cl, current_cl = next_cl, next_cl = fat_get_cl_next(fat, current_cl))
1074		(*chainsize)++;
1075
1076	/* A natural end */
1077	if (next_cl >= CLUST_EOFS) {
1078		(*chainsize)++;
1079		return FSOK;
1080	}
1081
1082	/*
1083	 * The chain ended with an out-of-range cluster number.
1084	 *
1085	 * If the current node is e.g. CLUST_FREE, CLUST_BAD, etc.,
1086	 * it should not be present in a chain and we has to truncate
1087	 * at the previous node.
1088	 *
1089	 * If the current cluster points to an invalid cluster, the
1090	 * current cluster might have useful data and we truncate at
1091	 * the current cluster instead.
1092	 */
1093	if (next_cl == CLUST_FREE || next_cl >= CLUST_RSRVD) {
1094		pwarn("Cluster chain starting at %u ends with cluster marked %s\n",
1095		    head, rsrvdcltype(next_cl));
1096		current_cl = prev_cl;
1097	} else {
1098		pwarn("Cluster chain starting at %u ends with cluster out of range (%u)\n",
1099		    head,
1100		    next_cl & boot_of_(fat)->ClustMask);
1101		(*chainsize)++;
1102	}
1103
1104	if (*chainsize > 0) {
1105		op = "Truncate";
1106		next_cl = CLUST_EOF;
1107	} else {
1108		op = "Clear";
1109		next_cl = CLUST_FREE;
1110	}
1111	if (ask(0, "%s", op)) {
1112		return (fat_set_cl_next(fat, current_cl, next_cl) | FSFATMOD);
1113	} else {
1114		return (FSERROR);
1115	}
1116}
1117
1118/*
1119 * Clear cluster chain from head.
1120 */
1121void
1122clearchain(struct fat_descriptor *fat, cl_t head)
1123{
1124	cl_t current_cl, next_cl;
1125	struct bootblock *boot = boot_of_(fat);
1126
1127	current_cl = head;
1128
1129	while (valid_cl(fat, current_cl)) {
1130		next_cl = fat_get_cl_next(fat, current_cl);
1131		(void)fat_set_cl_next(fat, current_cl, CLUST_FREE);
1132		boot->NumFree++;
1133		current_cl = next_cl;
1134	}
1135
1136}
1137
1138/*
1139 * Overwrite the n-th FAT with FAT0
1140 */
1141static int
1142copyfat(struct fat_descriptor *fat, int n)
1143{
1144	size_t	rwsize, tailsize, blobs, i;
1145	off_t	dst_off, src_off;
1146	struct bootblock *boot;
1147	int ret, fd;
1148
1149	ret = FSOK;
1150	fd = fd_of_(fat);
1151	boot = boot_of_(fat);
1152
1153	blobs = howmany(fat->fatsize, fat32_cache_size);
1154	tailsize = fat->fatsize % fat32_cache_size;
1155	if (tailsize == 0) {
1156		tailsize = fat32_cache_size;
1157	}
1158	rwsize = fat32_cache_size;
1159
1160	src_off = fat->fat32_offset;
1161	dst_off = boot->bpbResSectors + n * boot->FATsecs;
1162	dst_off *= boot->bpbBytesPerSec;
1163
1164	for (i = 0; i < blobs;
1165	    i++, src_off += fat32_cache_size, dst_off += fat32_cache_size) {
1166		if (i == blobs - 1) {
1167			rwsize = tailsize;
1168		}
1169		if ((lseek(fd, src_off, SEEK_SET) != src_off ||
1170		    (size_t)read(fd, fat->fatbuf, rwsize) != rwsize) &&
1171		    ret == FSOK) {
1172			perr("Unable to read FAT0");
1173			ret = FSFATAL;
1174			continue;
1175		}
1176		if ((lseek(fd, dst_off, SEEK_SET) != dst_off ||
1177			(size_t)write(fd, fat->fatbuf, rwsize) != rwsize) &&
1178			ret == FSOK) {
1179			perr("Unable to write FAT %d", n);
1180			ret = FSERROR;
1181		}
1182	}
1183	return (ret);
1184}
1185
1186/*
1187 * Write out FAT
1188 */
1189int
1190writefat(struct fat_descriptor *fat)
1191{
1192	u_int i;
1193	size_t writesz;
1194	off_t dst_base;
1195	int ret = FSOK, fd;
1196	struct bootblock *boot;
1197	struct fat32_cache_entry *entry;
1198
1199	boot = boot_of_(fat);
1200	fd = fd_of_(fat);
1201
1202	if (fat->use_cache) {
1203		/*
1204		 * Attempt to flush all in-flight cache, and bail out
1205		 * if we encountered an error (but only emit error
1206		 * message once).  Stop proceeding with copyfat()
1207		 * if any flush failed.
1208		 */
1209		TAILQ_FOREACH(entry, &fat->fat32_cache_head, entries) {
1210			if (fat_flush_fat32_cache_entry(fat, entry) != FSOK) {
1211				if (ret == FSOK) {
1212					perr("Unable to write FAT");
1213					ret = FSFATAL;
1214				}
1215			}
1216		}
1217		if (ret != FSOK)
1218			return (ret);
1219
1220		/* Update backup copies of FAT, error is not fatal */
1221		for (i = 1; i < boot->bpbFATs; i++) {
1222			if (copyfat(fat, i) != FSOK)
1223				ret = FSERROR;
1224		}
1225	} else {
1226		writesz = fat->fatsize;
1227
1228		for (i = fat->is_mmapped ? 1 : 0; i < boot->bpbFATs; i++) {
1229			dst_base = boot->bpbResSectors + i * boot->FATsecs;
1230			dst_base *= boot->bpbBytesPerSec;
1231			if ((lseek(fd, dst_base, SEEK_SET) != dst_base ||
1232			    (size_t)write(fd, fat->fatbuf, writesz) != writesz) &&
1233			    ret == FSOK) {
1234				perr("Unable to write FAT %d", i);
1235				ret = ((i == 0) ? FSFATAL : FSERROR);
1236			}
1237		}
1238	}
1239
1240	return ret;
1241}
1242
1243/*
1244 * Check a complete in-memory FAT for lost cluster chains
1245 */
1246int
1247checklost(struct fat_descriptor *fat)
1248{
1249	cl_t head;
1250	int mod = FSOK;
1251	int dosfs, ret;
1252	size_t chains, chainlength;
1253	struct bootblock *boot;
1254
1255	dosfs = fd_of_(fat);
1256	boot = boot_of_(fat);
1257
1258	/*
1259	 * At this point, we have already traversed all directories.
1260	 * All remaining chain heads in the bitmap are heads of lost
1261	 * chains.
1262	 */
1263	chains = fat_get_head_count(fat);
1264	for (head = CLUST_FIRST;
1265	    chains > 0 && head < boot->NumClusters;
1266	    ) {
1267		/*
1268		 * We expect the bitmap to be very sparse, so skip if
1269		 * the range is full of 0's
1270		 */
1271		if (head % LONG_BIT == 0 &&
1272		    !fat_is_cl_head_in_range(fat, head)) {
1273			head += LONG_BIT;
1274			continue;
1275		}
1276		if (fat_is_cl_head(fat, head)) {
1277			ret = checkchain(fat, head, &chainlength);
1278			if (ret != FSERROR && chainlength > 0) {
1279				pwarn("Lost cluster chain at cluster %u\n"
1280				    "%zd Cluster(s) lost\n",
1281				    head, chainlength);
1282				mod |= ret = reconnect(fat, head,
1283				    chainlength);
1284			}
1285			if (mod & FSFATAL)
1286				break;
1287			if (ret == FSERROR && ask(0, "Clear")) {
1288				clearchain(fat, head);
1289				mod |= FSFATMOD;
1290			}
1291			chains--;
1292		}
1293		head++;
1294	}
1295
1296	finishlf();
1297
1298	if (boot->bpbFSInfo) {
1299		ret = 0;
1300		if (boot->FSFree != 0xffffffffU &&
1301		    boot->FSFree != boot->NumFree) {
1302			pwarn("Free space in FSInfo block (%u) not correct (%u)\n",
1303			      boot->FSFree, boot->NumFree);
1304			if (ask(1, "Fix")) {
1305				boot->FSFree = boot->NumFree;
1306				ret = 1;
1307			}
1308		}
1309		if (boot->FSNext != 0xffffffffU &&
1310		    (boot->FSNext >= boot->NumClusters ||
1311		    (boot->NumFree && fat_get_cl_next(fat, boot->FSNext) != CLUST_FREE))) {
1312			pwarn("Next free cluster in FSInfo block (%u) %s\n",
1313			      boot->FSNext,
1314			      (boot->FSNext >= boot->NumClusters) ? "invalid" : "not free");
1315			if (ask(1, "Fix"))
1316				for (head = CLUST_FIRST; head < boot->NumClusters; head++)
1317					if (fat_get_cl_next(fat, head) == CLUST_FREE) {
1318						boot->FSNext = head;
1319						ret = 1;
1320						break;
1321					}
1322		}
1323		if (ret)
1324			mod |= writefsinfo(dosfs, boot);
1325	}
1326
1327	return mod;
1328}
1329