1/*
2 * ntfs_vfsops.c - NTFS kernel vfs operations.
3 *
4 * Copyright (c) 2006-2011 Anton Altaparmakov.  All Rights Reserved.
5 * Portions Copyright (c) 2006-2011 Apple Inc.  All Rights Reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 *    this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
16 *    contributors may be used to endorse or promote products derived from this
17 *    software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in
31 * full, this file may be redistributed and/or modified under the terms of the
32 * GNU General Public License (GPL) Version 2, in which case the provisions of
33 * that version of the GPL will apply to you instead of the license terms
34 * above.  You can obtain a copy of the GPL Version 2 at
35 * http://developer.apple.com/opensource/licenses/gpl-2.txt.
36 */
37
38#include <sys/cdefs.h>
39#include <sys/attr.h>
40#include <sys/buf.h>
41#include <sys/disk.h>
42#include <sys/errno.h>
43#include <sys/fcntl.h>
44#include <sys/kauth.h>
45#include <sys/kernel_types.h>
46#include <sys/mount.h>
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/stat.h>
50#include <sys/types.h>
51#include <sys/ubc.h>
52#include <sys/ucred.h>
53#include <sys/vnode.h>
54
55#include <mach/kern_return.h>
56#include <mach/kmod.h>
57#include <mach/machine/vm_param.h>
58
59#include <string.h>
60
61#include <libkern/libkern.h>
62#include <libkern/OSMalloc.h>
63#include <libkern/OSKextLib.h>
64
65#include <kern/debug.h>
66#include <kern/locks.h>
67
68#include <miscfs/specfs/specdev.h>
69
70#include "ntfs.h"
71#include "ntfs_attr.h"
72#include "ntfs_attr_list.h"
73#include "ntfs_debug.h"
74#include "ntfs_dir.h"
75#include "ntfs_hash.h"
76#include "ntfs_inode.h"
77#include "ntfs_layout.h"
78#include "ntfs_logfile.h"
79#include "ntfs_mft.h"
80#include "ntfs_mst.h"
81#include "ntfs_page.h"
82#include "ntfs_quota.h"
83#include "ntfs_secure.h"
84#include "ntfs_time.h"
85#include "ntfs_unistr.h"
86#include "ntfs_usnjrnl.h"
87#include "ntfs_vnops.h"
88#include "ntfs_volume.h"
89
90// FIXME: Change email address but to what?
91const char ntfs_dev_email[] = "linux-ntfs-dev@lists.sourceforge.net";
92const char ntfs_please_email[] = "Please email "
93		"linux-ntfs-dev@lists.sourceforge.net and say that you saw "
94		"this message.  Thank you.";
95
96/* A driver wide lock protecting the below global data structures. */
97static lck_mtx_t ntfs_lock;
98
99/* Number of mounted file systems which have compression enabled. */
100static unsigned long ntfs_compression_users;
101static u8 *ntfs_compression_buffer;
102#define ntfs_compression_buffer_size (16 * 4096)
103
104/* The global default upcase table and corresponding reference count. */
105static unsigned long ntfs_default_upcase_users;
106static ntfschar *ntfs_default_upcase;
107#define ntfs_default_upcase_size (64 * 1024 * sizeof(ntfschar))
108
109static errno_t ntfs_blocksize_set(mount_t mp, vnode_t dev_vn, u32 blocksize,
110		vfs_context_t context)
111{
112	errno_t err;
113	struct vfsioattr ia;
114
115	err = VNOP_IOCTL(dev_vn, DKIOCSETBLOCKSIZE, (caddr_t)&blocksize,
116			FWRITE, context);
117	if (err)
118		return ENXIO;
119	/*
120	 * Update the cached block size in the mount point, i.e. the value
121	 * returned by vfs_devblocksize().
122	 */
123	ntfs_debug("Updating io attributes with new block size.");
124	vfs_ioattr(mp, &ia);
125	ia.io_devblocksize = blocksize;
126	vfs_setioattr(mp, &ia);
127	/*
128	 * Update the block size in the block device, i.e. the
129	 * v_specsize of the device vnode.
130	 */
131	ntfs_debug("Updating device vnode with new block size.");
132	set_fsblocksize(dev_vn);
133	return 0;
134}
135
136/**
137 * ntfs_boot_sector_is_valid - check if @b contains a valid ntfs boot sector
138 * @mp:		Mount of the device to which @b belongs.
139 * @b:		Boot sector of device @mp to check.
140 *
141 * Check whether the boot sector @b is a valid ntfs boot sector.
142 *
143 * Return TRUE if it is valid and FALSE if not.
144 *
145 * @mp is only needed for warning/error output, i.e. it can be NULL.
146 */
147static BOOL ntfs_boot_sector_is_valid(const mount_t mp,
148		const NTFS_BOOT_SECTOR *b)
149{
150	ntfs_debug("Entering.");
151	/*
152	 * Check that checksum == sum of u32 values from b to the checksum
153	 * field.  If checksum is zero, no checking is done.  We will work when
154	 * the checksum test fails, since some utilities update the boot sector
155	 * ignoring the checksum which leaves the checksum out-of-date.  We
156	 * report a warning if this is the case.
157	 */
158	if ((void*)b < (void*)&b->checksum && b->checksum) {
159		le32 *u;
160		u32 i;
161
162		for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
163			i += le32_to_cpup(u);
164		if (le32_to_cpu(b->checksum) != i)
165			ntfs_warning(mp, "Invalid boot sector checksum.");
166	}
167	/* Check OEMidentifier is "NTFS    " */
168	if (b->oem_id != magicNTFS)
169		goto not_ntfs;
170	/*
171	 * Check bytes per sector value is between 256 and
172	 * NTFS_MAX_SECTOR_SIZE.
173	 */
174	if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 ||
175			le16_to_cpu(b->bpb.bytes_per_sector) >
176			NTFS_MAX_SECTOR_SIZE)
177		goto not_ntfs;
178	/* Check sectors per cluster value is valid. */
179	switch (b->bpb.sectors_per_cluster) {
180	case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128:
181		break;
182	default:
183		goto not_ntfs;
184	}
185	/* Check the cluster size is not above the maximum (64kiB). */
186	if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
187			b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
188		goto not_ntfs;
189	/* Check reserved/unused fields are really zero. */
190	if (le16_to_cpu(b->bpb.reserved_sectors) ||
191			le16_to_cpu(b->bpb.root_entries) ||
192			le16_to_cpu(b->bpb.sectors) ||
193			le16_to_cpu(b->bpb.sectors_per_fat) ||
194			le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats)
195		goto not_ntfs;
196	/*
197	 * Check clusters per file mft record value is valid.  It can be either
198	 * between -31 and -9 (in which case the actual mft record size is
199	 * -log2() of the absolute value) or a positive power of two.
200	 */
201	if ((u8)b->clusters_per_mft_record < 0xe1 ||
202			(u8)b->clusters_per_mft_record > 0xf7)
203		switch (b->clusters_per_mft_record) {
204		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
205			break;
206		default:
207			goto not_ntfs;
208		}
209	/* Check clusters per index block value is valid. */
210	if ((u8)b->clusters_per_index_block < 0xe1 ||
211			(u8)b->clusters_per_index_block > 0xf7)
212		switch (b->clusters_per_index_block) {
213		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
214			break;
215		default:
216			goto not_ntfs;
217		}
218	/*
219	 * Check for valid end of sector marker.  We will work without it, but
220	 * many BIOSes will refuse to boot from a bootsector if the magic is
221	 * incorrect, so we emit a warning.
222	 */
223	if (b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
224		ntfs_warning(mp, "Invalid end of sector marker.");
225	ntfs_debug("Done.");
226	return TRUE;
227not_ntfs:
228	ntfs_debug("Not an NTFS boot sector.");
229	return FALSE;
230}
231
232/**
233 * ntfs_boot_sector_read - read the ntfs boot sector of a device
234 * @vol:	ntfs_volume of device to read the boot sector from
235 * @cred:	credentials of running process
236 * @buf:	destination pointer for buffer containing boot sector
237 * @bs:		destination pointer for boot sector data
238 *
239 * Read the boot sector from the device and validate it.  If that fails, try to
240 * read the backup boot sector, first from the end of the device a-la NT4 and
241 * later and then from the middle of the device a-la NT3.51 and earlier.
242 *
243 * If a valid boot sector is found but it is not the primary boot sector, we
244 * repair the primary boot sector silently (unless the device is read-only or
245 * the primary boot sector is not accessible).
246 *
247 * On success return 0 and set *@buf to the buffer containing the boot sector
248 * and *@bs to the boot sector data.  The caller then has to buf_unmap() and
249 * buf_brelse() the buffer.
250 *
251 * On error return the error code.
252 *
253 * Note: We set the B_NOCACHE flag on the buffer(s), thus effectively
254 * invalidating them when we release them.  This is needed because the
255 * buffer(s) may get read later using a different vnode ($Boot for example).
256 */
257static errno_t ntfs_boot_sector_read(ntfs_volume *vol, kauth_cred_t cred,
258		buf_t *buf, NTFS_BOOT_SECTOR **bs)
259{
260	daddr64_t nr_blocks = vol->nr_blocks;
261	static const char read_err_str[] =
262			"Unable to read %s boot sector (error %d).";
263	mount_t mp = vol->mp;
264	vnode_t dev_vn = vol->dev_vn;
265	buf_t primary, backup;
266	NTFS_BOOT_SECTOR *bs1, *bs2;
267	errno_t err, err2;
268	u32 blocksize = vfs_devblocksize(mp);
269
270	ntfs_debug("Entering.");
271	/* Try to read primary boot sector. */
272	err = buf_meta_bread(dev_vn, 0, blocksize, cred, &primary);
273	buf_setflags(primary, B_NOCACHE);
274	if (!err) {
275		err = buf_map(primary, (caddr_t*)&bs1);
276		if (err) {
277			ntfs_error(mp, "Failed to map buffer of primary boot "
278					"sector (error %d).", err);
279			bs1 = NULL;
280		} else {
281			if (ntfs_boot_sector_is_valid(mp, bs1)) {
282				*buf = primary;
283				*bs = bs1;
284				ntfs_debug("Done.");
285				return 0;
286			}
287			ntfs_error(mp, "Primary boot sector is invalid.");
288			err = EIO;
289		}
290	} else {
291		ntfs_error(mp, read_err_str, "primary", err);
292		bs1 = NULL;
293	}
294	if (!(vol->on_errors & ON_ERRORS_RECOVER)) {
295		ntfs_error(mp, "Mount option errors=recover not used.  "
296				"Aborting without trying to recover.");
297		if (bs1) {
298			err2 = buf_unmap(primary);
299			if (err2)
300				ntfs_error(mp, "Failed to unmap buffer of "
301						"primary boot sector (error "
302						"%d).", err2);
303		}
304		buf_brelse(primary);
305		return err;
306	}
307	/* Try to read NT4+ backup boot sector. */
308	err = buf_meta_bread(dev_vn, nr_blocks - 1, blocksize, cred, &backup);
309	buf_setflags(backup, B_NOCACHE);
310	if (!err) {
311		err = buf_map(backup, (caddr_t*)&bs2);
312		if (err)
313			ntfs_error(mp, "Failed to map buffer of backup boot "
314					"sector (error %d).", err);
315		else {
316			if (ntfs_boot_sector_is_valid(mp, bs2))
317				goto hotfix_primary_boot_sector;
318			err = buf_unmap(backup);
319			if (err)
320				ntfs_error(mp, "Failed to unmap buffer of "
321						"backup boot sector (error "
322						"%d).", err);
323		}
324	} else
325		ntfs_error(mp, read_err_str, "backup", err);
326	buf_brelse(backup);
327	/* Try to read NT3.51- backup boot sector. */
328	err = buf_meta_bread(dev_vn, nr_blocks >> 1, blocksize, cred, &backup);
329	buf_setflags(backup, B_NOCACHE);
330	if (!err) {
331		err = buf_map(backup, (caddr_t*)&bs2);
332		if (err)
333			ntfs_error(mp, "Failed to map buffer of old backup "
334					"boot sector (error %d).", err);
335		else {
336			if (ntfs_boot_sector_is_valid(mp, bs2))
337				goto hotfix_primary_boot_sector;
338			err = buf_unmap(backup);
339			if (err)
340				ntfs_error(mp, "Failed to unmap buffer of old "
341						"backup boot sector (error "
342						"%d).", err);
343			err = EIO;
344		}
345		ntfs_error(mp, "Could not find a valid backup boot sector.");
346	} else
347		ntfs_error(mp, read_err_str, "backup", err);
348	buf_brelse(backup);
349	/* We failed.  Clean up and return. */
350	if (bs1) {
351		err2 = buf_unmap(primary);
352		if (err2)
353			ntfs_error(mp, "Failed to unmap buffer of primary "
354					"boot sector (error %d).", err2);
355	}
356	buf_brelse(primary);
357	return err;
358hotfix_primary_boot_sector:
359	ntfs_warning(mp, "Using backup boot sector.");
360	/*
361	 * If we managed to read sector zero and the volume is not read-only,
362	 * copy the found, valid, backup boot sector to the primary boot
363	 * sector.  Note we copy the complete sector, not just the boot sector
364	 * structure as the sector size may be bigger and in this case it
365	 * contains the correct boot loader code in the backup boot sector.
366	 */
367	if (bs1 && !NVolReadOnly(vol)) {
368		ntfs_warning(mp, "Hot-fix: Recovering invalid primary boot "
369				"sector from backup copy.");
370		memcpy(bs1, bs2, blocksize);
371		err = buf_bwrite(primary);
372		if (err)
373			ntfs_error(mp, "Hot-fix: Device write error while "
374					"recovering primary boot sector "
375					"(error %d).", err);
376	} else {
377		if (bs1) {
378			ntfs_warning(mp, "Hot-fix: Recovery of primary boot "
379					"sector failed: Read-only mount.");
380			err = buf_unmap(primary);
381			if (err)
382				ntfs_error(mp, "Failed to unmap buffer of "
383						"primary boot sector (error "
384						"%d).", err);
385		} else
386			ntfs_warning(mp, "Hot-fix: Recovery of primary boot "
387					"sector failed as it could not be "
388					"mapped.");
389		buf_brelse(primary);
390	}
391	*buf = backup;
392	*bs = bs2;
393	return 0;
394}
395
396/**
397 * ntfs_boot_sector_parse - parse the boot sector and store the data in @vol
398 * @vol:	volume structure to initialise with data from boot sector
399 * @b:		boot sector to parse
400 *
401 * Parse the ntfs boot sector @b and store all imporant information therein in
402 * the ntfs_volume @vol.
403 *
404 * Return 0 on success and errno on error.  The following error codes are
405 * defined:
406 *	EINVAL	- Boot sector is invalid.
407 *	ENOTSUP - Volume is not supported by this ntfs driver.
408 */
409static errno_t ntfs_boot_sector_parse(ntfs_volume *vol,
410		const NTFS_BOOT_SECTOR *b)
411{
412	s64 ll;
413	mount_t mp = vol->mp;
414	unsigned sectors_per_cluster_shift, nr_hidden_sects;
415	int clusters_per_mft_record, clusters_per_index_block;
416
417	ntfs_debug("Entering.");
418	vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector);
419	vol->sector_size_mask = vol->sector_size - 1;
420	vol->sector_size_shift = ffs(vol->sector_size) - 1;
421	ntfs_debug("vol->sector_size = %u (0x%x)", vol->sector_size,
422			vol->sector_size);
423	ntfs_debug("vol->sector_size_shift = %u", vol->sector_size_shift);
424	if (vol->sector_size < (u32)vfs_devblocksize(mp)) {
425		ntfs_error(mp, "Sector size (%u) is smaller than the device "
426				"block size (%d).  This is not supported.  "
427				"Sorry.", vol->sector_size,
428				vfs_devblocksize(mp));
429		return ENOTSUP;
430	}
431	ntfs_debug("sectors_per_cluster = %u", b->bpb.sectors_per_cluster);
432	sectors_per_cluster_shift = ffs(b->bpb.sectors_per_cluster) - 1;
433	ntfs_debug("sectors_per_cluster_shift = %u", sectors_per_cluster_shift);
434	nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors);
435	ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects);
436	vol->cluster_size = vol->sector_size << sectors_per_cluster_shift;
437	vol->cluster_size_mask = vol->cluster_size - 1;
438	vol->cluster_size_shift = ffs(vol->cluster_size) - 1;
439	ntfs_debug("vol->cluster_size = %u (0x%x)", vol->cluster_size,
440			vol->cluster_size);
441	ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
442	ntfs_debug("vol->cluster_size_shift = %u", vol->cluster_size_shift);
443	if (vol->cluster_size < vol->sector_size) {
444		ntfs_error(mp, "Cluster size (%u) is smaller than the sector "
445				"size (%u).  This is not supported.  Sorry.",
446				vol->cluster_size, vol->sector_size);
447		return ENOTSUP;
448	}
449	clusters_per_mft_record = b->clusters_per_mft_record;
450	ntfs_debug("clusters_per_mft_record = %u (0x%x)",
451			clusters_per_mft_record, clusters_per_mft_record);
452	if (clusters_per_mft_record > 0)
453		vol->mft_record_size = vol->cluster_size *
454				clusters_per_mft_record;
455	else
456		/*
457		 * When mft_record_size < cluster_size, clusters_per_mft_record
458		 * = -log2(mft_record_size) bytes.  mft_record_size normaly is
459		 * 1024 bytes, which is encoded as 0xF6 (-10 in decimal).
460		 */
461		vol->mft_record_size = 1 << -clusters_per_mft_record;
462	vol->mft_record_size_mask = vol->mft_record_size - 1;
463	vol->mft_record_size_shift = ffs(vol->mft_record_size) - 1;
464	ntfs_debug("vol->mft_record_size = %u (0x%x)", vol->mft_record_size,
465			vol->mft_record_size);
466	ntfs_debug("vol->mft_record_size_mask = 0x%x",
467			vol->mft_record_size_mask);
468	ntfs_debug("vol->mft_record_size_shift = %u)",
469			vol->mft_record_size_shift);
470	/*
471	 * We cannot support mft record sizes above the PAGE_SIZE since we
472	 * store $MFT/$DATA, i.e. the table of mft records, in the unified
473	 * buffer cache and thus in pages.
474	 */
475	if (vol->mft_record_size > PAGE_SIZE) {
476		ntfs_error(mp, "Mft record size (%u) exceeds the PAGE_SIZE on "
477				"your system (%u).  This is not supported.  "
478				"Sorry.", vol->mft_record_size, PAGE_SIZE);
479		return ENOTSUP;
480	}
481	/* We cannot support mft record sizes below the sector size. */
482	if (vol->mft_record_size < vol->sector_size) {
483		ntfs_error(mp, "Mft record size (%u) is smaller than the "
484				"sector size (%u).  This is not supported.  "
485				"Sorry.", vol->mft_record_size,
486				vol->sector_size);
487		return ENOTSUP;
488	}
489	clusters_per_index_block = b->clusters_per_index_block;
490	ntfs_debug("clusters_per_index_block = %d (0x%x)",
491			clusters_per_index_block, clusters_per_index_block);
492	if (clusters_per_index_block > 0) {
493		vol->index_block_size = vol->cluster_size *
494				clusters_per_index_block;
495		vol->blocks_per_index_block = clusters_per_index_block;
496	} else {
497		/*
498		 * When index_block_size < cluster_size,
499		 * clusters_per_index_block = -log2(index_block_size) bytes.
500		 * index_block_size normaly equals 4096 bytes, which is encoded
501		 * as 0xF4 (-12 in decimal).
502		 */
503		vol->index_block_size = 1 << -clusters_per_index_block;
504		vol->blocks_per_index_block = vol->index_block_size /
505				vol->sector_size;
506	}
507	vol->index_block_size_mask = vol->index_block_size - 1;
508	vol->index_block_size_shift = ffs(vol->index_block_size) - 1;
509	ntfs_debug("vol->index_block_size = %u (0x%x)",
510			vol->index_block_size, vol->index_block_size);
511	ntfs_debug("vol->index_block_size_mask = 0x%x",
512			vol->index_block_size_mask);
513	ntfs_debug("vol->index_block_size_shift = %u",
514			vol->index_block_size_shift);
515	ntfs_debug("vol->blocks_per_index_block = %u",
516			vol->blocks_per_index_block);
517	/* We cannot support index block sizes below the sector size. */
518	if (vol->index_block_size < vol->sector_size) {
519		ntfs_error(mp, "Index block size (%u) is smaller than the "
520				"sector size (%u).  This is not supported.  "
521				"Sorry.", vol->index_block_size,
522				vol->sector_size);
523		return ENOTSUP;
524	}
525	/*
526	 * Get the size of the volume in clusters and check for 64-bit-ness.
527	 * Windows currently only uses 32 bits to save the clusters so we do
528	 * the same as we do not really want to break compatibility.  We could
529	 * perhaps add a mount option to allow this one day but it would render
530	 * such volumes incompatible with Windows.
531	 */
532	ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_shift;
533	if ((u64)ll >= (u64)1 << 32) {
534		ntfs_error(mp, "Volume specifies 64-bit clusters but only "
535				"32-bit clusters are allowed by Microsoft "
536				"Windows.  Weird.");
537		return EINVAL;
538	}
539	vol->nr_clusters = ll;
540	ntfs_debug("vol->nr_clusters = 0x%llx",
541			(unsigned long long)vol->nr_clusters);
542	ll = sle64_to_cpu(b->mft_lcn);
543	if (ll >= vol->nr_clusters) {
544		ntfs_error(mp, "MFT LCN (%lld, 0x%llx) is beyond end of "
545				"volume.  Weird.", (unsigned long long)ll,
546				(unsigned long long)ll);
547		return EINVAL;
548	}
549	vol->mft_lcn = ll;
550	ntfs_debug("vol->mft_lcn = 0x%llx", (unsigned long long)vol->mft_lcn);
551	ll = sle64_to_cpu(b->mftmirr_lcn);
552	if (ll >= vol->nr_clusters) {
553		ntfs_error(mp, "MFTMirr LCN (%lld, 0x%llx) is beyond end of "
554				"volume.  Weird.", (unsigned long long)ll,
555				(unsigned long long)ll);
556		return EINVAL;
557	}
558	vol->mftmirr_lcn = ll;
559	ntfs_debug("vol->mftmirr_lcn = 0x%llx",
560			(unsigned long long)vol->mftmirr_lcn);
561	/*
562	 * Work out the size of the mft mirror in number of mft records.  If
563	 * the cluster size is less than or equal to the size taken by four mft
564	 * records, the mft mirror stores the first four mft records.  If the
565	 * cluster size is bigger than the size taken by four mft records, the
566	 * mft mirror contains as many mft records as will fit into one
567	 * cluster.
568	 *
569	 * Having said that Windows only keeps in sync and cares about the
570	 * consistency of the first four mft records so we do the same.
571	 */
572#if 0
573	if (vol->cluster_size <= ((u32)4 << vol->mft_record_size_shift))
574		vol->mftmirr_size = 4;
575	else
576		vol->mftmirr_size = vol->cluster_size >>
577				vol->mft_record_size_shift;
578#else
579	vol->mftmirr_size = 4;
580#endif
581	ntfs_debug("vol->mftmirr_size = 0x%x", vol->mftmirr_size);
582	vol->serial_no = le64_to_cpu(b->volume_serial_number);
583	ntfs_debug("vol->serial_no = 0x%llx",
584			(unsigned long long)vol->serial_no);
585	ntfs_debug("Done.");
586	return 0;
587}
588
589/**
590 * ntfs_setup_allocators - initialize the cluster and mft allocators
591 * @vol:	volume structure for which to setup the allocators
592 *
593 * Setup the cluster (lcn) and mft allocators to the starting values.
594 */
595static void ntfs_setup_allocators(ntfs_volume *vol)
596{
597	LCN mft_zone_size, mft_lcn;
598
599	ntfs_debug("Entering.");
600	ntfs_debug("vol->mft_zone_multiplier = 0x%x",
601			vol->mft_zone_multiplier);
602	/* Determine the size of the MFT zone. */
603	mft_zone_size = vol->nr_clusters;
604	switch (vol->mft_zone_multiplier) {  /* % of volume size in clusters */
605	case 4:
606		mft_zone_size >>= 1;			/* 50%   */
607		break;
608	case 3:
609		mft_zone_size = (mft_zone_size +
610				(mft_zone_size >> 1)) >> 2;	/* 37.5% */
611		break;
612	case 2:
613		mft_zone_size >>= 2;			/* 25%   */
614		break;
615	/* case 1: */
616	default:
617		mft_zone_size >>= 3;			/* 12.5% */
618		break;
619	}
620	/* Setup the mft zone. */
621	vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn;
622	ntfs_debug("vol->mft_zone_pos = 0x%llx",
623			(unsigned long long)vol->mft_zone_pos);
624	/*
625	 * Calculate the mft_lcn for an unmodified ntfs volume (see mkntfs
626	 * source) and if the actual mft_lcn is in the expected place or even
627	 * further to the front of the volume, extend the mft_zone to cover the
628	 * beginning of the volume as well.  This is in order to protect the
629	 * area reserved for the mft bitmap as well within the mft_zone itself.
630	 * On non-standard volumes we do not protect it as the overhead would
631	 * be higher than the speed increase we would get by doing it.
632	 */
633	mft_lcn = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size;
634	if (mft_lcn * vol->cluster_size < 16 * 1024)
635		mft_lcn = (16 * 1024 + vol->cluster_size - 1) /
636				vol->cluster_size;
637	if (vol->mft_zone_start <= mft_lcn)
638		vol->mft_zone_start = 0;
639	ntfs_debug("vol->mft_zone_start = 0x%llx",
640			(unsigned long long)vol->mft_zone_start);
641	/*
642	 * Need to cap the mft zone on non-standard volumes so that it does
643	 * not point outside the boundaries of the volume.  We do this by
644	 * halving the zone size until we are inside the volume.
645	 */
646	vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
647	while (vol->mft_zone_end >= vol->nr_clusters) {
648		mft_zone_size >>= 1;
649		vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
650	}
651	ntfs_debug("vol->mft_zone_end = 0x%llx",
652			(unsigned long long)vol->mft_zone_end);
653	/*
654	 * Set the current position within each data zone to the start of the
655	 * respective zone.
656	 */
657	vol->data1_zone_pos = vol->mft_zone_end;
658	ntfs_debug("vol->data1_zone_pos = 0x%llx",
659			(unsigned long long)vol->data1_zone_pos);
660	vol->data2_zone_pos = 0;
661	ntfs_debug("vol->data2_zone_pos = 0x%llx",
662			(unsigned long long)vol->data2_zone_pos);
663
664	/* Set the mft data allocation position to mft record 24. */
665	vol->mft_data_pos = 24;
666	ntfs_debug("vol->mft_data_pos = 0x%llx",
667			(unsigned long long)vol->mft_data_pos);
668	ntfs_debug("Done.");
669}
670
671/**
672 * ntfs_mft_inode_get - obtain the ntfs inode for $MFT at mount time
673 * @vol:	ntfs volume being mounted
674 *
675 * Obtain the ntfs inode corresponding to the system file $MFT (unnamed $DATA
676 * attribute) in the process bootstrapping the volume so that further inodes
677 * can be obtained and (extent) mft records can be mapped.
678 *
679 * A new ntfs inode is allocated and initialized, the base mft record of $MFT
680 * is read by hand from the device and this is then used to bootstrap the
681 * volume so that mft record mapping/unmapping is working and therefore inodes
682 * can be read in general.  To do so a new vnode is created and attached to the
683 * new ntfs inode and the runlist for the $DATA attribute is fully mapped.
684 *
685 * Return 0 on success and errno on error.
686 */
687static errno_t ntfs_mft_inode_get(ntfs_volume *vol)
688{
689	daddr64_t block;
690	VCN next_vcn, last_vcn, highest_vcn;
691	ntfs_inode *ni;
692	MFT_RECORD *m = NULL;
693	vnode_t dev_vn = vol->dev_vn;
694	buf_t buf;
695	ntfs_attr_search_ctx *ctx = NULL;
696	ATTR_RECORD *a;
697	STANDARD_INFORMATION *si;
698	errno_t err;
699	const int block_size = vol->sector_size;
700	unsigned nr_blocks, u;
701	ntfs_attr na;
702	char *es = "  $MFT is corrupt.  Run chkdsk.";
703	const u8 block_size_shift = vol->sector_size_shift;
704
705	ntfs_debug("Entering.");
706	na = (ntfs_attr) {
707		.mft_no = FILE_MFT,
708		.type = AT_UNUSED,
709		.raw = FALSE,
710	};
711	ni = ntfs_inode_hash_get(vol, &na);
712	if (!ni) {
713		ntfs_error(vol->mp, "Failed to allocate new inode.");
714		return ENOMEM;
715	}
716	if (!NInoAlloc(ni)) {
717		ntfs_error(vol->mp, "Failed (found stale inode in cache).");
718		err = ESTALE;
719		goto err;
720	}
721	/*
722	 * We allocated a new inode, now set it up as the unnamed data
723	 * attribute.  It is special as it is mst protected.
724	 */
725	NInoSetNonResident(ni);
726	NInoSetMstProtected(ni);
727	NInoSetSparseDisabled(ni);
728	ni->type = AT_DATA;
729	ni->block_size = vol->mft_record_size;
730	ni->block_size_shift = vol->mft_record_size_shift;
731	/* No-one is allowed to access $MFT directly. */
732	ni->uid = 0;
733	ni->gid = 0;
734	ni->mode = S_IFREG;
735	/* Allocate enough memory to read the first mft record. */
736	m = OSMalloc(vol->mft_record_size, ntfs_malloc_tag);
737	if (!m) {
738		ntfs_error(vol->mp, "Failed to allocate buffer for $MFT "
739				"record 0.");
740		err = ENOMEM;
741		goto err;
742	}
743	/* Determine the first physical block of the $MFT/$DATA attribute. */
744	block = vol->mft_lcn << (vol->cluster_size_shift - block_size_shift);
745	nr_blocks = vol->mft_record_size >> block_size_shift;
746	if (!nr_blocks)
747		nr_blocks = 1;
748	/* Load $MFT/$DATA's first mft record, one block at a time. */
749	for (u = 0; u < nr_blocks; u++, block++) {
750		u8 *src;
751
752		err = buf_meta_bread(dev_vn, block, block_size, NOCRED, &buf);
753		/*
754		 * We set the B_NOCACHE flag on the buffer(s), thus effectively
755		 * invalidating them when we release them.  This is needed
756		 * because the buffer(s) will get read later using the $MFT
757		 * base vnode.
758		 */
759		buf_setflags(buf, B_NOCACHE);
760		if (err) {
761			ntfs_error(vol->mp, "Failed to read $MFT record 0 "
762					"(block %u, physical block 0x%llx, "
763					"physical block size %d).", u,
764					(unsigned long long)block, block_size);
765			buf_brelse(buf);
766			goto err;
767		}
768		err = buf_map(buf, (caddr_t*)&src);
769		if (err) {
770			ntfs_error(vol->mp, "Failed to map buffer of mft "
771					"record 0 (block %u, physical block "
772					"0x%llx, physical block size %d).", u,
773					(unsigned long long)block, block_size);
774			buf_brelse(buf);
775			goto err;
776		}
777		memcpy((u8*)m + (u << block_size_shift), src, block_size);
778		err = buf_unmap(buf);
779		if (err)
780			ntfs_error(vol->mp, "Failed to unmap buffer of mft "
781					"record 0 (error %d).", err);
782		buf_brelse(buf);
783	}
784	/* Apply the mst fixups. */
785	err = ntfs_mst_fixup_post_read((NTFS_RECORD*)m, vol->mft_record_size);
786	if (err) {
787		/* TODO: Try to use the $MFTMirr now. */
788		ntfs_error(vol->mp, "MST fixup failed.%s", es);
789		goto io_err;
790	}
791	/*
792	 * Need this to be able to sanity check attribute list references to
793	 * $MFT.
794	 */
795	ni->seq_no = le16_to_cpu(m->sequence_number);
796	/* Get the number of hard links, too. */
797	ni->link_count = le16_to_cpu(m->link_count);
798	ctx = ntfs_attr_search_ctx_get(ni, m);
799	if (!ctx) {
800		err = ENOMEM;
801		goto err;
802	}
803	/*
804	 * Find the standard information attribute in the mft record.  At this
805	 * stage we have not setup the attribute list stuff yet, so this could
806	 * in fact fail if the standard information is in an extent record, but
807	 * this is not allowed hence not a problem.
808	 */
809	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, AT_UNNAMED, 0, 0, NULL,
810			0, ctx);
811	a = ctx->a;
812	if (err || a->non_resident || a->flags) {
813		if (err) {
814			if (err == ENOENT) {
815				/*
816				 * TODO: We should be performing a hot fix here
817				 * (if the recover mount option is set) by
818				 * creating a new attribute.
819				 */
820				ntfs_error(vol->mp, "Standard information "
821						"attribute is missing.");
822			} else
823				ntfs_error(vol->mp, "Failed to lookup "
824						"standard information "
825						"attribute.");
826		} else {
827info_err:
828			ntfs_error(vol->mp, "Standard information attribute "
829					"is corrupt.");
830			err = EIO;
831		}
832		goto err;
833	}
834	si = (STANDARD_INFORMATION*)((u8*)a +
835			le16_to_cpu(a->value_offset));
836	/* Some bounds checks. */
837	if ((u8*)si < (u8*)a || (u8*)si + le32_to_cpu(a->value_length) >
838			(u8*)a + le32_to_cpu(a->length) ||
839			(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
840			vol->mft_record_size)
841		goto info_err;
842	/*
843	 * Cache the create, the last data and mft modified, and the last
844	 * access times in the ntfs inode.
845	 */
846	ni->creation_time = ntfs2utc(si->creation_time);
847	ni->last_data_change_time = ntfs2utc(si->last_data_change_time);
848	ni->last_mft_change_time = ntfs2utc(si->last_mft_change_time);
849	ni->last_access_time = ntfs2utc(si->last_access_time);
850	/* Find the attribute list attribute if present. */
851	ntfs_attr_search_ctx_reinit(ctx);
852	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, AT_UNNAMED, 0, 0, NULL, 0,
853			ctx);
854	if (err) {
855		if (err != ENOENT) {
856			ntfs_error(vol->mp, "Failed to lookup attribute list "
857					"attribute.%s", es);
858			goto err;
859		}
860		ntfs_debug("$MFT does not have an attribute list attribute.");
861	} else /* if (!err) */ {
862		ATTR_LIST_ENTRY *al_entry, *next_al_entry;
863		u8 *al_end;
864
865		ntfs_debug("Attribute list attribute found in $MFT.");
866		NInoSetAttrList(ni);
867		a = ctx->a;
868		if (a->flags & ATTR_COMPRESSION_MASK) {
869			ntfs_error(vol->mp, "Attribute list attribute is "
870					"compressed.  Not allowed.%s", es);
871			goto io_err;
872		}
873		if (a->flags & (ATTR_IS_ENCRYPTED | ATTR_IS_SPARSE)) {
874			if (a->non_resident) {
875				ntfs_error(vol->mp, "Non-resident attribute "
876						"list attribute is encrypted/"
877						"sparse.  Not allowed.%s", es);
878				goto io_err;
879			}
880			ntfs_warning(vol->mp, "Resident attribute list "
881					"attribute is marked encrypted/sparse "
882					"which is not true.  However, Windows "
883					"allows this and chkdsk does not "
884					"detect or correct it so we will just "
885					"ignore the invalid flags and pretend "
886					"they are not set.");
887		}
888		/* Now allocate memory for the attribute list. */
889		ni->attr_list_size = (u32)ntfs_attr_size(a);
890		ni->attr_list_alloc = (ni->attr_list_size + NTFS_ALLOC_BLOCK -
891				1) & ~(NTFS_ALLOC_BLOCK - 1);
892		ni->attr_list = OSMalloc(ni->attr_list_alloc, ntfs_malloc_tag);
893		if (!ni->attr_list) {
894			ni->attr_list_alloc = 0;
895			ntfs_error(vol->mp, "Not enough memory to allocate "
896					"buffer for attribute list.");
897			err = ENOMEM;
898			goto err;
899		}
900		if (a->non_resident) {
901			NInoSetAttrListNonResident(ni);
902			if (a->lowest_vcn) {
903				ntfs_error(vol->mp, "Attribute list has non-"
904						"zero lowest_vcn.%s", es);
905				goto io_err;
906			}
907			/* Setup the runlist. */
908			err = ntfs_mapping_pairs_decompress(vol, a,
909					&ni->attr_list_rl);
910			if (err) {
911				ntfs_error(vol->mp, "Mapping pairs "
912						"decompression failed with "
913						"error code %d.%s", err, es);
914				goto err;
915			}
916			/* Now read in the attribute list. */
917			err = ntfs_rl_read(vol, &ni->attr_list_rl,
918					ni->attr_list, (s64)ni->attr_list_size,
919					sle64_to_cpu(a->initialized_size));
920			if (err) {
921				ntfs_error(vol->mp, "Failed to load attribute "
922						"list attribute with error "
923						"code %d.", err);
924				goto err;
925			}
926		} else /* if (!a->non_resident) */ {
927			u8 *al = (u8*)a + le16_to_cpu(a->value_offset);
928			u8 *a_end = (u8*)a + le32_to_cpu(a->length);
929			if (al < (u8*)a || al + le32_to_cpu(a->value_length) >
930					a_end || (u8*)a_end > (u8*)ctx->m +
931					vol->mft_record_size) {
932				ntfs_error(vol->mp, "Corrupt attribute list "
933						"attribute.%s", es);
934				goto io_err;
935			}
936			/* Now copy the attribute list. */
937			memcpy(ni->attr_list, (u8*)a +
938					le16_to_cpu(a->value_offset),
939					ni->attr_list_size);
940		}
941		/* The attribute list is now setup in memory. */
942		/*
943		 * FIXME: I do not know if this case is actually possible.
944		 * According to logic it is not possible but I have seen too
945		 * many weird things in MS software to rely on logic.  Thus we
946		 * perform a manual search and make sure the first $MFT/$DATA
947		 * extent is in the base inode.  If it is not we abort with an
948		 * error and if we ever see a report of this error we will need
949		 * to do some magic in order to have the necessary mft record
950		 * loaded and in the right place.  But hopefully logic will
951		 * prevail and this never happens...
952		 */
953		al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
954		al_end = (u8*)al_entry + ni->attr_list_size;
955		for (;; al_entry = next_al_entry) {
956			/* Out of bounds check. */
957			if ((u8*)al_entry < ni->attr_list ||
958					(u8*)al_entry > al_end)
959				goto em_err;
960			/* Catch the end of the attribute list. */
961			if ((u8*)al_entry == al_end)
962				goto em_err;
963			if (!al_entry->length)
964				goto em_err;
965			if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
966					le16_to_cpu(al_entry->length) > al_end)
967				goto em_err;
968			next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
969					le16_to_cpu(al_entry->length));
970			if (le32_to_cpu(al_entry->type) >
971					const_le32_to_cpu(AT_DATA))
972				goto em_err;
973			if (al_entry->type != AT_DATA)
974				continue;
975			/* We want an unnamed attribute. */
976			if (al_entry->name_length)
977				goto em_err;
978			/* Want the first entry, i.e. lowest_vcn == 0. */
979			if (al_entry->lowest_vcn)
980				goto em_err;
981			/* First entry has to be in the base mft record. */
982			if (MREF_LE(al_entry->mft_reference) != ni->mft_no) {
983				/* MFT references do not match, logic fails. */
984				ntfs_error(vol->mp, "BUG: The first $DATA "
985						"extent of $MFT is not in the "
986						"base mft record.  Please "
987						"report you saw this message "
988						"to %s.", ntfs_dev_email);
989				goto io_err;
990			}
991			/* Sequence numbers must match. */
992			if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no)
993				goto em_err;
994			/* Done: Found first extent of $DATA as expected. */
995			break;
996		}
997	}
998	ntfs_attr_search_ctx_reinit(ctx);
999	/* Now load all attribute extents. */
1000	a = NULL;
1001	next_vcn = last_vcn = highest_vcn = 0;
1002	while (!(err = ntfs_attr_lookup(AT_DATA, AT_UNNAMED, 0, next_vcn, NULL,
1003			0, ctx))) {
1004		/* Cache the current attribute. */
1005		a = ctx->a;
1006		/* $MFT must be non-resident. */
1007		if (!a->non_resident) {
1008			ntfs_error(vol->mp, "$MFT must be non-resident but a "
1009					"resident extent was found.%s", es);
1010			goto io_err;
1011		}
1012		/* $MFT must be uncompressed and unencrypted. */
1013		if (a->flags & ATTR_COMPRESSION_MASK ||
1014				a->flags & ATTR_IS_ENCRYPTED ||
1015				a->flags & ATTR_IS_SPARSE) {
1016			ntfs_error(vol->mp, "$MFT must be uncompressed, "
1017					"non-sparse, and unencrypted but a "
1018					"compressed/sparse/encrypted extent "
1019					"was found.%s", es);
1020			goto io_err;
1021		}
1022		/*
1023		 * Decompress the mapping pairs array of this extent and merge
1024		 * the result into the existing runlist.  No need for locking
1025		 * as we have exclusive access to the inode at this time and we
1026		 * are a mount in progress task, too.
1027		 */
1028		err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
1029		if (err) {
1030			ntfs_error(vol->mp, "Mapping pairs decompression "
1031					"failed with error code %d.%s", err,
1032					es);
1033			goto err;
1034		}
1035		/* Get the lowest vcn for the next extent. */
1036		highest_vcn = sle64_to_cpu(a->highest_vcn);
1037		/*
1038		 * If we are in the first extent, bootstrap the volume so we
1039		 * can load other inodes and map (extent) mft records.
1040		 */
1041		if (!next_vcn) {
1042			if (a->lowest_vcn) {
1043				ntfs_error(vol->mp, "First extent of $DATA "
1044						"attribute has non zero "
1045						"lowest_vcn.%s", es);
1046				goto io_err;
1047			}
1048			/* Get the last vcn in the $DATA attribute. */
1049			last_vcn = sle64_to_cpu(a->allocated_size)
1050					>> vol->cluster_size_shift;
1051			/* Fill in the sizes. */
1052			ni->allocated_size = sle64_to_cpu(a->allocated_size);
1053			ni->data_size = sle64_to_cpu(a->data_size);
1054			ni->initialized_size = sle64_to_cpu(
1055					a->initialized_size);
1056			/*
1057			 * Verify the sizes are sane.  In particular both the
1058			 * data size and the initialized size must be multiples
1059			 * of the mft record size or we will panic() when
1060			 * reading the boundary in ntfs_cluster_iodone().
1061			 *
1062			 * Also the allocated size must be a multiple of the
1063			 * volume cluster size.
1064			 */
1065			if (ni->allocated_size & vol->cluster_size_mask ||
1066					ni->data_size &
1067					vol->mft_record_size_mask ||
1068					ni->initialized_size &
1069					vol->mft_record_size_mask) {
1070				ntfs_error(vol->mp, "$DATA attribute contains "
1071						"invalid size.%s", es);
1072				goto io_err;
1073			}
1074			/*
1075			 * Verify the number of mft records does not exceed
1076			 * 2^32 - 1.
1077			 */
1078			if (ni->data_size >> vol->mft_record_size_shift >=
1079					1LL << 32) {
1080				ntfs_error(vol->mp, "$MFT is too big.  "
1081						"Aborting.");
1082				goto io_err;
1083			}
1084			/* We have the size now so we can add the vnode. */
1085			err = ntfs_inode_add_vnode(ni, TRUE, NULL, NULL);
1086			if (err) {
1087				ntfs_error(vol->mp, "Failed to create a "
1088						"system vnode for $MFT (error "
1089						"%d).", err);
1090				goto err;
1091			}
1092			/*
1093			 * We will hold on to the $MFT inode for the duration
1094			 * of the mount thus we need to take a reference on the
1095			 * vnode.  Note we need to attach the inode to the
1096			 * volume here so that ntfs_read_inode() can call
1097			 * ntfs_attr_lookup() which needs to be able to map
1098			 * extent mft records which requires vol->mft_ni to be
1099			 * setup.
1100			 */
1101			err = vnode_ref(ni->vn);
1102			if (err)
1103				ntfs_error(vol->mp, "vnode_ref() failed!");
1104			OSIncrementAtomic(&ni->nr_refs);
1105			vol->mft_ni = ni;
1106			/* The $MFT inode is fully setup now, so unlock it. */
1107			ntfs_inode_unlock_alloc(ni);
1108			/*
1109			 * We can release the iocount reference now.  It will
1110			 * be taken as and when required in the low level code.
1111			 * We can ignore the return value as it always is zero.
1112			 */
1113			(void)vnode_put(ni->vn);
1114			/* If $MFT/$DATA has only one extent, we are done. */
1115			if (highest_vcn == last_vcn - 1)
1116				break;
1117		}
1118		next_vcn = highest_vcn + 1;
1119		if (next_vcn <= 0) {
1120			ntfs_error(vol->mp, "Invalid highest vcn in attribute "
1121					"extent.%s", es);
1122			goto io_err;
1123		}
1124		/* Avoid endless loops due to corruption. */
1125		if (next_vcn < sle64_to_cpu(a->lowest_vcn)) {
1126			ntfs_error(vol->mp, "Corrupt attribute extent would "
1127					"cause endless loop, aborting.%s", es);
1128			goto io_err;
1129		}
1130	}
1131	if (err && err != ENOENT) {
1132		ntfs_error(vol->mp, "Failed to lookup $MFT/$DATA attribute "
1133				"extent.%s", es);
1134		goto err;
1135	}
1136	if (!a) {
1137		ntfs_error(vol->mp, "$MFT/$DATA attribute not found.%s", es);
1138		err = ENOENT;
1139		goto err;
1140	}
1141	if (highest_vcn != last_vcn - 1) {
1142		ntfs_error(vol->mp, "Failed to load the complete runlist for "
1143				"$MFT/$DATA.  Driver bug or corrupt $MFT.  "
1144				"Run chkdsk.");
1145		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
1146				(unsigned long long)highest_vcn,
1147				(unsigned long long)(last_vcn - 1));
1148		goto io_err;
1149	}
1150	ntfs_attr_search_ctx_put(ctx);
1151	OSFree(m, vol->mft_record_size, ntfs_malloc_tag);
1152	ntfs_debug("Done.");
1153	return 0;
1154em_err:
1155	ntfs_error(vol->mp, "Could not find first extent of $DATA attribute "
1156			"in attribute list.%s", es);
1157io_err:
1158	err = EIO;
1159err:
1160	if (ctx)
1161		ntfs_attr_search_ctx_put(ctx);
1162	if (m)
1163		OSFree(m, vol->mft_record_size, ntfs_malloc_tag);
1164	/* vol->mft_ni will be cleaned up by the caller. */
1165	if (!vol->mft_ni)
1166		ntfs_inode_reclaim(ni);
1167	return err;
1168}
1169
1170/**
1171 * ntfs_inode_attach - load and attach an inode to an ntfs structure
1172 * @vol:	ntfs volume to which the inode to load belongs
1173 * @mft_no:	mft record number / inode number to obtain
1174 * @ni:		pointer in which to return the obtained ntfs inode
1175 * @parent_vn:	vnode of directory containing the inode to return or NULL
1176 *
1177 * Load the ntfs inode @mft_no from the mounted ntfs volume @vol, attach it by
1178 * getting a reference on it and return the ntfs inode in @ni.
1179 *
1180 * The created vnode is marked as a system vnoded so that the volume can be
1181 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1182 *
1183 * If @parent_vn is not NULL, it is set up as the parent directory vnode of the
1184 * vnode of the obtained inode.
1185 *
1186 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1187 */
1188static errno_t ntfs_inode_attach(ntfs_volume *vol, const ino64_t mft_no,
1189		ntfs_inode **ni, vnode_t parent_vn)
1190{
1191	vnode_t vn;
1192	errno_t err;
1193
1194	ntfs_debug("Entering.");
1195	err = ntfs_inode_get(vol, mft_no, TRUE, LCK_RW_TYPE_SHARED, ni,
1196			parent_vn, NULL);
1197	if (err) {
1198		ntfs_error(vol->mp, "Failed to load inode 0x%llx.",
1199				(unsigned long long)mft_no);
1200		*ni = NULL;
1201		return err;
1202	}
1203	/*
1204	 * Take an internal reference on the parent inode to balance the
1205	 * reference taken on the parent vnode in vnode_create().
1206	 */
1207	if (parent_vn)
1208		OSIncrementAtomic(&NTFS_I(parent_vn)->nr_refs);
1209	vn = (*ni)->vn;
1210	err = vnode_ref(vn);
1211	if (err)
1212		ntfs_error(vol->mp, "vnode_ref() failed!");
1213	OSIncrementAtomic(&(*ni)->nr_refs);
1214	lck_rw_unlock_shared(&(*ni)->lock);
1215	(void)vnode_put(vn);
1216	ntfs_debug("Done.");
1217	return 0;
1218}
1219
1220/**
1221 * ntfs_attr_inode_attach - load and attach an attribute inode to a structure
1222 * @base_ni:	ntfs base inode containing the attribute
1223 * @type:	attribute type
1224 * @name:	Unicode name of the attribute (NULL if unnamed)
1225 * @name_len:	length of @name in Unicode characters (0 if unnamed)
1226 * @ni:		pointer in which to return the obtained ntfs inode
1227 *
1228 * Load the attribute inode described by @type, @name, and @name_len belonging
1229 * to the base inode @base_ni, attach it by getting a reference on it and
1230 * return the ntfs inode in @ni.
1231 *
1232 * The created vnode is marked as a system vnode so that the volume can be
1233 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1234 *
1235 * The vnode of the base inode @base_ni is set up as the parent vnode of the
1236 * vnode of the obtained inode.
1237 *
1238 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1239 */
1240static errno_t ntfs_attr_inode_attach(ntfs_inode *base_ni,
1241		const ATTR_TYPE type, ntfschar *name, const u32 name_len,
1242		ntfs_inode **ni)
1243{
1244	vnode_t vn;
1245	errno_t err;
1246
1247	ntfs_debug("Entering.");
1248	err = ntfs_attr_inode_get(base_ni, type, name, name_len, TRUE,
1249			LCK_RW_TYPE_SHARED, ni);
1250	if (err) {
1251		ntfs_error(base_ni->vol->mp, "Failed to load attribute inode "
1252				"0x%llx, attribute type 0x%x, name length "
1253				"0x%x.", (unsigned long long)base_ni->mft_no,
1254				(unsigned)le32_to_cpu(type),
1255				(unsigned)name_len);
1256		*ni = NULL;
1257		return err;
1258	}
1259	/*
1260	 * Take an internal reference on the base inode @base_ni (which is also
1261	 * the parent inode) to balance the reference taken on the parent vnode
1262	 * in vnode_create().
1263	 */
1264	OSIncrementAtomic(&base_ni->nr_refs);
1265	vn = (*ni)->vn;
1266	err = vnode_ref(vn);
1267	if (err)
1268		ntfs_error(base_ni->vol->mp, "vnode_ref() failed!");
1269	OSIncrementAtomic(&(*ni)->nr_refs);
1270	lck_rw_unlock_shared(&(*ni)->lock);
1271	(void)vnode_put(vn);
1272	ntfs_debug("Done.");
1273	return 0;
1274}
1275
1276/**
1277 * ntfs_index_inode_attach - load and attach an attribute inode to a structure
1278 * @base_ni:	ntfs base inode containing the index
1279 * @name:	Unicode name of the index
1280 * @name_len:	length of @name in Unicode characters
1281 * @ni:		pointer in which to return the obtained ntfs inode
1282 *
1283 * Load the index inode described by @name and @name_len belonging to the base
1284 * inode @base_ni, attach it by getting a reference on it and return the ntfs
1285 * inode in @ni.
1286 *
1287 * The created vnode is marked as a system vnode so that the volume can be
1288 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1289 *
1290 * The vnode of the base inode @base_ni is set up as the parent vnode of the
1291 * vnode of the obtained inode.
1292 *
1293 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1294 */
1295static errno_t ntfs_index_inode_attach(ntfs_inode *base_ni, ntfschar *name,
1296		const u32 name_len, ntfs_inode **ni)
1297{
1298	vnode_t vn;
1299	errno_t err;
1300
1301	ntfs_debug("Entering.");
1302	err = ntfs_index_inode_get(base_ni, name, name_len, TRUE, ni);
1303	if (err) {
1304		ntfs_error(base_ni->vol->mp, "Failed to load index inode "
1305				"0x%llx, name length 0x%x.",
1306				(unsigned long long)base_ni->mft_no,
1307				(unsigned)name_len);
1308		*ni = NULL;
1309		return err;
1310	}
1311	/*
1312	 * Take an internal reference on the base inode @base_ni (which is also
1313	 * the parent inode) to balance the reference taken on the parent vnode
1314	 * in vnode_create().
1315	 */
1316	OSIncrementAtomic(&base_ni->nr_refs);
1317	vn = (*ni)->vn;
1318	err = vnode_ref(vn);
1319	if (err)
1320		ntfs_error(base_ni->vol->mp, "vnode_ref() failed!");
1321	OSIncrementAtomic(&(*ni)->nr_refs);
1322	(void)vnode_put(vn);
1323	ntfs_debug("Done.");
1324	return 0;
1325}
1326
1327/**
1328 * ntfs_mft_mirror_load - load and setup the mft mirror inode
1329 * @vol:	ntfs volume describing device whose mft mirror to load
1330 *
1331 * Return 0 on success and errno on error.
1332 */
1333static errno_t ntfs_mft_mirror_load(ntfs_volume *vol)
1334{
1335	ntfs_inode *ni;
1336	vnode_t vn;
1337	errno_t err;
1338
1339	ntfs_debug("Entering.");
1340	err = ntfs_inode_get(vol, FILE_MFTMirr, TRUE, LCK_RW_TYPE_SHARED, &ni,
1341			vol->root_ni->vn, NULL);
1342	if (err) {
1343		ntfs_error(vol->mp, "Failed to load inode 0x%llx.",
1344				(unsigned long long)FILE_MFTMirr);
1345		return err;
1346	}
1347	vn = ni->vn;
1348	/*
1349	 * Re-initialize some specifics about the inode of $MFTMirr as
1350	 * ntfs_inode_get() will have set up the default ones.
1351	 */
1352	/* Set uid and gid to root. */
1353	ni->uid = 0;
1354	ni->gid = 0;
1355	/* Regular file.  No access for anyone. */
1356	ni->mode = S_IFREG;
1357	/*
1358	 * The $MFTMirr, like the $MFT is multi sector transfer protected but
1359	 * we do not mark it as such as we want to have the buffers directly
1360	 * copied from the mft thus we do not want to mess about with MST
1361	 * fixups on the mft mirror.
1362	 */
1363	NInoSetSparseDisabled(ni);
1364	ni->block_size = vol->mft_record_size;
1365	ni->block_size_shift = vol->mft_record_size_shift;
1366	/*
1367	 * Verify the sizes are sane.  In particular both the data size and the
1368	 * initialized size must be multiples of the mft record size or we will
1369	 * panic() when reading the boundary in ntfs_cluster_iodone().
1370	 *
1371	 * Also the allocated size must be a multiple of the volume cluster
1372	 * size.
1373	 */
1374	if (ni->allocated_size & vol->cluster_size_mask ||
1375			ni->data_size & vol->mft_record_size_mask ||
1376			ni->initialized_size & vol->mft_record_size_mask) {
1377		ntfs_error(vol->mp, "$DATA attribute contains invalid size.  "
1378				"$MFTMirr is corrupt.  Run chkdsk.");
1379		(void)vnode_recycle(vn);
1380		(void)vnode_put(vn);
1381		return EIO;
1382	}
1383	OSIncrementAtomic(&vol->root_ni->nr_refs);
1384	err = vnode_ref(vn);
1385	if (err)
1386		ntfs_error(vol->mp, "vnode_ref() failed!");
1387	OSIncrementAtomic(&ni->nr_refs);
1388	lck_rw_unlock_shared(&ni->lock);
1389	(void)vnode_put(vn);
1390	vol->mftmirr_ni = ni;
1391	ntfs_debug("Done.");
1392	return 0;
1393}
1394
1395/**
1396 * ntfs_mft_mirror_check - compare contents of the mft mirror with the mft
1397 * @vol:	ntfs volume describing device whose mft mirror to check
1398 *
1399 * Return 0 on success and errno on error.
1400 *
1401 * Note, this function also results in the mft mirror runlist being completely
1402 * mapped into memory.  The mft mirror write code requires this and will
1403 * panic() should it find an unmapped runlist element.
1404 */
1405static errno_t ntfs_mft_mirror_check(ntfs_volume *vol)
1406{
1407	ntfs_inode *ni;
1408	buf_t buf;
1409	u8 *mirr_start;
1410	MFT_RECORD *mirr, *m;
1411	unsigned nr_mirr_recs, alloc_size, rec_size, i;
1412	errno_t err, err2;
1413
1414	ntfs_debug("Entering.");
1415	if (!vol->mftmirr_size)
1416		panic("%s(): !vol->mftmirr_size\n", __FUNCTION__);
1417	nr_mirr_recs = vol->mftmirr_size;
1418	if (!nr_mirr_recs)
1419		panic("%s(): !nr_mirr_recs\n", __FUNCTION__);
1420	rec_size = vol->mft_record_size;
1421	/* Allocate a buffer and read all mft mirror records into it. */
1422	alloc_size = nr_mirr_recs << vol->mft_record_size_shift;
1423	mirr_start = OSMalloc(alloc_size, ntfs_malloc_tag);
1424	if (!mirr_start) {
1425		ntfs_error(vol->mp, "Failed to allocate temporary mft mirror "
1426				"buffer.");
1427		return ENOMEM;
1428	}
1429	mirr = (MFT_RECORD*)mirr_start;
1430	ni = vol->mftmirr_ni;
1431	err = vnode_get(ni->vn);
1432	if (err) {
1433		ntfs_error(vol->mp, "Failed to get vnode for $MFTMirr.");
1434		goto err;
1435	}
1436	lck_rw_lock_shared(&ni->lock);
1437	for (i = 0; i < nr_mirr_recs; i++) {
1438		/* Get the next $MFTMirr record. */
1439		err = buf_meta_bread(ni->vn, i, rec_size, NOCRED, &buf);
1440		if (err) {
1441			ntfs_error(vol->mp, "Failed to read $MFTMirr record "
1442					"%d (error %d).", i, err);
1443			goto brelse;
1444		}
1445		err = buf_map(buf, (caddr_t*)&m);
1446		if (err) {
1447			ntfs_error(vol->mp, "Failed to map buffer of $MFTMirr "
1448					"record %d (error %d).", i, err);
1449			goto brelse;
1450		}
1451		/*
1452		 * Copy the mirror record, drop the buffer, and remove the MST
1453		 * fixups.
1454		 */
1455		memcpy(mirr, m, rec_size);
1456		err = buf_unmap(buf);
1457		if (err) {
1458			ntfs_error(vol->mp, "Failed to unmap buffer of "
1459					"$MFTMirr record %d (error %d).", i,
1460					err);
1461			goto brelse;
1462		}
1463		buf_brelse(buf);
1464		err = ntfs_mst_fixup_post_read((NTFS_RECORD*)mirr, rec_size);
1465		/* Do not check the mirror record if it is not in use. */
1466		if (mirr->flags & MFT_RECORD_IN_USE) {
1467			if (err || ntfs_is_baad_record(mirr->magic)) {
1468				ntfs_error(vol->mp, "Incomplete multi sector "
1469						"transfer detected in mft "
1470						"mirror record %d.", i);
1471				if (!err)
1472					err = EIO;
1473				goto unlock;
1474			}
1475		}
1476		mirr = (MFT_RECORD*)((u8*)mirr + rec_size);
1477	}
1478	/*
1479	 * Because we have just read at least the beginning of the mft mirror,
1480	 * we know we have mapped at least the beginning of the runlist for it.
1481	 */
1482	lck_rw_lock_shared(&ni->rl.lock);
1483	/*
1484	 * The runlist for the mft mirror must contain at least @nr_mirr_recs
1485	 * mft records and they must be in the first run, i.e. consecutive on
1486	 * disk.
1487	 */
1488	if (ni->rl.rl->lcn != vol->mftmirr_lcn ||
1489			ni->rl.rl->length < (((s64)vol->mftmirr_size <<
1490			vol->mft_record_size_shift) +
1491			vol->cluster_size_mask) >> vol->cluster_size_shift) {
1492		ntfs_error(vol->mp, "$MFTMirr location mismatch.  Run "
1493				"chkdsk.");
1494		err = EIO;
1495	} else
1496		ntfs_debug("Done.");
1497	lck_rw_unlock_shared(&ni->rl.lock);
1498	lck_rw_unlock_shared(&ni->lock);
1499	(void)vnode_put(ni->vn);
1500	/*
1501	 * Now read the $MFT records one at a time and compare each against the
1502	 * already read $MFTMirr records.
1503	 */
1504	ni = vol->mft_ni;
1505	err = vnode_get(ni->vn);
1506	if (err) {
1507		ntfs_error(vol->mp, "Failed to get vnode for $MFT.");
1508		goto err;
1509	}
1510	lck_rw_lock_shared(&ni->lock);
1511	mirr = (MFT_RECORD*)mirr_start;
1512	for (i = 0; i < nr_mirr_recs; i++) {
1513		unsigned bytes;
1514
1515		/* Get the current $MFT record. */
1516		err = buf_meta_bread(ni->vn, i, rec_size, NOCRED, &buf);
1517		if (err) {
1518			ntfs_error(vol->mp, "Failed to read $MFT record %d "
1519					"(error %d).", i, err);
1520			goto brelse;
1521		}
1522		err = buf_map(buf, (caddr_t*)&m);
1523		if (err) {
1524			ntfs_error(vol->mp, "Failed to map buffer of $MFT "
1525					"record %d (error %d).", i, err);
1526			goto brelse;
1527		}
1528		/* Do not check the mft record if it is not in use. */
1529		if (m->flags & MFT_RECORD_IN_USE) {
1530			/* Make sure the record is ok. */
1531			if (ntfs_is_baad_record(m->magic)) {
1532				ntfs_error(vol->mp, "Incomplete multi sector "
1533						"transfer detected in mft "
1534						"record %d.", i);
1535				err = EIO;
1536				goto unmap;
1537			}
1538		}
1539		/* Get the amount of data in the current record. */
1540		bytes = le32_to_cpu(m->bytes_in_use);
1541		if (bytes < sizeof(MFT_RECORD_OLD) || bytes > rec_size ||
1542				ntfs_is_baad_record(m->magic)) {
1543			bytes = le32_to_cpu(mirr->bytes_in_use);
1544			if (bytes < sizeof(MFT_RECORD_OLD) ||
1545					bytes > rec_size ||
1546					ntfs_is_baad_record(mirr->magic))
1547				bytes = rec_size;
1548		}
1549		/* Compare the two records. */
1550		if (bcmp(m, mirr, bytes)) {
1551			ntfs_error(vol->mp, "$MFT and $MFTMirr (record %d) do "
1552					"not match.  Run chkdsk.", i);
1553			err = EIO;
1554			goto unmap;
1555		}
1556		mirr = (MFT_RECORD*)((u8*)mirr + rec_size);
1557		err = buf_unmap(buf);
1558		if (err) {
1559			ntfs_error(vol->mp, "Failed to unmap buffer of $MFT "
1560					"record %d (error %d).", i, err);
1561			goto brelse;
1562		}
1563		buf_brelse(buf);
1564	}
1565unlock:
1566	lck_rw_unlock_shared(&ni->lock);
1567	(void)vnode_put(ni->vn);
1568err:
1569	OSFree(mirr_start, alloc_size, ntfs_malloc_tag);
1570	return err;
1571unmap:
1572	err2 = buf_unmap(buf);
1573	if (err2)
1574		ntfs_error(vol->mp, "Failed to unmap buffer of mft record %d "
1575				"in error code path (error %d).", i, err2);
1576brelse:
1577	buf_brelse(buf);
1578	goto unlock;
1579}
1580
1581/**
1582 * ntfs_upcase_load - load the upcase table for an ntfs volume
1583 * @vol:	ntfs volume whose upcase to load
1584 *
1585 * Read the upcase table and setup @vol->upcase and @vol->upcase_len.
1586 *
1587 * Return 0 on success and errno on error.
1588 */
1589static errno_t ntfs_upcase_load(ntfs_volume *vol)
1590{
1591	s64 ofs, data_size = 0;
1592	ntfs_inode *ni;
1593	upl_t upl;
1594	upl_page_info_array_t pl;
1595	u8 *kaddr;
1596	errno_t err;
1597	unsigned u;
1598
1599	ntfs_debug("Entering.");
1600	err = ntfs_inode_get(vol, FILE_UpCase, TRUE, LCK_RW_TYPE_SHARED, &ni,
1601			vol->root_ni->vn, NULL);
1602	if (err) {
1603		ni = NULL;
1604		goto err;
1605	}
1606	/*
1607	 * The upcase size must not be above 64k Unicode characters, must not
1608	 * be zero, and must be a multiple of sizeof(ntfschar).
1609	 */
1610	lck_spin_lock(&ni->size_lock);
1611	data_size = ni->data_size;
1612	lck_spin_unlock(&ni->size_lock);
1613	if (data_size <= 0 || data_size & (sizeof(ntfschar) - 1) ||
1614			data_size > (s64)(64 * 1024 * sizeof(ntfschar))) {
1615		err = EINVAL;
1616		goto err;
1617	}
1618	/* Allocate memory to hold the $UpCase data. */
1619	vol->upcase = OSMalloc(data_size, ntfs_malloc_tag);
1620	if (!vol->upcase) {
1621		err = ENOMEM;
1622		goto err;
1623	}
1624	/*
1625	 * Read the whole $UpCase file a page at a time and copy the contents
1626	 * over.
1627	 */
1628	u = PAGE_SIZE;
1629	for (ofs = 0; ofs < data_size; ofs += PAGE_SIZE) {
1630		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, FALSE);
1631		if (err)
1632			goto err;
1633		if (ofs + u > data_size)
1634			u = data_size - ofs;
1635		memcpy((u8*)vol->upcase + ofs, kaddr, u);
1636		ntfs_page_unmap(ni, upl, pl, FALSE);
1637	}
1638	lck_rw_unlock_shared(&ni->lock);
1639	(void)vnode_recycle(ni->vn);
1640	(void)vnode_put(ni->vn);
1641	vol->upcase_len = data_size >> NTFSCHAR_SIZE_SHIFT;
1642	ntfs_debug("Read %lld bytes from $UpCase (expected %lu bytes).",
1643			(long long)data_size, 64LU * 1024 * sizeof(ntfschar));
1644	lck_mtx_lock(&ntfs_lock);
1645	if (!ntfs_default_upcase) {
1646		ntfs_debug("Using volume specified $UpCase since default is "
1647				"not present.");
1648	} else {
1649		unsigned max_size;
1650
1651		max_size = ntfs_default_upcase_size >> NTFSCHAR_SIZE_SHIFT;
1652		if (max_size > vol->upcase_len)
1653			max_size = vol->upcase_len;
1654		for (u = 0; u < max_size; u++)
1655			if (vol->upcase[u] != ntfs_default_upcase[u])
1656				break;
1657		if (u == max_size) {
1658			OSFree(vol->upcase, data_size, ntfs_malloc_tag);
1659			vol->upcase = ntfs_default_upcase;
1660			vol->upcase_len = ntfs_default_upcase_size >>
1661					NTFSCHAR_SIZE_SHIFT;
1662			ntfs_default_upcase_users++;
1663			ntfs_debug("Volume specified $UpCase matches "
1664					"default.  Using default.");
1665		} else
1666			ntfs_debug("Using volume specified $UpCase since it "
1667					"does not match the default.");
1668	}
1669	lck_mtx_unlock(&ntfs_lock);
1670	ntfs_debug("Done.");
1671	return 0;
1672err:
1673	if (vol->upcase) {
1674		OSFree(vol->upcase, data_size, ntfs_malloc_tag);
1675		vol->upcase = NULL;
1676		vol->upcase_len = 0;
1677	}
1678	if (ni) {
1679		lck_rw_unlock_shared(&ni->lock);
1680		(void)vnode_recycle(ni->vn);
1681		(void)vnode_put(ni->vn);
1682	}
1683	lck_mtx_lock(&ntfs_lock);
1684	if (ntfs_default_upcase) {
1685		vol->upcase = ntfs_default_upcase;
1686		vol->upcase_len = ntfs_default_upcase_size >>
1687				NTFSCHAR_SIZE_SHIFT;
1688		ntfs_default_upcase_users++;
1689		ntfs_error(vol->mp, "Failed to load $UpCase from the volume "
1690				"(error %d).  Using NTFS driver default "
1691				"upcase table instead.", err);
1692		err = 0;
1693	} else
1694		ntfs_error(vol->mp, "Failed to initialize upcase table.");
1695	lck_mtx_unlock(&ntfs_lock);
1696	return err;
1697}
1698
1699/**
1700 * ntfs_attrdef_load - load the attribute definitions table for a volume
1701 * @vol:	ntfs volume whose attrdef to load
1702 *
1703 * Read the attribute definitions table and setup @vol->attrdef and
1704 * @vol->attrdef_size.
1705 *
1706 * Return 0 on success and errno on error.
1707 */
1708static errno_t ntfs_attrdef_load(ntfs_volume *vol)
1709{
1710	s64 ofs, data_size = 0;
1711	ntfs_inode *ni;
1712	upl_t upl;
1713	upl_page_info_array_t pl;
1714	u8 *kaddr;
1715	errno_t err;
1716	unsigned u;
1717
1718	ntfs_debug("Entering.");
1719	err = ntfs_inode_get(vol, FILE_AttrDef, TRUE, LCK_RW_TYPE_SHARED, &ni,
1720			vol->root_ni->vn, NULL);
1721	if (err) {
1722		ni = NULL;
1723		goto err;
1724	}
1725	/*
1726	 * The attribute definitions size must be above 0 and fit inside 31
1727	 * bits.
1728	 */
1729	lck_spin_lock(&ni->size_lock);
1730	data_size = ni->data_size;
1731	lck_spin_unlock(&ni->size_lock);
1732	if (data_size <= 0 || data_size > 0x7fffffff) {
1733		err = EINVAL;
1734		goto err;
1735	}
1736	vol->attrdef = OSMalloc(data_size, ntfs_malloc_tag);
1737	if (!vol->attrdef) {
1738		err = ENOMEM;
1739		goto err;
1740	}
1741	/*
1742	 * Read the whole attribute definitions table a page at a time and copy
1743	 * the contents over.
1744	 */
1745	u = PAGE_SIZE;
1746	for (ofs = 0; ofs < data_size; ofs += PAGE_SIZE) {
1747		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, FALSE);
1748		if (err)
1749			goto err;
1750		if (ofs + u > data_size)
1751			u = data_size - ofs;
1752		memcpy((u8*)vol->attrdef + ofs, kaddr, u);
1753		ntfs_page_unmap(ni, upl, pl, FALSE);
1754	}
1755	lck_rw_unlock_shared(&ni->lock);
1756	(void)vnode_recycle(ni->vn);
1757	(void)vnode_put(ni->vn);
1758	vol->attrdef_size = data_size;
1759	ntfs_debug("Done.  Read %lld bytes from $AttrDef.",
1760			(long long)data_size);
1761	return 0;
1762err:
1763	if (vol->attrdef) {
1764		OSFree(vol->attrdef, data_size, ntfs_malloc_tag);
1765		vol->attrdef = NULL;
1766	}
1767	if (ni) {
1768		lck_rw_unlock_shared(&ni->lock);
1769		(void)vnode_recycle(ni->vn);
1770		(void)vnode_put(ni->vn);
1771	}
1772	ntfs_error(vol->mp, "Failed to initialize attribute definitions "
1773			"table.");
1774	return err;
1775}
1776
1777/**
1778 * ntfs_volume_load - load the $Volume inode and setup the ntfs volume
1779 * @vol:	ntfs volume whose $Volume to load
1780 *
1781 * Load the $Volume system file and setup the volume flags (@vol->flags), the
1782 * volume major and minor version (@vol->major_ver and @vol->minor_ver,
1783 * respectively), and the volume name converted to decomposed utf-8 (@vol->name
1784 * and @vol->name_size).
1785 *
1786 * Return 0 on success and errno on error.
1787 */
1788static errno_t ntfs_volume_load(ntfs_volume *vol)
1789{
1790	ntfs_inode *ni;
1791	MFT_RECORD *m;
1792	ntfs_attr_search_ctx *ctx;
1793	ATTR_RECORD *a;
1794	VOLUME_INFORMATION *vi;
1795	errno_t err;
1796
1797	ntfs_debug("Entering.");
1798	err = ntfs_inode_attach(vol, FILE_Volume, &ni, vol->root_ni->vn);
1799	if (err) {
1800		ntfs_error(vol->mp, "Failed to load $Volume.");
1801		return err;
1802	}
1803	vol->vol_ni = ni;
1804	err = vnode_get(ni->vn);
1805	if (err) {
1806		ntfs_error(vol->mp, "Failed to get vnode for $Volume.");
1807		return err;
1808	}
1809	err = ntfs_mft_record_map(ni, &m);
1810	if (err) {
1811		ntfs_error(vol->mp, "Failed to map mft record for $Volume.");
1812		goto err;
1813	}
1814	ctx = ntfs_attr_search_ctx_get(ni, m);
1815	if (!ctx) {
1816		ntfs_error(vol->mp, "Failed to get attribute search context "
1817				"for $Volume.");
1818		err = ENOMEM;
1819		goto unm_err;
1820	}
1821	err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, AT_UNNAMED, 0, 0, NULL,
1822			0, ctx);
1823	a = ctx->a;
1824	if (err || a->non_resident || a->flags) {
1825		if (err)
1826			ntfs_error(vol->mp, "Failed to lookup volume "
1827					"information attribute in $Volume.");
1828		else {
1829info_err:
1830			ntfs_error(vol->mp, "Volume information attribute in "
1831					"$Volume is corrupt.  Run chkdsk.");
1832		}
1833		goto put_err;
1834	}
1835	vi = (VOLUME_INFORMATION*)((u8*)a + le16_to_cpu(a->value_offset));
1836	/* Some bounds checks. */
1837	if ((u8*)vi < (u8*)a || (u8*)vi + le32_to_cpu(a->value_length) >
1838			(u8*)a + le32_to_cpu(a->length) ||
1839			(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
1840			vol->mft_record_size)
1841		goto info_err;
1842	/* Copy the volume flags and version to the ntfs_volume structure. */
1843	vol->vol_flags = vi->flags;
1844	vol->major_ver = vi->major_ver;
1845	vol->minor_ver = vi->minor_ver;
1846	ntfs_attr_search_ctx_reinit(ctx);
1847	err = ntfs_attr_lookup(AT_VOLUME_NAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
1848	if (err == ENOENT) {
1849		ntfs_debug("Volume has no name, using empty string.");
1850no_name:
1851		/* No volume name, i.e. the name is "". */
1852		vol->name = OSMalloc(sizeof(char), ntfs_malloc_tag);
1853		if (!vol->name) {
1854			ntfs_error(vol->mp, "Failed to allocate memory for "
1855					"volume name.");
1856			err = ENOMEM;
1857			goto put_err;
1858		}
1859		vol->name[0] = '\0';
1860	} else {
1861		ntfschar *ntfs_name;
1862		u8 *utf8_name;
1863		size_t ntfs_size, utf8_size;
1864		signed res_size;
1865
1866		a = ctx->a;
1867		if (err || a->non_resident || a->flags) {
1868			if (err)
1869				ntfs_error(vol->mp, "Failed to lookup volume "
1870						"name attribute in $Volume.");
1871			else {
1872name_err:
1873				ntfs_error(vol->mp, "Volume name attribute in "
1874						"$Volume is corrupt.  Run "
1875						"chkdsk.");
1876			}
1877put_err:
1878			ntfs_attr_search_ctx_put(ctx);
1879			if (!err)
1880				err = EIO;
1881			goto unm_err;
1882		}
1883		ntfs_name = (ntfschar*)((u8*)a + le16_to_cpu(a->value_offset));
1884		ntfs_size = le32_to_cpu(a->value_length);
1885		if (!ntfs_size) {
1886			ntfs_debug("Volume has empty name, using empty "
1887					"string.");
1888			goto no_name;
1889		}
1890		/* Some bounds checks. */
1891		if ((u8*)ntfs_name < (u8*)a || (u8*)ntfs_name + ntfs_size >
1892				(u8*)a + le32_to_cpu(a->length) ||
1893				(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
1894				vol->mft_record_size)
1895			goto name_err;
1896		/* Convert the name to decomposed utf-8 (NUL terminated). */
1897		utf8_name = NULL;
1898		res_size = ntfs_to_utf8(vol, ntfs_name, ntfs_size, &utf8_name,
1899				&utf8_size);
1900		if (res_size < 0) {
1901			err = -res_size;
1902			ntfs_error(vol->mp, "Failed to convert volume name to "
1903					"decomposed UTF-8 (error %d).",
1904					(int)err);
1905			goto put_err;
1906		}
1907		vol->name = (char*)utf8_name;
1908		vol->name_size = utf8_size;
1909	}
1910	ntfs_attr_search_ctx_put(ctx);
1911	ntfs_mft_record_unmap(ni);
1912	(void)vnode_put(ni->vn);
1913	ntfs_debug("Done.");
1914	return 0;
1915unm_err:
1916	ntfs_mft_record_unmap(ni);
1917err:
1918	(void)vnode_put(ni->vn);
1919	/* Obtained inode will be released by the call to ntfs_unmount(). */
1920	return err;
1921}
1922
1923#define NTFS_HIBERFIL_HEADER_SIZE	4096
1924
1925/**
1926 * ntfs_windows_hibernation_status_check - check if Windows is suspended
1927 * @vol:		ntfs volume to check
1928 * @is_hibernated:	pointer in which to return the hibernation status
1929 *
1930 * Check if Windows is hibernated on the ntfs volume @vol.  This is done by
1931 * looking for the file hiberfil.sys in the root directory of the volume.  If
1932 * the file is not present Windows is definitely not suspended and if it is
1933 * then the $LogFile will be marked dirty/still open so we will already have
1934 * caught that case.
1935 *
1936 * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
1937 * definitely suspended (this volume is not the system volume).  Caveat:  on a
1938 * system with many volumes it is possible that the < 4kiB check is bogus but
1939 * for now this should do fine.
1940 *
1941 * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
1942 * hiberfil header (which is the first 4kiB).  If this begins with "hibr",
1943 * Windows is definitely suspended.  If it is completely full of zeroes,
1944 * Windows is definitely not hibernated.  Any other case is treated as if
1945 * Windows is suspended.  This caters for the above mentioned caveat of a
1946 * system with many volumes where no "hibr" magic would be present and there is
1947 * no zero header.
1948 *
1949 * If Windows is not hibernated on the volume *@is_hibernated is false and if
1950 * Windows is hibernated on the volume it is set to true.
1951 *
1952 * Return 0 on success and errno on error.  On error, *@is_hibernated is
1953 * undefined.
1954 */
1955static errno_t ntfs_windows_hibernation_status_check(ntfs_volume *vol,
1956		BOOL *is_hibernated)
1957{
1958	s64 data_size;
1959	MFT_REF mref;
1960	ntfs_dir_lookup_name *name = NULL;
1961	ntfs_inode *ni;
1962	upl_t upl = NULL;
1963	upl_page_info_array_t pl;
1964	le32 *kaddr, *kend;
1965	errno_t err;
1966	static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
1967			const_cpu_to_le16('i'), const_cpu_to_le16('b'),
1968			const_cpu_to_le16('e'), const_cpu_to_le16('r'),
1969			const_cpu_to_le16('f'), const_cpu_to_le16('i'),
1970			const_cpu_to_le16('l'), const_cpu_to_le16('.'),
1971			const_cpu_to_le16('s'), const_cpu_to_le16('y'),
1972			const_cpu_to_le16('s'), 0 };
1973
1974	ntfs_debug("Entering.");
1975	*is_hibernated = FALSE;
1976	/*
1977	 * Find the inode number for the hibernation file by looking up the
1978	 * filename hiberfil.sys in the root directory.
1979	 */
1980	lck_rw_lock_shared(&vol->root_ni->lock);
1981	err = ntfs_lookup_inode_by_name(vol->root_ni, hiberfil, 12, &mref,
1982			&name);
1983	lck_rw_unlock_shared(&vol->root_ni->lock);
1984	if (err) {
1985		/* If the file does not exist, Windows is not hibernated. */
1986		if (err == ENOENT) {
1987			ntfs_debug("hiberfil.sys not present.  Windows is not "
1988					"hibernated on the volume.");
1989			return 0;
1990		}
1991		/* A real error occured. */
1992		ntfs_error(vol->mp, "Failed to find inode number for "
1993				"hiberfil.sys.");
1994		return err;
1995	}
1996	/* We do not care for the type of match that was found. */
1997	if (name)
1998		OSFree(name, sizeof(*name), ntfs_malloc_tag);
1999	/* Get the inode. */
2000	err = ntfs_inode_get(vol, MREF(mref), FALSE, LCK_RW_TYPE_SHARED, &ni,
2001			vol->root_ni->vn, NULL);
2002	if (err) {
2003		ntfs_error(vol->mp, "Failed to load hiberfil.sys.");
2004		return err;
2005	}
2006	lck_spin_lock(&ni->size_lock);
2007	data_size = ni->data_size;
2008	lck_spin_unlock(&ni->size_lock);
2009	if (data_size < NTFS_HIBERFIL_HEADER_SIZE) {
2010		ntfs_debug("Hiberfil.sys is present and smaller than the "
2011				"hibernation header size.  Windows is "
2012				"hibernated on the volume.  This is not the "
2013				"system volume.");
2014		*is_hibernated = TRUE;
2015		goto put;
2016	}
2017	err = ntfs_page_map(ni, 0, &upl, &pl, (u8**)&kaddr, FALSE);
2018	if (err) {
2019		ntfs_error(vol->mp, "Failed to read from hiberfil.sys.");
2020		goto put;
2021	}
2022	if (*kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
2023		ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is "
2024				"hibernated on the volume.  This is the "
2025				"system volume.");
2026		*is_hibernated = TRUE;
2027		goto unm;
2028	}
2029	kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
2030	do {
2031		if (*kaddr) {
2032			ntfs_debug("hiberfil.sys is larger than 4kiB "
2033					"(0x%llx), does not contain the "
2034					"\"hibr\" magic, and does not have a "
2035					"zero header.  Windows is hibernated "
2036					"on the volume.  This is not the "
2037					"system volume.", data_size);
2038			*is_hibernated = TRUE;
2039			goto unm;
2040		}
2041	} while (++kaddr < kend);
2042	ntfs_debug("hiberfil.sys contains a zero header.  Windows is not "
2043			"hibernated on the volume.  This is the system "
2044			"volume.");
2045	/* @err is currently zero. */
2046unm:
2047	ntfs_page_unmap(ni, upl, pl, FALSE);
2048put:
2049	lck_rw_unlock_shared(&ni->lock);
2050	(void)vnode_recycle(ni->vn);
2051	(void)vnode_put(ni->vn);
2052	return err;
2053}
2054
2055/**
2056 * ntfs_volume_flags_write - write new flags to the volume information flags
2057 * @vol:	ntfs volume on which to modify the flags
2058 * @flags:	new flags value for the volume information flags
2059 *
2060 * Internal function.  You probably want to use ntfs_volume_flags_{set,clear}()
2061 * instead (see below).
2062 *
2063 * Replace the volume information flags on the volume @vol with the value
2064 * supplied in @flags.  Note, this overwrites the volume information flags, so
2065 * make sure to combine the flags you want to modify with the old flags and use
2066 * the result when calling ntfs_volume_flags_write().
2067 *
2068 * Return 0 on success and errno on error.
2069 */
2070static errno_t ntfs_volume_flags_write(ntfs_volume *vol,
2071		const VOLUME_FLAGS flags)
2072{
2073	ntfs_inode *ni;
2074	MFT_RECORD *m;
2075	VOLUME_INFORMATION *vi;
2076	ntfs_attr_search_ctx *ctx;
2077	errno_t err;
2078
2079	ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.",
2080			le16_to_cpu(vol->vol_flags), le16_to_cpu(flags));
2081	if (vol->vol_flags == flags)
2082		goto done;
2083	ni = vol->vol_ni;
2084	if (!ni)
2085		panic("%s(): Volume inode is not loaded.\n", __FUNCTION__);
2086	err = ntfs_mft_record_map(ni, &m);
2087	if (err)
2088		goto err;
2089	ctx = ntfs_attr_search_ctx_get(ni, m);
2090	if (!ctx) {
2091		err = ENOMEM;
2092		goto put;
2093	}
2094	err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, AT_UNNAMED, 0, 0, NULL,
2095			0, ctx);
2096	if (err)
2097		goto put;
2098	vi = (VOLUME_INFORMATION*)((u8*)ctx->a +
2099			le16_to_cpu(ctx->a->value_offset));
2100	vol->vol_flags = vi->flags = flags;
2101	/* Mark the mft record dirty to ensure it gets written out. */
2102	NInoSetMrecNeedsDirtying(ctx->ni);
2103	ntfs_attr_search_ctx_put(ctx);
2104	ntfs_mft_record_unmap(ni);
2105done:
2106	ntfs_debug("Done.");
2107	return 0;
2108put:
2109	if (ctx)
2110		ntfs_attr_search_ctx_put(ctx);
2111	ntfs_mft_record_unmap(ni);
2112err:
2113	ntfs_error(vol->mp, "Failed with error code %d.", err);
2114	return err;
2115}
2116
2117/**
2118 * ntfs_volume_flags_set - set bits in the volume information flags
2119 * @vol:	ntfs volume on which to modify the flags
2120 * @flags:	flags to set on the volume
2121 *
2122 * Set the bits in @flags in the volume information flags on the volume @vol.
2123 *
2124 * Return 0 on success and errno on error.
2125 */
2126static inline errno_t ntfs_volume_flags_set(ntfs_volume *vol,
2127		VOLUME_FLAGS flags)
2128{
2129	flags &= VOLUME_FLAGS_MASK;
2130	return ntfs_volume_flags_write(vol, vol->vol_flags | flags);
2131}
2132
2133/**
2134 * ntfs_volume_flags_clear - clear bits in the volume information flags
2135 * @vol:	ntfs volume on which to modify the flags
2136 * @flags:	flags to clear on the volume
2137 *
2138 * Clear the bits in @flags in the volume information flags on the volume @vol.
2139 *
2140 * Return 0 on success and errno on error.
2141 */
2142static inline errno_t ntfs_volume_flags_clear(ntfs_volume *vol,
2143		VOLUME_FLAGS flags)
2144{
2145	flags &= VOLUME_FLAGS_MASK;
2146	return ntfs_volume_flags_write(vol, vol->vol_flags & ~flags);
2147}
2148
2149/**
2150 * ntfs_secure_load - load and setup the security file for a volume
2151 * @vol:	ntfs volume whose security file to load
2152 *
2153 * Return 0 on success and errno on error.
2154 */
2155static errno_t ntfs_secure_load(ntfs_volume *vol)
2156{
2157	ntfs_inode *ni;
2158	MFT_RECORD *m;
2159	ntfs_attr_search_ctx *ctx;
2160	FILENAME_ATTR *fn;
2161	errno_t err;
2162	static const ntfschar Secure[8] = { const_cpu_to_le16('$'),
2163			const_cpu_to_le16('S'), const_cpu_to_le16('e'),
2164			const_cpu_to_le16('c'), const_cpu_to_le16('u'),
2165			const_cpu_to_le16('r'), const_cpu_to_le16('e'), 0 };
2166	static ntfschar SDS[5] = { const_cpu_to_le16('$'),
2167			const_cpu_to_le16('S'), const_cpu_to_le16('D'),
2168			const_cpu_to_le16('S'), 0 };
2169	static ntfschar SDH[5] = { const_cpu_to_le16('$'),
2170			const_cpu_to_le16('S'), const_cpu_to_le16('D'),
2171			const_cpu_to_le16('H'), 0 };
2172	static ntfschar SII[5] = { const_cpu_to_le16('$'),
2173			const_cpu_to_le16('S'), const_cpu_to_le16('I'),
2174			const_cpu_to_le16('I'), 0 };
2175
2176	ntfs_debug("Entering.");
2177	/* Get the security descriptors inode. */
2178	err = ntfs_inode_attach(vol, FILE_Secure, &ni, vol->root_ni->vn);
2179	if (err) {
2180		ntfs_error(vol->mp, "Failed to load $Secure.");
2181		return err;
2182	}
2183	vol->secure_ni = ni;
2184	/*
2185	 * Check this really is $Secure rather than $Quota remaining from a
2186	 * partially converted ntfs 1.x volume.
2187	 */
2188	err = ntfs_mft_record_map(ni, &m);
2189	if (err) {
2190		ntfs_error(vol->mp, "Failed to map mft record for $Secure.");
2191		return err;
2192	}
2193	if (!(m->flags & MFT_RECORD_IN_USE)) {
2194not_in_use:
2195		ntfs_debug("Done ($Secure is not in use).");
2196		ntfs_mft_record_unmap(ni);
2197		NVolSetUseSDAttr(vol);
2198		return 0;
2199	}
2200	ctx = ntfs_attr_search_ctx_get(ni, m);
2201	if (!ctx) {
2202		ntfs_error(vol->mp, "Failed to allocate search context for "
2203				"$Secure.");
2204		ntfs_mft_record_unmap(ni);
2205		return ENOMEM;
2206	}
2207	err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
2208	if (err) {
2209		ntfs_error(vol->mp, "Failed to look up filename attribute in "
2210				"$Secure (error %d).", err);
2211		ntfs_attr_search_ctx_put(ctx);
2212		ntfs_mft_record_unmap(ni);
2213		return err;
2214	}
2215	fn = (FILENAME_ATTR*)((u8*)ctx->a + le16_to_cpu(ctx->a->value_offset));
2216	if (!ntfs_are_names_equal(fn->filename, fn->filename_length,
2217			Secure, 7, NVolCaseSensitive(vol), NULL, 0)) {
2218		ntfs_attr_search_ctx_put(ctx);
2219		goto not_in_use;
2220	}
2221	ntfs_attr_search_ctx_put(ctx);
2222	ntfs_mft_record_unmap(ni);
2223	ntfs_debug("Verified identity of $Secure system file.");
2224	/* Get the $SDS data attribute. */
2225	err = ntfs_attr_inode_attach(vol->secure_ni, AT_DATA, SDS, 4,
2226			&vol->secure_sds_ni);
2227	if (err) {
2228		ntfs_error(vol->mp, "Failed to load $Secure/$SDS data "
2229				"attribute (error %d).", err);
2230		return err;
2231	}
2232	/* Get the $SDH index attribute. */
2233	err = ntfs_index_inode_attach(vol->secure_ni, SDH, 4,
2234			&vol->secure_sdh_ni);
2235	if (err) {
2236		ntfs_error(vol->mp, "Failed to load $Secure/$SDH index "
2237				"(error %d).", err);
2238		return err;
2239	}
2240	/* Get the $SII index attribute. */
2241	err = ntfs_index_inode_attach(vol->secure_ni, SII, 4,
2242			&vol->secure_sii_ni);
2243	if (err) {
2244		ntfs_error(vol->mp, "Failed to load $Secure/$SII index "
2245				"(error %d).", err);
2246		return err;
2247	}
2248	/*
2249	 * We need to find the highest security_id on the volume by finding the
2250	 * last entry in the $SII index and record it so we know which
2251	 * security_id to assign to the next security descriptor.
2252	 */
2253	err = ntfs_next_security_id_init(vol, &vol->next_security_id);
2254	if (err) {
2255		ntfs_error(vol->mp, "Failed to determine next security_id "
2256				"(error %d).", err);
2257		return err;
2258	}
2259	// TODO: Initialize security.
2260	//
2261	// We need to look for our default security descriptors (for creating
2262	// directories and files) and if present record their security_ids and
2263	// set the appropriate flag on the volume.  If not present they will be
2264	// added when the first file/directory is created and the volume flag
2265	// will be set then.  (Do we need two flags, one for files and one for
2266	// directories?)
2267	//
2268	// Set up our default security descriptors for files and directories
2269	// so they can be used when creating files/directories on volumes
2270	// without $Secure and in the case that we fail to add our security
2271	// descriptors to $Secure in which case we just place them in the
2272	// old-style security descriptor attribute and do NVolSetUseSDAttr().
2273	// FIXME: We then need to use old-style standard information attribute!
2274	//
2275	// For now just always force creation of security descriptor attributes.
2276	NVolSetUseSDAttr(vol);
2277	ntfs_debug("Done.");
2278	return 0;
2279}
2280
2281/**
2282 * ntfs_objid_load - load and setup the object id file for a volume if present
2283 * @vol:	ntfs volume whose object id file to load
2284 *
2285 * Return 0 on success and errno on error.  If $ObjId is not present, we leave
2286 * vol->objid_ni as NULL and return success.
2287 */
2288static errno_t ntfs_objid_load(ntfs_volume *vol)
2289{
2290	MFT_REF mref;
2291	ntfs_inode *ni;
2292	ntfs_dir_lookup_name *name = NULL;
2293	int err;
2294	static const ntfschar ObjId[7] = { const_cpu_to_le16('$'),
2295			const_cpu_to_le16('O'), const_cpu_to_le16('b'),
2296			const_cpu_to_le16('j'), const_cpu_to_le16('I'),
2297			const_cpu_to_le16('d'), 0 };
2298	static ntfschar O[3] = { const_cpu_to_le16('$'),
2299			const_cpu_to_le16('O'), 0 };
2300
2301	ntfs_debug("Entering.");
2302	/*
2303	 * Find the inode number for the object id file by looking up the
2304	 * filename $ObjId in the extended system files directory $Extend.
2305	 */
2306	lck_rw_lock_shared(&vol->extend_ni->lock);
2307	err = ntfs_lookup_inode_by_name(vol->extend_ni, ObjId, 6, &mref,
2308			&name);
2309	lck_rw_unlock_shared(&vol->extend_ni->lock);
2310	if (err) {
2311		/*
2312		 * If the file does not exist, there are no object ids in use
2313		 * on this volume, just return success.
2314		 */
2315		if (err == ENOENT) {
2316			ntfs_debug("$ObjId not present.  Volume does not have "
2317					"any object ids present.");
2318			return 0;
2319		}
2320		/* A real error occured. */
2321		ntfs_error(vol->mp, "Failed to find inode number for $ObjId.");
2322		return err;
2323	}
2324	/* We do not care for the type of match that was found. */
2325	if (name)
2326		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2327	/* Get the inode. */
2328	err = ntfs_inode_attach(vol, MREF(mref), &ni, vol->extend_ni->vn);
2329	if (err) {
2330		ntfs_error(vol->mp, "Failed to load $ObjId.");
2331		return err;
2332	}
2333	vol->objid_ni = ni;
2334	/* Get the $O index inode. */
2335	err = ntfs_index_inode_attach(vol->objid_ni, O, 2, &vol->objid_o_ni);
2336	if (err) {
2337		ntfs_error(vol->mp, "Failed to load $ObjId/$O index (error "
2338				"%d).", err);
2339		return err;
2340	}
2341	ntfs_debug("Done.");
2342	return 0;
2343}
2344
2345/**
2346 * ntfs_quota_load - load and setup the quota file for a volume if present
2347 * @vol:	ntfs volume whose quota file to load
2348 *
2349 * Return 0 on success and errno on error.  If $Quota is not present, we leave
2350 * vol->quota_ni as NULL and return success.
2351 */
2352static errno_t ntfs_quota_load(ntfs_volume *vol)
2353{
2354	MFT_REF mref;
2355	ntfs_dir_lookup_name *name = NULL;
2356	int err;
2357	static const ntfschar Quota[7] = { const_cpu_to_le16('$'),
2358			const_cpu_to_le16('Q'), const_cpu_to_le16('u'),
2359			const_cpu_to_le16('o'), const_cpu_to_le16('t'),
2360			const_cpu_to_le16('a'), 0 };
2361	static ntfschar Q[3] = { const_cpu_to_le16('$'),
2362			const_cpu_to_le16('Q'), 0 };
2363
2364	ntfs_debug("Entering.");
2365	/*
2366	 * Find the inode number for the quota file by looking up the filename
2367	 * $Quota in the extended system files directory $Extend.
2368	 */
2369	lck_rw_lock_shared(&vol->extend_ni->lock);
2370	err = ntfs_lookup_inode_by_name(vol->extend_ni, Quota, 6, &mref,
2371			&name);
2372	lck_rw_unlock_shared(&vol->extend_ni->lock);
2373	if (err) {
2374		/*
2375		 * If the file does not exist, quotas are disabled and have
2376		 * never been enabled on this volume, just return success.
2377		 */
2378		if (err == ENOENT) {
2379			ntfs_debug("$Quota not present.  Volume does not have "
2380					"quotas enabled.");
2381			/*
2382			 * No need to try to set quotas out of date if they are
2383			 * not enabled.
2384			 */
2385			NVolSetQuotaOutOfDate(vol);
2386			return 0;
2387		}
2388		/* A real error occured. */
2389		ntfs_error(vol->mp, "Failed to find inode number for $Quota.");
2390		return err;
2391	}
2392	/* We do not care for the type of match that was found. */
2393	if (name)
2394		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2395	/* Get the inode. */
2396	err = ntfs_inode_attach(vol, MREF(mref), &vol->quota_ni,
2397			vol->extend_ni->vn);
2398	if (err) {
2399		ntfs_error(vol->mp, "Failed to load $Quota.");
2400		return err;
2401	}
2402	/* Get the $Q index inode. */
2403	err = ntfs_index_inode_attach(vol->quota_ni, Q, 2, &vol->quota_q_ni);
2404	if (err) {
2405		ntfs_error(vol->mp, "Failed to load $Quota/$Q index (error "
2406				"%d).", err);
2407		return err;
2408	}
2409	ntfs_debug("Done.");
2410	return 0;
2411}
2412
2413/**
2414 * ntfs_usnjrnl_load - load and setup the transaction log if present
2415 * @vol:	ntfs volume whose usnjrnl file to load
2416 *
2417 * Return 0 on success and errno on error.  $UsnJrnl is not present or in the
2418 * process of being disabled, we set NVolUsnJrnlStamped() and return success.
2419 *
2420 * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
2421 * i.e. transaction logging has only just been enabled or the journal has been
2422 * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
2423 * and return success.
2424 */
2425static errno_t ntfs_usnjrnl_load(ntfs_volume *vol)
2426{
2427	s64 data_size;
2428	MFT_REF mref;
2429	ntfs_inode *ni, *max_ni;
2430	ntfs_dir_lookup_name *name = NULL;
2431	upl_t upl;
2432	upl_page_info_array_t pl;
2433	USN_HEADER *uh;
2434	errno_t err;
2435	static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
2436			const_cpu_to_le16('U'), const_cpu_to_le16('s'),
2437			const_cpu_to_le16('n'), const_cpu_to_le16('J'),
2438			const_cpu_to_le16('r'), const_cpu_to_le16('n'),
2439			const_cpu_to_le16('l'), 0 };
2440	static ntfschar Max[5] = { const_cpu_to_le16('$'),
2441			const_cpu_to_le16('M'), const_cpu_to_le16('a'),
2442			const_cpu_to_le16('x'), 0 };
2443	static ntfschar J[3] = { const_cpu_to_le16('$'),
2444			const_cpu_to_le16('J'), 0 };
2445
2446	ntfs_debug("Entering.");
2447	/*
2448	 * Find the inode number for the transaction log file by looking up the
2449	 * filename $UsnJrnl in the extended system files directory $Extend.
2450	 */
2451	lck_rw_lock_shared(&vol->extend_ni->lock);
2452	err = ntfs_lookup_inode_by_name(vol->extend_ni, UsnJrnl, 8, &mref,
2453			&name);
2454	lck_rw_unlock_shared(&vol->extend_ni->lock);
2455	if (err) {
2456		/*
2457		 * If the file does not exist, transaction logging is disabled,
2458		 * just return success.
2459		 */
2460		if (err == ENOENT) {
2461			ntfs_debug("$UsnJrnl not present.  Volume does not "
2462					"have transaction logging enabled.");
2463not_enabled:
2464			/*
2465			 * No need to try to stamp the transaction log if
2466			 * transaction logging is not enabled.
2467			 */
2468			NVolSetUsnJrnlStamped(vol);
2469			return 0;
2470		}
2471		/* A real error occured. */
2472		ntfs_error(vol->mp, "Failed to find inode number for "
2473				"$UsnJrnl.");
2474		return err;
2475	}
2476	/* We do not care for the type of match that was found. */
2477	if (name)
2478		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2479	/* Get the inode. */
2480	err = ntfs_inode_attach(vol, MREF(mref), &ni, vol->extend_ni->vn);
2481	if (err) {
2482		ntfs_error(vol->mp, "Failed to load $UsnJrnl.");
2483		return err;
2484	}
2485	vol->usnjrnl_ni = ni;
2486	/*
2487	 * If the transaction log is in the process of being deleted, we can
2488	 * ignore it.
2489	 */
2490	if (vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY) {
2491		ntfs_debug("$UsnJrnl in the process of being disabled.  "
2492				"Volume does not have transaction logging "
2493				"enabled.");
2494		goto not_enabled;
2495	}
2496	/* Get the $DATA/$Max attribute. */
2497	err = ntfs_attr_inode_attach(vol->usnjrnl_ni, AT_DATA, Max, 4, &max_ni);
2498	if (err) {
2499		ntfs_error(vol->mp, "Failed to load $UsnJrnl/$DATA/$Max "
2500				"attribute.");
2501		return err;
2502	}
2503	vol->usnjrnl_max_ni = max_ni;
2504	lck_spin_lock(&max_ni->size_lock);
2505	data_size = max_ni->data_size;
2506	lck_spin_unlock(&max_ni->size_lock);
2507	if (data_size < (s64)sizeof(USN_HEADER)) {
2508		ntfs_error(vol->mp, "Found corrupt $UsnJrnl/$DATA/$Max "
2509				"attribute (size is 0x%llx but should be at "
2510				"least 0x%x bytes).",
2511				(unsigned long long)data_size,
2512				(unsigned)sizeof(USN_HEADER));
2513		return EIO;
2514	}
2515	err = vnode_get(max_ni->vn);
2516	if (err) {
2517		ntfs_error(vol->mp, "Failed to get vnode for "
2518				"$UsnJrnl/$DATA/$Max.");
2519		return err;
2520	}
2521	lck_rw_lock_shared(&max_ni->lock);
2522	/* Read the USN_HEADER from $DATA/$Max. */
2523	err = ntfs_page_map(max_ni, 0, &upl, &pl, (u8**)&uh, FALSE);
2524	if (err) {
2525		ntfs_error(vol->mp, "Failed to read from $UsnJrnl/$DATA/$Max "
2526				"attribute.");
2527		goto put_err;
2528	}
2529	/* Sanity check $Max. */
2530	if (sle64_to_cpu(uh->allocation_delta) >
2531			sle64_to_cpu(uh->maximum_size)) {
2532		ntfs_error(vol->mp, "Allocation delta (0x%llx) exceeds "
2533				"maximum size (0x%llx).  $UsnJrnl is corrupt.",
2534				(unsigned long long)
2535				sle64_to_cpu(uh->allocation_delta),
2536				(unsigned long long)
2537				sle64_to_cpu(uh->maximum_size));
2538		err = EIO;
2539		goto unm_err;
2540	}
2541	/* Get the $DATA/$J attribute. */
2542	err = ntfs_attr_inode_attach(vol->usnjrnl_ni, AT_DATA, J, 2, &ni);
2543	if (err) {
2544		ntfs_error(vol->mp, "Failed to load $UsnJrnl/$DATA/$J "
2545				"attribute.");
2546		goto unm_err;
2547	}
2548	vol->usnjrnl_j_ni = ni;
2549	/* Verify $J is non-resident and sparse. */
2550	if (!NInoNonResident(ni) || !NInoSparse(ni)) {
2551		ntfs_error(vol->mp, "$UsnJrnl/$DATA/$J attribute is resident "
2552				"and/or not sparse.");
2553		err = EIO;
2554		goto unm_err;
2555	}
2556	/*
2557	 * If the transaction log has been stamped and nothing has been written
2558	 * to it since, we do not need to stamp it.
2559	 */
2560	lck_spin_lock(&ni->size_lock);
2561	data_size = ni->data_size;
2562	lck_spin_unlock(&ni->size_lock);
2563	if (sle64_to_cpu(uh->lowest_valid_usn) >= data_size) {
2564		if (sle64_to_cpu(uh->lowest_valid_usn) == data_size) {
2565			ntfs_page_unmap(max_ni, upl, pl, FALSE);
2566			lck_rw_unlock_shared(&max_ni->lock);
2567			(void)vnode_put(max_ni->vn);
2568			ntfs_debug("$UsnJrnl is enabled but nothing has been "
2569					"logged since it was last stamped.  "
2570					"Treating this as if the volume does "
2571					"not have transaction logging "
2572					"enabled.");
2573			goto not_enabled;
2574		}
2575		ntfs_error(vol->mp, "$UsnJrnl has lowest valid usn (0x%llx) "
2576				"which is out of bounds (0x%llx).  $UsnJrnl "
2577				"is corrupt.", (unsigned long long)
2578				sle64_to_cpu(uh->lowest_valid_usn),
2579				(unsigned long long)data_size);
2580		err = EIO;
2581		goto unm_err;
2582	}
2583	ntfs_debug("Done.");
2584unm_err:
2585	ntfs_page_unmap(max_ni, upl, pl, FALSE);
2586put_err:
2587	lck_rw_unlock_shared(&max_ni->lock);
2588	(void)vnode_put(max_ni->vn);
2589	return err;
2590}
2591
2592/**
2593 * ntfs_system_inodes_get - load the system files at mount time
2594 * @vol:	ntfs volume being mounted
2595 *
2596 * Obtain the ntfs inodes corresponding to the system files and directories
2597 * needed for operation of a mounted ntfs file system and process their data
2598 * setting up any relevant in-memory structures in the process.
2599 *
2600 * It is assumed that ntfs_mft_inode_get() has already been called successfully
2601 * thus allowing us to simply use ntfs_inode_get(), ntfs_mft_record_map(), and
2602 * friends to do the work rather than having to do things by hand as is the
2603 * case when bootstrapping the volume in ntfs_mft_inode_get().
2604 *
2605 * Return 0 on success and errno on error.
2606 */
2607static errno_t ntfs_system_inodes_get(ntfs_volume *vol)
2608{
2609	s64 size;
2610	ntfs_inode *root_ni, *ni;
2611	vnode_t root_vn;
2612	errno_t err;
2613	BOOL is_hibernated;
2614
2615	ntfs_debug("Entering.");
2616	/*
2617	 * Get the root directory inode so we can do path lookups and so we can
2618	 * supply its vnode as the parent vnode for the other system vnodes.
2619	 */
2620	err = ntfs_inode_attach(vol, FILE_root, &root_ni, NULL);
2621	if (err) {
2622		ntfs_error(vol->mp, "Failed to load root directory.");
2623		goto err;
2624	}
2625	vol->root_ni = root_ni;
2626	root_vn = root_ni->vn;
2627	/*
2628	 * We already have the $MFT inode and vnode.  Add the root directory
2629	 * vnode as the parent vnode.  We also take an internal reference on
2630	 * the root inode because vnode_update_identity() takes a reference on
2631	 * the root vnode.
2632	 */
2633	vnode_update_identity(vol->mft_ni->vn, root_vn, NULL, 0, 0,
2634			VNODE_UPDATE_PARENT);
2635	OSIncrementAtomic(&root_ni->nr_refs);
2636	/*
2637	 * Get mft mirror inode and compare the contents of $MFT and $MFTMirr,
2638	 * then deal with any errors.
2639	 */
2640	err = ntfs_mft_mirror_load(vol);
2641	if (!err)
2642		err = ntfs_mft_mirror_check(vol);
2643	if (err) {
2644		static const char es1a[] = "Failed to load $MFTMirr";
2645		static const char es1b[] = "$MFTMirr does not match $MFT";
2646		static const char es2[] = ".  Run ntfsfix and/or chkdsk.";
2647		const char *es1;
2648
2649		es1 = !vol->mftmirr_ni ? es1a : es1b;
2650		/* If a read-write mount, convert it to a read-only mount. */
2651		if (!NVolReadOnly(vol)) {
2652			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2653				ntfs_error(vol->mp, "%s%s", es1, es2);
2654				err = EIO;
2655				goto err;
2656			}
2657			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2658					ON_ERRORS_CONTINUE))) {
2659				ntfs_error(vol->mp, "%s and neither on_errors="
2660						"continue nor on_errors="
2661						"remount-ro was specified%s",
2662						es1, es2);
2663				err = EIO;
2664				goto err;
2665			}
2666			vfs_setflags(vol->mp, MNT_RDONLY);
2667			NVolSetReadOnly(vol);
2668			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2669					es2);
2670		} else
2671			ntfs_warning(vol->mp, "%s.  Will not be able to "
2672					"remount read-write%s", es1, es2);
2673		/* This will prevent a read-write remount. */
2674		NVolSetErrors(vol);
2675	}
2676	/*
2677	 * Get mft bitmap attribute inode and again, take an internal reference
2678	 * on the root inode to balance the reference taken on the root vnode
2679	 * in ntfs_attr_inode_get() and also take a reference on the vnode as
2680	 * we will be holding onto it for the duration of the mount.  Finally,
2681	 * we also release the iocount reference.  It will be taken as and when
2682	 * required when accessing the $MFT/$BITMAP attribute.
2683	 */
2684	err = ntfs_attr_inode_attach(vol->mft_ni, AT_BITMAP, NULL, 0,
2685			&vol->mftbmp_ni);
2686	if (err) {
2687		ntfs_error(vol->mp, "Failed to load $MFT/$BITMAP attribute.");
2688		goto err;
2689	}
2690	NInoSetSparseDisabled(vol->mftbmp_ni);
2691	/*
2692	 * If the mft bitmap attribute is non-resident (which it must be), read
2693	 * in the complete runlist.  This simplifies things when we need to
2694	 * allocate mft records as it guarantees that accessing the mft bitmap
2695	 * will not cause any of its mft records to be mapped.
2696	 */
2697	err = ntfs_attr_map_runlist(vol->mftbmp_ni);
2698	if (err) {
2699		ntfs_error(vol->mp, "Failed to map runlist of $MFT/$BITMAP "
2700				"attribute.");
2701		goto err;
2702	}
2703	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
2704	err = ntfs_upcase_load(vol);
2705	if (err)
2706		goto err;
2707	/*
2708	 * Read attribute definitions table and setup @vol->attrdef and
2709	 * @vol->attrdef_size.
2710	 */
2711	err = ntfs_attrdef_load(vol);
2712	if (err)
2713		goto err;
2714	/* Get the cluster allocation bitmap inode and verify the size. */
2715	err = ntfs_inode_attach(vol, FILE_Bitmap, &ni, root_vn);
2716	if (err) {
2717		ntfs_error(vol->mp, "Failed to load $Bitmap.");
2718		goto err;
2719	}
2720	NInoSetSparseDisabled(ni);
2721	vol->lcnbmp_ni = ni;
2722	lck_spin_lock(&ni->size_lock);
2723	size = ni->data_size;
2724	lck_spin_unlock(&ni->size_lock);
2725	if ((vol->nr_clusters + 7) >> 3 > size) {
2726		ntfs_error(vol->mp, "$Bitmap (%lld) is shorter than required "
2727				"length of volume (%lld) as specified in the "
2728				"boot sector.  Run chkdsk.", (long long)size,
2729				(long long)(vol->nr_clusters + 7) >> 3);
2730		err = EIO;
2731		goto err;
2732	}
2733	/*
2734	 * If the cluster bitmap data attribute is non-resident, read in the
2735	 * complete runlist.  This simplifies things when we need to allocate
2736	 * mft records as it guarantees that accessing the cluster bitmap will
2737	 * not cause any of its mft records to be mapped.
2738	 */
2739	err = ntfs_attr_map_runlist(ni);
2740	if (err) {
2741		ntfs_error(vol->mp, "Failed to map runlist of $Bitmap/$DATA "
2742				"attribute.");
2743		goto err;
2744	}
2745	/*
2746	 * Get the volume inode and setup our cache of the volume flags and
2747	 * version as well as of the volume name in decomposed utf-8.
2748	 */
2749	err = ntfs_volume_load(vol);
2750	if (err)
2751		goto err;
2752	printf("NTFS volume name %s, version %u.%u.\n", vol->name,
2753			(unsigned)vol->major_ver, (unsigned)vol->minor_ver);
2754	if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
2755		ntfs_warning(vol->mp, "Disabling sparse support due to NTFS "
2756				"volume version %u.%u (need at least "
2757				"version 3.0).", (unsigned)vol->major_ver,
2758				(unsigned)vol->minor_ver);
2759		NVolClearSparseEnabled(vol);
2760	}
2761	if (vol->vol_flags & VOLUME_IS_DIRTY) {
2762		ntfs_warning(vol->mp, "NTFS volume is dirty.  You should "
2763				"unmount it and run chkdsk.");
2764		NVolSetErrors(vol);
2765	}
2766	/* Make sure that no unsupported volume flags are set. */
2767	if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
2768		static const char es1[] = "Volume has unsupported flags set";
2769		static const char es2[] = ".  To fix this problem boot into "
2770				"Windows, run chkdsk c: /f /v /x from the "
2771				"command prompt (replace c: with the drive "
2772				"letter of this volume), then reboot into Mac "
2773				"OS X and mount the volume again.";
2774
2775		ntfs_warning(vol->mp, "Unsupported volume flags 0x%x "
2776				"encountered.",
2777				(unsigned)le16_to_cpu(vol->vol_flags));
2778		/* If a read-write mount, convert it to a read-only mount. */
2779		if (!NVolReadOnly(vol)) {
2780			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2781				ntfs_error(vol->mp, "%s%s", es1, es2);
2782				err = EINVAL;
2783				goto err;
2784			}
2785			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2786					ON_ERRORS_CONTINUE))) {
2787				ntfs_error(vol->mp, "%s and neither on_errors="
2788						"continue nor on_errors="
2789						"remount-ro was specified%s",
2790						es1, es2);
2791				err = EINVAL;
2792				goto err;
2793			}
2794			vfs_setflags(vol->mp, MNT_RDONLY);
2795			NVolSetReadOnly(vol);
2796			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2797					es2);
2798		} else
2799			ntfs_warning(vol->mp, "%s.  Will not be able to "
2800					"remount read-write%s", es1, es2);
2801		/*
2802		 * Do not set NVolErrors() because ntfs_remount() re-checks the
2803		 * flags which we need to do in case any flags have changed.
2804		 */
2805	}
2806	/*
2807	 * Get the inode for the logfile, check it, and determine if the volume
2808	 * was shutdown cleanly, then deal with any errors.
2809	 */
2810	err = ntfs_inode_attach(vol, FILE_LogFile, &ni, root_vn);
2811	if (!err) {
2812		RESTART_PAGE_HEADER *rp;
2813
2814		NInoSetSparseDisabled(ni);
2815		vol->logfile_ni = ni;
2816		err = ntfs_logfile_check(ni, &rp);
2817		if (!err) {
2818			if (!ntfs_logfile_is_clean(ni, rp))
2819				err = EINVAL;
2820			if (rp)
2821				OSFree(rp, le32_to_cpu(rp->system_page_size),
2822						ntfs_malloc_tag);
2823		}
2824	}
2825	if (err) {
2826		static const char es1a[] = "Failed to load $LogFile";
2827		static const char es1b[] = "$LogFile is not clean";
2828		static const char es2[] = ".  Mount in Windows.";
2829		const char *es1;
2830
2831		es1 = !vol->logfile_ni ? es1a : es1b;
2832		/* If a read-write mount, convert it to a read-only mount. */
2833		if (!NVolReadOnly(vol)) {
2834			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2835				ntfs_error(vol->mp, "%s%s", es1, es2);
2836				err = EROFS;
2837				goto err;
2838			}
2839			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2840					ON_ERRORS_CONTINUE))) {
2841				ntfs_error(vol->mp, "%s and neither on_errors="
2842						"continue nor on_errors="
2843						"remount-ro was specified%s",
2844						es1, es2);
2845				goto err;
2846			}
2847			vfs_setflags(vol->mp, MNT_RDONLY);
2848			NVolSetReadOnly(vol);
2849			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2850					es2);
2851		} else
2852			ntfs_warning(vol->mp, "%s.  Will not be able to "
2853					"remount read-write%s", es1, es2);
2854		NVolSetErrors(vol);
2855	}
2856	/*
2857	 * Check if Windows is suspended to disk on the target volume.  If it
2858	 * is hibernated, we must not write *anything* to the disk so set
2859	 * NVolErrors() without setting the dirty volume flag and mount
2860	 * read-only.  This will prevent read-write remounting and it will also
2861	 * prevent all writes.
2862	 */
2863	err = ntfs_windows_hibernation_status_check(vol, &is_hibernated);
2864	if (err || is_hibernated) {
2865		static const char es1a[] = "Failed to determine if Windows is "
2866				"hibernated";
2867		static const char es1b[] = "Windows is hibernated";
2868		static const char es2[] = ".  Run chkdsk.";
2869		const char *es1;
2870
2871		es1 = err ? es1a : es1b;
2872		/* If a read-write mount, convert it to a read-only mount. */
2873		if (!NVolReadOnly(vol)) {
2874			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2875				ntfs_error(vol->mp, "%s%s", es1, es2);
2876				err = EROFS;
2877				goto err;
2878			}
2879			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2880					ON_ERRORS_CONTINUE))) {
2881				ntfs_error(vol->mp, "%s and neither on_errors="
2882						"continue nor on_errors="
2883						"remount-ro was specified%s",
2884						es1, es2);
2885				if (!err)
2886					err = EINVAL;
2887				goto err;
2888			}
2889			vfs_setflags(vol->mp, MNT_RDONLY);
2890			NVolSetReadOnly(vol);
2891			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2892					es2);
2893		} else
2894			ntfs_warning(vol->mp, "%s.  Will not be able to "
2895					"remount read-write%s", es1, es2);
2896		NVolSetErrors(vol);
2897	}
2898	/* If (still) a read-write mount, mark the volume dirty. */
2899	if (!NVolReadOnly(vol) &&
2900			(err = ntfs_volume_flags_set(vol, VOLUME_IS_DIRTY))) {
2901		static const char es1[] = "Failed to set dirty bit in volume "
2902				"information flags";
2903		static const char es2[] = ".  Run chkdsk.";
2904
2905		/* Convert to a read-only mount. */
2906		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2907			ntfs_error(vol->mp, "%s%s", es1, es2);
2908			err = EIO;
2909			goto err;
2910		}
2911		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2912				ON_ERRORS_CONTINUE))) {
2913			ntfs_error(vol->mp, "%s and neither on_errors="
2914					"continue nor on_errors=remount-ro "
2915					"was specified%s", es1, es2);
2916			goto err;
2917		}
2918		vfs_setflags(vol->mp, MNT_RDONLY);
2919		NVolSetReadOnly(vol);
2920		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
2921		/*
2922		 * Do not set NVolErrors() because ntfs_remount() might manage
2923		 * to set the dirty flag in which case all would be well.
2924		 */
2925	}
2926	/* If (still) a read-write mount, empty the logfile. */
2927	if (!NVolReadOnly(vol) &&
2928			(err = ntfs_logfile_empty(vol->logfile_ni))) {
2929		static const char es1[] = "Failed to empty journal $LogFile";
2930		static const char es2[] = ".  Mount in Windows.";
2931
2932		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2933			ntfs_error(vol->mp, "%s%s", es1, es2);
2934			err = EIO;
2935			goto err;
2936		}
2937		/* Convert to a read-only mount. */
2938		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2939				ON_ERRORS_CONTINUE))) {
2940			ntfs_error(vol->mp, "%s and neither on_errors="
2941					"continue nor on_errors=remount-ro "
2942					"was specified%s", es1, es2);
2943			goto err;
2944		}
2945		vfs_setflags(vol->mp, MNT_RDONLY);
2946		NVolSetReadOnly(vol);
2947		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
2948		NVolSetErrors(vol);
2949	}
2950	/* If the ntfs volume version is below 3.0, we are done. */
2951	if (vol->major_ver < 3) {
2952		/*
2953		 * Set NVolUseSDAttr() so we do not need to check both the
2954		 * volume version and NVolUseSDAttr() when creating inodes.
2955		 */
2956		NVolSetUseSDAttr(vol);
2957		ntfs_debug("Done (NTFS version < 3.0).");
2958		return 0;
2959	}
2960	/* Ntfs 3.0+ specific initialization. */
2961	/*
2962	 * Read the security descriptors file and initialize security on the
2963	 * volume.
2964	 */
2965	err = ntfs_secure_load(vol);
2966	if (err)
2967		goto err;
2968	/* Get the extended system files directory inode. */
2969	err = ntfs_inode_attach(vol, FILE_Extend, &vol->extend_ni, root_vn);
2970	if (err) {
2971		ntfs_error(vol->mp, "Failed to load $Extend directory.");
2972		goto err;
2973	}
2974	/* Find the object id file, load it if present, and set it up. */
2975	err = ntfs_objid_load(vol);
2976	if (err) {
2977		static const char es1[] = "Failed to load $ObjId";
2978		static const char es2[] = ".  Run chkdsk.";
2979
2980		/* If a read-write mount, convert it to a read-only mount. */
2981		if (!NVolReadOnly(vol)) {
2982			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2983				ntfs_error(vol->mp, "%s%s", es1, es2);
2984				err = EIO;
2985				goto err;
2986			}
2987			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2988					ON_ERRORS_CONTINUE))) {
2989				ntfs_error(vol->mp, "%s and neither on_errors="
2990						"continue nor on_errors="
2991						"remount-ro was specified%s",
2992						es1, es2);
2993				goto err;
2994			}
2995			vfs_setflags(vol->mp, MNT_RDONLY);
2996			NVolSetReadOnly(vol);
2997			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2998					es2);
2999		} else
3000			ntfs_warning(vol->mp, "%s.  Will not be able to "
3001					"remount read-write%s", es1, es2);
3002		NVolSetErrors(vol);
3003	}
3004	/* Find the quota file, load it if present, and set it up. */
3005	err = ntfs_quota_load(vol);
3006	if (err) {
3007		static const char es1[] = "Failed to load $Quota";
3008		static const char es2[] = ".  Run chkdsk.";
3009
3010		/* If a read-write mount, convert it to a read-only mount. */
3011		if (!NVolReadOnly(vol)) {
3012			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3013				ntfs_error(vol->mp, "%s%s", es1, es2);
3014				err = EIO;
3015				goto err;
3016			}
3017			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3018					ON_ERRORS_CONTINUE))) {
3019				ntfs_error(vol->mp, "%s and neither on_errors="
3020						"continue nor on_errors="
3021						"remount-ro was specified%s",
3022						es1, es2);
3023				goto err;
3024			}
3025			vfs_setflags(vol->mp, MNT_RDONLY);
3026			NVolSetReadOnly(vol);
3027			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
3028					es2);
3029		} else
3030			ntfs_warning(vol->mp, "%s.  Will not be able to "
3031					"remount read-write%s", es1, es2);
3032		NVolSetErrors(vol);
3033	}
3034	/* If (still) a read-write mount, mark the quotas out of date. */
3035	if (!NVolReadOnly(vol) && (err = ntfs_quotas_mark_out_of_date(vol))) {
3036		static const char es1[] = "Failed to mark quotas out of date";
3037		static const char es2[] = ".  Run chkdsk.";
3038
3039		/* Convert to a read-only mount. */
3040		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3041			ntfs_error(vol->mp, "%s%s", es1, es2);
3042			err = EIO;
3043			goto err;
3044		}
3045		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3046				ON_ERRORS_CONTINUE))) {
3047			ntfs_error(vol->mp, "%s and neither on_errors="
3048					"continue nor on_errors=remount-ro "
3049					"was specified%s", es1, es2);
3050			goto err;
3051		}
3052		vfs_setflags(vol->mp, MNT_RDONLY);
3053		NVolSetReadOnly(vol);
3054		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
3055		NVolSetErrors(vol);
3056	}
3057	/*
3058	 * Find the transaction log file ($UsnJrnl), load it if present, check
3059	 * it, and set it up.
3060	 */
3061	err = ntfs_usnjrnl_load(vol);
3062	if (err) {
3063		static const char es1[] = "Failed to load $UsnJrnl";
3064		static const char es2[] = ".  Run chkdsk.";
3065
3066		/* If a read-write mount, convert it to a read-only mount. */
3067		if (!NVolReadOnly(vol)) {
3068			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3069				ntfs_error(vol->mp, "%s%s", es1, es2);
3070				err = EIO;
3071				goto err;
3072			}
3073			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3074					ON_ERRORS_CONTINUE))) {
3075				ntfs_error(vol->mp, "%s and neither on_errors="
3076						"continue nor on_errors="
3077						"remount-ro was specified%s",
3078						es1, es2);
3079				goto err;
3080			}
3081			vfs_setflags(vol->mp, MNT_RDONLY);
3082			NVolSetReadOnly(vol);
3083			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
3084					es2);
3085		} else
3086			ntfs_warning(vol->mp, "%s.  Will not be able to "
3087					"remount read-write%s", es1, es2);
3088		NVolSetErrors(vol);
3089	}
3090	/* If (still) a read-write mount, stamp the transaction log. */
3091	if (!NVolReadOnly(vol) && (err = ntfs_usnjrnl_stamp(vol))) {
3092		static const char es1[] = "Failed to stamp transaction log "
3093				"($UsnJrnl)";
3094		static const char es2[] = ".  Run chkdsk.";
3095
3096		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3097			ntfs_error(vol->mp, "%s%s", es1, es2);
3098			err = EIO;
3099			goto err;
3100		}
3101		/* Convert to a read-only mount. */
3102		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3103				ON_ERRORS_CONTINUE))) {
3104			ntfs_error(vol->mp, "%s and neither on_errors="
3105					"continue nor on_errors=remount-ro "
3106					"was specified%s", es1, es2);
3107			goto err;
3108		}
3109		vfs_setflags(vol->mp, MNT_RDONLY);
3110		NVolSetReadOnly(vol);
3111		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
3112		NVolSetErrors(vol);
3113	}
3114	ntfs_debug("Done (NTFS version >= 3.0).");
3115	return 0;
3116err:
3117	/* Obtained inodes will be released by the call to ntfs_unmount(). */
3118	return err;
3119}
3120
3121/**
3122 * ntfs_popcount32 - count the number of set bits in a 32-bit word
3123 * @v:		32-bit value whose set bits to count
3124 *
3125 * Count the number of set bits in the 32-bit word @v.  This should be the most
3126 * efficient C algorithm.  Implementation is as described in Chapter 8, Section
3127 * 6, "Efficient Implementation of Population-Count Function in 32-Bit Mode",
3128 * pages 179-180 of the "Software Optimization Guide for AMD64 Processors":
3129 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF
3130 *
3131 * TODO: Does xnu really not have asm optimized version of the popcount (aka
3132 * bitcount) function?  My searches have failed to find one...  If it exists or
3133 * gets added at some point we should switch to using it instead of ours.
3134 */
3135static inline u32 ntfs_popcount32(u32 v)
3136{
3137	const u32 w = v - ((v >> 1) & 0x55555555);
3138	const u32 x = (w & 0x33333333) + ((w >> 2) & 0x33333333);
3139	return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
3140}
3141
3142/**
3143 * ntfs_get_nr_set_bits - get the number of set bits in a bitmap
3144 * @vn:		vnode of bitmap for which to get the number of set bits
3145 * @nr_bits:	number of bits in the bitmap
3146 * @res:	pointer to where the result should be written
3147 *
3148 * Calculate the number of set bits in the bitmap vnode @vn and return the
3149 * result in @res.  We do not care about partial buffers as these will be just
3150 * zero filled and hence not be counted as set bits.
3151 *
3152 * If any buffers cannot be read we assume all bits in the erroring buffers are
3153 * set.  This means we return an overestimate on errors which is better than
3154 * an underestimate.
3155 *
3156 * Return 0 on success amd errno if an iocount reference could not be obtained
3157 * on the bitmap vnode.
3158 */
3159static errno_t ntfs_get_nr_set_bits(vnode_t vn, const s64 nr_bits, s64 *res)
3160{
3161	s64 max_ofs, ofs, nr_set;
3162	ntfs_inode *ni = NTFS_I(vn);
3163	errno_t err;
3164
3165	ntfs_debug("Entering.");
3166	/* Get an iocount reference on the bitmap vnode. */
3167	err = vnode_get(vn);
3168	if (err)
3169		return err;
3170	lck_rw_lock_shared(&ni->lock);
3171	/* Convert the number of bits into bytes rounded up. */
3172	max_ofs = (nr_bits + 7) >> 3;
3173	ntfs_debug("Reading bitmap, max_ofs %lld.", (long long)max_ofs);
3174	for (nr_set = ofs = 0; ofs < max_ofs; ofs += PAGE_SIZE) {
3175		upl_t upl;
3176		upl_page_info_array_t pl;
3177		u32 *p;
3178		int i;
3179
3180		/* Map the page. */
3181		err = ntfs_page_map(ni, ofs, &upl, &pl, (u8**)&p, FALSE);
3182		if (err) {
3183			ntfs_debug("Failed to map page from bitmap (offset "
3184					"%lld, size %d, error %d).  Skipping "
3185					"page.", (long long)ofs, PAGE_SIZE,
3186					(int)err);
3187			/* Count the whole buffer contents as set bits. */
3188			nr_set += PAGE_SIZE * 8;
3189			continue;
3190		}
3191		/*
3192		 * For each 32-bit word, add the number of set bits.  If this
3193		 * is the last block and it is partial we do not really care as
3194		 * it just means we do a little extra work but it will not
3195		 * affect the result as all out of range bytes are set to zero
3196		 * by ntfs_page_map().
3197		 *
3198		 * Use multiples of 4 bytes, thus max size is PAGE_SIZE / 4.
3199		 */
3200	  	for (i = 0; i < (PAGE_SIZE / 4); i++)
3201			nr_set += ntfs_popcount32(p[i]);
3202		ntfs_page_unmap(ni, upl, pl, FALSE);
3203	}
3204	/*
3205	 * Release the iocount reference on the bitmap vnode.  We can ignore
3206	 * the return value as it always is zero.
3207	 */
3208	lck_rw_unlock_shared(&ni->lock);
3209	(void)vnode_put(vn);
3210	ntfs_debug("Done (nr_bits %lld, nr_set %lld).", (long long)nr_bits,
3211			(long long)nr_set);
3212	*res = nr_set;
3213	return 0;
3214}
3215
3216/**
3217 * ntfs_set_nr_free_clusters - set the number of free clusters on a volume
3218 * @vol:	ntfs volume for which to set the number of free clusters
3219 *
3220 * Calculate the number of free clusters on the mounted ntfs volume @vol and
3221 * cache the result in the @vol->nr_free_clusters.
3222 *
3223 * The only particularity is that clusters beyond the end of the logical ntfs
3224 * volume will be marked as in use to prevent errors which means we have to
3225 * discount those at the end.  This is important as the cluster bitmap always
3226 * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside
3227 * the logical volume and marked in use when they are not as they do not exist.
3228 *
3229 * If any part of the bitmap cannot be read we assume all clusters in the
3230 * erroring part(s) are in use.  This means we return an underestimate of the
3231 * number of free clusters on errors which is better than an overrestimate.
3232 *
3233 * Return 0 on success or errno if an iocount reference could not be obtained
3234 * on the $Bitmap vnode.
3235 */
3236static errno_t ntfs_set_nr_free_clusters(ntfs_volume *vol)
3237{
3238	s64 nr_free;
3239	errno_t err;
3240
3241	ntfs_debug("Entering.");
3242	lck_rw_lock_exclusive(&vol->lcnbmp_lock);
3243	err = ntfs_get_nr_set_bits(vol->lcnbmp_ni->vn, vol->nr_clusters,
3244			&nr_free);
3245	if (err) {
3246		ntfs_error(vol->mp, "Failed to get vnode for $Bitmap.");
3247		lck_rw_unlock_exclusive(&vol->lcnbmp_lock);
3248		return err;
3249	}
3250	/* Determine the number of zero bits from the number of set bits. */
3251	nr_free = vol->nr_clusters - nr_free;
3252	/*
3253	 * Fixup for eventual bits outside logical ntfs volume (see function
3254	 * description above).
3255	 */
3256	if (vol->nr_clusters & 63)
3257		nr_free += 64 - (vol->nr_clusters & 63);
3258	/* If errors occured we may have gone below zero, fix this. */
3259	if (nr_free < 0)
3260		nr_free = 0;
3261	vol->nr_free_clusters = nr_free;
3262	ntfs_debug("Done (nr_clusters %lld, nr_free_clusters %lld).",
3263			(long long)vol->nr_clusters, (long long)nr_free);
3264	lck_rw_unlock_exclusive(&vol->lcnbmp_lock);
3265	return 0;
3266}
3267
3268/**
3269 * ntfs_set_nr_mft_records - set the number of total/free mft records
3270 * @vol:	volume for which to set the number of total/free mft records
3271 *
3272 * Calculate the number of mft records (inodes) as well as the number of free
3273 * mft records on the mounted ntfs volume @vol and cache the results in
3274 * @vol->nr_mft_records and @vol->nr_free_mft_records, respectively.
3275 *
3276 * If any part of the bitmap cannot be read we assume all mft records in the
3277 * erroring part(s) are in use.  This means we return an underestimate of the
3278 * number of free mft records on errors which is better than an overrestimate.
3279 *
3280 * FIXME: HFS uses the maximum ever possible by basing it on the volume size
3281 * rather than the current total/free.  Do we want to keep it the ntfsprogs and
3282 * Linux NTFS driver way or move to the HFS way?
3283 */
3284static errno_t ntfs_set_nr_mft_records(ntfs_volume *vol)
3285{
3286	s64 nr_free;
3287	errno_t err;
3288
3289	ntfs_debug("Entering.");
3290	/*
3291	 * First, determine the total number of mft records from the size of
3292	 * the $MFT/$DATA attribute.
3293	 */
3294	lck_rw_lock_exclusive(&vol->mftbmp_lock);
3295	lck_spin_lock(&vol->mft_ni->size_lock);
3296	vol->nr_mft_records = vol->mft_ni->data_size >>
3297			vol->mft_record_size_shift;
3298	lck_spin_unlock(&vol->mft_ni->size_lock);
3299	err = ntfs_get_nr_set_bits(vol->mftbmp_ni->vn,
3300			vol->mft_ni->initialized_size >>
3301			vol->mft_record_size_shift, &nr_free);
3302	if (err) {
3303		ntfs_error(vol->mp, "Failed to get vnode for $MFT/$BITMAP.");
3304		lck_rw_unlock_exclusive(&vol->mftbmp_lock);
3305		return err;
3306	}
3307	/* Determine the number of zero bits from the number of set bits. */
3308	nr_free = vol->nr_mft_records - nr_free;
3309	/* If errors occured we may well have gone below zero, fix this. */
3310	if (nr_free < 0)
3311		nr_free = 0;
3312	vol->nr_free_mft_records = nr_free;
3313	ntfs_debug("Done (nr_mft_records %lld, nr_free_mft_records %lld).",
3314			(long long)vol->nr_mft_records, (long long)nr_free);
3315	lck_rw_unlock_exclusive(&vol->mftbmp_lock);
3316	return 0;
3317}
3318
3319/**
3320 * ntfs_statfs - return information about a mounted ntfs volume
3321 * @vol:	ntfs volume about which to return information
3322 * @sfs:	vfsstatfs structure in which to return the information
3323 *
3324 * Return information about the mounted ntfs volume @vol in the vfsstatfs
3325 * structure @sfs.  We interpret the values to be correct of the moment in time
3326 * at which we are called.  Most values are variable otherwise and this is not
3327 * just the free values but the totals as well.  For example we can increase
3328 * the total number of file nodes if we run out and we can keep doing this
3329 * until there is no more space on the volume left at all.
3330 *
3331 * This is only called from ntfs_mount() hence we only need to set the
3332 * fields that are not already set.
3333 *
3334 * The mount() system call sets @sfs to zero and then sets up f_owner, f_flags,
3335 * f_fstypename, f_mntonname, f_mntfromname, and f_reserved.
3336 *
3337 * ntfs_mount() then sets f_fsid and calls ntfs_statfs() and the rest of @sfs
3338 * is set here.
3339 *
3340 * Note: No need for locking as this is only called from ntfs_mount().
3341 */
3342static void ntfs_statfs(ntfs_volume *vol, struct vfsstatfs *sfs)
3343{
3344	ntfs_debug("Entering.");
3345	/*
3346	 * Block size for the below size values.  We use the cluster size of
3347	 * the volume as that means we do not convert to a different unit.
3348	 * Alternatively, we could return the sector size instead.
3349	 */
3350	sfs->f_bsize = vol->cluster_size;
3351	/* Optimal transfer block size (in bytes). */
3352	sfs->f_iosize = ubc_upl_maxbufsize();
3353	/* Total data blocks in file system (in units of @f_bsize). */
3354	sfs->f_blocks = (u64)vol->nr_clusters;
3355	/* Free data blocks in file system (in units of @f_bsize). */
3356	sfs->f_bfree = (u64)vol->nr_free_clusters;
3357	/*
3358	 * Free blocks available to non-superuser (in units of @f_bsize), same
3359	 * as above for ntfs.
3360	 * FIXME: We could provide a mount option to cause a virtual, reserved
3361	 * percentage of total space for superuser and perhaps even use a
3362	 * non-zero default and enforce it in the cluster allocator.  If we do
3363	 * that we would need to subtract that percentage from
3364	 * @vol->nr_free_clusters and return the result in @sfs->f_bavail
3365	 * unless the result is below zero in which case we would just set
3366	 * @sfs->f_bavail to 0.
3367	 */
3368	sfs->f_bavail = (u64)vol->nr_free_clusters;
3369	/* Blocks in use (in units of @f_bsize). */
3370	sfs->f_bused = (u64)(vol->nr_clusters - vol->nr_free_clusters);
3371	/* Number of inodes in file system (at this point in time). */
3372	sfs->f_files = (u64)vol->nr_mft_records;
3373	/* Free inodes in file system (at this point in time). */
3374	sfs->f_ffree = (u64)vol->nr_free_mft_records;
3375	/*
3376	 * File system subtype.  Set this to the ntfs version encoded into 16
3377	 * bits, the high 8 bits being the major version and the low 8 bits
3378	 * being the minor version.  This is then extended to 32 bits, thus the
3379	 * higher 16 bits are currently zero.
3380	 */
3381	sfs->f_fssubtype = (u32)vol->major_ver << 8 | vol->minor_ver;
3382	ntfs_debug("Done.");
3383}
3384
3385/**
3386 * ntfs_unmount_callback_recycle - callback for vnode iterate in ntfs_unmount()
3387 * @vn:		vnode the callback is invoked with (has iocount reference)
3388 * @data:	for us always NULL and ignored
3389 *
3390 * This callback is called from vnode_iterate() which is called from
3391 * ntfs_unmount() for all in-core, non-dead, non-suspend vnodes belonging to
3392 * the mounted volume that still have an ntfs inode attached.
3393 *
3394 * We mark all vnodes for termination so they are reclaimed as soon as all
3395 * references to them are released.
3396 */
3397static int ntfs_unmount_callback_recycle(vnode_t vn, void *data __unused)
3398{
3399#ifdef DEBUG
3400	if (NTFS_I(vn))
3401		ntfs_debug("Entering for mft_no 0x%llx.",
3402				(unsigned long long)NTFS_I(vn)->mft_no);
3403#endif
3404	(void)vnode_recycle(vn);
3405	ntfs_debug("Done.");
3406	return VNODE_RETURNED;
3407}
3408
3409/**
3410 * ntfs_unmount_inode_detach - detach an inode at umount time
3411 * @pni:	pointer to the attached ntfs inode to detach
3412 * @parent_ni:	parent ntfs inode
3413 *
3414 * Mark the vnode of the ntfs inode *@pni for termination and detach the ntfs
3415 * inode *@pni from the mounted ntfs volume @vol by dropping the reference on
3416 * its vnode and setting *@pni to NULL.
3417 */
3418static void ntfs_unmount_inode_detach(ntfs_inode **pni, ntfs_inode *parent_ni)
3419{
3420	ntfs_inode *ni = *pni;
3421	if (ni) {
3422		ntfs_debug("Entering for mft_no 0x%llx.",
3423				(unsigned long long)ni->mft_no);
3424		/* Drop the internal reference on the parent inode. */
3425		if (parent_ni)
3426			OSDecrementAtomic(&parent_ni->nr_refs);
3427		OSDecrementAtomic(&ni->nr_refs);
3428		if (ni->vn) {
3429			(void)vnode_recycle(ni->vn);
3430			vnode_rele(ni->vn);
3431		} else
3432			ntfs_inode_reclaim(ni);
3433		*pni = NULL;
3434		ntfs_debug("Done.");
3435	}
3436}
3437
3438/**
3439 * ntfs_unmount_attr_inode_detach - detach an attribute inode at umount time
3440 * @pni:	pointer to the attached ntfs inode to detach
3441 *
3442 * Mark the vnode of the ntfs inode *@pni for termination and detach the ntfs
3443 * inode *@pni from the mounted ntfs volume @vol by dropping the reference on
3444 * its vnode and setting *@pni to NULL.
3445 */
3446static void ntfs_unmount_attr_inode_detach(ntfs_inode **pni)
3447{
3448	ntfs_inode *ni = *pni;
3449	if (ni) {
3450		ntfs_debug("Entering for mft_no 0x%llx.",
3451				(unsigned long long)ni->mft_no);
3452		/*
3453		 * Drop the internal reference on the base inode @base_ni
3454		 * (which is also the parent inode).
3455		 */
3456		if (NInoAttr(ni) && ni->base_ni)
3457			OSDecrementAtomic(&ni->base_ni->nr_refs);
3458		OSDecrementAtomic(&ni->nr_refs);
3459		if (ni->vn) {
3460			(void)vnode_recycle(ni->vn);
3461			vnode_rele(ni->vn);
3462		} else
3463			ntfs_inode_reclaim(ni);
3464		*pni = NULL;
3465		ntfs_debug("Done.");
3466	}
3467}
3468
3469/**
3470 * ntfs_do_postponed_release - release resources used by an ntfs volume
3471 * @vol:	ntfs volume to release
3472 *
3473 * Release resources used by the ntfs volume @vol.
3474 *
3475 * This is called either at unmount time or if there were still inodes active
3476 * then it is called when the last inode is freed.  This ensures the @vol
3477 * pointer in the ntfs_inode structure remains valid until all inodes are gone.
3478 */
3479void ntfs_do_postponed_release(ntfs_volume *vol)
3480{
3481	ntfs_debug("Doing postponed release of volume.");
3482	lck_mtx_lock(&ntfs_lock);
3483	if (vol->upcase && vol->upcase == ntfs_default_upcase) {
3484		vol->upcase = NULL;
3485		/*
3486		 * Drop our reference on the default upcase table and throw it
3487		 * away if we had the only reference.
3488		 */
3489		if (!--ntfs_default_upcase_users) {
3490			OSFree(ntfs_default_upcase, ntfs_default_upcase_size,
3491					ntfs_malloc_tag);
3492			ntfs_default_upcase = NULL;
3493		}
3494	}
3495	if (NVolCompressionEnabled(vol)) {
3496		/*
3497		 * Drop our reference on the compression buffer and throw it
3498		 * away if we had the only reference.
3499		 */
3500		if (!--ntfs_compression_users) {
3501			OSFree(ntfs_compression_buffer,
3502					ntfs_compression_buffer_size,
3503					ntfs_malloc_tag);
3504			ntfs_compression_buffer = NULL;
3505		}
3506	}
3507	lck_mtx_unlock(&ntfs_lock);
3508	/* If we loaded the attribute definitions table, throw it away now. */
3509	if (vol->attrdef)
3510		OSFree(vol->attrdef, vol->attrdef_size, ntfs_malloc_tag);
3511	/* If we used a volume specific upcase table, throw it away now. */
3512	if (vol->upcase)
3513		OSFree(vol->upcase, vol->upcase_len << NTFSCHAR_SIZE_SHIFT,
3514				ntfs_malloc_tag);
3515	/* If we cached a volume name, throw it away now. */
3516	if (vol->name)
3517		OSFree(vol->name, vol->name_size, ntfs_malloc_tag);
3518	/* Deinitialize the ntfs_volume locks. */
3519	lck_rw_destroy(&vol->mftbmp_lock, ntfs_lock_grp);
3520	lck_rw_destroy(&vol->lcnbmp_lock, ntfs_lock_grp);
3521	lck_mtx_destroy(&vol->rename_lock, ntfs_lock_grp);
3522	lck_rw_destroy(&vol->secure_lock, ntfs_lock_grp);
3523	lck_spin_destroy(&vol->security_id_lock, ntfs_lock_grp);
3524	lck_mtx_destroy(&vol->inodes_lock, ntfs_lock_grp);
3525	/* Finally, free the ntfs volume. */
3526	OSFree(vol, sizeof(ntfs_volume), ntfs_malloc_tag);
3527	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
3528}
3529
3530/**
3531 * ntfs_unmount - unmount an ntfs file system
3532 * @mp:		mount point to unmount
3533 * @mnt_flags:	flags describing the unmount (MNT_FORCE is the only one)
3534 * @context:	vfs context
3535 *
3536 * The VFS calls this via VFS_UNMOUNT() when it wants to unmount an ntfs
3537 * volume.  We sync and release all held inodes as well as all other resources.
3538 *
3539 * For each held inode, if we have the vnode already, go through vfs reclaim
3540 * which will also get rid off the ntfs inode.  Otherwise kill the ntfs inode
3541 * directly.
3542 *
3543 * If the volume is successfully unmounted, we must call
3544 * OSKextReleaseKextWithLoadTag() to allow the KEXT to be unloaded when no
3545 * longer in use.
3546 *
3547 * Return 0 on success and errno on error.
3548 */
3549static int ntfs_unmount(mount_t mp, int mnt_flags,
3550		vfs_context_t context __unused)
3551{
3552	ntfs_volume *vol;
3553	int vflags, err;
3554	BOOL force;
3555
3556	ntfs_debug("Entering.");
3557	vol = NTFS_MP(mp);
3558	if (!vol)
3559		goto unload;
3560	if (!vol->mft_ni) {
3561		/* Split our ntfs_volume away from the mount. */
3562		vfs_setfsprivate(mp, NULL);
3563		goto no_mft;
3564	}
3565	vflags = 0;
3566	force = FALSE;
3567	if (mnt_flags & MNT_FORCE) {
3568		vflags |= FORCECLOSE;
3569		force = TRUE;
3570	}
3571	if (!vol->root_ni)
3572		goto no_root;
3573	/*
3574	 * Try to reclaim all non-root and non-system vnodes.  For a non-forced
3575	 * unmount, this will fail if there are any open files.
3576	 */
3577	err = vflush(mp, NULLVP, vflags|SKIPROOT|SKIPSYSTEM);
3578	if (err) {
3579		ntfs_warning(mp, "Cannot unmount (vflush() returned error "
3580				"%d).  Are there open files keeping the "
3581				"volume busy?\n", err);
3582		goto abort;
3583	}
3584	/*
3585	 * Once we get here, the only vnodes left are our system vnodes, which
3586	 * we will detach and vnode_put below.  At this point, the system
3587	 * directories may still have index attributes with references on the
3588	 * directory vnodes.  And we might have other system vnodes still
3589	 * hanging around, with no references.  So we will explicitly try to
3590	 * recycle all remaining vnodes so that they will all be reclaimed as
3591	 * soon as their last references are dropped.
3592	 */
3593	(void)vnode_iterate(mp, 0, ntfs_unmount_callback_recycle, NULL);
3594	/*
3595	 * If a read-write mount and no volume errors have been detected, mark
3596	 * the volume clean.
3597	 */
3598	if (!NVolReadOnly(vol) && vol->vol_ni) {
3599		if (!NVolErrors(vol)) {
3600			if (ntfs_volume_flags_clear(vol, VOLUME_IS_DIRTY))
3601				ntfs_warning(mp, "Failed to clear dirty bit "
3602						"in volume information "
3603						"flags.  Run chkdsk.");
3604		} else
3605			ntfs_warning(mp, "Volume has errors.  Leaving volume "
3606					"marked dirty.  Run chkdsk.");
3607	}
3608	/* Ntfs 3.0+ specific clean up. */
3609	if (vol->vol_ni && vol->major_ver >= 3) {
3610		ntfs_unmount_attr_inode_detach(&vol->usnjrnl_j_ni);
3611		ntfs_unmount_attr_inode_detach(&vol->usnjrnl_max_ni);
3612		ntfs_unmount_inode_detach(&vol->usnjrnl_ni, vol->extend_ni);
3613		ntfs_unmount_attr_inode_detach(&vol->quota_q_ni);
3614		ntfs_unmount_inode_detach(&vol->quota_ni, vol->extend_ni);
3615		ntfs_unmount_attr_inode_detach(&vol->objid_o_ni);
3616		ntfs_unmount_inode_detach(&vol->objid_ni, vol->extend_ni);
3617		ntfs_unmount_inode_detach(&vol->extend_ni, vol->root_ni);
3618		ntfs_unmount_attr_inode_detach(&vol->secure_sds_ni);
3619		ntfs_unmount_attr_inode_detach(&vol->secure_sdh_ni);
3620		ntfs_unmount_attr_inode_detach(&vol->secure_sii_ni);
3621		ntfs_unmount_inode_detach(&vol->secure_ni, vol->root_ni);
3622	}
3623	ntfs_unmount_inode_detach(&vol->vol_ni, vol->root_ni);
3624	ntfs_unmount_inode_detach(&vol->lcnbmp_ni, vol->root_ni);
3625	ntfs_unmount_attr_inode_detach(&vol->mftbmp_ni);
3626	ntfs_unmount_inode_detach(&vol->logfile_ni, vol->root_ni);
3627	/*
3628	 * The root directory vnode is still held by the parent vnode
3629	 * references of the $MFT and $MFTMirr vnodes thus it will only be
3630	 * inactivated after those vnodes are reclaimed.  The problem with this
3631	 * is that when VNOP_INACTIVE() is called for the root directory vnode
3632	 * this in turn calls ntfs_inode_sync() which in turn calls
3633	 * ntfs_mft_record_sync() which in turn calls buf_getblk() followed by
3634	 * buf_bwrite() for the vnode of $MFT which fails as the vnode for $MFT
3635	 * has been reclaimed already.  The solution is thus to drop the parent
3636	 * vnode references held by $MFT and $MFTMirr now so that the root
3637	 * directory vnode can be recycled now.
3638	 */
3639	if (vol->mftmirr_ni && vol->mftmirr_ni->vn) {
3640		/* Drop the internal reference on the parent inode. */
3641		if (vol->root_ni)
3642			OSDecrementAtomic(&vol->root_ni->nr_refs);
3643		vnode_update_identity(vol->mftmirr_ni->vn, NULL, NULL, 0, 0,
3644				VNODE_UPDATE_PARENT);
3645	}
3646	if (vol->mft_ni && vol->mft_ni->vn) {
3647		/* Drop the internal reference on the parent inode. */
3648		if (vol->root_ni)
3649			OSDecrementAtomic(&vol->root_ni->nr_refs);
3650		vnode_update_identity(vol->mft_ni->vn, NULL, NULL, 0, 0,
3651				VNODE_UPDATE_PARENT);
3652	}
3653	/*
3654	 * Nothing references the root inode any more so we can release it.
3655	 * Note the VFS still holds a reference that it will drop after
3656	 * ntfs_unmount() completes thus the root vnode will be the last one to
3657	 * be reclaimed.
3658	 */
3659	ntfs_unmount_inode_detach(&vol->root_ni, NULL);
3660	/*
3661	 * Do a final flush to get rid of any vnodes that have not been
3662	 * inactivated/recycled yet.  Note this must be done without the force
3663	 * flag otherwise it blows away the mft mirror and mft inodes which we
3664	 * will recycle below.
3665	 */
3666	(void)vflush(mp, NULLVP, vflags & ~FORCECLOSE);
3667	ntfs_unmount_inode_detach(&vol->mftmirr_ni, NULL);
3668no_root:
3669	if (vol->mft_ni) {
3670		if (vol->mft_ni->vn)
3671			ntfs_unmount_inode_detach(&vol->mft_ni, NULL);
3672		else {
3673			/*
3674			 * There may be no vnode in the error code paths of
3675			 * ntfs_mount() which calls ntfs_unmount() to clean up.
3676			 */
3677			ntfs_inode_reclaim(vol->mft_ni);
3678			vol->mft_ni = NULL;
3679		}
3680	}
3681	/*
3682	 * We are holding no inodes at all now.  It is time to blow everything
3683	 * away that is remaining.  If this is a forced unmount, we immediately
3684	 * and forcibly blow everything away.  If not forced, we try to blow
3685	 * everything away that is not busy but if anything is busy vflush()
3686	 * does not do anything at all.  In that case we report an error, and
3687	 * then forcibly blow everything away anyway.  FIXME: We could undo the
3688	 * unmount by re-reading all the system inodes we just released, but do
3689	 * we want to?  It does not seem to be worth the hassle given it should
3690	 * never really happen...
3691	 */
3692	err = vflush(mp, NULLVP, vflags);
3693	if (err && !force) {
3694		ntfs_error(mp, "There are busy vnodes after unmounting!  "
3695				"Forcibly closing and reclaiming them.");
3696		(void)vflush(mp, NULLVP, FORCECLOSE);
3697
3698	}
3699	/* Split our ntfs_volume away from the mount. */
3700	vol->mp = NULL;
3701	vfs_setfsprivate(mp, NULL);
3702	/* If there are still inodes attached, postpone freeing the volume. */
3703	lck_mtx_lock(&vol->inodes_lock);
3704	if (!LIST_EMPTY(&vol->inodes)) {
3705		NVolSetPostponedRelease(vol);
3706		lck_mtx_unlock(&vol->inodes_lock);
3707		ntfs_debug("Scheduled postponed release of volume.");
3708		return 0;
3709	}
3710	lck_mtx_unlock(&vol->inodes_lock);
3711	ntfs_do_postponed_release(vol);
3712	ntfs_debug("Done.");
3713	return 0;
3714no_mft:
3715	/* Deinitialize the ntfs_volume locks. */
3716	lck_rw_destroy(&vol->mftbmp_lock, ntfs_lock_grp);
3717	lck_rw_destroy(&vol->lcnbmp_lock, ntfs_lock_grp);
3718	lck_mtx_destroy(&vol->rename_lock, ntfs_lock_grp);
3719	lck_rw_destroy(&vol->secure_lock, ntfs_lock_grp);
3720	lck_spin_destroy(&vol->security_id_lock, ntfs_lock_grp);
3721	lck_mtx_destroy(&vol->inodes_lock, ntfs_lock_grp);
3722	/* Finally, free the ntfs volume. */
3723	OSFree(vol, sizeof(ntfs_volume), ntfs_malloc_tag);
3724unload:
3725	err = 0;
3726	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
3727abort:
3728	ntfs_debug("Done.");
3729	return err;
3730}
3731
3732/**
3733 * ntfs_sync_args - arguments for the ntfs_sync_callback (see below)
3734 * @sync:	if IO_SYNC wait for all i/o to complete
3735 * @err:	if an error occurred the error code is returned here
3736 */
3737struct ntfs_sync_args {
3738	int sync;
3739	int err;
3740};
3741
3742/**
3743 * ntfs_sync_callback - callback for vnode iterate in ntfs_sync()
3744 * @vn:		vnode the callback is invoked with (has iocount reference)
3745 * @arg:	pointer to an ntfs_sync_args structure
3746 *
3747 * This callback is called from vnode_iterate() which is called from
3748 * ntfs_sync() for all in-core, non-dead, non-suspend vnodes belonging to the
3749 * mounted volume that still have an ntfs inode attached.
3750 *
3751 * We sync all dirty inodes to disk and if an error occurs we record it in the
3752 * @err field of the ntfs_sync_args structure pointed to by @arg.  Note we
3753 * preserve the old error code if an error is already recorded unless that
3754 * error code is ENOTSUP.
3755 *
3756 * If the @sync field of the ntfs_sync_args structure pointed to by @arg is
3757 * IO_SYNC, wait for all i/o to complete.
3758 */
3759static int ntfs_sync_callback(vnode_t vn, void *arg)
3760{
3761	ntfs_inode *ni = NTFS_I(vn);
3762	ntfs_volume *vol = ni->vol;
3763	struct ntfs_sync_args *args = (struct ntfs_sync_args*)arg;
3764
3765	/*
3766	 * Skip the inodes for $MFT and $MFTMirr.  They are done separately as
3767	 * the last ones to be synced.
3768	 */
3769	if (ni != vol->mft_ni && ni != vol->mftmirr_ni) {
3770		errno_t err;
3771
3772		/*
3773		 * Sync the inode data to disk and sync the ntfs inode to the
3774		 * mft record(s) but do not write the mft record(s) to disk.
3775		 */
3776		err = ntfs_inode_sync(ni, args->sync, TRUE);
3777		/*
3778		 * Only record the first error that is not ENOTSUP or record
3779		 * ENOTSUP if that is the only error.
3780		 *
3781		 * Skip deleted inodes.
3782		 */
3783		if (err && err != ENOENT) {
3784			if (!args->err || args->err == ENOTSUP)
3785				args->err = err;
3786		}
3787	}
3788	return VNODE_RETURNED;
3789}
3790
3791/**
3792 * ntfs_sync_helper - helper for ntfs_sync()
3793 * @ni:				ntfs inode the helper is invoked for
3794 * @args:			pointer to an ntfs_sync_args structure
3795 * @skip_mft_record_sync:	do not sync the mft record(s) to disk
3796 *
3797 * This helper is called from ntfs_sync() when syncing the $MFT and $MFTMirr
3798 * inodes.
3799 *
3800 * Any errors are returned in @args->err.
3801 */
3802static void ntfs_sync_helper(ntfs_inode *ni, struct ntfs_sync_args *args,
3803		const BOOL skip_mft_record_sync)
3804{
3805	errno_t err;
3806
3807	err = vnode_get(ni->vn);
3808	if (err) {
3809		ntfs_error(ni->vol->mp, "Failed to get vnode for $MFT%s "
3810				"(error %d).",
3811				(ni == ni->vol->mft_ni) ? "" : "Mirr",
3812				(int)err);
3813		goto err;
3814	}
3815	err = ntfs_inode_sync(ni, args->sync, skip_mft_record_sync);
3816	vnode_put(ni->vn);
3817	/* Skip deleted inodes. */
3818	if (err && err != ENOENT) {
3819		ntfs_error(ni->vol->mp, "Failed to sync $MFT%s (error %d).",
3820				(ni == ni->vol->mft_ni) ? "" : "Mirr",
3821				(int)err);
3822		goto err;
3823	}
3824	return;
3825err:
3826	if (!args->err || args->err == ENOTSUP)
3827		args->err = err;
3828	return;
3829}
3830
3831/**
3832 * ntfs_sync - sync a mounted volume to disk
3833 * @mp:		mount point of ntfs file system
3834 * @waitfor:	if MNT_WAIT wait fo i/o to complete
3835 * @context:	vfs context
3836 *
3837 * The VFS calls this via VFS_SYNC() when it wants to sync all cached data of
3838 * the mounted ntfs volume described by the mount @mp.
3839 *
3840 * If @waitfor is MNT_WAIT, wait for all i/o to complete before returning.
3841 *
3842 * Return 0 on success and errno on error.
3843 *
3844 * Note this function is only called for r/w mounted volumes so no need to
3845 * check if the volume is read-only.
3846 */
3847static int ntfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
3848{
3849	ntfs_volume *vol = NTFS_MP(mp);
3850	struct ntfs_sync_args args;
3851
3852	/* If we are mounted read-only, we do not need to sync anything. */
3853	if (NVolReadOnly(vol))
3854		return 0;
3855	ntfs_debug("Entering.");
3856	args.sync = (waitfor == MNT_WAIT) ? IO_SYNC : 0;
3857	args.err = 0;
3858	/* Iterate over all vnodes and run ntfs_inode_sync() on each of them. */
3859	(void)vnode_iterate(mp, 0, ntfs_sync_callback, (void*)&args);
3860	/*
3861	 * Finally, sync the inodes for $MFT and $MFTMirr to disk.  Note we do
3862	 * the sync twice to ensure that any interdependent changes that are
3863	 * flushed from one inode to the other are actually written to disk.
3864	 */
3865	ntfs_sync_helper(vol->mftmirr_ni, &args, TRUE);
3866	ntfs_sync_helper(vol->mft_ni, &args, TRUE);
3867	ntfs_sync_helper(vol->mftmirr_ni, &args, FALSE);
3868	ntfs_sync_helper(vol->mft_ni, &args, FALSE);
3869	if (!args.err)
3870		ntfs_debug("Done.");
3871	else
3872		ntfs_error(mp, "Failed to sync volume (error %d).", args.err);
3873	return args.err;
3874}
3875
3876/**
3877 * ntfs_remount - change the mount options of a mounted ntfs file system
3878 * @mp:		mount point of mounted ntfs file system
3879 * @opts:	ntfs specific mount options (already copied from user space)
3880 *
3881 * Change the mount options of an already mounted ntfs file system.
3882 *
3883 * Return 0 on success and errno on error.
3884 *
3885 * If the remount fails, we must call OSKextReleaseKextWithLoadTag
3886 * to allow the KEXT to be unloaded when no longer in use.
3887 *
3888 *
3889 * Note we are at mount protocol version 0.0 where we do not have any ntfs
3890 * specific mount options so we annotate @opts as __unused to make gcc happy.
3891 */
3892static errno_t ntfs_remount(mount_t mp,
3893		ntfs_mount_options_1_0 *opts)
3894{
3895	errno_t err = 0;
3896	ntfs_volume *vol = NTFS_MP(mp);
3897
3898	ntfs_debug("Entering.");
3899	/*
3900	 * Check for a change in the case sensitivity semantics and abort if
3901	 * one is requested as things could get very confused if we allow a
3902	 * remount to switch from case sensitive to case insensitive or vice
3903	 * versa.
3904	 */
3905	if (((opts->flags & NTFS_MNT_OPT_CASE_SENSITIVE) &&
3906			!NVolCaseSensitive(vol)) ||
3907			(!(opts->flags & NTFS_MNT_OPT_CASE_SENSITIVE) &&
3908			NVolCaseSensitive(vol))) {
3909		ntfs_error(mp, "Cannot change case sensitivity semantics via "
3910				"remount.  You need to unmount and then mount "
3911				"again with the desired options.");
3912		err = EINVAL;
3913		goto err_exit;
3914	}
3915	/*
3916	 * If we are remounting read-write, make sure there are no volume
3917	 * errors and that no unsupported volume flags are set.  Also, empty
3918	 * the logfile journal as it would become stale as soon as something is
3919	 * written to the volume and mark the volume dirty so that chkdsk is
3920	 * run if the volume is not umounted cleanly.  Finally, mark the quotas
3921	 * out of date so Windows rescans the volume on boot and updates them.
3922	 *
3923	 * When remounting read-only, mark the volume clean if no volume errors
3924	 * have occured.
3925	 */
3926	if (vfs_iswriteupgrade(mp)) {
3927		/* We no longer allow (re-)mounting read/write. */
3928		ntfs_error(mp, "Remounting read/write is not supported");
3929		goto EROFS_exit;
3930#if 0
3931		static const char es[] = ".  Cannot remount read-write.  To "
3932				"fix this problem boot into Windows, run "
3933				"chkdsk c: /f /v /x from the command prompt "
3934				"(replace c: with the drive letter of this "
3935				"volume), then reboot into Mac OS X and mount "
3936				"the volume again.";
3937
3938		/* Remounting read-write. */
3939		if (NVolErrors(vol)) {
3940			ntfs_error(mp, "Volume has errors and is read-only%s",
3941					es);
3942			goto EROFS_exit;
3943		}
3944		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
3945			ntfs_error(mp, "Volume has unsupported flags set "
3946					"(0x%x) and is read-only%s",
3947					(unsigned)le16_to_cpu(vol->vol_flags),
3948					es);
3949			goto EROFS_exit;
3950		}
3951		if (ntfs_volume_flags_set(vol, VOLUME_IS_DIRTY)) {
3952			ntfs_error(mp, "Failed to set dirty bit in volume "
3953					"information flags%s", es);
3954			goto EROFS_exit;
3955		}
3956		if (ntfs_logfile_empty(vol->logfile_ni)) {
3957			ntfs_error(mp, "Failed to empty journal $LogFile%s",
3958					es);
3959			NVolSetErrors(vol);
3960			goto EROFS_exit;
3961		}
3962		if (ntfs_quotas_mark_out_of_date(vol)) {
3963			ntfs_error(mp, "Failed to mark quotas out of date%s",
3964					es);
3965			NVolSetErrors(vol);
3966			goto EROFS_exit;
3967		}
3968		if (ntfs_usnjrnl_stamp(vol)) {
3969			ntfs_error(mp, "Failed to stamp transation log "
3970					"($UsnJrnl)%s", es);
3971			NVolSetErrors(vol);
3972			goto EROFS_exit;
3973		}
3974		NVolClearReadOnly(vol);
3975#endif /* r/w upgrade not supported */
3976	} else if (!NVolReadOnly(vol) && vfs_isrdonly(mp)) {
3977		/* Remounting read-only, flush all pending writes. */
3978		err = ntfs_sync(mp, MNT_WAIT, NULL);
3979		if (err) {
3980			ntfs_error(mp, "Failed to sync volume (error %d).  "
3981					"Cannot remount read-only.", err);
3982			goto err_exit;
3983		}
3984		/* If no volume errors have occured, mark the volume clean. */
3985		if (!NVolErrors(vol)) {
3986			if (ntfs_volume_flags_clear(vol, VOLUME_IS_DIRTY))
3987				ntfs_warning(mp, "Failed to clear dirty bit "
3988						"in volume information "
3989						"flags.  Run chkdsk.");
3990			/* Flush the changes to disk. */
3991			err = ntfs_sync(mp, MNT_WAIT, NULL);
3992			if (err) {
3993				ntfs_error(mp, "Failed to sync volume (error "
3994						"%d).  Cannot remount "
3995						"read-only.", err);
3996				/*
3997				 * Try to set the dirty flag again in case we
3998				 * did clear it but something else failed.  We
3999				 * do not care about any errors as we almost
4000				 * expect them to happen if we got here.
4001				 */
4002				(void)ntfs_volume_flags_set(vol,
4003						VOLUME_IS_DIRTY);
4004				goto err_exit;
4005			}
4006		} else
4007			ntfs_warning(mp, "Volume has errors.  Leaving volume "
4008					"marked dirty.  Run chkdsk.");
4009		NVolSetReadOnly(vol);
4010	}
4011	/* Don't allow the user to clear MNT_DONTBROWSE for read/write volumes. */
4012	if (vfs_isrdwr(mp))
4013		vfs_setflags(mp, MNT_DONTBROWSE);
4014	// TODO: Copy mount options from @opts to @vol.
4015	ntfs_debug("Done.");
4016	return 0;
4017EROFS_exit:
4018	err = EROFS;
4019err_exit:
4020	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
4021	return err;
4022}
4023
4024/**
4025 * ntfs_mount - mount an ntfs file system
4026 * @mp:		mount point to initialize/mount
4027 * @dev_vn:	vnode of the device we are mounting
4028 * @data:	mount options (in user space)
4029 * @context:	vfs context
4030 *
4031 * The VFS calls this via VFS_MOUNT() when it wants to mount an ntfs volume.
4032 *
4033 * Note: @dev_vn is NULLVP if this is a MNT_UPDATE or MNT_RELOAD mount but of
4034 * course in those cases it can be retrieved from the NTFS_MP(mp)->dev_vn.
4035 *
4036 * Return 0 on success and errno on error.
4037 *
4038 * We call OSKextRetainKextWithLoadTag() to prevent the KEXT from being
4039 * unloaded automatically while in use.  If the mount fails, we must call
4040 * OSKextReleaseKextWithLoadTag() to allow the KEXT to be unloaded.
4041 */
4042static int ntfs_mount(mount_t mp, vnode_t dev_vn, user_addr_t data,
4043		vfs_context_t context)
4044{
4045	daddr64_t nr_blocks;
4046	struct vfsstatfs *sfs = vfs_statfs(mp);
4047	ntfs_volume *vol;
4048	buf_t buf;
4049	kauth_cred_t cred;
4050	dev_t dev;
4051	NTFS_BOOT_SECTOR *bs;
4052	errno_t err, err2;
4053	u32 blocksize;
4054	ntfs_mount_options_header opts_hdr;
4055	ntfs_mount_options_1_0 opts;
4056
4057	ntfs_debug("Entering.");
4058	OSKextRetainKextWithLoadTag(OSKextGetCurrentLoadTag());
4059	/*
4060	 * FIXME: Not convinced that this is necessary.  It may well be
4061	 * sufficient to set cred = vfs_context_ucred(context) as some file
4062	 * systems do (e.g. msdosfs, old ntfs), but HFS does it this way so we
4063	 * follow suit.  Also, some file systems even simply set cred = NOCRED
4064	 * (e.g. udf).  Should investigate or ask someone...
4065	 */
4066	cred = vfs_context_proc(context) ? vfs_context_ucred(context) : NOCRED;
4067	/* Copy our mount options header from user space. */
4068	err = copyin(data, (caddr_t)&opts_hdr, sizeof(opts_hdr));
4069	if (err) {
4070		ntfs_error(mp, "Failed to copy mount options header from user "
4071				"space (error %d).", err);
4072		goto unload;
4073	}
4074	ntfs_debug("Mount options header version %d.%d.", opts_hdr.major_ver,
4075			opts_hdr.minor_ver);
4076	/* Get and check options. */
4077	switch (opts_hdr.major_ver) {
4078	case 1:
4079		if (opts_hdr.minor_ver != 0)
4080			ntfs_warning(mp, "Your version of /sbin/mount_ntfs is "
4081					"newer than this driver, ignoring any "
4082					"new options.");
4083		/* Version 1.x has one option so copy it from user space. */
4084		err = copyin((data + sizeof(opts_hdr) + 7) & ~7,
4085				(caddr_t)&opts, sizeof(opts));
4086		if (err) {
4087			ntfs_error(mp, "Failed to copy NTFS mount options "
4088					"from user space (error %d).", err);
4089			goto unload;
4090		}
4091		break;
4092	case 0:
4093		/* Version 0.x has no options at all. */
4094		bzero(&opts, sizeof(opts));
4095		break;
4096	default:
4097		ntfs_warning(mp, "Your version of /sbin/mount_ntfs is not "
4098				"compatible with this driver, ignoring NTFS "
4099				"specific mount options.");
4100		bzero(&opts, sizeof(opts));
4101		break;
4102	}
4103	/*
4104	 * We only allow read/write mounts if the "nobrowse" option was also
4105	 * given.  This is to discourage end users from mounting read/write,
4106	 * but still allows our utilities (such as an OS install) to make
4107	 * changes to an NTFS volume.  Without the "nobrowse" option, we force
4108	 * a read-only mount.  Note that we also check for non-update mounts
4109	 * here.  In the case of an update mount, ntfs_remount() will do the
4110	 * appropriate checking for changing the writability of the mount.
4111	 */
4112	if ((vfs_flags(mp) & MNT_DONTBROWSE) == 0 && !vfs_isupdate(mp))
4113		vfs_setflags(mp, MNT_RDONLY);
4114	/*
4115	 * TODO: For now we do not implement ACLs thus we force the "noowners"
4116	 * mount option.
4117	 */
4118	vfs_setflags(mp, MNT_IGNORE_OWNERSHIP);
4119	/*
4120	 * We do not support MNT_RELOAD yet.  Note, MNT_RELOAD implies the
4121	 * file system is currently read-only.
4122	 */
4123	if (vfs_isreload(mp)) {
4124		ntfs_error(mp, "MNT_RELOAD is not supported yet.");
4125		err = ENOTSUP;
4126		goto unload;
4127	}
4128	/*
4129	 * If this is a remount request, handle this elsewhere.  Note this
4130	 * check has to come after the vfs_isreload() check as vfs_isupdate()
4131	 * is always true when vfs_isreload() is true but this is not true the
4132	 * other way round.
4133	 */
4134	if (vfs_isupdate(mp))
4135		return ntfs_remount(mp, &opts);
4136	/* We know this is a real mount request thus @dev_vn is not NULL. */
4137	dev = vnode_specrdev(dev_vn);
4138	/* Let the VFS do advisory locking for us. */
4139	vfs_setlocklocal(mp);
4140	/*
4141	 * Tell old-style applications that we support VolFS style lookups.
4142	 *
4143	 * Note we do not set MNT_DOVOLFS because then various things start
4144	 * breaking like for example the Finder "Empty Trash" command always
4145	 * fails silently unless we also support va_nchildren in
4146	 * ntfs_vnop_getattr() and set ATTR_DIR_ENTRYCOUNT in our valid
4147	 * directory attributes in ntfs_getattr().
4148	 */
4149	//vfs_setflags(mp, MNT_DOVOLFS);
4150	/*
4151	 * Set the file system id in the fsstat part of the mount structure.
4152	 * We use the device @dev for the first 32-bit value and the dynamic
4153	 * file system number assigned by the VFS to us for the second 32-bit
4154	 * value.  This is important because the VFS uses the first 32-bit
4155	 * value to satisfy the ATTR_CMN_DEVID request in getattrlist() and
4156	 * getvolattrlist() thus it must be the device.
4157	 */
4158	sfs->f_fsid.val[0] = (int32_t)dev;
4159	sfs->f_fsid.val[1] = (int32_t)vfs_typenum(mp);
4160	/*
4161	 * Allocate and initialize an ntfs volume and attach it to the vfs
4162	 * mount.
4163	 */
4164	vol = OSMalloc(sizeof(ntfs_volume), ntfs_malloc_tag);
4165	if (!vol) {
4166		ntfs_error(mp, "Failed to allocate ntfs volume buffer.");
4167		err = ENOMEM;
4168		goto unload;
4169	}
4170	*vol = (ntfs_volume) {
4171		.mp = mp,
4172		.dev = dev,
4173		.dev_vn = dev_vn,
4174		/*
4175		 * Default is group and other have read-only access to files
4176		 * and directories while owner has full access.  Everyone gets
4177		 * directory search and file execute permission.  The latter is
4178		 * so people can execute binaries from NTFS volumes.
4179		 *
4180		 * In reality it does not matter as we set MNT_IGNORE_OWNERSHIP
4181		 * thus everyone can fully access the NTFS volume.  The only
4182		 * reason to set the umask this way is that when people copy
4183		 * files with the Finder or "cp -p" from an NTFS volume to a
4184		 * HFS for example, the file does not end up being world
4185		 * writable.
4186		 */
4187		.fmask = 0022,
4188		.dmask = 0022,
4189		.mft_zone_multiplier = 1,
4190		.on_errors = ON_ERRORS_CONTINUE|ON_ERRORS_FAIL_DIRTY,
4191	};
4192	lck_rw_init(&vol->mftbmp_lock, ntfs_lock_grp, ntfs_lock_attr);
4193	lck_rw_init(&vol->lcnbmp_lock, ntfs_lock_grp, ntfs_lock_attr);
4194	lck_mtx_init(&vol->rename_lock, ntfs_lock_grp, ntfs_lock_attr);
4195	lck_rw_init(&vol->secure_lock, ntfs_lock_grp, ntfs_lock_attr);
4196	lck_spin_init(&vol->security_id_lock, ntfs_lock_grp, ntfs_lock_attr);
4197	lck_mtx_init(&vol->inodes_lock, ntfs_lock_grp, ntfs_lock_attr);
4198	vfs_setfsprivate(mp, vol);
4199	if (vfs_isrdonly(mp))
4200		NVolSetReadOnly(vol);
4201	/* Check for the requested case sensitivity semantics. */
4202	if (opts.flags & NTFS_MNT_OPT_CASE_SENSITIVE) {
4203		ntfs_debug("Mounting volume case sensitive.");
4204		NVolSetCaseSensitive(vol);
4205	}
4206// FIXME: For now disable sparse support as it is not done yet...
4207#if 0
4208	/* By default, enable sparse support. */
4209	NVolSetSparseEnabled(vol);
4210#endif
4211	/* By default, enable compression support. */
4212	NVolSetCompressionEnabled(vol);
4213	blocksize = vfs_devblocksize(mp);
4214	/* We support device sector sizes up to the PAGE_SIZE. */
4215	if (blocksize > PAGE_SIZE) {
4216		ntfs_error(mp, "Device has unsupported sector size (%u).  "
4217				"The maximum supported sector size on this "
4218				"system is %u bytes.", blocksize, PAGE_SIZE);
4219		err = ENOTSUP;
4220		goto err;
4221	}
4222	/*
4223	 * If the block size of the device we are to mount is less than
4224	 * NTFS_BLOCK_SIZE, change the block size to NTFS_BLOCK_SIZE.
4225	 */
4226	if (blocksize < NTFS_BLOCK_SIZE) {
4227		ntfs_debug("Setting device block size to NTFS_BLOCK_SIZE.");
4228		err = ntfs_blocksize_set(mp, dev_vn, NTFS_BLOCK_SIZE, context);
4229		if (err) {
4230			ntfs_error(mp, "Failed to set device block size to "
4231					"NTFS_BLOCK_SIZE (512 bytes) because "
4232					"the DKIOCSETBLOCKSIZE ioctl returned "
4233					"error %d).", err);
4234			goto err;
4235		}
4236		blocksize = NTFS_BLOCK_SIZE;
4237	} else
4238		ntfs_debug("Device block size (%u) is greater than or equal "
4239				"to NTFS_BLOCK_SIZE.", blocksize);
4240	/* Get the size of the device in units of blocksize bytes. */
4241	err = VNOP_IOCTL(dev_vn, DKIOCGETBLOCKCOUNT, (caddr_t)&nr_blocks, 0,
4242			context);
4243	if (err) {
4244		ntfs_error(mp, "Failed to determine the size of the device "
4245				"(DKIOCGETBLOCKCOUNT ioctl returned error "
4246				"%d).", err);
4247		err = ENXIO;
4248		goto err;
4249	}
4250	vol->nr_blocks = nr_blocks;
4251#ifdef DEBUG
4252	{
4253		u64 dev_size, u;
4254		char *suffix;
4255		int shift = 0;
4256		u8 blocksize_shift = ffs(blocksize) - 1;
4257
4258		dev_size = u = (u64)nr_blocks << blocksize_shift;
4259		while ((u >>= 10) > 10 && shift < 40)
4260			shift += 10;
4261		switch (shift) {
4262		case 0:
4263			suffix = "bytes";
4264			break;
4265		case 10:
4266			suffix = "kiB";
4267			break;
4268		case 20:
4269			suffix = "MiB";
4270			break;
4271		case 30:
4272			suffix = "GiB";
4273			break;
4274		default:
4275			suffix = "TiB";
4276			break;
4277		}
4278		ntfs_debug("Device size is %llu%s (%llu bytes).",
4279				(unsigned long long)dev_size >> shift, suffix,
4280				(unsigned long long)dev_size);
4281	}
4282#endif
4283	/* Read the boot sector and return the buffer containing it. */
4284	buf = NULL;
4285	bs = NULL;
4286	err = ntfs_boot_sector_read(vol, cred, &buf, &bs);
4287	if (err) {
4288		ntfs_error(mp, "Not an NTFS volume.");
4289		goto err;
4290	}
4291	/*
4292	 * Extract the data from the boot sector and setup the ntfs volume
4293	 * using it.
4294	 */
4295	err = ntfs_boot_sector_parse(vol, bs);
4296	err2 = buf_unmap(buf);
4297	if (err2)
4298		ntfs_error(mp, "Failed to unmap buffer of boot sector (error "
4299				"%d).", err2);
4300	buf_brelse(buf);
4301	if (err) {
4302		ntfs_error(mp, "%s NTFS file system.",
4303				err == ENOTSUP ? "Unsupported" : "Invalid");
4304		goto err;
4305	}
4306	/*
4307	 * If the boot sector indicates a sector size bigger than the current
4308	 * device block size, switch the device block size to the sector size.
4309	 * TODO: It may be possible to support this case even when the set
4310	 * below fails, we would just be breaking up the i/o for each sector
4311	 * into multiple blocks for i/o purposes but otherwise it should just
4312	 * work.  However it is safer to leave disabled until someone hits this
4313	 * error message and then we can get them to try it without the setting
4314	 * so we know for sure that it works.  We would then want to set
4315	 * vol->sector_size* to the current blocksize or add vol->blocksize*...
4316	 * No, cannot do that or will break directory operations.  We will need
4317	 * to move to using vol->blocksize* instead of vol->sector_size in most
4318	 * places and stick with vol->sector_size where we really want its
4319	 * actual value.
4320	 */
4321	if (vol->sector_size > blocksize) {
4322		ntfs_debug("Setting device block size to sector size.");
4323		err = ntfs_blocksize_set(mp, dev_vn, vol->sector_size, context);
4324		if (err) {
4325			ntfs_error(mp, "Failed to set device block size to "
4326					"sector size (%u bytes) because "
4327					"the DKIOCSETBLOCKSIZE ioctl returned "
4328					"error %d).", vol->sector_size, err);
4329			goto err;
4330		}
4331		blocksize = vol->sector_size;
4332	}
4333	/* Initialize the cluster and mft allocators. */
4334	ntfs_setup_allocators(vol);
4335	/*
4336	 * Get the $MFT inode and bootstrap the volume sufficiently so we can
4337	 * get other inodes and map (extent) mft records.
4338	 */
4339	err = ntfs_mft_inode_get(vol);
4340	if (err)
4341		goto err;
4342	lck_mtx_lock(&ntfs_lock);
4343	if (NVolCompressionEnabled(vol)) {
4344		/*
4345		 * The current mount may be a compression user if the cluster
4346		 * size is less than or equal to 4kiB.
4347		 */
4348		if (vol->cluster_size <= 4096) {
4349			if (!ntfs_compression_buffer) {
4350				ntfs_compression_buffer = OSMalloc(
4351						ntfs_compression_buffer_size,
4352						ntfs_malloc_tag);
4353				if (!ntfs_compression_buffer) {
4354					// FIXME: We could continue with
4355					// compression disabled.  But do we
4356					// want to do that given the system is
4357					// that low on memory?
4358					ntfs_error(mp, "Failed to allocate "
4359							"buffer for "
4360							"compression engine.");
4361					NVolClearCompressionEnabled(vol);
4362					lck_mtx_unlock(&ntfs_lock);
4363					goto err;
4364				}
4365			}
4366			ntfs_compression_users++;
4367		} else {
4368			ntfs_debug("Disabling compression because the cluster "
4369					"size of %u bytes is above the "
4370					"allowed maximum of 4096 bytes.",
4371					(unsigned)vol->cluster_size);
4372			NVolClearCompressionEnabled(vol);
4373		}
4374	}
4375	/* Generate the global default upcase table if necessary. */
4376	if (!ntfs_default_upcase) {
4377		ntfs_default_upcase = OSMalloc(ntfs_default_upcase_size,
4378				ntfs_malloc_tag);
4379		if (!ntfs_default_upcase) {
4380			// FIXME: We could continue without a default upcase
4381			// table.  But do we want to do that given the system
4382			// is that low on memory?
4383			ntfs_error(mp, "Failed to allocate memory for default "
4384					"upcase table.");
4385			lck_mtx_unlock(&ntfs_lock);
4386			err = ENOMEM;
4387			goto err;
4388		}
4389		ntfs_upcase_table_generate(ntfs_default_upcase,
4390				ntfs_default_upcase_size);
4391	}
4392	/*
4393	 * Temporarily take a reference on the default upcase table to avoid
4394	 * race conditions with concurrent (u)mounts.
4395	 */
4396	ntfs_default_upcase_users++;
4397	lck_mtx_unlock(&ntfs_lock);
4398	/* Process the system inodes. */
4399	err = ntfs_system_inodes_get(vol);
4400	/*
4401	 * We now have the volume upcase table (either having read it from disk
4402	 * or using the default, in which case we have taken a reference on the
4403	 * default upcase table) or there was an error and we are going to bail
4404	 * out.  In any case, we can drop our temporary reference on the
4405	 * default upcase table and throw it away if we had the only reference.
4406	 */
4407	lck_mtx_lock(&ntfs_lock);
4408	if (!--ntfs_default_upcase_users) {
4409		OSFree(ntfs_default_upcase, ntfs_default_upcase_size,
4410				ntfs_malloc_tag);
4411		ntfs_default_upcase = NULL;
4412	}
4413	lck_mtx_unlock(&ntfs_lock);
4414	/* If we failed to process the system inodes, abort the mount. */
4415	if (err) {
4416		ntfs_error(mp, "Failed to load system files (error %d).", err);
4417		goto err;
4418	}
4419	/*
4420	 * Determine the number of free clusters and cache it in the volume (in
4421	 * @vol->nr_free_clusters).
4422	 */
4423	err = ntfs_set_nr_free_clusters(vol);
4424	if (err)
4425		goto err;
4426	/*
4427	 * Determine the number of both total and free mft records and cache
4428	 * them in the volume (in @vol->nr_mft_records and
4429	 * @vol->nr_free_mft_records, respectively).
4430	 */
4431	err = ntfs_set_nr_mft_records(vol);
4432	if (err)
4433		goto err;
4434	/*
4435	 * Finally, determine the statfs information for the volume and cache
4436	 * it in the vfs mount structure.
4437	 */
4438	ntfs_statfs(vol, sfs);
4439	ntfs_debug("Done.");
4440	return 0;
4441unload:
4442	/* Ensure NTFS_MP(mp) is NULL so it is safe to call ntfs_unmount(). */
4443	vfs_setfsprivate(mp, NULL);
4444err:
4445	ntfs_error(mp, "Mount failed (error %d).", err);
4446	/*
4447	 * ntfs_unmount() will clean up everything we did until we encountered
4448	 * the error condition including calling OSKextReleaseKextWithLoadTag().
4449	 *
4450	 * Note we need to pass MNT_FORCE to ensure ntfs_unmount() definitely
4451	 * ends up calling OSKextReleaseKextWithLoadTag().
4452	 */
4453	ntfs_unmount(mp, MNT_FORCE, context);
4454	return err;
4455}
4456
4457/**
4458 * ntfs_root - get the vnode of the root directory of an ntfs file system
4459 * @mp:		mount point of ntfs file system
4460 * @vpp:	destination pointer for the obtained file system root vnode
4461 * @context:	vfs context
4462 *
4463 * The VFS calls this via VFS_ROOT() when it wants to have the root directory
4464 * of a mounted ntfs volume.  We already have the root vnode/inode due to
4465 * ntfs_mount() so just get an iocount reference on the vnode and return the
4466 * vnode.
4467 *
4468 * Return 0 on success and errno on error.
4469 *
4470 * Warning: We get a panic() if we return error here!  Due to the function
4471 * checkdirs() which is called after ntfs_mount() but before VFS_START() (which
4472 * we do not implement).
4473 */
4474static int ntfs_root(mount_t mp, struct vnode **vpp,
4475		vfs_context_t context __unused)
4476{
4477	ntfs_volume *vol = NTFS_MP(mp);
4478	vnode_t vn;
4479	int err;
4480
4481	ntfs_debug("Entering.");
4482	if (!vol || !vol->root_ni || !vol->root_ni->vn)
4483		panic("%s(): Mount and/or root inode and/or vnode is not "
4484				"loaded.\n", __FUNCTION__);
4485	vn = vol->root_ni->vn;
4486	/*
4487	 * Simulate an ntfs_inode_get() by taking an iocount reference on the
4488	 * vnode of the ntfs inode.  It is ok to do this here because we know
4489	 * the root directory is loaded and attached to the ntfs volume (thus
4490	 * we already hold a use count reference on the vnode).
4491	 */
4492	err = vnode_get(vn);
4493	if (!err) {
4494		*vpp = vn;
4495		ntfs_debug("Done.");
4496	} else {
4497		*vpp = NULL;
4498		ntfs_error(mp, "Cannot return root vnode because vnode_get() "
4499				"failed (error %d).", err);
4500	}
4501	return err;
4502}
4503
4504/**
4505 * ntfs_vget - get the vnode corresponding to an inode number
4506 * @mp:		mount point of ntfs file system
4507 * @ino:	inode number / mft record number to obtain
4508 * @vpp:	destination pointer for the obtained vnode
4509 * @context:	vfs context
4510 *
4511 * Volfs and other strange places where no further path or name context is
4512 * available call this via VFS_VGET() to obtain the vnode with the inode number
4513 * @ino.
4514 *
4515 * The vnode is returned with an iocount reference.
4516 *
4517 * Return 0 on success and errno on error.
4518 *
4519 * FIXME: The only potential problem is that using only the inode / mft record
4520 * number only allows ntfs_vget() to return the file or directory vnode itself
4521 * but not for example the vnode of a named stream or other attribute.  Perhaps
4522 * this does not matter for volfs in which case everything is fine...
4523 */
4524static int ntfs_vget(mount_t mp, ino64_t ino, struct vnode **vpp,
4525		vfs_context_t context __unused)
4526{
4527	ntfs_inode *ni;
4528	errno_t err;
4529
4530	ntfs_debug("Entering for ino 0x%llx.", (unsigned long long)ino);
4531	/*
4532	 * Remove all NTFS core system files from the name space so we do not
4533	 * need to worry about users damaging a volume by writing to them or
4534	 * deleting/renaming them and so that we can return fsRtParID (1) as
4535	 * the inode number of the parent of the volume root directory and
4536	 * fsRtDirID (2) as the inode number of the volume root directory which
4537	 * are both expected by Carbon and various applications.
4538	 *
4539	 * Note we thus have to remap inode number 2 (fsRtDirID) to FILE_root
4540	 * here.
4541	 */
4542	if (ino < FILE_first_user) {
4543		if (ino != 2) {
4544			ntfs_debug("Removing core NTFS system file (mft_no "
4545					"0x%x) from name space.",
4546					(unsigned)ino);
4547			err = ENOENT;
4548			goto err;
4549		}
4550		/*
4551		 * @ino is 2, i.e. fsRtDirID, thus return the vnode of the root
4552		 * directory inode (FILE_root).
4553		 *
4554		 * First try to use the already loaded root directory inode and
4555		 * if that fails for some reason go and get it the slow way.
4556		 */
4557		ni = NTFS_MP(mp)->root_ni;
4558		if (ni) {
4559			err = vnode_get(ni->vn);
4560			if (!err)
4561				goto done;
4562		}
4563		ino = FILE_root;
4564	}
4565	err = ntfs_inode_get(NTFS_MP(mp), ino, FALSE, LCK_RW_TYPE_SHARED, &ni,
4566			NULL, NULL);
4567	if (!err) {
4568		lck_rw_unlock_shared(&ni->lock);
4569done:
4570		ntfs_debug("Done.");
4571		*vpp = ni->vn;
4572		return err;
4573	}
4574err:
4575	*vpp = NULL;
4576	if (err != ENOENT)
4577		ntfs_error(mp, "Failed to get mft_no 0x%llx (error %d).",
4578				(unsigned long long)ino, err);
4579	else
4580		ntfs_debug("Mft_no 0x%llx does not exist, returning ENOENT.",
4581				(unsigned long long)ino);
4582	return err;
4583}
4584
4585/**
4586 * ntfs_getattr - obtain information about a mounted ntfs volume
4587 * @mp:		mount point of ntfs file system
4588 * @fsa:	requested information and destination in which to return it
4589 * @context:	vfs context
4590 *
4591 * The VFS calls this via VFS_GETATTR() when it wants to obtain some
4592 * information about the mounted ntfs volume described by the mount @mp.
4593 *
4594 * Which information is requested is described by the vfs attribute structure
4595 * pointed to by @fsa, which is also the destination pointer in which the
4596 * requested information is returned.
4597 *
4598 * Return 0 on success and errno on error.
4599 *
4600 * Note: Further details are in the man page for the getattrlist function and
4601 * in the header files xnu/bsd/sys/{mount,attr}.h.
4602 */
4603static int ntfs_getattr(mount_t mp, struct vfs_attr *fsa,
4604		vfs_context_t context __unused)
4605{
4606	u64 nr_clusters, nr_free_clusters, nr_used_mft_records;
4607	u64 nr_free_mft_records;
4608	ntfs_volume *vol = NTFS_MP(mp);
4609	struct vfsstatfs *sfs = vfs_statfs(mp);
4610	ntfs_inode *ni;
4611
4612	ntfs_debug("Entering.");
4613	/* Get a fully consistent snapshot of this point in time. */
4614	lck_rw_lock_shared(&vol->mftbmp_lock);
4615	lck_rw_lock_shared(&vol->lcnbmp_lock);
4616	nr_clusters = vol->nr_clusters;
4617	nr_free_clusters = vol->nr_free_clusters;
4618	lck_rw_unlock_shared(&vol->lcnbmp_lock);
4619	nr_free_mft_records = vol->nr_free_mft_records;
4620	nr_used_mft_records = vol->nr_mft_records - nr_free_mft_records;
4621	lck_rw_unlock_shared(&vol->mftbmp_lock);
4622	/* Number of file system objects on volume (at this point in time). */
4623	VFSATTR_RETURN(fsa, f_objcount, nr_used_mft_records);
4624	/*
4625	 * Number of files on volume (at this point in time).
4626	 * FIXME: We cannot easily support this and the number of directories,
4627	 * below) as these two fields require reading the entirety of
4628	 * $MFT/$DATA, and checking each record if it is in use and if so,
4629	 * check if it is a file or directory and then return that here.  Note
4630	 * we would take all special files as files, and only real directories
4631	 * as directories.  Instead of reading all of $MFT/$DATA it may be
4632	 * worth only reading mft records that are set as in use in the
4633	 * $MFT/$BITMAP.  Also, need to check if the mft record is a base mft
4634	 * record or not and only if it is one should it be marked as
4635	 * file/directory.  Or should it be counted towards files, just like
4636	 * other special files?
4637	 *
4638	 * A quote from ZFS:
4639	 *
4640	 * <quote>Carbon depends on f_filecount and f_dircount so make up some
4641	 * values based on total objects.</quote>
4642	 *
4643	 * Thus at least for now we behave like ZFS does.
4644	 */
4645	VFSATTR_RETURN(fsa, f_filecount, nr_used_mft_records -
4646			(nr_used_mft_records / 4));
4647	/* Number of directories on volume (at this point in time). */
4648	VFSATTR_RETURN(fsa, f_dircount, nr_used_mft_records / 4);
4649	/*
4650	 * Maximum number of file system objects given infinite free space.
4651	 * The actual number will be likely smaller as it is limited by the
4652	 * amount of free space but both HFS and ZFS return the theoretical
4653	 * maximum so we do the same.
4654	 */
4655	VFSATTR_RETURN(fsa, f_maxobjcount, NTFS_MAX_NR_MFT_RECORDS);
4656	/*
4657	 * Block size for the below size values.  We use the cluster size of
4658	 * the volume as that means we do not convert to a different unit.
4659	 * Alternatively, we could return the sector size instead.
4660	 */
4661	VFSATTR_RETURN(fsa, f_bsize, vol->cluster_size);
4662	/* Optimal transfer block size (in bytes). */
4663	VFSATTR_RETURN(fsa, f_iosize, ubc_upl_maxbufsize());
4664	/* Total data blocks in file system (in units of @f_bsize). */
4665	VFSATTR_RETURN(fsa, f_blocks, nr_clusters);
4666	/* Free data blocks in file system (in units of @f_bsize). */
4667	VFSATTR_RETURN(fsa, f_bfree, nr_free_clusters);
4668	/*
4669	 * Free blocks available to non-superuser (in units of @f_bsize), same
4670	 * as the free data blocks as NTFS, like ZFS, does not support root
4671	 * reservation.
4672	 */
4673	VFSATTR_RETURN(fsa, f_bavail, nr_free_clusters);
4674	/* Blocks in use (in units of @f_bsize). */
4675	VFSATTR_RETURN(fsa, f_bused, nr_clusters - nr_free_clusters);
4676	/*
4677	 * Free inodes in file system (at this point in time).  This is made up
4678	 * of both the current number of free mft records and the amount of
4679	 * available free space for new mft records.  The number is then capped
4680	 * to the maximum allowed number of mft records.  This is what ZFS
4681	 * does, too.
4682	 */
4683	nr_free_mft_records += (nr_free_clusters << vol->cluster_size_shift) >>
4684			vol->mft_record_size_shift;
4685	if (nr_free_mft_records > NTFS_MAX_NR_MFT_RECORDS - nr_used_mft_records)
4686		nr_free_mft_records = NTFS_MAX_NR_MFT_RECORDS -
4687			nr_used_mft_records;
4688	VFSATTR_RETURN(fsa, f_ffree, nr_free_mft_records);
4689	/*
4690	 * Number of inodes in file system (at this point in time).  This is
4691	 * the number of available files we returned above plus the number of
4692	 * mft records currently in use.
4693	 */
4694	VFSATTR_RETURN(fsa, f_files, nr_used_mft_records + nr_free_mft_records);
4695	/*
4696	 * We set the file system id in the statfs part of the mount structure
4697	 * in ntfs_mount(), so just return that.
4698	 */
4699	VFSATTR_RETURN(fsa, f_fsid, sfs->f_fsid);
4700	/*
4701	 * The mount syscall sets the f_owner in the statfs structure of the
4702	 * mount structure to the uid of the user performing the mount, so just
4703	 * return that.
4704	 */
4705	VFSATTR_RETURN(fsa, f_owner, sfs->f_owner);
4706	/*
4707	 * Optional features supported by the volume.  Note, ->valid indicates
4708	 * which bits in the ->capabilities are valid whilst ->capabilities
4709	 * indicates the capabilities of the driver implementation.  An
4710	 * example: Ntfs is journalled but we do not implement journalling so
4711	 * we do not set that bit in ->capabilities, but we do set it in
4712	 * ->valid thus stating that we do not support journalling.
4713	 */
4714	if (VFSATTR_IS_ACTIVE(fsa, f_capabilities)) {
4715		vol_capabilities_attr_t *ca = &fsa->f_capabilities;
4716
4717		/* Volume format capabilities. */
4718		ca->capabilities[VOL_CAPABILITIES_FORMAT] =
4719				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4720				VOL_CAP_FMT_SYMBOLICLINKS |
4721				VOL_CAP_FMT_HARDLINKS |
4722				VOL_CAP_FMT_JOURNAL |
4723				/* We do not support journalling. */
4724				//VOL_CAP_FMT_JOURNAL_ACTIVE |
4725				VOL_CAP_FMT_SPARSE_FILES |
4726				VOL_CAP_FMT_ZERO_RUNS |
4727				/*
4728				 * Whether to be case sensitive or not is a
4729				 * mount option.
4730				 */
4731				(NVolCaseSensitive(vol) ?
4732					VOL_CAP_FMT_CASE_SENSITIVE : 0) |
4733				VOL_CAP_FMT_CASE_PRESERVING |
4734				VOL_CAP_FMT_FAST_STATFS |
4735				VOL_CAP_FMT_2TB_FILESIZE |
4736				// TODO: What do we need to do to implement
4737				// open deny modes?  And do we want to?
4738				// VOL_CAP_FMT_OPENDENYMODES |
4739				VOL_CAP_FMT_HIDDEN_FILES |
4740				/*
4741				 * VOL_CAP_FMT_PATH_FROM_ID is disabled until
4742				 * <rdar://problem/10685403> is fixed.  Use
4743				 * <rdar://problem/10685404> to re-enable.
4744				 */
4745				// VOL_CAP_FMT_PATH_FROM_ID |
4746				0;
4747		ca->valid[VOL_CAPABILITIES_FORMAT] =
4748				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4749				VOL_CAP_FMT_SYMBOLICLINKS |
4750				VOL_CAP_FMT_HARDLINKS |
4751				VOL_CAP_FMT_JOURNAL |
4752				VOL_CAP_FMT_JOURNAL_ACTIVE |
4753				VOL_CAP_FMT_NO_ROOT_TIMES |
4754				VOL_CAP_FMT_SPARSE_FILES |
4755				VOL_CAP_FMT_ZERO_RUNS |
4756				VOL_CAP_FMT_CASE_SENSITIVE |
4757				VOL_CAP_FMT_CASE_PRESERVING |
4758				VOL_CAP_FMT_FAST_STATFS |
4759				VOL_CAP_FMT_2TB_FILESIZE |
4760				VOL_CAP_FMT_OPENDENYMODES |
4761				VOL_CAP_FMT_HIDDEN_FILES |
4762				VOL_CAP_FMT_PATH_FROM_ID |
4763				0;
4764		/* File system driver capabilities. */
4765		ca->capabilities[VOL_CAPABILITIES_INTERFACES] =
4766				/* TODO: These are not implemented yet. */
4767				// VOL_CAP_INT_SEARCHFS |
4768				VOL_CAP_INT_ATTRLIST |
4769				// VOL_CAP_INT_NFSEXPORT |
4770				// VOL_CAP_INT_READDIRATTR |
4771				// VOL_CAP_INT_EXCHANGEDATA |
4772				/*
4773				 * Nothing supports copyfile in current xnu and
4774				 * it is not documented so we do not support it
4775				 * either.
4776				 */
4777				// VOL_CAP_INT_COPYFILE |
4778				// VOL_CAP_INT_ALLOCATE |
4779				VOL_CAP_INT_VOL_RENAME |
4780				VOL_CAP_INT_ADVLOCK |
4781				VOL_CAP_INT_FLOCK |
4782				// VOL_CAP_INT_EXTENDED_SECURITY |
4783				// VOL_CAP_INT_USERACCESS |
4784				// VOL_CAP_INT_MANLOCK |
4785				VOL_CAP_INT_NAMEDSTREAMS |
4786				VOL_CAP_INT_EXTENDED_ATTR |
4787				0;
4788		ca->valid[VOL_CAPABILITIES_INTERFACES] =
4789				VOL_CAP_INT_SEARCHFS |
4790				VOL_CAP_INT_ATTRLIST |
4791				VOL_CAP_INT_NFSEXPORT |
4792				VOL_CAP_INT_READDIRATTR |
4793				VOL_CAP_INT_EXCHANGEDATA |
4794				VOL_CAP_INT_COPYFILE |
4795				VOL_CAP_INT_ALLOCATE |
4796				VOL_CAP_INT_VOL_RENAME |
4797				VOL_CAP_INT_ADVLOCK |
4798				VOL_CAP_INT_FLOCK |
4799				VOL_CAP_INT_EXTENDED_SECURITY |
4800				VOL_CAP_INT_USERACCESS |
4801				VOL_CAP_INT_MANLOCK |
4802				VOL_CAP_INT_NAMEDSTREAMS |
4803				VOL_CAP_INT_EXTENDED_ATTR |
4804				0;
4805		/* Reserved, set to zero. */
4806		ca->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
4807		ca->valid[VOL_CAPABILITIES_RESERVED1] = 0;
4808		ca->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
4809		ca->valid[VOL_CAPABILITIES_RESERVED2] = 0;
4810		VFSATTR_SET_SUPPORTED(fsa, f_capabilities);
4811	}
4812	/*
4813	 * Attributes supported by the volume.  Note, ->validattr indicates the
4814	 * capabilities of the file system driver whilst ->nativeattr indicates
4815	 * the native capabilities of the volume format itself.
4816	 */
4817	if (VFSATTR_IS_ACTIVE(fsa, f_attributes)) {
4818		vol_attributes_attr_t *aa = &fsa->f_attributes;
4819
4820		/*
4821		 * Common attribute group (these attributes apply to all of the
4822		 * below groups).
4823		 */
4824		aa->validattr.commonattr =
4825				ATTR_CMN_NAME |
4826				/*
4827				 * ATTR_CMN_DEVID, ATTR_CMN_OBJTYPE, and
4828				 * ATTR_CMN_OBJTAG are supplied by the VFS.
4829				 */
4830				ATTR_CMN_DEVID |
4831				ATTR_CMN_FSID |
4832				ATTR_CMN_OBJTYPE |
4833				ATTR_CMN_OBJTAG |
4834				ATTR_CMN_OBJID |
4835				ATTR_CMN_OBJPERMANENTID |
4836				ATTR_CMN_PAROBJID |
4837				ATTR_CMN_SCRIPT |
4838				ATTR_CMN_CRTIME |
4839				ATTR_CMN_MODTIME |
4840				ATTR_CMN_CHGTIME |
4841				ATTR_CMN_ACCTIME |
4842				ATTR_CMN_BKUPTIME |
4843				/*
4844				 * Supplied by the VFS via a call to
4845				 * vn_getxattr(XATTR_FINDERINFO_NAME).
4846				 */
4847				ATTR_CMN_FNDRINFO |
4848				ATTR_CMN_OWNERID |
4849				ATTR_CMN_GRPID |
4850				ATTR_CMN_ACCESSMASK |
4851				ATTR_CMN_FLAGS |
4852				//ATTR_CMN_NAMEDATTRCOUNT /* not implemented */ |
4853				//ATTR_CMN_NAMEDATTRLIST /* not implemented */ |
4854				/*
4855				 * Supplied by the VFS via calls to
4856				 * vnode_authorize().
4857				 */
4858				ATTR_CMN_USERACCESS |
4859				//ATTR_CMN_EXTENDED_SECURITY |
4860				//ATTR_CMN_UUID |
4861				//ATTR_CMN_GRPUUID |
4862				ATTR_CMN_FILEID |
4863				ATTR_CMN_PARENTID;
4864		aa->nativeattr.commonattr =
4865				ATTR_CMN_NAME |
4866				ATTR_CMN_DEVID |
4867				ATTR_CMN_FSID |
4868				ATTR_CMN_OBJTYPE |
4869				ATTR_CMN_OBJTAG |
4870				ATTR_CMN_OBJID |
4871				ATTR_CMN_OBJPERMANENTID |
4872				ATTR_CMN_PAROBJID |
4873				ATTR_CMN_SCRIPT |
4874				ATTR_CMN_CRTIME |
4875				ATTR_CMN_MODTIME |
4876				ATTR_CMN_CHGTIME |
4877				ATTR_CMN_ACCTIME |
4878				ATTR_CMN_BKUPTIME |
4879				ATTR_CMN_FNDRINFO |
4880				ATTR_CMN_OWNERID |
4881				ATTR_CMN_GRPID |
4882				ATTR_CMN_ACCESSMASK |
4883				ATTR_CMN_FLAGS |
4884				ATTR_CMN_NAMEDATTRCOUNT |
4885				ATTR_CMN_NAMEDATTRLIST |
4886				ATTR_CMN_USERACCESS |
4887				ATTR_CMN_EXTENDED_SECURITY |
4888				ATTR_CMN_UUID |
4889				ATTR_CMN_GRPUUID |
4890				ATTR_CMN_FILEID |
4891				ATTR_CMN_PARENTID;
4892		/* Volume attribute group. */
4893		aa->validattr.volattr =
4894				/*
4895				 * ATTR_VOL_FSTYPE, ATTR_VOL_MOUNTPOINT,
4896				 * ATTR_VOL_MOUNTFLAGS, ATTR_VOL_MOUNTEDDEVICE,
4897				 * and ATTR_VOL_ENCODINGSUSED are supplied by
4898				 * the VFS.
4899				 */
4900				ATTR_VOL_FSTYPE |
4901				ATTR_VOL_SIGNATURE |
4902				ATTR_VOL_SIZE |
4903				ATTR_VOL_SPACEFREE |
4904				ATTR_VOL_SPACEAVAIL |
4905				ATTR_VOL_MINALLOCATION |
4906				ATTR_VOL_ALLOCATIONCLUMP |
4907				ATTR_VOL_IOBLOCKSIZE |
4908				ATTR_VOL_OBJCOUNT |
4909				ATTR_VOL_FILECOUNT |
4910				ATTR_VOL_DIRCOUNT |
4911				ATTR_VOL_MAXOBJCOUNT |
4912				ATTR_VOL_MOUNTPOINT |
4913				ATTR_VOL_NAME |
4914				ATTR_VOL_MOUNTFLAGS |
4915				ATTR_VOL_MOUNTEDDEVICE |
4916				ATTR_VOL_ENCODINGSUSED |
4917				ATTR_VOL_CAPABILITIES |
4918				ATTR_VOL_ATTRIBUTES;
4919		aa->nativeattr.volattr =
4920				ATTR_VOL_FSTYPE |
4921				ATTR_VOL_SIGNATURE |
4922				ATTR_VOL_SIZE |
4923				ATTR_VOL_SPACEFREE |
4924				ATTR_VOL_SPACEAVAIL |
4925				ATTR_VOL_MINALLOCATION |
4926				ATTR_VOL_ALLOCATIONCLUMP |
4927				ATTR_VOL_IOBLOCKSIZE |
4928				ATTR_VOL_OBJCOUNT |
4929				/*
4930				 * NTFS does not provide ATTR_VOL_FILECOUNT and
4931				 * ATTR_VOL_DIRCOUNT on disk.
4932				 */
4933				//ATTR_VOL_FILECOUNT |
4934				//ATTR_VOL_DIRCOUNT |
4935				ATTR_VOL_MAXOBJCOUNT |
4936				ATTR_VOL_MOUNTPOINT |
4937				ATTR_VOL_NAME |
4938				ATTR_VOL_MOUNTFLAGS |
4939				ATTR_VOL_MOUNTEDDEVICE |
4940				ATTR_VOL_ENCODINGSUSED |
4941				ATTR_VOL_CAPABILITIES |
4942				ATTR_VOL_ATTRIBUTES;
4943		/* Directory attribute group. */
4944		aa->validattr.dirattr =
4945				/*
4946				 * ATTR_DIR_LINKCOUNT and ATTR_DIR_ENTRYCOUNT
4947				 * are hard to work out on NTFS and the
4948				 * getattrlist(2) man page states that a file
4949				 * system should not implement
4950				 * ATTR_DIR_LINKCOUNT in this case.  We choose
4951				 * not to implement ATTR_DIR_ENTRYCOUNT either.
4952				 */
4953				//ATTR_DIR_LINKCOUNT |
4954				//ATTR_DIR_ENTRYCOUNT |
4955				/* This is supplied by the VFS. */
4956				ATTR_DIR_MOUNTSTATUS;
4957		aa->nativeattr.dirattr =
4958				/*
4959				 * NTFS does not provide ATTR_DIR_LINKCOUNT and
4960				 * ATTR_DIR_ENTRYCOUNT on disk.
4961				 */
4962				//ATTR_DIR_LINKCOUNT |
4963				//ATTR_DIR_ENTRYCOUNT |
4964				ATTR_DIR_MOUNTSTATUS;
4965		/* File attribute group. */
4966		aa->validattr.fileattr =
4967				ATTR_FILE_LINKCOUNT |
4968				ATTR_FILE_TOTALSIZE |
4969				ATTR_FILE_ALLOCSIZE |
4970				ATTR_FILE_IOBLOCKSIZE |
4971				/* This is supplied by the VFS. */
4972				ATTR_FILE_CLUMPSIZE |
4973				ATTR_FILE_DEVTYPE |
4974				//ATTR_FILE_FILETYPE |
4975				//ATTR_FILE_FORKCOUNT |
4976				//ATTR_FILE_FORKLIST |
4977				ATTR_FILE_DATALENGTH |
4978				ATTR_FILE_DATAALLOCSIZE |
4979				//ATTR_FILE_DATAEXTENTS |
4980				/*
4981				 * Both ATTR_FILE_RSRCLENGTH and
4982				 * ATTR_FILE_RSRCALLOCSIZE are supplied by the
4983				 * VFS via a call to
4984				 * vn_getxattr(XATTR_RESOURCEFORK_NAME).
4985				 *
4986				 * FIXME: The VFS supplies
4987				 * ATTR_FILE_RSRCALLOCSIZE by rounding up
4988				 * ATTR_FILE_RSRCLENGTH to the the next logical
4989				 * block size boundary (for NTFS the cluster
4990				 * this is the next cluster boundary) which is
4991				 * not correct if the resource fork named
4992				 * stream is sparse which can be the case on
4993				 * NTFS.
4994				 */
4995				ATTR_FILE_RSRCLENGTH |
4996				ATTR_FILE_RSRCALLOCSIZE |
4997				//ATTR_FILE_RSRCEXTENTS |
4998				0;
4999		aa->nativeattr.fileattr =
5000				ATTR_FILE_LINKCOUNT |
5001				/*
5002				 * NTFS does not provide ATTR_FILE_TOTALSIZE
5003				 * and ATTR_FILE_ALLOCSIZE on disk or at least
5004				 * not in an easy to determine way.
5005				 */
5006				//ATTR_FILE_TOTALSIZE |
5007				//ATTR_FILE_ALLOCSIZE |
5008				ATTR_FILE_IOBLOCKSIZE |
5009				ATTR_FILE_CLUMPSIZE /* obsolete */ |
5010				ATTR_FILE_DEVTYPE |
5011				/*
5012				 * VFS does not allow setting of
5013				 * ATTR_FILE_FILETYPE, ATTR_FILE_FORKCOUNT,
5014				 * ATTR_FILE_FORKLIST, ATTR_FILE_DATAEXTENTS,
5015				 * and ATTR_FILE_RSRCEXTENTS.
5016				 */
5017				//ATTR_FILE_FILETYPE /* always zero */ |
5018				//ATTR_FILE_FORKCOUNT |
5019				//ATTR_FILE_FORKLIST |
5020				ATTR_FILE_DATALENGTH |
5021				ATTR_FILE_DATAALLOCSIZE |
5022				//ATTR_FILE_DATAEXTENTS /* obsolete, HFS-specific */ |
5023				ATTR_FILE_RSRCLENGTH |
5024				ATTR_FILE_RSRCALLOCSIZE |
5025				//ATTR_FILE_RSRCEXTENTS /* obsolete, HFS-specific */ |
5026				0;
5027		/* Fork attribute group. */
5028		aa->validattr.forkattr =
5029				/*
5030				 * getattrlist(2) man page says that we should
5031				 * not implement any fork attributes.
5032				 */
5033				//ATTR_FORK_TOTALSIZE |
5034				//ATTR_FORK_ALLOCSIZE |
5035				0;
5036		aa->nativeattr.forkattr =
5037				/* VFS does not allow setting of these. */
5038				//ATTR_FORK_TOTALSIZE |
5039				//ATTR_FORK_ALLOCSIZE |
5040				0;
5041		VFSATTR_SET_SUPPORTED(fsa, f_attributes);
5042	}
5043	ni = vol->root_ni;
5044	lck_rw_lock_shared(&ni->lock);
5045	/*
5046	 * For the volume times, we use the corresponding times from the
5047	 * standard information attribute of the root directory inode.
5048	 */
5049	/* Creation time. */
5050	VFSATTR_RETURN(fsa, f_create_time, ni->creation_time);
5051	/*
5052	 * Last modification time.  We use the last mft change time as this
5053	 * changes every time the directory is changed in any way, thus it
5054	 * reflects the volume change time the best.
5055	 */
5056	VFSATTR_RETURN(fsa, f_modify_time, ni->last_mft_change_time);
5057	/* Time of last access. */
5058	VFSATTR_RETURN(fsa, f_access_time, ni->last_access_time);
5059	/* Time of last backup. */
5060	if (VFSATTR_IS_ACTIVE(fsa, f_backup_time)) {
5061		if (NInoValidBackupTime(ni)) {
5062			VFSATTR_RETURN(fsa, f_backup_time, ni->backup_time);
5063			lck_rw_unlock_shared(&ni->lock);
5064		} else {
5065			errno_t err;
5066
5067			if (!lck_rw_lock_shared_to_exclusive(&ni->lock))
5068				lck_rw_lock_exclusive(&ni->lock);
5069			/*
5070			 * Load the AFP_AfpInfo stream and initialize the
5071			 * backup time and Finder Info (if they are not already
5072			 * valid).
5073			 */
5074			err = ntfs_inode_afpinfo_read(ni);
5075			if (err) {
5076				ntfs_error(vol->mp, "Failed to obtain AfpInfo "
5077						"for mft_no 0x%llx (error "
5078						"%d).",
5079						(unsigned long long)ni->mft_no,
5080						err);
5081				lck_rw_unlock_exclusive(&ni->lock);
5082				return err;
5083			}
5084			if (!NInoValidBackupTime(ni))
5085				panic("%s(): !NInoValidBackupTime(base_ni)\n",
5086						__FUNCTION__);
5087			VFSATTR_RETURN(fsa, f_backup_time, ni->backup_time);
5088			lck_rw_unlock_exclusive(&ni->lock);
5089		}
5090	} else
5091		lck_rw_unlock_shared(&ni->lock);
5092	/*
5093	 * File system subtype.  Set this to the ntfs version encoded into 16
5094	 * bits, the high 8 bits being the major version and the low 8 bits
5095	 * being the minor version.  This is then extended to 32 bits, thus the
5096	 * higher 16 bits are currently zero.  The latter could be used at a
5097	 * later point in time to return more information about the mount
5098	 * options of the mounted volume (e.g. enable/disable sparse creation,
5099	 * compression, encryption, quotas, acls, usnjournal, case sensitivity,
5100	 * etc).
5101	 */
5102	VFSATTR_RETURN(fsa, f_fssubtype, (u32)vol->major_ver << 8 |
5103			vol->minor_ver);
5104	/* NUL terminated volume name in decomposed UTF-8. */
5105	if (VFSATTR_IS_ACTIVE(fsa, f_vol_name)) {
5106		/* Copy the cached name from the ntfs_volume structure. */
5107		(void)strlcpy(fsa->f_vol_name, vol->name, MAXPATHLEN - 1);
5108		VFSATTR_SET_SUPPORTED(fsa, f_vol_name);
5109	}
5110	/*
5111	 * Used for ATTR_VOL_SIGNATURE, Carbon's FSVolumeInfo.signature.  The
5112	 * kernel's getvolattrlist() function will default this to 'BD' which
5113	 * is apparently the generic signature that most Carbon file systems
5114	 * should be returning.
5115	 *
5116	 * ZFS returns 'Z!' so we return 'NT'.
5117	 */
5118	VFSATTR_RETURN(fsa, f_signature, 0x4e54); /* 'NT' */
5119	/*
5120	 * Same as Carbon's FSVolumeInfo.filesystemID.  HFS and HFS Plus use a
5121	 * value of zero.  ZFS also returns zero so we do that, too.
5122	 */
5123	VFSATTR_RETURN(fsa, f_carbon_fsid, 0);
5124	ntfs_debug("Done.");
5125	return 0;
5126}
5127
5128/**
5129 * ntfs_volume_rename - rename an ntfs volume
5130 * @vol:	ntfs volume to rename
5131 * @name:	new name for the ntfs volume
5132 *
5133 * Rename the ntfs volume @vol to @name which is a decomposed, NUL-terminated,
5134 * UTF-8 string as used on OS X.
5135 *
5136 * Return 0 on success and errno on error.
5137 */
5138static errno_t ntfs_volume_rename(ntfs_volume *vol, char *name)
5139{
5140	ntfs_inode *ni = vol->vol_ni;
5141	MFT_RECORD *m;
5142	ntfs_attr_search_ctx *ctx;
5143	ATTR_RECORD *a;
5144	u8 *utf8_name = NULL;
5145	ntfschar *ntfs_name = NULL;
5146	size_t utf8_name_size, ntfs_name_size;
5147	signed ntfs_name_len = 0;
5148	errno_t err;
5149
5150	ntfs_debug("Entering (old name: %s, new name: %s).", vol->name, name);
5151	/*
5152	 * We do not need to do anything if the new name is the same as the old
5153	 * name.
5154	 */
5155	utf8_name_size = strlen(name) + 1;
5156	if (utf8_name_size == vol->name_size &&
5157			!strncmp(vol->name, name, vol->name_size)) {
5158		ntfs_debug("The new name is the same as the old name, "
5159				"ignoring the rename request.");
5160		return 0;
5161	}
5162	/*
5163	 * If the new name is the empty string "", no need to convert it.  We
5164	 * will simply delete the $VOLUME_NAME attribute altogether.
5165	 *
5166	 * Otherwise, convert the name from the decomposed, UTF-8 format used
5167	 * by OS X into the little endian, 2-byte, composed Unicode format used
5168	 * by NTFS.
5169	 */
5170	if (utf8_name_size > 1) {
5171		ntfs_name_len = utf8_to_ntfs(vol, (u8*)name, utf8_name_size,
5172				&ntfs_name, &ntfs_name_size);
5173		if (ntfs_name_len < 0) {
5174			err = -ntfs_name_len;
5175			ntfs_error(vol->mp, "Failed to convert volume name to "
5176					"little endian, 2-byte, composed "
5177					"Unicode (error %d).", (int)err);
5178			goto err;
5179		}
5180		/* Switch @ntfs_name_len to be the name length in bytes. */
5181		ntfs_name_len <<= NTFSCHAR_SIZE_SHIFT;
5182		/*
5183		 * Verify that the length of the new name is in the allowed
5184		 * range.
5185		 */
5186		err = ntfs_attr_size_bounds_check(vol, AT_VOLUME_NAME,
5187				ntfs_name_len);
5188		if (err) {
5189			if (err == ERANGE) {
5190				ntfs_error(vol->mp, "Specified name is too "
5191						"long (%d little endian, "
5192						"2-byte, composed Unicode "
5193						"characters).",
5194						ntfs_name_len <<
5195						NTFSCHAR_SIZE_SHIFT);
5196				err = ENAMETOOLONG;
5197			} else {
5198				ntfs_error(vol->mp, "$VOLUME_NAME attribute "
5199						"is not defined on the NTFS "
5200						"volume.  Possible "
5201						"corruption!  You should run "
5202						"chkdsk.");
5203				err = EIO;
5204			}
5205			goto err;
5206		}
5207	}
5208	/* Make a copy of the new volume name to be placed in @vol->name. */
5209	utf8_name = OSMalloc(utf8_name_size, ntfs_malloc_tag);
5210	if (!utf8_name) {
5211		ntfs_error(vol->mp, "Not enough memory to make a copy of the "
5212				"new name.");
5213		err = ENOMEM;
5214		goto err;
5215	}
5216	if (strlcpy((char*)utf8_name, name, utf8_name_size) >= utf8_name_size)
5217		panic("%s(): strlcpy() failed\n", __FUNCTION__);
5218	err = vnode_get(ni->vn);
5219	if (err) {
5220		ntfs_error(vol->mp, "Failed to get vnode for $Volume.");
5221		goto err;
5222	}
5223	err = ntfs_mft_record_map(ni, &m);
5224	if (err) {
5225		ntfs_error(vol->mp, "Failed to map mft record for $Volume "
5226				"(error %d).", err);
5227		m = NULL;
5228		ctx = NULL;
5229		goto put_err;
5230	}
5231	ctx = ntfs_attr_search_ctx_get(ni, m);
5232	if (!ctx) {
5233		ntfs_error(vol->mp, "Not enough memory to get attribute "
5234				"search context.");
5235		err = ENOMEM;
5236		goto put_err;
5237	}
5238	err = ntfs_attr_lookup(AT_VOLUME_NAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
5239	m = ctx->m;
5240	a = ctx->a;
5241	if (err || a->non_resident || a->flags) {
5242		if (err != ENOENT) {
5243			/* Real lookup error or corrupt attribute. */
5244			if (!err)
5245				goto name_err;
5246			ntfs_error(vol->mp, "Failed to lookup volume name "
5247					"attribute (error %d).", err);
5248			goto put_err;
5249		}
5250		if (!ntfs_name) {
5251			ntfs_debug("Volume has no name and new name is the "
5252					"empty string, nothing to do.");
5253			goto done;
5254		}
5255		ntfs_debug("Volume has no name.  Creating new volume name "
5256				"attribute.");
5257		err = ntfs_resident_attr_record_insert(ni, ctx, AT_VOLUME_NAME,
5258				NULL, 0, ntfs_name, ntfs_name_len);
5259		if (err || ctx->is_error) {
5260			if (!err)
5261				err = ctx->error;
5262			ntfs_error(vol->mp, "Failed to %s $Volume (error %d).",
5263					ctx->is_error ?
5264					"remap extent mft record of" :
5265					"insert volume name attribute in", err);
5266			goto put_err;
5267		}
5268	} else {
5269		u8 *val = (u8*)a + le16_to_cpu(a->value_offset);
5270		/* Some bounds checks. */
5271		if (val < (u8*)a || val + le32_to_cpu(a->value_length) >
5272				(u8*)a + le32_to_cpu(a->length) ||
5273				(u8*)a + le32_to_cpu(a->length) >
5274				(u8*)m + vol->mft_record_size)
5275			goto name_err;
5276		if (!ntfs_name) {
5277			/*
5278			 * The new name is the empty string, thus remove the
5279			 * $VOLUME_NAME attribute altogether.
5280			 */
5281			ntfs_debug("New name is the empty string.  Removing "
5282					"the existing $VOLUME_NAME attribute.");
5283			err = ntfs_attr_record_delete(ni, ctx);
5284			if (!err)
5285				goto done;
5286			ntfs_warning(vol->mp, "Failed to delete volume name "
5287					"attribute (error %d).  Truncating it "
5288					"to zero length instead.", err);
5289		}
5290		/* Resize the existing attribute to fit the new name. */
5291retry_resize:
5292		err = ntfs_resident_attr_value_resize(m, a, ntfs_name_len);
5293		if (err) {
5294			if (err != ENOSPC)
5295				panic("%s(): err != ENOSPC\n", __FUNCTION__);
5296			/*
5297			 * If the base mft record does not have an attribute
5298			 * list attribute, add it now.
5299			 */
5300			if (!NInoAttrList(ni)) {
5301				err = ntfs_attr_list_add(ni, m, ctx);
5302				if (err || ctx->is_error) {
5303					if (!err)
5304						err = ctx->error;
5305					ntfs_error(vol->mp, "Failed to %s "
5306							"$Volume (error %d).",
5307							ctx->is_error ?
5308							"remap extent mft "
5309							"record of" :
5310							"add attribute list "
5311							"attribute to", err);
5312					goto put_err;
5313				}
5314				/*
5315				 * The attribute location will have changed so
5316				 * update it from the search context.
5317				 */
5318				m = ctx->m;
5319				a = ctx->a;
5320				/*
5321				 * We now have an attribute list attribute.
5322				 * This may have cause the attribute to be
5323				 * moved out to an extent mft record in which
5324				 * case there would now be enough space to
5325				 * resize the attribute.
5326				 *
5327				 * Alternatively some other large attribute may
5328				 * have been moved out to an extent mft record
5329				 * thus generating enough space in the base mft
5330				 * record to resize the attribute.
5331				 *
5332				 * In either case we simply want to retry the
5333				 * resize.
5334				 */
5335				goto retry_resize;
5336			}
5337			/*
5338			 * If the attribute record is the only one in the mft
5339			 * record then there must have been enough space.
5340			 */
5341			if (ntfs_attr_record_is_only_one(m, a))
5342				panic("%s(): err == ENOSPC && "
5343						"ntfs_attr_record_is_only_one"
5344						"()\n", __FUNCTION__);
5345			/*
5346			 * The attribute record is not the only one in the mft
5347			 * record.  Move it out to an extent mft record which
5348			 * will cause enough space to be generated.
5349			 */
5350			lck_rw_lock_shared(&ni->attr_list_rl.lock);
5351			err = ntfs_attr_record_move(ctx);
5352			lck_rw_unlock_shared(&ni->attr_list_rl.lock);
5353			if (err) {
5354				ntfs_error(vol->mp, "Failed to move volume "
5355						"name attribute to an extent "
5356						"mft record (error %d).", err);
5357				goto put_err;
5358			}
5359			/*
5360			 * The attribute location will have changed so update
5361			 * it from the search context.
5362			 */
5363			m = ctx->m;
5364			a = ctx->a;
5365			/*
5366			 * Retry the original attribute record resize as we
5367			 * will now have enough space to do it.
5368			 */
5369			goto retry_resize;
5370		}
5371		/* Copy the new name into the resized attribute record. */
5372		if (ntfs_name)
5373			memcpy((u8*)a + le16_to_cpu(a->value_offset),
5374					ntfs_name, ntfs_name_len);
5375	}
5376	/* Free the no longer needed temporary copy of the new name. */
5377	if (ntfs_name)
5378		OSFree(ntfs_name, ntfs_name_size, ntfs_malloc_tag);
5379	/* Mark the mft record dirty to ensure it gets written out. */
5380	NInoSetMrecNeedsDirtying(ctx->ni);
5381done:
5382	/*
5383	 * Finally set the new name to be the volume name releasing the old one
5384	 * first.  Since we have no locking around accesses to the volume name,
5385	 * we have to be careful about how we update it here, i.e. we have to
5386	 * set the size to the smaller of the two, then switch the pointers,
5387	 * then set the size to the new size and only then free the old
5388	 * pointer.  This is also why we do this under the protection of the
5389	 * mapped mft record so there cannot be two concurrent
5390	 * ntfs_volume_rename()s running.
5391	 */
5392	name = vol->name;
5393	ntfs_name_size = vol->name_size;
5394	if (utf8_name_size < vol->name_size)
5395		vol->name_size = utf8_name_size;
5396	vol->name = (char*)utf8_name;
5397	vol->name_size = utf8_name_size;
5398	ntfs_attr_search_ctx_put(ctx);
5399	ntfs_mft_record_unmap(ni);
5400	(void)vnode_put(ni->vn);
5401	OSFree(name, ntfs_name_size, ntfs_malloc_tag);
5402	ntfs_debug("Done.");
5403	return 0;
5404name_err:
5405	ntfs_error(vol->mp, "Volume name attribute is corrupt.  Run chkdsk.");
5406	NVolSetErrors(vol);
5407	err = EIO;
5408put_err:
5409	if (ctx)
5410		ntfs_attr_search_ctx_put(ctx);
5411	if (m)
5412		ntfs_mft_record_unmap(ni);
5413	(void)vnode_put(ni->vn);
5414err:
5415	if (utf8_name)
5416		OSFree(utf8_name, utf8_name_size, ntfs_malloc_tag);
5417	if (ntfs_name)
5418		OSFree(ntfs_name, ntfs_name_size, ntfs_malloc_tag);
5419	return err;
5420}
5421
5422/**
5423 * ntfs_setattr - set information about a mounted ntfs volume
5424 * @mp:		mount point of ntfs file system
5425 * @fsa:	information to set
5426 * @context:	vfs context
5427 *
5428 * The VFS calls this via VFS_SETATTR() when it wants to set some information
5429 * about the mounted ntfs volume described by the mount @mp.
5430 *
5431 * Which information is to be set is described by the vfs attribute structure
5432 * pointed to by @fsa, which is also the source pointer from which the
5433 * information to be set is copied.
5434 *
5435 * At present the kernel will only ever call this function for ATTR_VOL_NAME,
5436 * i.e. to set the name of the volume.
5437 *
5438 * Return 0 on success and errno on error.
5439 *
5440 * Note: Further details are in the man pages for the getattrlist and
5441 * setattrlist functions and in the header files xnu/bsd/sys/{mount,attr}.h.
5442 *
5443 * Note this function is only called for r/w mounted volumes so no need to
5444 * check if the volume is read-only.
5445 */
5446static int ntfs_setattr(struct mount *mp, struct vfs_attr *fsa,
5447		vfs_context_t context)
5448{
5449	kauth_cred_t cred = vfs_context_ucred(context);
5450	errno_t err;
5451
5452	ntfs_debug("Entering.");
5453	/*
5454	 * Must be superuser or owner of file system to change volume
5455	 * attributes.
5456	 */
5457	if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) !=
5458			vfs_statfs(mp)->f_owner))
5459		return EACCES;
5460	/*
5461	 * Only the volume name is settable (ATTR_VOL_NAME) at present so if
5462	 * this is not requested return success.  The VFS enforces that we are
5463	 * never called with any other flags set.
5464	 */
5465	if (!VFSATTR_IS_ACTIVE(fsa, f_vol_name))
5466		return 0;
5467	if (!fsa->f_vol_name)
5468		panic("%s(): !fsa->f_vol_name\n", __FUNCTION__);
5469	err = ntfs_volume_rename(NTFS_MP(mp), fsa->f_vol_name);
5470	if (err) {
5471		ntfs_error(mp, "Failed to set the name of the volume to %s "
5472				"(error %d).", fsa->f_vol_name, err);
5473		return err;
5474	}
5475	VFSATTR_SET_SUPPORTED(fsa, f_vol_name);
5476	ntfs_debug("Done.");
5477	return 0;
5478}
5479
5480static struct vfsops ntfs_vfsops = {
5481	.vfs_mount	= ntfs_mount,
5482	.vfs_unmount	= ntfs_unmount,
5483	.vfs_root	= ntfs_root,
5484	.vfs_getattr	= ntfs_getattr,
5485	.vfs_sync	= ntfs_sync,
5486	.vfs_vget	= ntfs_vget,
5487	.vfs_setattr	= ntfs_setattr,
5488};
5489
5490static struct vnodeopv_desc *ntfs_vnodeopv_desc_list[1] = {
5491	&ntfs_vnodeopv_desc,
5492};
5493
5494/* Lock group and lock attribute for allocation and freeing of locks. */
5495static lck_grp_attr_t *ntfs_lock_grp_attr;
5496lck_grp_t *ntfs_lock_grp;
5497lck_attr_t *ntfs_lock_attr;
5498
5499/* A tag to allow allocation and freeing of memory. */
5500OSMallocTag ntfs_malloc_tag;
5501
5502static vfstable_t ntfs_vfstable;
5503
5504extern kern_return_t ntfs_module_start(kmod_info_t *ki __unused,
5505		void *data __unused);
5506kern_return_t ntfs_module_start(kmod_info_t *ki __unused, void *data __unused)
5507{
5508	errno_t err;
5509	struct vfs_fsentry vfe;
5510
5511    printf("NTFS driver " NTFS_VERSION_STRING " [Flags: R/W"
5512#ifdef DEBUG
5513			" DEBUG"
5514#endif
5515			"].\n");
5516	/* This should never happen. */
5517	if (ntfs_lock_grp_attr || ntfs_lock_grp || ntfs_lock_attr ||
5518			ntfs_malloc_tag)
5519		panic("%s(): Lock(s) and/or malloc tag already initialized.\n",
5520				__FUNCTION__);
5521	/* First initialize the lock group so we can initialize debugging. */
5522	ntfs_lock_grp_attr = lck_grp_attr_alloc_init();
5523	if (!ntfs_lock_grp_attr) {
5524lck_err:
5525		printf("NTFS: Failed to allocate a lock element.\n");
5526		goto dbg_err;
5527	}
5528#ifdef DEBUG
5529	lck_grp_attr_setstat(ntfs_lock_grp_attr);
5530#endif
5531	ntfs_lock_grp = lck_grp_alloc_init("com.apple.filesystems.ntfs",
5532			ntfs_lock_grp_attr);
5533	if (!ntfs_lock_grp)
5534		goto lck_err;
5535	ntfs_lock_attr = lck_attr_alloc_init();
5536	if (!ntfs_lock_attr)
5537		goto lck_err;
5538#ifdef DEBUG
5539	lck_attr_setdebug(ntfs_lock_attr);
5540#endif
5541	/* Allocate a tag so we can allocate memory. */
5542	ntfs_malloc_tag = OSMalloc_Tagalloc("com.apple.filesystems.ntfs",
5543			OSMT_DEFAULT);
5544	if (!ntfs_malloc_tag) {
5545		printf("NTFS: OSMalloc_Tagalloc() failed.\n");
5546		goto dbg_err;
5547	}
5548	/* Initialize the driver wide lock. */
5549	lck_mtx_init(&ntfs_lock, ntfs_lock_grp, ntfs_lock_attr);
5550	/*
5551	 * This call must happen before we can use ntfs_debug(),
5552	 * ntfs_warning(), and ntfs_error().
5553	 */
5554	ntfs_debug_init();
5555	ntfs_debug("Debug messages are enabled.");
5556	err = ntfs_default_sds_entries_init();
5557	if (err)
5558		goto sds_err;
5559	err = ntfs_inode_hash_init();
5560	if (err)
5561		goto hash_err;
5562	vfe = (struct vfs_fsentry) {
5563		.vfe_vfsops	= &ntfs_vfsops,
5564		.vfe_vopcnt	= 1,	/* For now we just use one set of vnode
5565					   operations for all file types.
5566					   Note: Current max is 5 due to (not
5567					   needed) hard-coded limit in xnu. */
5568		.vfe_opvdescs	= ntfs_vnodeopv_desc_list,
5569		.vfe_fsname	= "ntfs",
5570// TODO: Implement VFS_TBLREADDIR_EXTENDED and set it here.
5571		.vfe_flags	= VFS_TBLNATIVEXATTR | VFS_TBL64BITREADY |
5572				  VFS_TBLLOCALVOL | VFS_TBLNOTYPENUM |
5573				  VFS_TBLFSNODELOCK | VFS_TBLTHREADSAFE,
5574	};
5575	err = vfs_fsadd(&vfe, &ntfs_vfstable);
5576	if (!err) {
5577		ntfs_debug("NTFS driver registered successfully.");
5578		return KERN_SUCCESS;
5579	}
5580	ntfs_error(NULL, "vfs_fsadd() failed (error %d).", (int)err);
5581	ntfs_inode_hash_deinit();
5582hash_err:
5583	OSFree(ntfs_file_sds_entry, 0x60 * 4, ntfs_malloc_tag);
5584	ntfs_file_sds_entry = NULL;
5585sds_err:
5586	ntfs_debug_deinit();
5587	lck_mtx_destroy(&ntfs_lock, ntfs_lock_grp);
5588dbg_err:
5589	if (ntfs_malloc_tag) {
5590		OSMalloc_Tagfree(ntfs_malloc_tag);
5591		ntfs_malloc_tag = NULL;
5592	}
5593	if (ntfs_lock_attr) {
5594		lck_attr_free(ntfs_lock_attr);
5595		ntfs_lock_attr = NULL;
5596	}
5597	if (ntfs_lock_grp) {
5598		lck_grp_free(ntfs_lock_grp);
5599		ntfs_lock_grp = NULL;
5600	}
5601	if (ntfs_lock_grp_attr) {
5602		lck_grp_attr_free(ntfs_lock_grp_attr);
5603		ntfs_lock_grp_attr = NULL;
5604	}
5605	printf("NTFS: Failed to register the NTFS driver.\n");
5606	return KERN_FAILURE;
5607}
5608
5609extern kern_return_t ntfs_module_stop(kmod_info_t *ki __unused,
5610		void *data __unused);
5611kern_return_t ntfs_module_stop(kmod_info_t *ki __unused, void *data __unused)
5612{
5613	errno_t err;
5614
5615	if (!ntfs_lock_grp_attr || !ntfs_lock_grp || !ntfs_lock_attr ||
5616			!ntfs_malloc_tag)
5617		panic("%s(): Lock(s) and/or malloc tag not yet initialized.\n",
5618				__FUNCTION__);
5619	ntfs_debug("Unregistering NTFS driver.");
5620	err = vfs_fsremove(ntfs_vfstable);
5621	if (err) {
5622		if (err == EBUSY)
5623			printf("NTFS: Failed to unregister the NTFS driver "
5624					"because there are mounted NTFS "
5625					"volumes.\n");
5626		else
5627			printf("NTFS: Failed to unregister the NTFS driver "
5628					"because vfs_fsremove() failed (error "
5629					"%d).\n", err);
5630		return KERN_FAILURE;
5631	}
5632	ntfs_inode_hash_deinit();
5633	OSFree(ntfs_file_sds_entry, 0x60 * 4, ntfs_malloc_tag);
5634	ntfs_file_sds_entry = NULL;
5635	ntfs_debug("Done.");
5636	/*
5637	 * Once this completes, we cannot use ntfs_debug(), ntfs_warning(), and
5638	 * ntfs_error() any more.  Since it cannot fail we cheat and report
5639	 * "Done." before the call.
5640	 */
5641	ntfs_debug_deinit();
5642	lck_mtx_destroy(&ntfs_lock, ntfs_lock_grp);
5643	OSMalloc_Tagfree(ntfs_malloc_tag);
5644	ntfs_malloc_tag = NULL;
5645	lck_attr_free(ntfs_lock_attr);
5646	ntfs_lock_attr = NULL;
5647	lck_grp_free(ntfs_lock_grp);
5648	ntfs_lock_grp = NULL;
5649	lck_grp_attr_free(ntfs_lock_grp_attr);
5650	ntfs_lock_grp_attr = NULL;
5651	return KERN_SUCCESS;
5652}
5653