1/*
2 * ntfs_vfsops.c - NTFS kernel vfs operations.
3 *
4 * Copyright (c) 2006-2011 Anton Altaparmakov.  All Rights Reserved.
5 * Portions Copyright (c) 2006-2014 Apple Inc.  All Rights Reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 *    this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright notice,
13 *    this list of conditions and the following disclaimer in the documentation
14 *    and/or other materials provided with the distribution.
15 * 3. Neither the name of Apple Inc. ("Apple") nor the names of its
16 *    contributors may be used to endorse or promote products derived from this
17 *    software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
20 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * ALTERNATIVELY, provided that this notice and licensing terms are retained in
31 * full, this file may be redistributed and/or modified under the terms of the
32 * GNU General Public License (GPL) Version 2, in which case the provisions of
33 * that version of the GPL will apply to you instead of the license terms
34 * above.  You can obtain a copy of the GPL Version 2 at
35 * http://developer.apple.com/opensource/licenses/gpl-2.txt.
36 */
37
38#include <sys/cdefs.h>
39#include <sys/attr.h>
40#include <sys/buf.h>
41#include <sys/disk.h>
42#include <sys/errno.h>
43#include <sys/fcntl.h>
44#include <sys/kauth.h>
45#include <sys/kernel_types.h>
46#include <sys/mount.h>
47#include <sys/param.h>
48#include <sys/systm.h>
49#include <sys/stat.h>
50#include <sys/types.h>
51#include <sys/ubc.h>
52#include <sys/ucred.h>
53#include <sys/vnode.h>
54
55#include <mach/kern_return.h>
56#include <mach/kmod.h>
57#include <mach/machine/vm_param.h>
58
59#include <string.h>
60
61#include <libkern/libkern.h>
62#include <libkern/OSMalloc.h>
63#include <libkern/OSKextLib.h>
64
65#include <kern/debug.h>
66#include <kern/locks.h>
67
68#include <miscfs/specfs/specdev.h>
69
70#include "ntfs.h"
71#include "ntfs_attr.h"
72#include "ntfs_attr_list.h"
73#include "ntfs_debug.h"
74#include "ntfs_dir.h"
75#include "ntfs_hash.h"
76#include "ntfs_inode.h"
77#include "ntfs_layout.h"
78#include "ntfs_logfile.h"
79#include "ntfs_mft.h"
80#include "ntfs_mst.h"
81#include "ntfs_page.h"
82#include "ntfs_quota.h"
83#include "ntfs_secure.h"
84#include "ntfs_time.h"
85#include "ntfs_unistr.h"
86#include "ntfs_usnjrnl.h"
87#include "ntfs_vnops.h"
88#include "ntfs_volume.h"
89
90// FIXME: Change email address but to what?
91const char ntfs_dev_email[] = "linux-ntfs-dev@lists.sourceforge.net";
92const char ntfs_please_email[] = "Please email "
93		"linux-ntfs-dev@lists.sourceforge.net and say that you saw "
94		"this message.  Thank you.";
95
96/* A driver wide lock protecting the below global data structures. */
97static lck_mtx_t ntfs_lock;
98
99/* Number of mounted file systems which have compression enabled. */
100static unsigned long ntfs_compression_users;
101static u8 *ntfs_compression_buffer;
102#define ntfs_compression_buffer_size (16 * 4096)
103
104/* The global default upcase table and corresponding reference count. */
105static unsigned long ntfs_default_upcase_users;
106static ntfschar *ntfs_default_upcase;
107#define ntfs_default_upcase_size (64 * 1024 * sizeof(ntfschar))
108
109static errno_t ntfs_blocksize_set(mount_t mp, vnode_t dev_vn, u32 blocksize,
110		vfs_context_t context)
111{
112	errno_t err;
113	struct vfsioattr ia;
114
115	err = VNOP_IOCTL(dev_vn, DKIOCSETBLOCKSIZE, (caddr_t)&blocksize,
116			FWRITE, context);
117	if (err)
118		return ENXIO;
119	/*
120	 * Update the cached block size in the mount point, i.e. the value
121	 * returned by vfs_devblocksize().
122	 */
123	ntfs_debug("Updating io attributes with new block size.");
124	vfs_ioattr(mp, &ia);
125	ia.io_devblocksize = blocksize;
126	vfs_setioattr(mp, &ia);
127	/*
128	 * Update the block size in the block device, i.e. the
129	 * v_specsize of the device vnode.
130	 */
131	ntfs_debug("Updating device vnode with new block size.");
132	set_fsblocksize(dev_vn);
133	return 0;
134}
135
136/**
137 * ntfs_boot_sector_is_valid - check if @b contains a valid ntfs boot sector
138 * @mp:		Mount of the device to which @b belongs.
139 * @b:		Boot sector of device @mp to check.
140 *
141 * Check whether the boot sector @b is a valid ntfs boot sector.
142 *
143 * Return TRUE if it is valid and FALSE if not.
144 *
145 * @mp is only needed for warning/error output, i.e. it can be NULL.
146 */
147static BOOL ntfs_boot_sector_is_valid(const mount_t mp,
148		const NTFS_BOOT_SECTOR *b)
149{
150	ntfs_debug("Entering.");
151	/*
152	 * Check that checksum == sum of u32 values from b to the checksum
153	 * field.  If checksum is zero, no checking is done.  We will work when
154	 * the checksum test fails, since some utilities update the boot sector
155	 * ignoring the checksum which leaves the checksum out-of-date.  We
156	 * report a warning if this is the case.
157	 */
158	if ((void*)b < (void*)&b->checksum && b->checksum) {
159		le32 *u;
160		u32 i;
161
162		for (i = 0, u = (le32*)b; u < (le32*)(&b->checksum); ++u)
163			i += le32_to_cpup(u);
164		if (le32_to_cpu(b->checksum) != i)
165			ntfs_warning(mp, "Invalid boot sector checksum.");
166	}
167	/* Check OEMidentifier is "NTFS    " */
168	if (b->oem_id != magicNTFS)
169		goto not_ntfs;
170	/*
171	 * Check bytes per sector value is between 256 and
172	 * NTFS_MAX_SECTOR_SIZE.
173	 */
174	if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 ||
175			le16_to_cpu(b->bpb.bytes_per_sector) >
176			NTFS_MAX_SECTOR_SIZE)
177		goto not_ntfs;
178	/* Check sectors per cluster value is valid. */
179	switch (b->bpb.sectors_per_cluster) {
180	case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128:
181		break;
182	default:
183		goto not_ntfs;
184	}
185	/* Check the cluster size is not above the maximum (64kiB). */
186	if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) *
187			b->bpb.sectors_per_cluster > NTFS_MAX_CLUSTER_SIZE)
188		goto not_ntfs;
189	/* Check reserved/unused fields are really zero. */
190	if (le16_to_cpu(b->bpb.reserved_sectors) ||
191			le16_to_cpu(b->bpb.root_entries) ||
192			le16_to_cpu(b->bpb.sectors) ||
193			le16_to_cpu(b->bpb.sectors_per_fat) ||
194			le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats)
195		goto not_ntfs;
196	/*
197	 * Check clusters per file mft record value is valid.  It can be either
198	 * between -31 and -9 (in which case the actual mft record size is
199	 * -log2() of the absolute value) or a positive power of two.
200	 */
201	if ((u8)b->clusters_per_mft_record < 0xe1 ||
202			(u8)b->clusters_per_mft_record > 0xf7)
203		switch (b->clusters_per_mft_record) {
204		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
205			break;
206		default:
207			goto not_ntfs;
208		}
209	/* Check clusters per index block value is valid. */
210	if ((u8)b->clusters_per_index_block < 0xe1 ||
211			(u8)b->clusters_per_index_block > 0xf7)
212		switch (b->clusters_per_index_block) {
213		case 1: case 2: case 4: case 8: case 16: case 32: case 64:
214			break;
215		default:
216			goto not_ntfs;
217		}
218	/*
219	 * Check for valid end of sector marker.  We will work without it, but
220	 * many BIOSes will refuse to boot from a bootsector if the magic is
221	 * incorrect, so we emit a warning.
222	 */
223	if (b->end_of_sector_marker != const_cpu_to_le16(0xaa55))
224		ntfs_warning(mp, "Invalid end of sector marker.");
225	ntfs_debug("Done.");
226	return TRUE;
227not_ntfs:
228	ntfs_debug("Not an NTFS boot sector.");
229	return FALSE;
230}
231
232/**
233 * ntfs_boot_sector_read - read the ntfs boot sector of a device
234 * @vol:	ntfs_volume of device to read the boot sector from
235 * @cred:	credentials of running process
236 * @buf:	destination pointer for buffer containing boot sector
237 * @bs:		destination pointer for boot sector data
238 *
239 * Read the boot sector from the device and validate it.  If that fails, try to
240 * read the backup boot sector, first from the end of the device a-la NT4 and
241 * later and then from the middle of the device a-la NT3.51 and earlier.
242 *
243 * If a valid boot sector is found but it is not the primary boot sector, we
244 * repair the primary boot sector silently (unless the device is read-only or
245 * the primary boot sector is not accessible).
246 *
247 * On success return 0 and set *@buf to the buffer containing the boot sector
248 * and *@bs to the boot sector data.  The caller then has to buf_unmap() and
249 * buf_brelse() the buffer.
250 *
251 * On error return the error code.
252 *
253 * Note: We set the B_NOCACHE flag on the buffer(s), thus effectively
254 * invalidating them when we release them.  This is needed because the
255 * buffer(s) may get read later using a different vnode ($Boot for example).
256 */
257static errno_t ntfs_boot_sector_read(ntfs_volume *vol, kauth_cred_t cred,
258		buf_t *buf, NTFS_BOOT_SECTOR **bs)
259{
260	daddr64_t nr_blocks = vol->nr_blocks;
261	static const char read_err_str[] =
262			"Unable to read %s boot sector (error %d).";
263	mount_t mp = vol->mp;
264	vnode_t dev_vn = vol->dev_vn;
265	buf_t primary, backup;
266	NTFS_BOOT_SECTOR *bs1, *bs2;
267	errno_t err, err2;
268	u32 blocksize = vfs_devblocksize(mp);
269
270	ntfs_debug("Entering.");
271	/* Try to read primary boot sector. */
272	err = buf_meta_bread(dev_vn, 0, blocksize, cred, &primary);
273	buf_setflags(primary, B_NOCACHE);
274	if (!err) {
275		err = buf_map(primary, (caddr_t*)&bs1);
276		if (err) {
277			ntfs_error(mp, "Failed to map buffer of primary boot "
278					"sector (error %d).", err);
279			bs1 = NULL;
280		} else {
281			if (ntfs_boot_sector_is_valid(mp, bs1)) {
282				*buf = primary;
283				*bs = bs1;
284				ntfs_debug("Done.");
285				return 0;
286			}
287			ntfs_error(mp, "Primary boot sector is invalid.");
288			err = EIO;
289		}
290	} else {
291		ntfs_error(mp, read_err_str, "primary", err);
292		bs1 = NULL;
293	}
294	if (!(vol->on_errors & ON_ERRORS_RECOVER)) {
295		ntfs_error(mp, "Mount option errors=recover not used.  "
296				"Aborting without trying to recover.");
297		if (bs1) {
298			err2 = buf_unmap(primary);
299			if (err2)
300				ntfs_error(mp, "Failed to unmap buffer of "
301						"primary boot sector (error "
302						"%d).", err2);
303		}
304		buf_brelse(primary);
305		return err;
306	}
307	/* Try to read NT4+ backup boot sector. */
308	err = buf_meta_bread(dev_vn, nr_blocks - 1, blocksize, cred, &backup);
309	buf_setflags(backup, B_NOCACHE);
310	if (!err) {
311		err = buf_map(backup, (caddr_t*)&bs2);
312		if (err)
313			ntfs_error(mp, "Failed to map buffer of backup boot "
314					"sector (error %d).", err);
315		else {
316			if (ntfs_boot_sector_is_valid(mp, bs2))
317				goto hotfix_primary_boot_sector;
318			err = buf_unmap(backup);
319			if (err)
320				ntfs_error(mp, "Failed to unmap buffer of "
321						"backup boot sector (error "
322						"%d).", err);
323		}
324	} else
325		ntfs_error(mp, read_err_str, "backup", err);
326	buf_brelse(backup);
327	/* Try to read NT3.51- backup boot sector. */
328	err = buf_meta_bread(dev_vn, nr_blocks >> 1, blocksize, cred, &backup);
329	buf_setflags(backup, B_NOCACHE);
330	if (!err) {
331		err = buf_map(backup, (caddr_t*)&bs2);
332		if (err)
333			ntfs_error(mp, "Failed to map buffer of old backup "
334					"boot sector (error %d).", err);
335		else {
336			if (ntfs_boot_sector_is_valid(mp, bs2))
337				goto hotfix_primary_boot_sector;
338			err = buf_unmap(backup);
339			if (err)
340				ntfs_error(mp, "Failed to unmap buffer of old "
341						"backup boot sector (error "
342						"%d).", err);
343			err = EIO;
344		}
345		ntfs_error(mp, "Could not find a valid backup boot sector.");
346	} else
347		ntfs_error(mp, read_err_str, "backup", err);
348	buf_brelse(backup);
349	/* We failed.  Clean up and return. */
350	if (bs1) {
351		err2 = buf_unmap(primary);
352		if (err2)
353			ntfs_error(mp, "Failed to unmap buffer of primary "
354					"boot sector (error %d).", err2);
355	}
356	buf_brelse(primary);
357	return err;
358hotfix_primary_boot_sector:
359	ntfs_warning(mp, "Using backup boot sector.");
360	/*
361	 * If we managed to read sector zero and the volume is not read-only,
362	 * copy the found, valid, backup boot sector to the primary boot
363	 * sector.  Note we copy the complete sector, not just the boot sector
364	 * structure as the sector size may be bigger and in this case it
365	 * contains the correct boot loader code in the backup boot sector.
366	 */
367	if (bs1 && !NVolReadOnly(vol)) {
368		ntfs_warning(mp, "Hot-fix: Recovering invalid primary boot "
369				"sector from backup copy.");
370		memcpy(bs1, bs2, blocksize);
371		err = buf_bwrite(primary);
372		if (err)
373			ntfs_error(mp, "Hot-fix: Device write error while "
374					"recovering primary boot sector "
375					"(error %d).", err);
376	} else {
377		if (bs1) {
378			ntfs_warning(mp, "Hot-fix: Recovery of primary boot "
379					"sector failed: Read-only mount.");
380			err = buf_unmap(primary);
381			if (err)
382				ntfs_error(mp, "Failed to unmap buffer of "
383						"primary boot sector (error "
384						"%d).", err);
385		} else
386			ntfs_warning(mp, "Hot-fix: Recovery of primary boot "
387					"sector failed as it could not be "
388					"mapped.");
389		buf_brelse(primary);
390	}
391	*buf = backup;
392	*bs = bs2;
393	return 0;
394}
395
396/**
397 * ntfs_boot_sector_parse - parse the boot sector and store the data in @vol
398 * @vol:	volume structure to initialise with data from boot sector
399 * @b:		boot sector to parse
400 *
401 * Parse the ntfs boot sector @b and store all imporant information therein in
402 * the ntfs_volume @vol.
403 *
404 * Return 0 on success and errno on error.  The following error codes are
405 * defined:
406 *	EINVAL	- Boot sector is invalid.
407 *	ENOTSUP - Volume is not supported by this ntfs driver.
408 */
409static errno_t ntfs_boot_sector_parse(ntfs_volume *vol,
410		const NTFS_BOOT_SECTOR *b)
411{
412	s64 ll;
413	mount_t mp = vol->mp;
414	unsigned sectors_per_cluster_shift, nr_hidden_sects;
415	int clusters_per_mft_record, clusters_per_index_block;
416
417	ntfs_debug("Entering.");
418	vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector);
419	vol->sector_size_mask = vol->sector_size - 1;
420	vol->sector_size_shift = ffs(vol->sector_size) - 1;
421	ntfs_debug("vol->sector_size = %u (0x%x)", vol->sector_size,
422			vol->sector_size);
423	ntfs_debug("vol->sector_size_shift = %u", vol->sector_size_shift);
424	if (vol->sector_size < (u32)vfs_devblocksize(mp)) {
425		ntfs_error(mp, "Sector size (%u) is smaller than the device "
426				"block size (%d).  This is not supported.  "
427				"Sorry.", vol->sector_size,
428				vfs_devblocksize(mp));
429		return ENOTSUP;
430	}
431	ntfs_debug("sectors_per_cluster = %u", b->bpb.sectors_per_cluster);
432	sectors_per_cluster_shift = ffs(b->bpb.sectors_per_cluster) - 1;
433	ntfs_debug("sectors_per_cluster_shift = %u", sectors_per_cluster_shift);
434	nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors);
435	ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects);
436	vol->cluster_size = vol->sector_size << sectors_per_cluster_shift;
437	vol->cluster_size_mask = vol->cluster_size - 1;
438	vol->cluster_size_shift = ffs(vol->cluster_size) - 1;
439	ntfs_debug("vol->cluster_size = %u (0x%x)", vol->cluster_size,
440			vol->cluster_size);
441	ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask);
442	ntfs_debug("vol->cluster_size_shift = %u", vol->cluster_size_shift);
443	if (vol->cluster_size < vol->sector_size) {
444		ntfs_error(mp, "Cluster size (%u) is smaller than the sector "
445				"size (%u).  This is not supported.  Sorry.",
446				vol->cluster_size, vol->sector_size);
447		return ENOTSUP;
448	}
449	clusters_per_mft_record = b->clusters_per_mft_record;
450	ntfs_debug("clusters_per_mft_record = %u (0x%x)",
451			clusters_per_mft_record, clusters_per_mft_record);
452	if (clusters_per_mft_record > 0)
453		vol->mft_record_size = vol->cluster_size *
454				clusters_per_mft_record;
455	else
456		/*
457		 * When mft_record_size < cluster_size, clusters_per_mft_record
458		 * = -log2(mft_record_size) bytes.  mft_record_size normaly is
459		 * 1024 bytes, which is encoded as 0xF6 (-10 in decimal).
460		 */
461		vol->mft_record_size = 1 << -clusters_per_mft_record;
462	vol->mft_record_size_mask = vol->mft_record_size - 1;
463	vol->mft_record_size_shift = ffs(vol->mft_record_size) - 1;
464	ntfs_debug("vol->mft_record_size = %u (0x%x)", vol->mft_record_size,
465			vol->mft_record_size);
466	ntfs_debug("vol->mft_record_size_mask = 0x%x",
467			vol->mft_record_size_mask);
468	ntfs_debug("vol->mft_record_size_shift = %u)",
469			vol->mft_record_size_shift);
470	/*
471	 * We cannot support mft record sizes above the PAGE_SIZE since we
472	 * store $MFT/$DATA, i.e. the table of mft records, in the unified
473	 * buffer cache and thus in pages.
474	 */
475	if (vol->mft_record_size > PAGE_SIZE) {
476		ntfs_error(mp, "Mft record size (%u) exceeds the PAGE_SIZE on "
477				"your system (%u).  This is not supported.  "
478				"Sorry.", vol->mft_record_size, PAGE_SIZE);
479		return ENOTSUP;
480	}
481	/* We cannot support mft record sizes below the sector size. */
482	if (vol->mft_record_size < vol->sector_size) {
483		ntfs_error(mp, "Mft record size (%u) is smaller than the "
484				"sector size (%u).  This is not supported.  "
485				"Sorry.", vol->mft_record_size,
486				vol->sector_size);
487		return ENOTSUP;
488	}
489	clusters_per_index_block = b->clusters_per_index_block;
490	ntfs_debug("clusters_per_index_block = %d (0x%x)",
491			clusters_per_index_block, clusters_per_index_block);
492	if (clusters_per_index_block > 0) {
493		vol->index_block_size = vol->cluster_size *
494				clusters_per_index_block;
495		vol->blocks_per_index_block = clusters_per_index_block;
496	} else {
497		/*
498		 * When index_block_size < cluster_size,
499		 * clusters_per_index_block = -log2(index_block_size) bytes.
500		 * index_block_size normaly equals 4096 bytes, which is encoded
501		 * as 0xF4 (-12 in decimal).
502		 */
503		vol->index_block_size = 1 << -clusters_per_index_block;
504		vol->blocks_per_index_block = vol->index_block_size /
505				vol->sector_size;
506	}
507	vol->index_block_size_mask = vol->index_block_size - 1;
508	vol->index_block_size_shift = ffs(vol->index_block_size) - 1;
509	ntfs_debug("vol->index_block_size = %u (0x%x)",
510			vol->index_block_size, vol->index_block_size);
511	ntfs_debug("vol->index_block_size_mask = 0x%x",
512			vol->index_block_size_mask);
513	ntfs_debug("vol->index_block_size_shift = %u",
514			vol->index_block_size_shift);
515	ntfs_debug("vol->blocks_per_index_block = %u",
516			vol->blocks_per_index_block);
517	/* We cannot support index block sizes below the sector size. */
518	if (vol->index_block_size < vol->sector_size) {
519		ntfs_error(mp, "Index block size (%u) is smaller than the "
520				"sector size (%u).  This is not supported.  "
521				"Sorry.", vol->index_block_size,
522				vol->sector_size);
523		return ENOTSUP;
524	}
525	/*
526	 * Get the size of the volume in clusters and check for 64-bit-ness.
527	 * Windows currently only uses 32 bits to save the clusters so we do
528	 * the same as we do not really want to break compatibility.  We could
529	 * perhaps add a mount option to allow this one day but it would render
530	 * such volumes incompatible with Windows.
531	 */
532	ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_shift;
533	if ((u64)ll >= (u64)1 << 32) {
534		ntfs_error(mp, "Volume specifies 64-bit clusters but only "
535				"32-bit clusters are allowed by Microsoft "
536				"Windows.  Weird.");
537		return EINVAL;
538	}
539	vol->nr_clusters = ll;
540	ntfs_debug("vol->nr_clusters = 0x%llx",
541			(unsigned long long)vol->nr_clusters);
542	ll = sle64_to_cpu(b->mft_lcn);
543	if (ll >= vol->nr_clusters) {
544		ntfs_error(mp, "MFT LCN (%lld, 0x%llx) is beyond end of "
545				"volume.  Weird.", (unsigned long long)ll,
546				(unsigned long long)ll);
547		return EINVAL;
548	}
549	vol->mft_lcn = ll;
550	ntfs_debug("vol->mft_lcn = 0x%llx", (unsigned long long)vol->mft_lcn);
551	ll = sle64_to_cpu(b->mftmirr_lcn);
552	if (ll >= vol->nr_clusters) {
553		ntfs_error(mp, "MFTMirr LCN (%lld, 0x%llx) is beyond end of "
554				"volume.  Weird.", (unsigned long long)ll,
555				(unsigned long long)ll);
556		return EINVAL;
557	}
558	vol->mftmirr_lcn = ll;
559	ntfs_debug("vol->mftmirr_lcn = 0x%llx",
560			(unsigned long long)vol->mftmirr_lcn);
561	/*
562	 * Work out the size of the mft mirror in number of mft records.  If
563	 * the cluster size is less than or equal to the size taken by four mft
564	 * records, the mft mirror stores the first four mft records.  If the
565	 * cluster size is bigger than the size taken by four mft records, the
566	 * mft mirror contains as many mft records as will fit into one
567	 * cluster.
568	 *
569	 * Having said that Windows only keeps in sync and cares about the
570	 * consistency of the first four mft records so we do the same.
571	 */
572#if 0
573	if (vol->cluster_size <= ((u32)4 << vol->mft_record_size_shift))
574		vol->mftmirr_size = 4;
575	else
576		vol->mftmirr_size = vol->cluster_size >>
577				vol->mft_record_size_shift;
578#else
579	vol->mftmirr_size = 4;
580#endif
581	ntfs_debug("vol->mftmirr_size = 0x%x", vol->mftmirr_size);
582	vol->serial_no = le64_to_cpu(b->volume_serial_number);
583	ntfs_debug("vol->serial_no = 0x%llx",
584			(unsigned long long)vol->serial_no);
585	ntfs_debug("Done.");
586	return 0;
587}
588
589/**
590 * ntfs_setup_allocators - initialize the cluster and mft allocators
591 * @vol:	volume structure for which to setup the allocators
592 *
593 * Setup the cluster (lcn) and mft allocators to the starting values.
594 */
595static void ntfs_setup_allocators(ntfs_volume *vol)
596{
597	LCN mft_zone_size, mft_lcn;
598
599	ntfs_debug("Entering.");
600	ntfs_debug("vol->mft_zone_multiplier = 0x%x",
601			vol->mft_zone_multiplier);
602	/* Determine the size of the MFT zone. */
603	mft_zone_size = vol->nr_clusters;
604	switch (vol->mft_zone_multiplier) {  /* % of volume size in clusters */
605	case 4:
606		mft_zone_size >>= 1;			/* 50%   */
607		break;
608	case 3:
609		mft_zone_size = (mft_zone_size +
610				(mft_zone_size >> 1)) >> 2;	/* 37.5% */
611		break;
612	case 2:
613		mft_zone_size >>= 2;			/* 25%   */
614		break;
615	/* case 1: */
616	default:
617		mft_zone_size >>= 3;			/* 12.5% */
618		break;
619	}
620	/* Setup the mft zone. */
621	vol->mft_zone_start = vol->mft_zone_pos = vol->mft_lcn;
622	ntfs_debug("vol->mft_zone_pos = 0x%llx",
623			(unsigned long long)vol->mft_zone_pos);
624	/*
625	 * Calculate the mft_lcn for an unmodified ntfs volume (see mkntfs
626	 * source) and if the actual mft_lcn is in the expected place or even
627	 * further to the front of the volume, extend the mft_zone to cover the
628	 * beginning of the volume as well.  This is in order to protect the
629	 * area reserved for the mft bitmap as well within the mft_zone itself.
630	 * On non-standard volumes we do not protect it as the overhead would
631	 * be higher than the speed increase we would get by doing it.
632	 */
633	mft_lcn = (8192 + 2 * vol->cluster_size - 1) / vol->cluster_size;
634	if (mft_lcn * vol->cluster_size < 16 * 1024)
635		mft_lcn = (16 * 1024 + vol->cluster_size - 1) /
636				vol->cluster_size;
637	if (vol->mft_zone_start <= mft_lcn)
638		vol->mft_zone_start = 0;
639	ntfs_debug("vol->mft_zone_start = 0x%llx",
640			(unsigned long long)vol->mft_zone_start);
641	/*
642	 * Need to cap the mft zone on non-standard volumes so that it does
643	 * not point outside the boundaries of the volume.  We do this by
644	 * halving the zone size until we are inside the volume.
645	 */
646	vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
647	while (vol->mft_zone_end >= vol->nr_clusters) {
648		mft_zone_size >>= 1;
649		vol->mft_zone_end = vol->mft_lcn + mft_zone_size;
650	}
651	ntfs_debug("vol->mft_zone_end = 0x%llx",
652			(unsigned long long)vol->mft_zone_end);
653	/*
654	 * Set the current position within each data zone to the start of the
655	 * respective zone.
656	 */
657	vol->data1_zone_pos = vol->mft_zone_end;
658	ntfs_debug("vol->data1_zone_pos = 0x%llx",
659			(unsigned long long)vol->data1_zone_pos);
660	vol->data2_zone_pos = 0;
661	ntfs_debug("vol->data2_zone_pos = 0x%llx",
662			(unsigned long long)vol->data2_zone_pos);
663
664	/* Set the mft data allocation position to mft record 24. */
665	vol->mft_data_pos = 24;
666	ntfs_debug("vol->mft_data_pos = 0x%llx",
667			(unsigned long long)vol->mft_data_pos);
668	ntfs_debug("Done.");
669}
670
671/**
672 * ntfs_mft_inode_get - obtain the ntfs inode for $MFT at mount time
673 * @vol:	ntfs volume being mounted
674 *
675 * Obtain the ntfs inode corresponding to the system file $MFT (unnamed $DATA
676 * attribute) in the process bootstrapping the volume so that further inodes
677 * can be obtained and (extent) mft records can be mapped.
678 *
679 * A new ntfs inode is allocated and initialized, the base mft record of $MFT
680 * is read by hand from the device and this is then used to bootstrap the
681 * volume so that mft record mapping/unmapping is working and therefore inodes
682 * can be read in general.  To do so a new vnode is created and attached to the
683 * new ntfs inode and the runlist for the $DATA attribute is fully mapped.
684 *
685 * Return 0 on success and errno on error.
686 */
687static errno_t ntfs_mft_inode_get(ntfs_volume *vol)
688{
689	daddr64_t block;
690	VCN next_vcn, last_vcn, highest_vcn;
691	ntfs_inode *ni;
692	MFT_RECORD *m = NULL;
693	vnode_t dev_vn = vol->dev_vn;
694	buf_t buf;
695	ntfs_attr_search_ctx *ctx = NULL;
696	ATTR_RECORD *a;
697	STANDARD_INFORMATION *si;
698	errno_t err;
699	const int block_size = vol->sector_size;
700	unsigned nr_blocks, u;
701	ntfs_attr na;
702	char *es = "  $MFT is corrupt.  Run chkdsk.";
703	const u8 block_size_shift = vol->sector_size_shift;
704
705	ntfs_debug("Entering.");
706	na = (ntfs_attr) {
707		.mft_no = FILE_MFT,
708		.type = AT_UNUSED,
709		.raw = FALSE,
710	};
711	ni = ntfs_inode_hash_get(vol, &na);
712	if (!ni) {
713		ntfs_error(vol->mp, "Failed to allocate new inode.");
714		return ENOMEM;
715	}
716	if (!NInoAlloc(ni)) {
717		ntfs_error(vol->mp, "Failed (found stale inode in cache).");
718		err = ESTALE;
719		goto err;
720	}
721	/*
722	 * We allocated a new inode, now set it up as the unnamed data
723	 * attribute.  It is special as it is mst protected.
724	 */
725	NInoSetNonResident(ni);
726	NInoSetMstProtected(ni);
727	NInoSetSparseDisabled(ni);
728	ni->type = AT_DATA;
729	ni->block_size = vol->mft_record_size;
730	ni->block_size_shift = vol->mft_record_size_shift;
731	/* No-one is allowed to access $MFT directly. */
732	ni->uid = 0;
733	ni->gid = 0;
734	ni->mode = S_IFREG;
735	/* Allocate enough memory to read the first mft record. */
736	m = OSMalloc(vol->mft_record_size, ntfs_malloc_tag);
737	if (!m) {
738		ntfs_error(vol->mp, "Failed to allocate buffer for $MFT "
739				"record 0.");
740		err = ENOMEM;
741		goto err;
742	}
743	/* Determine the first physical block of the $MFT/$DATA attribute. */
744	block = vol->mft_lcn << (vol->cluster_size_shift - block_size_shift);
745	nr_blocks = vol->mft_record_size >> block_size_shift;
746	if (!nr_blocks)
747		nr_blocks = 1;
748	/* Load $MFT/$DATA's first mft record, one block at a time. */
749	for (u = 0; u < nr_blocks; u++, block++) {
750		u8 *src;
751
752		err = buf_meta_bread(dev_vn, block, block_size, NOCRED, &buf);
753		/*
754		 * We set the B_NOCACHE flag on the buffer(s), thus effectively
755		 * invalidating them when we release them.  This is needed
756		 * because the buffer(s) will get read later using the $MFT
757		 * base vnode.
758		 */
759		buf_setflags(buf, B_NOCACHE);
760		if (err) {
761			ntfs_error(vol->mp, "Failed to read $MFT record 0 "
762					"(block %u, physical block 0x%llx, "
763					"physical block size %d).", u,
764					(unsigned long long)block, block_size);
765			buf_brelse(buf);
766			goto err;
767		}
768		err = buf_map(buf, (caddr_t*)&src);
769		if (err) {
770			ntfs_error(vol->mp, "Failed to map buffer of mft "
771					"record 0 (block %u, physical block "
772					"0x%llx, physical block size %d).", u,
773					(unsigned long long)block, block_size);
774			buf_brelse(buf);
775			goto err;
776		}
777		memcpy((u8*)m + (u << block_size_shift), src, block_size);
778		err = buf_unmap(buf);
779		if (err)
780			ntfs_error(vol->mp, "Failed to unmap buffer of mft "
781					"record 0 (error %d).", err);
782		buf_brelse(buf);
783	}
784	/* Apply the mst fixups. */
785	err = ntfs_mst_fixup_post_read((NTFS_RECORD*)m, vol->mft_record_size);
786	if (err) {
787		/* TODO: Try to use the $MFTMirr now. */
788		ntfs_error(vol->mp, "MST fixup failed.%s", es);
789		goto io_err;
790	}
791	/*
792	 * Need this to be able to sanity check attribute list references to
793	 * $MFT.
794	 */
795	ni->seq_no = le16_to_cpu(m->sequence_number);
796	/* Get the number of hard links, too. */
797	ni->link_count = le16_to_cpu(m->link_count);
798	ctx = ntfs_attr_search_ctx_get(ni, m);
799	if (!ctx) {
800		err = ENOMEM;
801		goto err;
802	}
803	/*
804	 * Find the standard information attribute in the mft record.  At this
805	 * stage we have not setup the attribute list stuff yet, so this could
806	 * in fact fail if the standard information is in an extent record, but
807	 * this is not allowed hence not a problem.
808	 */
809	err = ntfs_attr_lookup(AT_STANDARD_INFORMATION, AT_UNNAMED, 0, 0, NULL,
810			0, ctx);
811	a = ctx->a;
812	if (err || a->non_resident || a->flags) {
813		if (err) {
814			if (err == ENOENT) {
815				/*
816				 * TODO: We should be performing a hot fix here
817				 * (if the recover mount option is set) by
818				 * creating a new attribute.
819				 */
820				ntfs_error(vol->mp, "Standard information "
821						"attribute is missing.");
822			} else
823				ntfs_error(vol->mp, "Failed to lookup "
824						"standard information "
825						"attribute.");
826		} else {
827info_err:
828			ntfs_error(vol->mp, "Standard information attribute "
829					"is corrupt.");
830			err = EIO;
831		}
832		goto err;
833	}
834	si = (STANDARD_INFORMATION*)((u8*)a +
835			le16_to_cpu(a->value_offset));
836	/* Some bounds checks. */
837	if ((u8*)si < (u8*)a || (u8*)si + le32_to_cpu(a->value_length) >
838			(u8*)a + le32_to_cpu(a->length) ||
839			(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
840			vol->mft_record_size)
841		goto info_err;
842	/*
843	 * Cache the create, the last data and mft modified, and the last
844	 * access times in the ntfs inode.
845	 */
846	ni->creation_time = ntfs2utc(si->creation_time);
847	ni->last_data_change_time = ntfs2utc(si->last_data_change_time);
848	ni->last_mft_change_time = ntfs2utc(si->last_mft_change_time);
849	ni->last_access_time = ntfs2utc(si->last_access_time);
850	/* Find the attribute list attribute if present. */
851	ntfs_attr_search_ctx_reinit(ctx);
852	err = ntfs_attr_lookup(AT_ATTRIBUTE_LIST, AT_UNNAMED, 0, 0, NULL, 0,
853			ctx);
854	if (err) {
855		if (err != ENOENT) {
856			ntfs_error(vol->mp, "Failed to lookup attribute list "
857					"attribute.%s", es);
858			goto err;
859		}
860		ntfs_debug("$MFT does not have an attribute list attribute.");
861	} else /* if (!err) */ {
862		ATTR_LIST_ENTRY *al_entry, *next_al_entry;
863		u8 *al_end;
864
865		ntfs_debug("Attribute list attribute found in $MFT.");
866		NInoSetAttrList(ni);
867		a = ctx->a;
868		if (a->flags & ATTR_COMPRESSION_MASK) {
869			ntfs_error(vol->mp, "Attribute list attribute is "
870					"compressed.  Not allowed.%s", es);
871			goto io_err;
872		}
873		if (a->flags & (ATTR_IS_ENCRYPTED | ATTR_IS_SPARSE)) {
874			if (a->non_resident) {
875				ntfs_error(vol->mp, "Non-resident attribute "
876						"list attribute is encrypted/"
877						"sparse.  Not allowed.%s", es);
878				goto io_err;
879			}
880			ntfs_warning(vol->mp, "Resident attribute list "
881					"attribute is marked encrypted/sparse "
882					"which is not true.  However, Windows "
883					"allows this and chkdsk does not "
884					"detect or correct it so we will just "
885					"ignore the invalid flags and pretend "
886					"they are not set.");
887		}
888		/* Now allocate memory for the attribute list. */
889		ni->attr_list_size = (u32)ntfs_attr_size(a);
890		ni->attr_list_alloc = (ni->attr_list_size + NTFS_ALLOC_BLOCK -
891				1) & ~(NTFS_ALLOC_BLOCK - 1);
892		ni->attr_list = OSMalloc(ni->attr_list_alloc, ntfs_malloc_tag);
893		if (!ni->attr_list) {
894			ni->attr_list_alloc = 0;
895			ntfs_error(vol->mp, "Not enough memory to allocate "
896					"buffer for attribute list.");
897			err = ENOMEM;
898			goto err;
899		}
900		if (a->non_resident) {
901			NInoSetAttrListNonResident(ni);
902			if (a->lowest_vcn) {
903				ntfs_error(vol->mp, "Attribute list has non-"
904						"zero lowest_vcn.%s", es);
905				goto io_err;
906			}
907			/* Setup the runlist. */
908			err = ntfs_mapping_pairs_decompress(vol, a,
909					&ni->attr_list_rl);
910			if (err) {
911				ntfs_error(vol->mp, "Mapping pairs "
912						"decompression failed with "
913						"error code %d.%s", err, es);
914				goto err;
915			}
916			/* Now read in the attribute list. */
917			err = ntfs_rl_read(vol, &ni->attr_list_rl,
918					ni->attr_list, (s64)ni->attr_list_size,
919					sle64_to_cpu(a->initialized_size));
920			if (err) {
921				ntfs_error(vol->mp, "Failed to load attribute "
922						"list attribute with error "
923						"code %d.", err);
924				goto err;
925			}
926		} else /* if (!a->non_resident) */ {
927			u8 *al = (u8*)a + le16_to_cpu(a->value_offset);
928			u8 *a_end = (u8*)a + le32_to_cpu(a->length);
929			if (al < (u8*)a || al + le32_to_cpu(a->value_length) >
930					a_end || (u8*)a_end > (u8*)ctx->m +
931					vol->mft_record_size) {
932				ntfs_error(vol->mp, "Corrupt attribute list "
933						"attribute.%s", es);
934				goto io_err;
935			}
936			/* Now copy the attribute list. */
937			memcpy(ni->attr_list, (u8*)a +
938					le16_to_cpu(a->value_offset),
939					ni->attr_list_size);
940		}
941		/* The attribute list is now setup in memory. */
942		/*
943		 * FIXME: I do not know if this case is actually possible.
944		 * According to logic it is not possible but I have seen too
945		 * many weird things in MS software to rely on logic.  Thus we
946		 * perform a manual search and make sure the first $MFT/$DATA
947		 * extent is in the base inode.  If it is not we abort with an
948		 * error and if we ever see a report of this error we will need
949		 * to do some magic in order to have the necessary mft record
950		 * loaded and in the right place.  But hopefully logic will
951		 * prevail and this never happens...
952		 */
953		al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
954		al_end = (u8*)al_entry + ni->attr_list_size;
955		for (;; al_entry = next_al_entry) {
956			/* Out of bounds check. */
957			if ((u8*)al_entry < ni->attr_list ||
958					(u8*)al_entry > al_end)
959				goto em_err;
960			/* Catch the end of the attribute list. */
961			if ((u8*)al_entry == al_end)
962				goto em_err;
963			if (!al_entry->length)
964				goto em_err;
965			if ((u8*)al_entry + 6 > al_end || (u8*)al_entry +
966					le16_to_cpu(al_entry->length) > al_end)
967				goto em_err;
968			next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry +
969					le16_to_cpu(al_entry->length));
970			if (le32_to_cpu(al_entry->type) >
971					const_le32_to_cpu(AT_DATA))
972				goto em_err;
973			if (al_entry->type != AT_DATA)
974				continue;
975			/* We want an unnamed attribute. */
976			if (al_entry->name_length)
977				goto em_err;
978			/* Want the first entry, i.e. lowest_vcn == 0. */
979			if (al_entry->lowest_vcn)
980				goto em_err;
981			/* First entry has to be in the base mft record. */
982			if (MREF_LE(al_entry->mft_reference) != ni->mft_no) {
983				/* MFT references do not match, logic fails. */
984				ntfs_error(vol->mp, "BUG: The first $DATA "
985						"extent of $MFT is not in the "
986						"base mft record.  Please "
987						"report you saw this message "
988						"to %s.", ntfs_dev_email);
989				goto io_err;
990			}
991			/* Sequence numbers must match. */
992			if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no)
993				goto em_err;
994			/* Done: Found first extent of $DATA as expected. */
995			break;
996		}
997	}
998	ntfs_attr_search_ctx_reinit(ctx);
999	/* Now load all attribute extents. */
1000	a = NULL;
1001	next_vcn = last_vcn = highest_vcn = 0;
1002	while (!(err = ntfs_attr_lookup(AT_DATA, AT_UNNAMED, 0, next_vcn, NULL,
1003			0, ctx))) {
1004		/* Cache the current attribute. */
1005		a = ctx->a;
1006		/* $MFT must be non-resident. */
1007		if (!a->non_resident) {
1008			ntfs_error(vol->mp, "$MFT must be non-resident but a "
1009					"resident extent was found.%s", es);
1010			goto io_err;
1011		}
1012		/* $MFT must be uncompressed and unencrypted. */
1013		if (a->flags & ATTR_COMPRESSION_MASK ||
1014				a->flags & ATTR_IS_ENCRYPTED ||
1015				a->flags & ATTR_IS_SPARSE) {
1016			ntfs_error(vol->mp, "$MFT must be uncompressed, "
1017					"non-sparse, and unencrypted but a "
1018					"compressed/sparse/encrypted extent "
1019					"was found.%s", es);
1020			goto io_err;
1021		}
1022		/*
1023		 * Decompress the mapping pairs array of this extent and merge
1024		 * the result into the existing runlist.  No need for locking
1025		 * as we have exclusive access to the inode at this time and we
1026		 * are a mount in progress task, too.
1027		 */
1028		err = ntfs_mapping_pairs_decompress(vol, a, &ni->rl);
1029		if (err) {
1030			ntfs_error(vol->mp, "Mapping pairs decompression "
1031					"failed with error code %d.%s", err,
1032					es);
1033			goto err;
1034		}
1035		/* Get the lowest vcn for the next extent. */
1036		highest_vcn = sle64_to_cpu(a->highest_vcn);
1037		/*
1038		 * If we are in the first extent, bootstrap the volume so we
1039		 * can load other inodes and map (extent) mft records.
1040		 */
1041		if (!next_vcn) {
1042			if (a->lowest_vcn) {
1043				ntfs_error(vol->mp, "First extent of $DATA "
1044						"attribute has non zero "
1045						"lowest_vcn.%s", es);
1046				goto io_err;
1047			}
1048			/* Get the last vcn in the $DATA attribute. */
1049			last_vcn = sle64_to_cpu(a->allocated_size)
1050					>> vol->cluster_size_shift;
1051			/* Fill in the sizes. */
1052			ni->allocated_size = sle64_to_cpu(a->allocated_size);
1053			ni->data_size = sle64_to_cpu(a->data_size);
1054			ni->initialized_size = sle64_to_cpu(
1055					a->initialized_size);
1056			/*
1057			 * Verify the sizes are sane.  In particular both the
1058			 * data size and the initialized size must be multiples
1059			 * of the mft record size or we will panic() when
1060			 * reading the boundary in ntfs_cluster_iodone().
1061			 *
1062			 * Also the allocated size must be a multiple of the
1063			 * volume cluster size.
1064			 */
1065			if (ni->allocated_size & vol->cluster_size_mask ||
1066					ni->data_size &
1067					vol->mft_record_size_mask ||
1068					ni->initialized_size &
1069					vol->mft_record_size_mask) {
1070				ntfs_error(vol->mp, "$DATA attribute contains "
1071						"invalid size.%s", es);
1072				goto io_err;
1073			}
1074			/*
1075			 * Verify the number of mft records does not exceed
1076			 * 2^32 - 1.
1077			 */
1078			if (ni->data_size >> vol->mft_record_size_shift >=
1079					1LL << 32) {
1080				ntfs_error(vol->mp, "$MFT is too big.  "
1081						"Aborting.");
1082				goto io_err;
1083			}
1084			/* We have the size now so we can add the vnode. */
1085			err = ntfs_inode_add_vnode(ni, TRUE, NULL, NULL);
1086			if (err) {
1087				ntfs_error(vol->mp, "Failed to create a "
1088						"system vnode for $MFT (error "
1089						"%d).", err);
1090				goto err;
1091			}
1092			/*
1093			 * We will hold on to the $MFT inode for the duration
1094			 * of the mount thus we need to take a reference on the
1095			 * vnode.  Note we need to attach the inode to the
1096			 * volume here so that ntfs_read_inode() can call
1097			 * ntfs_attr_lookup() which needs to be able to map
1098			 * extent mft records which requires vol->mft_ni to be
1099			 * setup.
1100			 */
1101			err = vnode_ref(ni->vn);
1102			if (err)
1103				ntfs_error(vol->mp, "vnode_ref() failed!");
1104			OSIncrementAtomic(&ni->nr_refs);
1105			vol->mft_ni = ni;
1106			/* The $MFT inode is fully setup now, so unlock it. */
1107			ntfs_inode_unlock_alloc(ni);
1108			/*
1109			 * We can release the iocount reference now.  It will
1110			 * be taken as and when required in the low level code.
1111			 * We can ignore the return value as it always is zero.
1112			 */
1113			(void)vnode_put(ni->vn);
1114			/* If $MFT/$DATA has only one extent, we are done. */
1115			if (highest_vcn == last_vcn - 1)
1116				break;
1117		}
1118		next_vcn = highest_vcn + 1;
1119		if (next_vcn <= 0) {
1120			ntfs_error(vol->mp, "Invalid highest vcn in attribute "
1121					"extent.%s", es);
1122			goto io_err;
1123		}
1124		/* Avoid endless loops due to corruption. */
1125		if (next_vcn < sle64_to_cpu(a->lowest_vcn)) {
1126			ntfs_error(vol->mp, "Corrupt attribute extent would "
1127					"cause endless loop, aborting.%s", es);
1128			goto io_err;
1129		}
1130	}
1131	if (err && err != ENOENT) {
1132		ntfs_error(vol->mp, "Failed to lookup $MFT/$DATA attribute "
1133				"extent.%s", es);
1134		goto err;
1135	}
1136	if (!a) {
1137		ntfs_error(vol->mp, "$MFT/$DATA attribute not found.%s", es);
1138		err = ENOENT;
1139		goto err;
1140	}
1141	if (highest_vcn != last_vcn - 1) {
1142		ntfs_error(vol->mp, "Failed to load the complete runlist for "
1143				"$MFT/$DATA.  Driver bug or corrupt $MFT.  "
1144				"Run chkdsk.");
1145		ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
1146				(unsigned long long)highest_vcn,
1147				(unsigned long long)(last_vcn - 1));
1148		goto io_err;
1149	}
1150	ntfs_attr_search_ctx_put(ctx);
1151	OSFree(m, vol->mft_record_size, ntfs_malloc_tag);
1152	ntfs_debug("Done.");
1153	return 0;
1154em_err:
1155	ntfs_error(vol->mp, "Could not find first extent of $DATA attribute "
1156			"in attribute list.%s", es);
1157io_err:
1158	err = EIO;
1159err:
1160	if (ctx)
1161		ntfs_attr_search_ctx_put(ctx);
1162	if (m)
1163		OSFree(m, vol->mft_record_size, ntfs_malloc_tag);
1164	/* vol->mft_ni will be cleaned up by the caller. */
1165	if (!vol->mft_ni)
1166		ntfs_inode_reclaim(ni);
1167	return err;
1168}
1169
1170/**
1171 * ntfs_inode_attach - load and attach an inode to an ntfs structure
1172 * @vol:	ntfs volume to which the inode to load belongs
1173 * @mft_no:	mft record number / inode number to obtain
1174 * @ni:		pointer in which to return the obtained ntfs inode
1175 * @parent_vn:	vnode of directory containing the inode to return or NULL
1176 *
1177 * Load the ntfs inode @mft_no from the mounted ntfs volume @vol, attach it by
1178 * getting a reference on it and return the ntfs inode in @ni.
1179 *
1180 * The created vnode is marked as a system vnoded so that the volume can be
1181 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1182 *
1183 * If @parent_vn is not NULL, it is set up as the parent directory vnode of the
1184 * vnode of the obtained inode.
1185 *
1186 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1187 */
1188static errno_t ntfs_inode_attach(ntfs_volume *vol, const ino64_t mft_no,
1189		ntfs_inode **ni, vnode_t parent_vn)
1190{
1191	vnode_t vn;
1192	errno_t err;
1193
1194	ntfs_debug("Entering.");
1195	err = ntfs_inode_get(vol, mft_no, TRUE, LCK_RW_TYPE_SHARED, ni,
1196			parent_vn, NULL);
1197	if (err) {
1198		ntfs_error(vol->mp, "Failed to load inode 0x%llx.",
1199				(unsigned long long)mft_no);
1200		*ni = NULL;
1201		return err;
1202	}
1203	/*
1204	 * Take an internal reference on the parent inode to balance the
1205	 * reference taken on the parent vnode in vnode_create().
1206	 */
1207	if (parent_vn)
1208		OSIncrementAtomic(&NTFS_I(parent_vn)->nr_refs);
1209	vn = (*ni)->vn;
1210	err = vnode_ref(vn);
1211	if (err)
1212		ntfs_error(vol->mp, "vnode_ref() failed!");
1213	OSIncrementAtomic(&(*ni)->nr_refs);
1214	lck_rw_unlock_shared(&(*ni)->lock);
1215	(void)vnode_put(vn);
1216	ntfs_debug("Done.");
1217	return 0;
1218}
1219
1220/**
1221 * ntfs_attr_inode_attach - load and attach an attribute inode to a structure
1222 * @base_ni:	ntfs base inode containing the attribute
1223 * @type:	attribute type
1224 * @name:	Unicode name of the attribute (NULL if unnamed)
1225 * @name_len:	length of @name in Unicode characters (0 if unnamed)
1226 * @ni:		pointer in which to return the obtained ntfs inode
1227 *
1228 * Load the attribute inode described by @type, @name, and @name_len belonging
1229 * to the base inode @base_ni, attach it by getting a reference on it and
1230 * return the ntfs inode in @ni.
1231 *
1232 * The created vnode is marked as a system vnode so that the volume can be
1233 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1234 *
1235 * The vnode of the base inode @base_ni is set up as the parent vnode of the
1236 * vnode of the obtained inode.
1237 *
1238 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1239 */
1240static errno_t ntfs_attr_inode_attach(ntfs_inode *base_ni,
1241		const ATTR_TYPE type, ntfschar *name, const u32 name_len,
1242		ntfs_inode **ni)
1243{
1244	vnode_t vn;
1245	errno_t err;
1246
1247	ntfs_debug("Entering.");
1248	err = ntfs_attr_inode_get(base_ni, type, name, name_len, TRUE,
1249			LCK_RW_TYPE_SHARED, ni);
1250	if (err) {
1251		ntfs_error(base_ni->vol->mp, "Failed to load attribute inode "
1252				"0x%llx, attribute type 0x%x, name length "
1253				"0x%x.", (unsigned long long)base_ni->mft_no,
1254				(unsigned)le32_to_cpu(type),
1255				(unsigned)name_len);
1256		*ni = NULL;
1257		return err;
1258	}
1259	/*
1260	 * Take an internal reference on the base inode @base_ni (which is also
1261	 * the parent inode) to balance the reference taken on the parent vnode
1262	 * in vnode_create().
1263	 */
1264	OSIncrementAtomic(&base_ni->nr_refs);
1265	vn = (*ni)->vn;
1266	err = vnode_ref(vn);
1267	if (err)
1268		ntfs_error(base_ni->vol->mp, "vnode_ref() failed!");
1269	OSIncrementAtomic(&(*ni)->nr_refs);
1270	lck_rw_unlock_shared(&(*ni)->lock);
1271	(void)vnode_put(vn);
1272	ntfs_debug("Done.");
1273	return 0;
1274}
1275
1276/**
1277 * ntfs_index_inode_attach - load and attach an attribute inode to a structure
1278 * @base_ni:	ntfs base inode containing the index
1279 * @name:	Unicode name of the index
1280 * @name_len:	length of @name in Unicode characters
1281 * @ni:		pointer in which to return the obtained ntfs inode
1282 *
1283 * Load the index inode described by @name and @name_len belonging to the base
1284 * inode @base_ni, attach it by getting a reference on it and return the ntfs
1285 * inode in @ni.
1286 *
1287 * The created vnode is marked as a system vnode so that the volume can be
1288 * unmounted.  (VSYSTEM vnodes are skipped during vflush()).)
1289 *
1290 * The vnode of the base inode @base_ni is set up as the parent vnode of the
1291 * vnode of the obtained inode.
1292 *
1293 * Return 0 on success and errno on error.  On error *@ni is set to NULL.
1294 */
1295static errno_t ntfs_index_inode_attach(ntfs_inode *base_ni, ntfschar *name,
1296		const u32 name_len, ntfs_inode **ni)
1297{
1298	vnode_t vn;
1299	errno_t err;
1300
1301	ntfs_debug("Entering.");
1302	err = ntfs_index_inode_get(base_ni, name, name_len, TRUE, ni);
1303	if (err) {
1304		ntfs_error(base_ni->vol->mp, "Failed to load index inode "
1305				"0x%llx, name length 0x%x.",
1306				(unsigned long long)base_ni->mft_no,
1307				(unsigned)name_len);
1308		*ni = NULL;
1309		return err;
1310	}
1311	/*
1312	 * Take an internal reference on the base inode @base_ni (which is also
1313	 * the parent inode) to balance the reference taken on the parent vnode
1314	 * in vnode_create().
1315	 */
1316	OSIncrementAtomic(&base_ni->nr_refs);
1317	vn = (*ni)->vn;
1318	err = vnode_ref(vn);
1319	if (err)
1320		ntfs_error(base_ni->vol->mp, "vnode_ref() failed!");
1321	OSIncrementAtomic(&(*ni)->nr_refs);
1322	(void)vnode_put(vn);
1323	ntfs_debug("Done.");
1324	return 0;
1325}
1326
1327/**
1328 * ntfs_mft_mirror_load - load and setup the mft mirror inode
1329 * @vol:	ntfs volume describing device whose mft mirror to load
1330 *
1331 * Return 0 on success and errno on error.
1332 */
1333static errno_t ntfs_mft_mirror_load(ntfs_volume *vol)
1334{
1335	ntfs_inode *ni;
1336	vnode_t vn;
1337	errno_t err;
1338
1339	ntfs_debug("Entering.");
1340	err = ntfs_inode_get(vol, FILE_MFTMirr, TRUE, LCK_RW_TYPE_SHARED, &ni,
1341			vol->root_ni->vn, NULL);
1342	if (err) {
1343		ntfs_error(vol->mp, "Failed to load inode 0x%llx.",
1344				(unsigned long long)FILE_MFTMirr);
1345		return err;
1346	}
1347	vn = ni->vn;
1348	/*
1349	 * Re-initialize some specifics about the inode of $MFTMirr as
1350	 * ntfs_inode_get() will have set up the default ones.
1351	 */
1352	/* Set uid and gid to root. */
1353	ni->uid = 0;
1354	ni->gid = 0;
1355	/* Regular file.  No access for anyone. */
1356	ni->mode = S_IFREG;
1357	/*
1358	 * The $MFTMirr, like the $MFT is multi sector transfer protected but
1359	 * we do not mark it as such as we want to have the buffers directly
1360	 * copied from the mft thus we do not want to mess about with MST
1361	 * fixups on the mft mirror.
1362	 */
1363	NInoSetSparseDisabled(ni);
1364	ni->block_size = vol->mft_record_size;
1365	ni->block_size_shift = vol->mft_record_size_shift;
1366	/*
1367	 * Verify the sizes are sane.  In particular both the data size and the
1368	 * initialized size must be multiples of the mft record size or we will
1369	 * panic() when reading the boundary in ntfs_cluster_iodone().
1370	 *
1371	 * Also the allocated size must be a multiple of the volume cluster
1372	 * size.
1373	 */
1374	if (ni->allocated_size & vol->cluster_size_mask ||
1375			ni->data_size & vol->mft_record_size_mask ||
1376			ni->initialized_size & vol->mft_record_size_mask) {
1377		ntfs_error(vol->mp, "$DATA attribute contains invalid size.  "
1378				"$MFTMirr is corrupt.  Run chkdsk.");
1379		(void)vnode_recycle(vn);
1380		(void)vnode_put(vn);
1381		return EIO;
1382	}
1383	OSIncrementAtomic(&vol->root_ni->nr_refs);
1384	err = vnode_ref(vn);
1385	if (err)
1386		ntfs_error(vol->mp, "vnode_ref() failed!");
1387	OSIncrementAtomic(&ni->nr_refs);
1388	lck_rw_unlock_shared(&ni->lock);
1389	(void)vnode_put(vn);
1390	vol->mftmirr_ni = ni;
1391	ntfs_debug("Done.");
1392	return 0;
1393}
1394
1395/**
1396 * ntfs_mft_mirror_check - compare contents of the mft mirror with the mft
1397 * @vol:	ntfs volume describing device whose mft mirror to check
1398 *
1399 * Return 0 on success and errno on error.
1400 *
1401 * Note, this function also results in the mft mirror runlist being completely
1402 * mapped into memory.  The mft mirror write code requires this and will
1403 * panic() should it find an unmapped runlist element.
1404 */
1405static errno_t ntfs_mft_mirror_check(ntfs_volume *vol)
1406{
1407	ntfs_inode *ni;
1408	buf_t buf;
1409	u8 *mirr_start;
1410	MFT_RECORD *mirr, *m;
1411	unsigned nr_mirr_recs, alloc_size, rec_size, i;
1412	errno_t err, err2;
1413
1414	ntfs_debug("Entering.");
1415	if (!vol->mftmirr_size)
1416		panic("%s(): !vol->mftmirr_size\n", __FUNCTION__);
1417	nr_mirr_recs = vol->mftmirr_size;
1418	if (!nr_mirr_recs)
1419		panic("%s(): !nr_mirr_recs\n", __FUNCTION__);
1420	rec_size = vol->mft_record_size;
1421	/* Allocate a buffer and read all mft mirror records into it. */
1422	alloc_size = nr_mirr_recs << vol->mft_record_size_shift;
1423	mirr_start = OSMalloc(alloc_size, ntfs_malloc_tag);
1424	if (!mirr_start) {
1425		ntfs_error(vol->mp, "Failed to allocate temporary mft mirror "
1426				"buffer.");
1427		return ENOMEM;
1428	}
1429	mirr = (MFT_RECORD*)mirr_start;
1430	ni = vol->mftmirr_ni;
1431	err = vnode_get(ni->vn);
1432	if (err) {
1433		ntfs_error(vol->mp, "Failed to get vnode for $MFTMirr.");
1434		goto err;
1435	}
1436	lck_rw_lock_shared(&ni->lock);
1437	for (i = 0; i < nr_mirr_recs; i++) {
1438		/* Get the next $MFTMirr record. */
1439		err = buf_meta_bread(ni->vn, i, rec_size, NOCRED, &buf);
1440		if (err) {
1441			ntfs_error(vol->mp, "Failed to read $MFTMirr record "
1442					"%d (error %d).", i, err);
1443			goto brelse;
1444		}
1445		err = buf_map(buf, (caddr_t*)&m);
1446		if (err) {
1447			ntfs_error(vol->mp, "Failed to map buffer of $MFTMirr "
1448					"record %d (error %d).", i, err);
1449			goto brelse;
1450		}
1451		/*
1452		 * Copy the mirror record, drop the buffer, and remove the MST
1453		 * fixups.
1454		 */
1455		memcpy(mirr, m, rec_size);
1456		err = buf_unmap(buf);
1457		if (err) {
1458			ntfs_error(vol->mp, "Failed to unmap buffer of "
1459					"$MFTMirr record %d (error %d).", i,
1460					err);
1461			goto brelse;
1462		}
1463		buf_brelse(buf);
1464		err = ntfs_mst_fixup_post_read((NTFS_RECORD*)mirr, rec_size);
1465		/* Do not check the mirror record if it is not in use. */
1466		if (mirr->flags & MFT_RECORD_IN_USE) {
1467			if (err || ntfs_is_baad_record(mirr->magic)) {
1468				ntfs_error(vol->mp, "Incomplete multi sector "
1469						"transfer detected in mft "
1470						"mirror record %d.", i);
1471				if (!err)
1472					err = EIO;
1473				goto unlock;
1474			}
1475		}
1476		mirr = (MFT_RECORD*)((u8*)mirr + rec_size);
1477	}
1478	/*
1479	 * Because we have just read at least the beginning of the mft mirror,
1480	 * we know we have mapped at least the beginning of the runlist for it.
1481	 */
1482	lck_rw_lock_shared(&ni->rl.lock);
1483	/*
1484	 * The runlist for the mft mirror must contain at least @nr_mirr_recs
1485	 * mft records and they must be in the first run, i.e. consecutive on
1486	 * disk.
1487	 */
1488	if (ni->rl.rl->lcn != vol->mftmirr_lcn ||
1489			ni->rl.rl->length < (((s64)vol->mftmirr_size <<
1490			vol->mft_record_size_shift) +
1491			vol->cluster_size_mask) >> vol->cluster_size_shift) {
1492		ntfs_error(vol->mp, "$MFTMirr location mismatch.  Run "
1493				"chkdsk.");
1494		err = EIO;
1495	} else
1496		ntfs_debug("Done.");
1497	lck_rw_unlock_shared(&ni->rl.lock);
1498	lck_rw_unlock_shared(&ni->lock);
1499	(void)vnode_put(ni->vn);
1500	/*
1501	 * Now read the $MFT records one at a time and compare each against the
1502	 * already read $MFTMirr records.
1503	 */
1504	ni = vol->mft_ni;
1505	err = vnode_get(ni->vn);
1506	if (err) {
1507		ntfs_error(vol->mp, "Failed to get vnode for $MFT.");
1508		goto err;
1509	}
1510	lck_rw_lock_shared(&ni->lock);
1511	mirr = (MFT_RECORD*)mirr_start;
1512	for (i = 0; i < nr_mirr_recs; i++) {
1513		unsigned bytes;
1514
1515		/* Get the current $MFT record. */
1516		err = buf_meta_bread(ni->vn, i, rec_size, NOCRED, &buf);
1517		if (err) {
1518			ntfs_error(vol->mp, "Failed to read $MFT record %d "
1519					"(error %d).", i, err);
1520			goto brelse;
1521		}
1522		err = buf_map(buf, (caddr_t*)&m);
1523		if (err) {
1524			ntfs_error(vol->mp, "Failed to map buffer of $MFT "
1525					"record %d (error %d).", i, err);
1526			goto brelse;
1527		}
1528		/* Do not check the mft record if it is not in use. */
1529		if (m->flags & MFT_RECORD_IN_USE) {
1530			/* Make sure the record is ok. */
1531			if (ntfs_is_baad_record(m->magic)) {
1532				ntfs_error(vol->mp, "Incomplete multi sector "
1533						"transfer detected in mft "
1534						"record %d.", i);
1535				err = EIO;
1536				goto unmap;
1537			}
1538		}
1539		/* Get the amount of data in the current record. */
1540		bytes = le32_to_cpu(m->bytes_in_use);
1541		if (bytes < sizeof(MFT_RECORD_OLD) || bytes > rec_size ||
1542				ntfs_is_baad_record(m->magic)) {
1543			bytes = le32_to_cpu(mirr->bytes_in_use);
1544			if (bytes < sizeof(MFT_RECORD_OLD) ||
1545					bytes > rec_size ||
1546					ntfs_is_baad_record(mirr->magic))
1547				bytes = rec_size;
1548		}
1549		/* Compare the two records. */
1550		if (bcmp(m, mirr, bytes)) {
1551			ntfs_error(vol->mp, "$MFT and $MFTMirr (record %d) do "
1552					"not match.  Run chkdsk.", i);
1553			err = EIO;
1554			goto unmap;
1555		}
1556		mirr = (MFT_RECORD*)((u8*)mirr + rec_size);
1557		err = buf_unmap(buf);
1558		if (err) {
1559			ntfs_error(vol->mp, "Failed to unmap buffer of $MFT "
1560					"record %d (error %d).", i, err);
1561			goto brelse;
1562		}
1563		buf_brelse(buf);
1564	}
1565unlock:
1566	lck_rw_unlock_shared(&ni->lock);
1567	(void)vnode_put(ni->vn);
1568err:
1569	OSFree(mirr_start, alloc_size, ntfs_malloc_tag);
1570	return err;
1571unmap:
1572	err2 = buf_unmap(buf);
1573	if (err2)
1574		ntfs_error(vol->mp, "Failed to unmap buffer of mft record %d "
1575				"in error code path (error %d).", i, err2);
1576brelse:
1577	buf_brelse(buf);
1578	goto unlock;
1579}
1580
1581/**
1582 * ntfs_upcase_load - load the upcase table for an ntfs volume
1583 * @vol:	ntfs volume whose upcase to load
1584 *
1585 * Read the upcase table and setup @vol->upcase and @vol->upcase_len.
1586 *
1587 * Return 0 on success and errno on error.
1588 */
1589static errno_t ntfs_upcase_load(ntfs_volume *vol)
1590{
1591	s64 ofs, data_size = 0;
1592	ntfs_inode *ni;
1593	upl_t upl;
1594	upl_page_info_array_t pl;
1595	u8 *kaddr;
1596	errno_t err;
1597	unsigned u;
1598
1599	ntfs_debug("Entering.");
1600	err = ntfs_inode_get(vol, FILE_UpCase, TRUE, LCK_RW_TYPE_SHARED, &ni,
1601			vol->root_ni->vn, NULL);
1602	if (err) {
1603		ni = NULL;
1604		goto err;
1605	}
1606	/*
1607	 * The upcase size must not be above 64k Unicode characters, must not
1608	 * be zero, and must be a multiple of sizeof(ntfschar).
1609	 */
1610	lck_spin_lock(&ni->size_lock);
1611	data_size = ni->data_size;
1612	lck_spin_unlock(&ni->size_lock);
1613	if (data_size <= 0 || data_size & (sizeof(ntfschar) - 1) ||
1614			data_size > (s64)(64 * 1024 * sizeof(ntfschar))) {
1615		err = EINVAL;
1616		goto err;
1617	}
1618	/* Allocate memory to hold the $UpCase data. */
1619	vol->upcase = OSMalloc(data_size, ntfs_malloc_tag);
1620	if (!vol->upcase) {
1621		err = ENOMEM;
1622		goto err;
1623	}
1624	/*
1625	 * Read the whole $UpCase file a page at a time and copy the contents
1626	 * over.
1627	 */
1628	u = PAGE_SIZE;
1629	for (ofs = 0; ofs < data_size; ofs += PAGE_SIZE) {
1630		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, FALSE);
1631		if (err)
1632			goto err;
1633		if (ofs + u > data_size)
1634			u = data_size - ofs;
1635		memcpy((u8*)vol->upcase + ofs, kaddr, u);
1636		ntfs_page_unmap(ni, upl, pl, FALSE);
1637	}
1638	lck_rw_unlock_shared(&ni->lock);
1639	(void)vnode_recycle(ni->vn);
1640	(void)vnode_put(ni->vn);
1641	vol->upcase_len = data_size >> NTFSCHAR_SIZE_SHIFT;
1642	ntfs_debug("Read %lld bytes from $UpCase (expected %lu bytes).",
1643			(long long)data_size, 64LU * 1024 * sizeof(ntfschar));
1644	lck_mtx_lock(&ntfs_lock);
1645	if (!ntfs_default_upcase) {
1646		ntfs_debug("Using volume specified $UpCase since default is "
1647				"not present.");
1648	} else {
1649		unsigned max_size;
1650
1651		max_size = ntfs_default_upcase_size >> NTFSCHAR_SIZE_SHIFT;
1652		if (max_size > vol->upcase_len)
1653			max_size = vol->upcase_len;
1654		for (u = 0; u < max_size; u++)
1655			if (vol->upcase[u] != ntfs_default_upcase[u])
1656				break;
1657		if (u == max_size) {
1658			OSFree(vol->upcase, data_size, ntfs_malloc_tag);
1659			vol->upcase = ntfs_default_upcase;
1660			vol->upcase_len = ntfs_default_upcase_size >>
1661					NTFSCHAR_SIZE_SHIFT;
1662			ntfs_default_upcase_users++;
1663			ntfs_debug("Volume specified $UpCase matches "
1664					"default.  Using default.");
1665		} else
1666			ntfs_debug("Using volume specified $UpCase since it "
1667					"does not match the default.");
1668	}
1669	lck_mtx_unlock(&ntfs_lock);
1670	ntfs_debug("Done.");
1671	return 0;
1672err:
1673	if (vol->upcase) {
1674		OSFree(vol->upcase, data_size, ntfs_malloc_tag);
1675		vol->upcase = NULL;
1676		vol->upcase_len = 0;
1677	}
1678	if (ni) {
1679		lck_rw_unlock_shared(&ni->lock);
1680		(void)vnode_recycle(ni->vn);
1681		(void)vnode_put(ni->vn);
1682	}
1683	lck_mtx_lock(&ntfs_lock);
1684	if (ntfs_default_upcase) {
1685		vol->upcase = ntfs_default_upcase;
1686		vol->upcase_len = ntfs_default_upcase_size >>
1687				NTFSCHAR_SIZE_SHIFT;
1688		ntfs_default_upcase_users++;
1689		ntfs_error(vol->mp, "Failed to load $UpCase from the volume "
1690				"(error %d).  Using NTFS driver default "
1691				"upcase table instead.", err);
1692		err = 0;
1693	} else
1694		ntfs_error(vol->mp, "Failed to initialize upcase table.");
1695	lck_mtx_unlock(&ntfs_lock);
1696	return err;
1697}
1698
1699/**
1700 * ntfs_attrdef_load - load the attribute definitions table for a volume
1701 * @vol:	ntfs volume whose attrdef to load
1702 *
1703 * Read the attribute definitions table and setup @vol->attrdef and
1704 * @vol->attrdef_size.
1705 *
1706 * Return 0 on success and errno on error.
1707 */
1708static errno_t ntfs_attrdef_load(ntfs_volume *vol)
1709{
1710	s64 ofs, data_size = 0;
1711	ntfs_inode *ni;
1712	upl_t upl;
1713	upl_page_info_array_t pl;
1714	u8 *kaddr;
1715	errno_t err;
1716	unsigned u;
1717
1718	ntfs_debug("Entering.");
1719	err = ntfs_inode_get(vol, FILE_AttrDef, TRUE, LCK_RW_TYPE_SHARED, &ni,
1720			vol->root_ni->vn, NULL);
1721	if (err) {
1722		ni = NULL;
1723		goto err;
1724	}
1725	/*
1726	 * The attribute definitions size must be above 0 and fit inside 31
1727	 * bits.
1728	 */
1729	lck_spin_lock(&ni->size_lock);
1730	data_size = ni->data_size;
1731	lck_spin_unlock(&ni->size_lock);
1732	if (data_size <= 0 || data_size > 0x7fffffff) {
1733		err = EINVAL;
1734		goto err;
1735	}
1736	vol->attrdef = OSMalloc(data_size, ntfs_malloc_tag);
1737	if (!vol->attrdef) {
1738		err = ENOMEM;
1739		goto err;
1740	}
1741	/*
1742	 * Read the whole attribute definitions table a page at a time and copy
1743	 * the contents over.
1744	 */
1745	u = PAGE_SIZE;
1746	for (ofs = 0; ofs < data_size; ofs += PAGE_SIZE) {
1747		err = ntfs_page_map(ni, ofs, &upl, &pl, &kaddr, FALSE);
1748		if (err)
1749			goto err;
1750		if (ofs + u > data_size)
1751			u = data_size - ofs;
1752		memcpy((u8*)vol->attrdef + ofs, kaddr, u);
1753		ntfs_page_unmap(ni, upl, pl, FALSE);
1754	}
1755	lck_rw_unlock_shared(&ni->lock);
1756	(void)vnode_recycle(ni->vn);
1757	(void)vnode_put(ni->vn);
1758	vol->attrdef_size = data_size;
1759	ntfs_debug("Done.  Read %lld bytes from $AttrDef.",
1760			(long long)data_size);
1761	return 0;
1762err:
1763	if (vol->attrdef) {
1764		OSFree(vol->attrdef, data_size, ntfs_malloc_tag);
1765		vol->attrdef = NULL;
1766	}
1767	if (ni) {
1768		lck_rw_unlock_shared(&ni->lock);
1769		(void)vnode_recycle(ni->vn);
1770		(void)vnode_put(ni->vn);
1771	}
1772	ntfs_error(vol->mp, "Failed to initialize attribute definitions "
1773			"table.");
1774	return err;
1775}
1776
1777/**
1778 * ntfs_volume_load - load the $Volume inode and setup the ntfs volume
1779 * @vol:	ntfs volume whose $Volume to load
1780 *
1781 * Load the $Volume system file and setup the volume flags (@vol->flags), the
1782 * volume major and minor version (@vol->major_ver and @vol->minor_ver,
1783 * respectively), and the volume name converted to decomposed utf-8 (@vol->name
1784 * and @vol->name_size).
1785 *
1786 * Return 0 on success and errno on error.
1787 */
1788static errno_t ntfs_volume_load(ntfs_volume *vol)
1789{
1790	ntfs_inode *ni;
1791	MFT_RECORD *m;
1792	ntfs_attr_search_ctx *ctx;
1793	ATTR_RECORD *a;
1794	VOLUME_INFORMATION *vi;
1795	errno_t err;
1796
1797	ntfs_debug("Entering.");
1798	err = ntfs_inode_attach(vol, FILE_Volume, &ni, vol->root_ni->vn);
1799	if (err) {
1800		ntfs_error(vol->mp, "Failed to load $Volume.");
1801		return err;
1802	}
1803	vol->vol_ni = ni;
1804	err = vnode_get(ni->vn);
1805	if (err) {
1806		ntfs_error(vol->mp, "Failed to get vnode for $Volume.");
1807		return err;
1808	}
1809	err = ntfs_mft_record_map(ni, &m);
1810	if (err) {
1811		ntfs_error(vol->mp, "Failed to map mft record for $Volume.");
1812		goto err;
1813	}
1814	ctx = ntfs_attr_search_ctx_get(ni, m);
1815	if (!ctx) {
1816		ntfs_error(vol->mp, "Failed to get attribute search context "
1817				"for $Volume.");
1818		err = ENOMEM;
1819		goto unm_err;
1820	}
1821	err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, AT_UNNAMED, 0, 0, NULL,
1822			0, ctx);
1823	a = ctx->a;
1824	if (err || a->non_resident || a->flags) {
1825		if (err)
1826			ntfs_error(vol->mp, "Failed to lookup volume "
1827					"information attribute in $Volume.");
1828		else {
1829info_err:
1830			ntfs_error(vol->mp, "Volume information attribute in "
1831					"$Volume is corrupt.  Run chkdsk.");
1832		}
1833		goto put_err;
1834	}
1835	vi = (VOLUME_INFORMATION*)((u8*)a + le16_to_cpu(a->value_offset));
1836	/* Some bounds checks. */
1837	if ((u8*)vi < (u8*)a || (u8*)vi + le32_to_cpu(a->value_length) >
1838			(u8*)a + le32_to_cpu(a->length) ||
1839			(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
1840			vol->mft_record_size)
1841		goto info_err;
1842	/* Copy the volume flags and version to the ntfs_volume structure. */
1843	vol->vol_flags = vi->flags;
1844	vol->major_ver = vi->major_ver;
1845	vol->minor_ver = vi->minor_ver;
1846	ntfs_attr_search_ctx_reinit(ctx);
1847	err = ntfs_attr_lookup(AT_VOLUME_NAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
1848	if (err == ENOENT) {
1849		ntfs_debug("Volume has no name, using empty string.");
1850no_name:
1851		/* No volume name, i.e. the name is "". */
1852		vol->name = OSMalloc(sizeof(char), ntfs_malloc_tag);
1853		if (!vol->name) {
1854			ntfs_error(vol->mp, "Failed to allocate memory for "
1855					"volume name.");
1856			err = ENOMEM;
1857			goto put_err;
1858		}
1859		vol->name[0] = '\0';
1860	} else {
1861		ntfschar *ntfs_name;
1862		u8 *utf8_name;
1863		size_t ntfs_size, utf8_size;
1864		signed res_size;
1865
1866		a = ctx->a;
1867		if (err || a->non_resident || a->flags) {
1868			if (err)
1869				ntfs_error(vol->mp, "Failed to lookup volume "
1870						"name attribute in $Volume.");
1871			else {
1872name_err:
1873				ntfs_error(vol->mp, "Volume name attribute in "
1874						"$Volume is corrupt.  Run "
1875						"chkdsk.");
1876			}
1877put_err:
1878			ntfs_attr_search_ctx_put(ctx);
1879			if (!err)
1880				err = EIO;
1881			goto unm_err;
1882		}
1883		ntfs_name = (ntfschar*)((u8*)a + le16_to_cpu(a->value_offset));
1884		ntfs_size = le32_to_cpu(a->value_length);
1885		if (!ntfs_size) {
1886			ntfs_debug("Volume has empty name, using empty "
1887					"string.");
1888			goto no_name;
1889		}
1890		/* Some bounds checks. */
1891		if ((u8*)ntfs_name < (u8*)a || (u8*)ntfs_name + ntfs_size >
1892				(u8*)a + le32_to_cpu(a->length) ||
1893				(u8*)a + le32_to_cpu(a->length) > (u8*)ctx->m +
1894				vol->mft_record_size)
1895			goto name_err;
1896		/* Convert the name to decomposed utf-8 (NUL terminated). */
1897		utf8_name = NULL;
1898		res_size = ntfs_to_utf8(vol, ntfs_name, ntfs_size, &utf8_name,
1899				&utf8_size);
1900		if (res_size < 0) {
1901			err = -res_size;
1902			ntfs_error(vol->mp, "Failed to convert volume name to "
1903					"decomposed UTF-8 (error %d).",
1904					(int)err);
1905			goto put_err;
1906		}
1907		vol->name = (char*)utf8_name;
1908		vol->name_size = utf8_size;
1909	}
1910	/* Get the volume UUID (GUID), if there is one. */
1911	ntfs_attr_search_ctx_reinit(ctx);
1912	err = ntfs_attr_lookup(AT_OBJECT_ID, AT_UNNAMED, 0, 0, NULL, 0, ctx);
1913	a = ctx->a;
1914	if (!err && !a->non_resident &&
1915	    le32_to_cpu(a->value_length) >= offsetof(OBJECT_ID_ATTR, extended_info)) {
1916		OBJECT_ID_ATTR *object_id;
1917		object_id = (OBJECT_ID_ATTR*)((u8*)a + le16_to_cpu(a->value_offset));
1918
1919		/*
1920		 * In the on-disk GUID, the first three fields are little
1921		 * endian.  We want to be able to return a big endian UUID.
1922		 * So we unconditionally swap those fields now.  We'll do this
1923		 * in a local copy of the GUID for safety.
1924		 */
1925		GUID guid = object_id->object_id;
1926		guid.data1 = OSSwapInt32(guid.data1);
1927		guid.data2 = OSSwapInt16(guid.data2);
1928		guid.data3 = OSSwapInt16(guid.data3);
1929		bcopy(&guid, vol->uuid, sizeof(guid));
1930		NVolSetHasGUID(vol);
1931	}
1932	ntfs_attr_search_ctx_put(ctx);
1933	ntfs_mft_record_unmap(ni);
1934	(void)vnode_put(ni->vn);
1935	ntfs_debug("Done.");
1936	return 0;
1937unm_err:
1938	ntfs_mft_record_unmap(ni);
1939err:
1940	(void)vnode_put(ni->vn);
1941	/* Obtained inode will be released by the call to ntfs_unmount(). */
1942	return err;
1943}
1944
1945#define NTFS_HIBERFIL_HEADER_SIZE	4096
1946
1947/**
1948 * ntfs_windows_hibernation_status_check - check if Windows is suspended
1949 * @vol:		ntfs volume to check
1950 * @is_hibernated:	pointer in which to return the hibernation status
1951 *
1952 * Check if Windows is hibernated on the ntfs volume @vol.  This is done by
1953 * looking for the file hiberfil.sys in the root directory of the volume.  If
1954 * the file is not present Windows is definitely not suspended and if it is
1955 * then the $LogFile will be marked dirty/still open so we will already have
1956 * caught that case.
1957 *
1958 * If hiberfil.sys exists and is less than 4kiB in size it means Windows is
1959 * definitely suspended (this volume is not the system volume).  Caveat:  on a
1960 * system with many volumes it is possible that the < 4kiB check is bogus but
1961 * for now this should do fine.
1962 *
1963 * If hiberfil.sys exists and is larger than 4kiB in size, we need to read the
1964 * hiberfil header (which is the first 4kiB).  If this begins with "hibr",
1965 * Windows is definitely suspended.  If it is completely full of zeroes,
1966 * Windows is definitely not hibernated.  Any other case is treated as if
1967 * Windows is suspended.  This caters for the above mentioned caveat of a
1968 * system with many volumes where no "hibr" magic would be present and there is
1969 * no zero header.
1970 *
1971 * If Windows is not hibernated on the volume *@is_hibernated is false and if
1972 * Windows is hibernated on the volume it is set to true.
1973 *
1974 * Return 0 on success and errno on error.  On error, *@is_hibernated is
1975 * undefined.
1976 */
1977static errno_t ntfs_windows_hibernation_status_check(ntfs_volume *vol,
1978		BOOL *is_hibernated)
1979{
1980	s64 data_size;
1981	MFT_REF mref;
1982	ntfs_dir_lookup_name *name = NULL;
1983	ntfs_inode *ni;
1984	upl_t upl = NULL;
1985	upl_page_info_array_t pl;
1986	le32 *kaddr, *kend;
1987	errno_t err;
1988	static const ntfschar hiberfil[13] = { const_cpu_to_le16('h'),
1989			const_cpu_to_le16('i'), const_cpu_to_le16('b'),
1990			const_cpu_to_le16('e'), const_cpu_to_le16('r'),
1991			const_cpu_to_le16('f'), const_cpu_to_le16('i'),
1992			const_cpu_to_le16('l'), const_cpu_to_le16('.'),
1993			const_cpu_to_le16('s'), const_cpu_to_le16('y'),
1994			const_cpu_to_le16('s'), 0 };
1995
1996	ntfs_debug("Entering.");
1997	*is_hibernated = FALSE;
1998	/*
1999	 * Find the inode number for the hibernation file by looking up the
2000	 * filename hiberfil.sys in the root directory.
2001	 */
2002	lck_rw_lock_shared(&vol->root_ni->lock);
2003	err = ntfs_lookup_inode_by_name(vol->root_ni, hiberfil, 12, &mref,
2004			&name);
2005	lck_rw_unlock_shared(&vol->root_ni->lock);
2006	if (err) {
2007		/* If the file does not exist, Windows is not hibernated. */
2008		if (err == ENOENT) {
2009			ntfs_debug("hiberfil.sys not present.  Windows is not "
2010					"hibernated on the volume.");
2011			return 0;
2012		}
2013		/* A real error occured. */
2014		ntfs_error(vol->mp, "Failed to find inode number for "
2015				"hiberfil.sys.");
2016		return err;
2017	}
2018	/* We do not care for the type of match that was found. */
2019	if (name)
2020		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2021	/* Get the inode. */
2022	err = ntfs_inode_get(vol, MREF(mref), FALSE, LCK_RW_TYPE_SHARED, &ni,
2023			vol->root_ni->vn, NULL);
2024	if (err) {
2025		ntfs_error(vol->mp, "Failed to load hiberfil.sys.");
2026		return err;
2027	}
2028	lck_spin_lock(&ni->size_lock);
2029	data_size = ni->data_size;
2030	lck_spin_unlock(&ni->size_lock);
2031	if (data_size < NTFS_HIBERFIL_HEADER_SIZE) {
2032		ntfs_debug("Hiberfil.sys is present and smaller than the "
2033				"hibernation header size.  Windows is "
2034				"hibernated on the volume.  This is not the "
2035				"system volume.");
2036		*is_hibernated = TRUE;
2037		goto put;
2038	}
2039	err = ntfs_page_map(ni, 0, &upl, &pl, (u8**)&kaddr, FALSE);
2040	if (err) {
2041		ntfs_error(vol->mp, "Failed to read from hiberfil.sys.");
2042		goto put;
2043	}
2044	if (*kaddr == const_cpu_to_le32(0x72626968)/*'hibr'*/) {
2045		ntfs_debug("Magic \"hibr\" found in hiberfil.sys.  Windows is "
2046				"hibernated on the volume.  This is the "
2047				"system volume.");
2048		*is_hibernated = TRUE;
2049		goto unm;
2050	}
2051	kend = kaddr + NTFS_HIBERFIL_HEADER_SIZE/sizeof(*kaddr);
2052	do {
2053		if (*kaddr) {
2054			ntfs_debug("hiberfil.sys is larger than 4kiB "
2055					"(0x%llx), does not contain the "
2056					"\"hibr\" magic, and does not have a "
2057					"zero header.  Windows is hibernated "
2058					"on the volume.  This is not the "
2059					"system volume.", data_size);
2060			*is_hibernated = TRUE;
2061			goto unm;
2062		}
2063	} while (++kaddr < kend);
2064	ntfs_debug("hiberfil.sys contains a zero header.  Windows is not "
2065			"hibernated on the volume.  This is the system "
2066			"volume.");
2067	/* @err is currently zero. */
2068unm:
2069	ntfs_page_unmap(ni, upl, pl, FALSE);
2070put:
2071	lck_rw_unlock_shared(&ni->lock);
2072	(void)vnode_recycle(ni->vn);
2073	(void)vnode_put(ni->vn);
2074	return err;
2075}
2076
2077/**
2078 * ntfs_volume_flags_write - write new flags to the volume information flags
2079 * @vol:	ntfs volume on which to modify the flags
2080 * @flags:	new flags value for the volume information flags
2081 *
2082 * Internal function.  You probably want to use ntfs_volume_flags_{set,clear}()
2083 * instead (see below).
2084 *
2085 * Replace the volume information flags on the volume @vol with the value
2086 * supplied in @flags.  Note, this overwrites the volume information flags, so
2087 * make sure to combine the flags you want to modify with the old flags and use
2088 * the result when calling ntfs_volume_flags_write().
2089 *
2090 * Return 0 on success and errno on error.
2091 */
2092static errno_t ntfs_volume_flags_write(ntfs_volume *vol,
2093		const VOLUME_FLAGS flags)
2094{
2095	ntfs_inode *ni;
2096	MFT_RECORD *m;
2097	VOLUME_INFORMATION *vi;
2098	ntfs_attr_search_ctx *ctx;
2099	errno_t err;
2100
2101	ntfs_debug("Entering, old flags = 0x%x, new flags = 0x%x.",
2102			le16_to_cpu(vol->vol_flags), le16_to_cpu(flags));
2103	if (vol->vol_flags == flags)
2104		goto done;
2105	ni = vol->vol_ni;
2106	if (!ni)
2107		panic("%s(): Volume inode is not loaded.\n", __FUNCTION__);
2108	err = ntfs_mft_record_map(ni, &m);
2109	if (err)
2110		goto err;
2111	ctx = ntfs_attr_search_ctx_get(ni, m);
2112	if (!ctx) {
2113		err = ENOMEM;
2114		goto put;
2115	}
2116	err = ntfs_attr_lookup(AT_VOLUME_INFORMATION, AT_UNNAMED, 0, 0, NULL,
2117			0, ctx);
2118	if (err)
2119		goto put;
2120	vi = (VOLUME_INFORMATION*)((u8*)ctx->a +
2121			le16_to_cpu(ctx->a->value_offset));
2122	vol->vol_flags = vi->flags = flags;
2123	/* Mark the mft record dirty to ensure it gets written out. */
2124	NInoSetMrecNeedsDirtying(ctx->ni);
2125	ntfs_attr_search_ctx_put(ctx);
2126	ntfs_mft_record_unmap(ni);
2127done:
2128	ntfs_debug("Done.");
2129	return 0;
2130put:
2131	if (ctx)
2132		ntfs_attr_search_ctx_put(ctx);
2133	ntfs_mft_record_unmap(ni);
2134err:
2135	ntfs_error(vol->mp, "Failed with error code %d.", err);
2136	return err;
2137}
2138
2139/**
2140 * ntfs_volume_flags_set - set bits in the volume information flags
2141 * @vol:	ntfs volume on which to modify the flags
2142 * @flags:	flags to set on the volume
2143 *
2144 * Set the bits in @flags in the volume information flags on the volume @vol.
2145 *
2146 * Return 0 on success and errno on error.
2147 */
2148static inline errno_t ntfs_volume_flags_set(ntfs_volume *vol,
2149		VOLUME_FLAGS flags)
2150{
2151	flags &= VOLUME_FLAGS_MASK;
2152	return ntfs_volume_flags_write(vol, vol->vol_flags | flags);
2153}
2154
2155/**
2156 * ntfs_volume_flags_clear - clear bits in the volume information flags
2157 * @vol:	ntfs volume on which to modify the flags
2158 * @flags:	flags to clear on the volume
2159 *
2160 * Clear the bits in @flags in the volume information flags on the volume @vol.
2161 *
2162 * Return 0 on success and errno on error.
2163 */
2164static inline errno_t ntfs_volume_flags_clear(ntfs_volume *vol,
2165		VOLUME_FLAGS flags)
2166{
2167	flags &= VOLUME_FLAGS_MASK;
2168	return ntfs_volume_flags_write(vol, vol->vol_flags & ~flags);
2169}
2170
2171/**
2172 * ntfs_secure_load - load and setup the security file for a volume
2173 * @vol:	ntfs volume whose security file to load
2174 *
2175 * Return 0 on success and errno on error.
2176 */
2177static errno_t ntfs_secure_load(ntfs_volume *vol)
2178{
2179	ntfs_inode *ni;
2180	MFT_RECORD *m;
2181	ntfs_attr_search_ctx *ctx;
2182	FILENAME_ATTR *fn;
2183	errno_t err;
2184	static const ntfschar Secure[8] = { const_cpu_to_le16('$'),
2185			const_cpu_to_le16('S'), const_cpu_to_le16('e'),
2186			const_cpu_to_le16('c'), const_cpu_to_le16('u'),
2187			const_cpu_to_le16('r'), const_cpu_to_le16('e'), 0 };
2188	static ntfschar SDS[5] = { const_cpu_to_le16('$'),
2189			const_cpu_to_le16('S'), const_cpu_to_le16('D'),
2190			const_cpu_to_le16('S'), 0 };
2191	static ntfschar SDH[5] = { const_cpu_to_le16('$'),
2192			const_cpu_to_le16('S'), const_cpu_to_le16('D'),
2193			const_cpu_to_le16('H'), 0 };
2194	static ntfschar SII[5] = { const_cpu_to_le16('$'),
2195			const_cpu_to_le16('S'), const_cpu_to_le16('I'),
2196			const_cpu_to_le16('I'), 0 };
2197
2198	ntfs_debug("Entering.");
2199	/* Get the security descriptors inode. */
2200	err = ntfs_inode_attach(vol, FILE_Secure, &ni, vol->root_ni->vn);
2201	if (err) {
2202		ntfs_error(vol->mp, "Failed to load $Secure.");
2203		return err;
2204	}
2205	vol->secure_ni = ni;
2206	/*
2207	 * Check this really is $Secure rather than $Quota remaining from a
2208	 * partially converted ntfs 1.x volume.
2209	 */
2210	err = ntfs_mft_record_map(ni, &m);
2211	if (err) {
2212		ntfs_error(vol->mp, "Failed to map mft record for $Secure.");
2213		return err;
2214	}
2215	if (!(m->flags & MFT_RECORD_IN_USE)) {
2216not_in_use:
2217		ntfs_debug("Done ($Secure is not in use).");
2218		ntfs_mft_record_unmap(ni);
2219		NVolSetUseSDAttr(vol);
2220		return 0;
2221	}
2222	ctx = ntfs_attr_search_ctx_get(ni, m);
2223	if (!ctx) {
2224		ntfs_error(vol->mp, "Failed to allocate search context for "
2225				"$Secure.");
2226		ntfs_mft_record_unmap(ni);
2227		return ENOMEM;
2228	}
2229	err = ntfs_attr_lookup(AT_FILENAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
2230	if (err) {
2231		ntfs_error(vol->mp, "Failed to look up filename attribute in "
2232				"$Secure (error %d).", err);
2233		ntfs_attr_search_ctx_put(ctx);
2234		ntfs_mft_record_unmap(ni);
2235		return err;
2236	}
2237	fn = (FILENAME_ATTR*)((u8*)ctx->a + le16_to_cpu(ctx->a->value_offset));
2238	if (!ntfs_are_names_equal(fn->filename, fn->filename_length,
2239			Secure, 7, NVolCaseSensitive(vol), NULL, 0)) {
2240		ntfs_attr_search_ctx_put(ctx);
2241		goto not_in_use;
2242	}
2243	ntfs_attr_search_ctx_put(ctx);
2244	ntfs_mft_record_unmap(ni);
2245	ntfs_debug("Verified identity of $Secure system file.");
2246	/* Get the $SDS data attribute. */
2247	err = ntfs_attr_inode_attach(vol->secure_ni, AT_DATA, SDS, 4,
2248			&vol->secure_sds_ni);
2249	if (err) {
2250		ntfs_error(vol->mp, "Failed to load $Secure/$SDS data "
2251				"attribute (error %d).", err);
2252		return err;
2253	}
2254	/* Get the $SDH index attribute. */
2255	err = ntfs_index_inode_attach(vol->secure_ni, SDH, 4,
2256			&vol->secure_sdh_ni);
2257	if (err) {
2258		ntfs_error(vol->mp, "Failed to load $Secure/$SDH index "
2259				"(error %d).", err);
2260		return err;
2261	}
2262	/* Get the $SII index attribute. */
2263	err = ntfs_index_inode_attach(vol->secure_ni, SII, 4,
2264			&vol->secure_sii_ni);
2265	if (err) {
2266		ntfs_error(vol->mp, "Failed to load $Secure/$SII index "
2267				"(error %d).", err);
2268		return err;
2269	}
2270	/*
2271	 * We need to find the highest security_id on the volume by finding the
2272	 * last entry in the $SII index and record it so we know which
2273	 * security_id to assign to the next security descriptor.
2274	 */
2275	err = ntfs_next_security_id_init(vol, &vol->next_security_id);
2276	if (err) {
2277		ntfs_error(vol->mp, "Failed to determine next security_id "
2278				"(error %d).", err);
2279		return err;
2280	}
2281	// TODO: Initialize security.
2282	//
2283	// We need to look for our default security descriptors (for creating
2284	// directories and files) and if present record their security_ids and
2285	// set the appropriate flag on the volume.  If not present they will be
2286	// added when the first file/directory is created and the volume flag
2287	// will be set then.  (Do we need two flags, one for files and one for
2288	// directories?)
2289	//
2290	// Set up our default security descriptors for files and directories
2291	// so they can be used when creating files/directories on volumes
2292	// without $Secure and in the case that we fail to add our security
2293	// descriptors to $Secure in which case we just place them in the
2294	// old-style security descriptor attribute and do NVolSetUseSDAttr().
2295	// FIXME: We then need to use old-style standard information attribute!
2296	//
2297	// For now just always force creation of security descriptor attributes.
2298	NVolSetUseSDAttr(vol);
2299	ntfs_debug("Done.");
2300	return 0;
2301}
2302
2303/**
2304 * ntfs_objid_load - load and setup the object id file for a volume if present
2305 * @vol:	ntfs volume whose object id file to load
2306 *
2307 * Return 0 on success and errno on error.  If $ObjId is not present, we leave
2308 * vol->objid_ni as NULL and return success.
2309 */
2310static errno_t ntfs_objid_load(ntfs_volume *vol)
2311{
2312	MFT_REF mref;
2313	ntfs_inode *ni;
2314	ntfs_dir_lookup_name *name = NULL;
2315	int err;
2316	static const ntfschar ObjId[7] = { const_cpu_to_le16('$'),
2317			const_cpu_to_le16('O'), const_cpu_to_le16('b'),
2318			const_cpu_to_le16('j'), const_cpu_to_le16('I'),
2319			const_cpu_to_le16('d'), 0 };
2320	static ntfschar O[3] = { const_cpu_to_le16('$'),
2321			const_cpu_to_le16('O'), 0 };
2322
2323	ntfs_debug("Entering.");
2324	/*
2325	 * Find the inode number for the object id file by looking up the
2326	 * filename $ObjId in the extended system files directory $Extend.
2327	 */
2328	lck_rw_lock_shared(&vol->extend_ni->lock);
2329	err = ntfs_lookup_inode_by_name(vol->extend_ni, ObjId, 6, &mref,
2330			&name);
2331	lck_rw_unlock_shared(&vol->extend_ni->lock);
2332	if (err) {
2333		/*
2334		 * If the file does not exist, there are no object ids in use
2335		 * on this volume, just return success.
2336		 */
2337		if (err == ENOENT) {
2338			ntfs_debug("$ObjId not present.  Volume does not have "
2339					"any object ids present.");
2340			return 0;
2341		}
2342		/* A real error occured. */
2343		ntfs_error(vol->mp, "Failed to find inode number for $ObjId.");
2344		return err;
2345	}
2346	/* We do not care for the type of match that was found. */
2347	if (name)
2348		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2349	/* Get the inode. */
2350	err = ntfs_inode_attach(vol, MREF(mref), &ni, vol->extend_ni->vn);
2351	if (err) {
2352		ntfs_error(vol->mp, "Failed to load $ObjId.");
2353		return err;
2354	}
2355	vol->objid_ni = ni;
2356	/* Get the $O index inode. */
2357	err = ntfs_index_inode_attach(vol->objid_ni, O, 2, &vol->objid_o_ni);
2358	if (err) {
2359		ntfs_error(vol->mp, "Failed to load $ObjId/$O index (error "
2360				"%d).", err);
2361		return err;
2362	}
2363	ntfs_debug("Done.");
2364	return 0;
2365}
2366
2367/**
2368 * ntfs_quota_load - load and setup the quota file for a volume if present
2369 * @vol:	ntfs volume whose quota file to load
2370 *
2371 * Return 0 on success and errno on error.  If $Quota is not present, we leave
2372 * vol->quota_ni as NULL and return success.
2373 */
2374static errno_t ntfs_quota_load(ntfs_volume *vol)
2375{
2376	MFT_REF mref;
2377	ntfs_dir_lookup_name *name = NULL;
2378	int err;
2379	static const ntfschar Quota[7] = { const_cpu_to_le16('$'),
2380			const_cpu_to_le16('Q'), const_cpu_to_le16('u'),
2381			const_cpu_to_le16('o'), const_cpu_to_le16('t'),
2382			const_cpu_to_le16('a'), 0 };
2383	static ntfschar Q[3] = { const_cpu_to_le16('$'),
2384			const_cpu_to_le16('Q'), 0 };
2385
2386	ntfs_debug("Entering.");
2387	/*
2388	 * Find the inode number for the quota file by looking up the filename
2389	 * $Quota in the extended system files directory $Extend.
2390	 */
2391	lck_rw_lock_shared(&vol->extend_ni->lock);
2392	err = ntfs_lookup_inode_by_name(vol->extend_ni, Quota, 6, &mref,
2393			&name);
2394	lck_rw_unlock_shared(&vol->extend_ni->lock);
2395	if (err) {
2396		/*
2397		 * If the file does not exist, quotas are disabled and have
2398		 * never been enabled on this volume, just return success.
2399		 */
2400		if (err == ENOENT) {
2401			ntfs_debug("$Quota not present.  Volume does not have "
2402					"quotas enabled.");
2403			/*
2404			 * No need to try to set quotas out of date if they are
2405			 * not enabled.
2406			 */
2407			NVolSetQuotaOutOfDate(vol);
2408			return 0;
2409		}
2410		/* A real error occured. */
2411		ntfs_error(vol->mp, "Failed to find inode number for $Quota.");
2412		return err;
2413	}
2414	/* We do not care for the type of match that was found. */
2415	if (name)
2416		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2417	/* Get the inode. */
2418	err = ntfs_inode_attach(vol, MREF(mref), &vol->quota_ni,
2419			vol->extend_ni->vn);
2420	if (err) {
2421		ntfs_error(vol->mp, "Failed to load $Quota.");
2422		return err;
2423	}
2424	/* Get the $Q index inode. */
2425	err = ntfs_index_inode_attach(vol->quota_ni, Q, 2, &vol->quota_q_ni);
2426	if (err) {
2427		ntfs_error(vol->mp, "Failed to load $Quota/$Q index (error "
2428				"%d).", err);
2429		return err;
2430	}
2431	ntfs_debug("Done.");
2432	return 0;
2433}
2434
2435/**
2436 * ntfs_usnjrnl_load - load and setup the transaction log if present
2437 * @vol:	ntfs volume whose usnjrnl file to load
2438 *
2439 * Return 0 on success and errno on error.  $UsnJrnl is not present or in the
2440 * process of being disabled, we set NVolUsnJrnlStamped() and return success.
2441 *
2442 * If the $UsnJrnl $DATA/$J attribute has a size equal to the lowest valid usn,
2443 * i.e. transaction logging has only just been enabled or the journal has been
2444 * stamped and nothing has been logged since, we also set NVolUsnJrnlStamped()
2445 * and return success.
2446 */
2447static errno_t ntfs_usnjrnl_load(ntfs_volume *vol)
2448{
2449	s64 data_size;
2450	MFT_REF mref;
2451	ntfs_inode *ni, *max_ni;
2452	ntfs_dir_lookup_name *name = NULL;
2453	upl_t upl;
2454	upl_page_info_array_t pl;
2455	USN_HEADER *uh;
2456	errno_t err;
2457	static const ntfschar UsnJrnl[9] = { const_cpu_to_le16('$'),
2458			const_cpu_to_le16('U'), const_cpu_to_le16('s'),
2459			const_cpu_to_le16('n'), const_cpu_to_le16('J'),
2460			const_cpu_to_le16('r'), const_cpu_to_le16('n'),
2461			const_cpu_to_le16('l'), 0 };
2462	static ntfschar Max[5] = { const_cpu_to_le16('$'),
2463			const_cpu_to_le16('M'), const_cpu_to_le16('a'),
2464			const_cpu_to_le16('x'), 0 };
2465	static ntfschar J[3] = { const_cpu_to_le16('$'),
2466			const_cpu_to_le16('J'), 0 };
2467
2468	ntfs_debug("Entering.");
2469	/*
2470	 * Find the inode number for the transaction log file by looking up the
2471	 * filename $UsnJrnl in the extended system files directory $Extend.
2472	 */
2473	lck_rw_lock_shared(&vol->extend_ni->lock);
2474	err = ntfs_lookup_inode_by_name(vol->extend_ni, UsnJrnl, 8, &mref,
2475			&name);
2476	lck_rw_unlock_shared(&vol->extend_ni->lock);
2477	if (err) {
2478		/*
2479		 * If the file does not exist, transaction logging is disabled,
2480		 * just return success.
2481		 */
2482		if (err == ENOENT) {
2483			ntfs_debug("$UsnJrnl not present.  Volume does not "
2484					"have transaction logging enabled.");
2485not_enabled:
2486			/*
2487			 * No need to try to stamp the transaction log if
2488			 * transaction logging is not enabled.
2489			 */
2490			NVolSetUsnJrnlStamped(vol);
2491			return 0;
2492		}
2493		/* A real error occured. */
2494		ntfs_error(vol->mp, "Failed to find inode number for "
2495				"$UsnJrnl.");
2496		return err;
2497	}
2498	/* We do not care for the type of match that was found. */
2499	if (name)
2500		OSFree(name, sizeof(*name), ntfs_malloc_tag);
2501	/* Get the inode. */
2502	err = ntfs_inode_attach(vol, MREF(mref), &ni, vol->extend_ni->vn);
2503	if (err) {
2504		ntfs_error(vol->mp, "Failed to load $UsnJrnl.");
2505		return err;
2506	}
2507	vol->usnjrnl_ni = ni;
2508	/*
2509	 * If the transaction log is in the process of being deleted, we can
2510	 * ignore it.
2511	 */
2512	if (vol->vol_flags & VOLUME_DELETE_USN_UNDERWAY) {
2513		ntfs_debug("$UsnJrnl in the process of being disabled.  "
2514				"Volume does not have transaction logging "
2515				"enabled.");
2516		goto not_enabled;
2517	}
2518	/* Get the $DATA/$Max attribute. */
2519	err = ntfs_attr_inode_attach(vol->usnjrnl_ni, AT_DATA, Max, 4, &max_ni);
2520	if (err) {
2521		ntfs_error(vol->mp, "Failed to load $UsnJrnl/$DATA/$Max "
2522				"attribute.");
2523		return err;
2524	}
2525	vol->usnjrnl_max_ni = max_ni;
2526	lck_spin_lock(&max_ni->size_lock);
2527	data_size = max_ni->data_size;
2528	lck_spin_unlock(&max_ni->size_lock);
2529	if (data_size < (s64)sizeof(USN_HEADER)) {
2530		ntfs_error(vol->mp, "Found corrupt $UsnJrnl/$DATA/$Max "
2531				"attribute (size is 0x%llx but should be at "
2532				"least 0x%x bytes).",
2533				(unsigned long long)data_size,
2534				(unsigned)sizeof(USN_HEADER));
2535		return EIO;
2536	}
2537	err = vnode_get(max_ni->vn);
2538	if (err) {
2539		ntfs_error(vol->mp, "Failed to get vnode for "
2540				"$UsnJrnl/$DATA/$Max.");
2541		return err;
2542	}
2543	lck_rw_lock_shared(&max_ni->lock);
2544	/* Read the USN_HEADER from $DATA/$Max. */
2545	err = ntfs_page_map(max_ni, 0, &upl, &pl, (u8**)&uh, FALSE);
2546	if (err) {
2547		ntfs_error(vol->mp, "Failed to read from $UsnJrnl/$DATA/$Max "
2548				"attribute.");
2549		goto put_err;
2550	}
2551	/* Sanity check $Max. */
2552	if (sle64_to_cpu(uh->allocation_delta) >
2553			sle64_to_cpu(uh->maximum_size)) {
2554		ntfs_error(vol->mp, "Allocation delta (0x%llx) exceeds "
2555				"maximum size (0x%llx).  $UsnJrnl is corrupt.",
2556				(unsigned long long)
2557				sle64_to_cpu(uh->allocation_delta),
2558				(unsigned long long)
2559				sle64_to_cpu(uh->maximum_size));
2560		err = EIO;
2561		goto unm_err;
2562	}
2563	/* Get the $DATA/$J attribute. */
2564	err = ntfs_attr_inode_attach(vol->usnjrnl_ni, AT_DATA, J, 2, &ni);
2565	if (err) {
2566		ntfs_error(vol->mp, "Failed to load $UsnJrnl/$DATA/$J "
2567				"attribute.");
2568		goto unm_err;
2569	}
2570	vol->usnjrnl_j_ni = ni;
2571	/* Verify $J is non-resident and sparse. */
2572	if (!NInoNonResident(ni) || !NInoSparse(ni)) {
2573		ntfs_error(vol->mp, "$UsnJrnl/$DATA/$J attribute is resident "
2574				"and/or not sparse.");
2575		err = EIO;
2576		goto unm_err;
2577	}
2578	/*
2579	 * If the transaction log has been stamped and nothing has been written
2580	 * to it since, we do not need to stamp it.
2581	 */
2582	lck_spin_lock(&ni->size_lock);
2583	data_size = ni->data_size;
2584	lck_spin_unlock(&ni->size_lock);
2585	if (sle64_to_cpu(uh->lowest_valid_usn) >= data_size) {
2586		if (sle64_to_cpu(uh->lowest_valid_usn) == data_size) {
2587			ntfs_page_unmap(max_ni, upl, pl, FALSE);
2588			lck_rw_unlock_shared(&max_ni->lock);
2589			(void)vnode_put(max_ni->vn);
2590			ntfs_debug("$UsnJrnl is enabled but nothing has been "
2591					"logged since it was last stamped.  "
2592					"Treating this as if the volume does "
2593					"not have transaction logging "
2594					"enabled.");
2595			goto not_enabled;
2596		}
2597		ntfs_error(vol->mp, "$UsnJrnl has lowest valid usn (0x%llx) "
2598				"which is out of bounds (0x%llx).  $UsnJrnl "
2599				"is corrupt.", (unsigned long long)
2600				sle64_to_cpu(uh->lowest_valid_usn),
2601				(unsigned long long)data_size);
2602		err = EIO;
2603		goto unm_err;
2604	}
2605	ntfs_debug("Done.");
2606unm_err:
2607	ntfs_page_unmap(max_ni, upl, pl, FALSE);
2608put_err:
2609	lck_rw_unlock_shared(&max_ni->lock);
2610	(void)vnode_put(max_ni->vn);
2611	return err;
2612}
2613
2614/**
2615 * ntfs_system_inodes_get - load the system files at mount time
2616 * @vol:	ntfs volume being mounted
2617 *
2618 * Obtain the ntfs inodes corresponding to the system files and directories
2619 * needed for operation of a mounted ntfs file system and process their data
2620 * setting up any relevant in-memory structures in the process.
2621 *
2622 * It is assumed that ntfs_mft_inode_get() has already been called successfully
2623 * thus allowing us to simply use ntfs_inode_get(), ntfs_mft_record_map(), and
2624 * friends to do the work rather than having to do things by hand as is the
2625 * case when bootstrapping the volume in ntfs_mft_inode_get().
2626 *
2627 * Return 0 on success and errno on error.
2628 */
2629static errno_t ntfs_system_inodes_get(ntfs_volume *vol)
2630{
2631	s64 size;
2632	ntfs_inode *root_ni, *ni;
2633	vnode_t root_vn;
2634	errno_t err;
2635	BOOL is_hibernated;
2636
2637	ntfs_debug("Entering.");
2638	/*
2639	 * Get the root directory inode so we can do path lookups and so we can
2640	 * supply its vnode as the parent vnode for the other system vnodes.
2641	 */
2642	err = ntfs_inode_attach(vol, FILE_root, &root_ni, NULL);
2643	if (err) {
2644		ntfs_error(vol->mp, "Failed to load root directory.");
2645		goto err;
2646	}
2647	vol->root_ni = root_ni;
2648	root_vn = root_ni->vn;
2649	/*
2650	 * We already have the $MFT inode and vnode.  Add the root directory
2651	 * vnode as the parent vnode.  We also take an internal reference on
2652	 * the root inode because vnode_update_identity() takes a reference on
2653	 * the root vnode.
2654	 */
2655	vnode_update_identity(vol->mft_ni->vn, root_vn, NULL, 0, 0,
2656			VNODE_UPDATE_PARENT);
2657	OSIncrementAtomic(&root_ni->nr_refs);
2658	/*
2659	 * Get mft mirror inode and compare the contents of $MFT and $MFTMirr,
2660	 * then deal with any errors.
2661	 */
2662	err = ntfs_mft_mirror_load(vol);
2663	if (!err)
2664		err = ntfs_mft_mirror_check(vol);
2665	if (err) {
2666		static const char es1a[] = "Failed to load $MFTMirr";
2667		static const char es1b[] = "$MFTMirr does not match $MFT";
2668		static const char es2[] = ".  Run ntfsfix and/or chkdsk.";
2669		const char *es1;
2670
2671		es1 = !vol->mftmirr_ni ? es1a : es1b;
2672		/* If a read-write mount, convert it to a read-only mount. */
2673		if (!NVolReadOnly(vol)) {
2674			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2675				ntfs_error(vol->mp, "%s%s", es1, es2);
2676				err = EIO;
2677				goto err;
2678			}
2679			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2680					ON_ERRORS_CONTINUE))) {
2681				ntfs_error(vol->mp, "%s and neither on_errors="
2682						"continue nor on_errors="
2683						"remount-ro was specified%s",
2684						es1, es2);
2685				err = EIO;
2686				goto err;
2687			}
2688			vfs_setflags(vol->mp, MNT_RDONLY);
2689			NVolSetReadOnly(vol);
2690			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2691					es2);
2692		} else
2693			ntfs_warning(vol->mp, "%s.  Will not be able to "
2694					"remount read-write%s", es1, es2);
2695		/* This will prevent a read-write remount. */
2696		NVolSetErrors(vol);
2697	}
2698	/*
2699	 * Get mft bitmap attribute inode and again, take an internal reference
2700	 * on the root inode to balance the reference taken on the root vnode
2701	 * in ntfs_attr_inode_get() and also take a reference on the vnode as
2702	 * we will be holding onto it for the duration of the mount.  Finally,
2703	 * we also release the iocount reference.  It will be taken as and when
2704	 * required when accessing the $MFT/$BITMAP attribute.
2705	 */
2706	err = ntfs_attr_inode_attach(vol->mft_ni, AT_BITMAP, NULL, 0,
2707			&vol->mftbmp_ni);
2708	if (err) {
2709		ntfs_error(vol->mp, "Failed to load $MFT/$BITMAP attribute.");
2710		goto err;
2711	}
2712	NInoSetSparseDisabled(vol->mftbmp_ni);
2713	/*
2714	 * If the mft bitmap attribute is non-resident (which it must be), read
2715	 * in the complete runlist.  This simplifies things when we need to
2716	 * allocate mft records as it guarantees that accessing the mft bitmap
2717	 * will not cause any of its mft records to be mapped.
2718	 */
2719	err = ntfs_attr_map_runlist(vol->mftbmp_ni);
2720	if (err) {
2721		ntfs_error(vol->mp, "Failed to map runlist of $MFT/$BITMAP "
2722				"attribute.");
2723		goto err;
2724	}
2725	/* Read upcase table and setup @vol->upcase and @vol->upcase_len. */
2726	err = ntfs_upcase_load(vol);
2727	if (err)
2728		goto err;
2729	/*
2730	 * Read attribute definitions table and setup @vol->attrdef and
2731	 * @vol->attrdef_size.
2732	 */
2733	err = ntfs_attrdef_load(vol);
2734	if (err)
2735		goto err;
2736	/* Get the cluster allocation bitmap inode and verify the size. */
2737	err = ntfs_inode_attach(vol, FILE_Bitmap, &ni, root_vn);
2738	if (err) {
2739		ntfs_error(vol->mp, "Failed to load $Bitmap.");
2740		goto err;
2741	}
2742	NInoSetSparseDisabled(ni);
2743	vol->lcnbmp_ni = ni;
2744	lck_spin_lock(&ni->size_lock);
2745	size = ni->data_size;
2746	lck_spin_unlock(&ni->size_lock);
2747	if ((vol->nr_clusters + 7) >> 3 > size) {
2748		ntfs_error(vol->mp, "$Bitmap (%lld) is shorter than required "
2749				"length of volume (%lld) as specified in the "
2750				"boot sector.  Run chkdsk.", (long long)size,
2751				(long long)(vol->nr_clusters + 7) >> 3);
2752		err = EIO;
2753		goto err;
2754	}
2755	/*
2756	 * If the cluster bitmap data attribute is non-resident, read in the
2757	 * complete runlist.  This simplifies things when we need to allocate
2758	 * mft records as it guarantees that accessing the cluster bitmap will
2759	 * not cause any of its mft records to be mapped.
2760	 */
2761	err = ntfs_attr_map_runlist(ni);
2762	if (err) {
2763		ntfs_error(vol->mp, "Failed to map runlist of $Bitmap/$DATA "
2764				"attribute.");
2765		goto err;
2766	}
2767	/*
2768	 * Get the volume inode and setup our cache of the volume flags and
2769	 * version as well as of the volume name in decomposed utf-8.
2770	 */
2771	err = ntfs_volume_load(vol);
2772	if (err)
2773		goto err;
2774	printf("NTFS volume name %s, version %u.%u.\n", vol->name,
2775			(unsigned)vol->major_ver, (unsigned)vol->minor_ver);
2776	if (vol->major_ver < 3 && NVolSparseEnabled(vol)) {
2777		ntfs_warning(vol->mp, "Disabling sparse support due to NTFS "
2778				"volume version %u.%u (need at least "
2779				"version 3.0).", (unsigned)vol->major_ver,
2780				(unsigned)vol->minor_ver);
2781		NVolClearSparseEnabled(vol);
2782	}
2783	if (vol->vol_flags & VOLUME_IS_DIRTY) {
2784		ntfs_warning(vol->mp, "NTFS volume is dirty.  You should "
2785				"unmount it and run chkdsk.");
2786		NVolSetErrors(vol);
2787	}
2788	/* Make sure that no unsupported volume flags are set. */
2789	if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
2790		static const char es1[] = "Volume has unsupported flags set";
2791		static const char es2[] = ".  To fix this problem boot into "
2792				"Windows, run chkdsk c: /f /v /x from the "
2793				"command prompt (replace c: with the drive "
2794				"letter of this volume), then reboot into Mac "
2795				"OS X and mount the volume again.";
2796
2797		ntfs_warning(vol->mp, "Unsupported volume flags 0x%x "
2798				"encountered.",
2799				(unsigned)le16_to_cpu(vol->vol_flags));
2800		/* If a read-write mount, convert it to a read-only mount. */
2801		if (!NVolReadOnly(vol)) {
2802			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2803				ntfs_error(vol->mp, "%s%s", es1, es2);
2804				err = EINVAL;
2805				goto err;
2806			}
2807			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2808					ON_ERRORS_CONTINUE))) {
2809				ntfs_error(vol->mp, "%s and neither on_errors="
2810						"continue nor on_errors="
2811						"remount-ro was specified%s",
2812						es1, es2);
2813				err = EINVAL;
2814				goto err;
2815			}
2816			vfs_setflags(vol->mp, MNT_RDONLY);
2817			NVolSetReadOnly(vol);
2818			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2819					es2);
2820		} else
2821			ntfs_warning(vol->mp, "%s.  Will not be able to "
2822					"remount read-write%s", es1, es2);
2823		/*
2824		 * Do not set NVolErrors() because ntfs_remount() re-checks the
2825		 * flags which we need to do in case any flags have changed.
2826		 */
2827	}
2828	/*
2829	 * Get the inode for the logfile, check it, and determine if the volume
2830	 * was shutdown cleanly, then deal with any errors.
2831	 */
2832	err = ntfs_inode_attach(vol, FILE_LogFile, &ni, root_vn);
2833	if (!err) {
2834		RESTART_PAGE_HEADER *rp;
2835
2836		NInoSetSparseDisabled(ni);
2837		vol->logfile_ni = ni;
2838		err = ntfs_logfile_check(ni, &rp);
2839		if (!err) {
2840			if (!ntfs_logfile_is_clean(ni, rp))
2841				err = EINVAL;
2842			if (rp)
2843				OSFree(rp, le32_to_cpu(rp->system_page_size),
2844						ntfs_malloc_tag);
2845		}
2846	}
2847	if (err) {
2848		static const char es1a[] = "Failed to load $LogFile";
2849		static const char es1b[] = "$LogFile is not clean";
2850		static const char es2[] = ".  Mount in Windows.";
2851		const char *es1;
2852
2853		es1 = !vol->logfile_ni ? es1a : es1b;
2854		/* If a read-write mount, convert it to a read-only mount. */
2855		if (!NVolReadOnly(vol)) {
2856			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2857				ntfs_error(vol->mp, "%s%s", es1, es2);
2858				err = EROFS;
2859				goto err;
2860			}
2861			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2862					ON_ERRORS_CONTINUE))) {
2863				ntfs_error(vol->mp, "%s and neither on_errors="
2864						"continue nor on_errors="
2865						"remount-ro was specified%s",
2866						es1, es2);
2867				goto err;
2868			}
2869			vfs_setflags(vol->mp, MNT_RDONLY);
2870			NVolSetReadOnly(vol);
2871			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2872					es2);
2873		} else
2874			ntfs_warning(vol->mp, "%s.  Will not be able to "
2875					"remount read-write%s", es1, es2);
2876		NVolSetErrors(vol);
2877	}
2878	/*
2879	 * Check if Windows is suspended to disk on the target volume.  If it
2880	 * is hibernated, we must not write *anything* to the disk so set
2881	 * NVolErrors() without setting the dirty volume flag and mount
2882	 * read-only.  This will prevent read-write remounting and it will also
2883	 * prevent all writes.
2884	 */
2885	err = ntfs_windows_hibernation_status_check(vol, &is_hibernated);
2886	if (err || is_hibernated) {
2887		static const char es1a[] = "Failed to determine if Windows is "
2888				"hibernated";
2889		static const char es1b[] = "Windows is hibernated";
2890		static const char es2[] = ".  Run chkdsk.";
2891		const char *es1;
2892
2893		es1 = err ? es1a : es1b;
2894		/* If a read-write mount, convert it to a read-only mount. */
2895		if (!NVolReadOnly(vol)) {
2896			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2897				ntfs_error(vol->mp, "%s%s", es1, es2);
2898				err = EROFS;
2899				goto err;
2900			}
2901			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2902					ON_ERRORS_CONTINUE))) {
2903				ntfs_error(vol->mp, "%s and neither on_errors="
2904						"continue nor on_errors="
2905						"remount-ro was specified%s",
2906						es1, es2);
2907				if (!err)
2908					err = EINVAL;
2909				goto err;
2910			}
2911			vfs_setflags(vol->mp, MNT_RDONLY);
2912			NVolSetReadOnly(vol);
2913			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
2914					es2);
2915		} else
2916			ntfs_warning(vol->mp, "%s.  Will not be able to "
2917					"remount read-write%s", es1, es2);
2918		NVolSetErrors(vol);
2919	}
2920	/* If (still) a read-write mount, mark the volume dirty. */
2921	if (!NVolReadOnly(vol) &&
2922			(err = ntfs_volume_flags_set(vol, VOLUME_IS_DIRTY))) {
2923		static const char es1[] = "Failed to set dirty bit in volume "
2924				"information flags";
2925		static const char es2[] = ".  Run chkdsk.";
2926
2927		/* Convert to a read-only mount. */
2928		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2929			ntfs_error(vol->mp, "%s%s", es1, es2);
2930			err = EIO;
2931			goto err;
2932		}
2933		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2934				ON_ERRORS_CONTINUE))) {
2935			ntfs_error(vol->mp, "%s and neither on_errors="
2936					"continue nor on_errors=remount-ro "
2937					"was specified%s", es1, es2);
2938			goto err;
2939		}
2940		vfs_setflags(vol->mp, MNT_RDONLY);
2941		NVolSetReadOnly(vol);
2942		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
2943		/*
2944		 * Do not set NVolErrors() because ntfs_remount() might manage
2945		 * to set the dirty flag in which case all would be well.
2946		 */
2947	}
2948	/* If (still) a read-write mount, empty the logfile. */
2949	if (!NVolReadOnly(vol) &&
2950			(err = ntfs_logfile_empty(vol->logfile_ni))) {
2951		static const char es1[] = "Failed to empty journal $LogFile";
2952		static const char es2[] = ".  Mount in Windows.";
2953
2954		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
2955			ntfs_error(vol->mp, "%s%s", es1, es2);
2956			err = EIO;
2957			goto err;
2958		}
2959		/* Convert to a read-only mount. */
2960		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
2961				ON_ERRORS_CONTINUE))) {
2962			ntfs_error(vol->mp, "%s and neither on_errors="
2963					"continue nor on_errors=remount-ro "
2964					"was specified%s", es1, es2);
2965			goto err;
2966		}
2967		vfs_setflags(vol->mp, MNT_RDONLY);
2968		NVolSetReadOnly(vol);
2969		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
2970		NVolSetErrors(vol);
2971	}
2972	/* If the ntfs volume version is below 3.0, we are done. */
2973	if (vol->major_ver < 3) {
2974		/*
2975		 * Set NVolUseSDAttr() so we do not need to check both the
2976		 * volume version and NVolUseSDAttr() when creating inodes.
2977		 */
2978		NVolSetUseSDAttr(vol);
2979		ntfs_debug("Done (NTFS version < 3.0).");
2980		return 0;
2981	}
2982	/* Ntfs 3.0+ specific initialization. */
2983	/*
2984	 * Read the security descriptors file and initialize security on the
2985	 * volume.
2986	 */
2987	err = ntfs_secure_load(vol);
2988	if (err)
2989		goto err;
2990	/* Get the extended system files directory inode. */
2991	err = ntfs_inode_attach(vol, FILE_Extend, &vol->extend_ni, root_vn);
2992	if (err) {
2993		ntfs_error(vol->mp, "Failed to load $Extend directory.");
2994		goto err;
2995	}
2996	/* Find the object id file, load it if present, and set it up. */
2997	err = ntfs_objid_load(vol);
2998	if (err) {
2999		static const char es1[] = "Failed to load $ObjId";
3000		static const char es2[] = ".  Run chkdsk.";
3001
3002		/* If a read-write mount, convert it to a read-only mount. */
3003		if (!NVolReadOnly(vol)) {
3004			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3005				ntfs_error(vol->mp, "%s%s", es1, es2);
3006				err = EIO;
3007				goto err;
3008			}
3009			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3010					ON_ERRORS_CONTINUE))) {
3011				ntfs_error(vol->mp, "%s and neither on_errors="
3012						"continue nor on_errors="
3013						"remount-ro was specified%s",
3014						es1, es2);
3015				goto err;
3016			}
3017			vfs_setflags(vol->mp, MNT_RDONLY);
3018			NVolSetReadOnly(vol);
3019			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
3020					es2);
3021		} else
3022			ntfs_warning(vol->mp, "%s.  Will not be able to "
3023					"remount read-write%s", es1, es2);
3024		NVolSetErrors(vol);
3025	}
3026	/* Find the quota file, load it if present, and set it up. */
3027	err = ntfs_quota_load(vol);
3028	if (err) {
3029		static const char es1[] = "Failed to load $Quota";
3030		static const char es2[] = ".  Run chkdsk.";
3031
3032		/* If a read-write mount, convert it to a read-only mount. */
3033		if (!NVolReadOnly(vol)) {
3034			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3035				ntfs_error(vol->mp, "%s%s", es1, es2);
3036				err = EIO;
3037				goto err;
3038			}
3039			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3040					ON_ERRORS_CONTINUE))) {
3041				ntfs_error(vol->mp, "%s and neither on_errors="
3042						"continue nor on_errors="
3043						"remount-ro was specified%s",
3044						es1, es2);
3045				goto err;
3046			}
3047			vfs_setflags(vol->mp, MNT_RDONLY);
3048			NVolSetReadOnly(vol);
3049			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
3050					es2);
3051		} else
3052			ntfs_warning(vol->mp, "%s.  Will not be able to "
3053					"remount read-write%s", es1, es2);
3054		NVolSetErrors(vol);
3055	}
3056	/* If (still) a read-write mount, mark the quotas out of date. */
3057	if (!NVolReadOnly(vol) && (err = ntfs_quotas_mark_out_of_date(vol))) {
3058		static const char es1[] = "Failed to mark quotas out of date";
3059		static const char es2[] = ".  Run chkdsk.";
3060
3061		/* Convert to a read-only mount. */
3062		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3063			ntfs_error(vol->mp, "%s%s", es1, es2);
3064			err = EIO;
3065			goto err;
3066		}
3067		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3068				ON_ERRORS_CONTINUE))) {
3069			ntfs_error(vol->mp, "%s and neither on_errors="
3070					"continue nor on_errors=remount-ro "
3071					"was specified%s", es1, es2);
3072			goto err;
3073		}
3074		vfs_setflags(vol->mp, MNT_RDONLY);
3075		NVolSetReadOnly(vol);
3076		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
3077		NVolSetErrors(vol);
3078	}
3079	/*
3080	 * Find the transaction log file ($UsnJrnl), load it if present, check
3081	 * it, and set it up.
3082	 */
3083	err = ntfs_usnjrnl_load(vol);
3084	if (err) {
3085		static const char es1[] = "Failed to load $UsnJrnl";
3086		static const char es2[] = ".  Run chkdsk.";
3087
3088		/* If a read-write mount, convert it to a read-only mount. */
3089		if (!NVolReadOnly(vol)) {
3090			if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3091				ntfs_error(vol->mp, "%s%s", es1, es2);
3092				err = EIO;
3093				goto err;
3094			}
3095			if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3096					ON_ERRORS_CONTINUE))) {
3097				ntfs_error(vol->mp, "%s and neither on_errors="
3098						"continue nor on_errors="
3099						"remount-ro was specified%s",
3100						es1, es2);
3101				goto err;
3102			}
3103			vfs_setflags(vol->mp, MNT_RDONLY);
3104			NVolSetReadOnly(vol);
3105			ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1,
3106					es2);
3107		} else
3108			ntfs_warning(vol->mp, "%s.  Will not be able to "
3109					"remount read-write%s", es1, es2);
3110		NVolSetErrors(vol);
3111	}
3112	/* If (still) a read-write mount, stamp the transaction log. */
3113	if (!NVolReadOnly(vol) && (err = ntfs_usnjrnl_stamp(vol))) {
3114		static const char es1[] = "Failed to stamp transaction log "
3115				"($UsnJrnl)";
3116		static const char es2[] = ".  Run chkdsk.";
3117
3118		if (vol->on_errors & ON_ERRORS_FAIL_DIRTY) {
3119			ntfs_error(vol->mp, "%s%s", es1, es2);
3120			err = EIO;
3121			goto err;
3122		}
3123		/* Convert to a read-only mount. */
3124		if (!(vol->on_errors & (ON_ERRORS_REMOUNT_RO |
3125				ON_ERRORS_CONTINUE))) {
3126			ntfs_error(vol->mp, "%s and neither on_errors="
3127					"continue nor on_errors=remount-ro "
3128					"was specified%s", es1, es2);
3129			goto err;
3130		}
3131		vfs_setflags(vol->mp, MNT_RDONLY);
3132		NVolSetReadOnly(vol);
3133		ntfs_error(vol->mp, "%s.  Mounting read-only%s", es1, es2);
3134		NVolSetErrors(vol);
3135	}
3136	ntfs_debug("Done (NTFS version >= 3.0).");
3137	return 0;
3138err:
3139	/* Obtained inodes will be released by the call to ntfs_unmount(). */
3140	return err;
3141}
3142
3143/**
3144 * ntfs_popcount32 - count the number of set bits in a 32-bit word
3145 * @v:		32-bit value whose set bits to count
3146 *
3147 * Count the number of set bits in the 32-bit word @v.  This should be the most
3148 * efficient C algorithm.  Implementation is as described in Chapter 8, Section
3149 * 6, "Efficient Implementation of Population-Count Function in 32-Bit Mode",
3150 * pages 179-180 of the "Software Optimization Guide for AMD64 Processors":
3151 * http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/25112.PDF
3152 *
3153 * TODO: Does xnu really not have asm optimized version of the popcount (aka
3154 * bitcount) function?  My searches have failed to find one...  If it exists or
3155 * gets added at some point we should switch to using it instead of ours.
3156 */
3157static inline u32 ntfs_popcount32(u32 v)
3158{
3159	const u32 w = v - ((v >> 1) & 0x55555555);
3160	const u32 x = (w & 0x33333333) + ((w >> 2) & 0x33333333);
3161	return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
3162}
3163
3164/**
3165 * ntfs_get_nr_set_bits - get the number of set bits in a bitmap
3166 * @vn:		vnode of bitmap for which to get the number of set bits
3167 * @nr_bits:	number of bits in the bitmap
3168 * @res:	pointer to where the result should be written
3169 *
3170 * Calculate the number of set bits in the bitmap vnode @vn and return the
3171 * result in @res.  We do not care about partial buffers as these will be just
3172 * zero filled and hence not be counted as set bits.
3173 *
3174 * If any buffers cannot be read we assume all bits in the erroring buffers are
3175 * set.  This means we return an overestimate on errors which is better than
3176 * an underestimate.
3177 *
3178 * Return 0 on success amd errno if an iocount reference could not be obtained
3179 * on the bitmap vnode.
3180 */
3181static errno_t ntfs_get_nr_set_bits(vnode_t vn, const s64 nr_bits, s64 *res)
3182{
3183	s64 max_ofs, ofs, nr_set;
3184	ntfs_inode *ni = NTFS_I(vn);
3185	errno_t err;
3186
3187	ntfs_debug("Entering.");
3188	/* Get an iocount reference on the bitmap vnode. */
3189	err = vnode_get(vn);
3190	if (err)
3191		return err;
3192	lck_rw_lock_shared(&ni->lock);
3193	/* Convert the number of bits into bytes rounded up. */
3194	max_ofs = (nr_bits + 7) >> 3;
3195	ntfs_debug("Reading bitmap, max_ofs %lld.", (long long)max_ofs);
3196	for (nr_set = ofs = 0; ofs < max_ofs; ofs += PAGE_SIZE) {
3197		upl_t upl;
3198		upl_page_info_array_t pl;
3199		u32 *p;
3200		int i;
3201
3202		/* Map the page. */
3203		err = ntfs_page_map(ni, ofs, &upl, &pl, (u8**)&p, FALSE);
3204		if (err) {
3205			ntfs_debug("Failed to map page from bitmap (offset "
3206					"%lld, size %d, error %d).  Skipping "
3207					"page.", (long long)ofs, PAGE_SIZE,
3208					(int)err);
3209			/* Count the whole buffer contents as set bits. */
3210			nr_set += PAGE_SIZE * 8;
3211			continue;
3212		}
3213		/*
3214		 * For each 32-bit word, add the number of set bits.  If this
3215		 * is the last block and it is partial we do not really care as
3216		 * it just means we do a little extra work but it will not
3217		 * affect the result as all out of range bytes are set to zero
3218		 * by ntfs_page_map().
3219		 *
3220		 * Use multiples of 4 bytes, thus max size is PAGE_SIZE / 4.
3221		 */
3222	  	for (i = 0; i < (PAGE_SIZE / 4); i++)
3223			nr_set += ntfs_popcount32(p[i]);
3224		ntfs_page_unmap(ni, upl, pl, FALSE);
3225	}
3226	/*
3227	 * Release the iocount reference on the bitmap vnode.  We can ignore
3228	 * the return value as it always is zero.
3229	 */
3230	lck_rw_unlock_shared(&ni->lock);
3231	(void)vnode_put(vn);
3232	ntfs_debug("Done (nr_bits %lld, nr_set %lld).", (long long)nr_bits,
3233			(long long)nr_set);
3234	*res = nr_set;
3235	return 0;
3236}
3237
3238/**
3239 * ntfs_set_nr_free_clusters - set the number of free clusters on a volume
3240 * @vol:	ntfs volume for which to set the number of free clusters
3241 *
3242 * Calculate the number of free clusters on the mounted ntfs volume @vol and
3243 * cache the result in the @vol->nr_free_clusters.
3244 *
3245 * The only particularity is that clusters beyond the end of the logical ntfs
3246 * volume will be marked as in use to prevent errors which means we have to
3247 * discount those at the end.  This is important as the cluster bitmap always
3248 * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside
3249 * the logical volume and marked in use when they are not as they do not exist.
3250 *
3251 * If any part of the bitmap cannot be read we assume all clusters in the
3252 * erroring part(s) are in use.  This means we return an underestimate of the
3253 * number of free clusters on errors which is better than an overrestimate.
3254 *
3255 * Return 0 on success or errno if an iocount reference could not be obtained
3256 * on the $Bitmap vnode.
3257 */
3258static errno_t ntfs_set_nr_free_clusters(ntfs_volume *vol)
3259{
3260	s64 nr_free;
3261	errno_t err;
3262
3263	ntfs_debug("Entering.");
3264	lck_rw_lock_exclusive(&vol->lcnbmp_lock);
3265	err = ntfs_get_nr_set_bits(vol->lcnbmp_ni->vn, vol->nr_clusters,
3266			&nr_free);
3267	if (err) {
3268		ntfs_error(vol->mp, "Failed to get vnode for $Bitmap.");
3269		lck_rw_unlock_exclusive(&vol->lcnbmp_lock);
3270		return err;
3271	}
3272	/* Determine the number of zero bits from the number of set bits. */
3273	nr_free = vol->nr_clusters - nr_free;
3274	/*
3275	 * Fixup for eventual bits outside logical ntfs volume (see function
3276	 * description above).
3277	 */
3278	if (vol->nr_clusters & 63)
3279		nr_free += 64 - (vol->nr_clusters & 63);
3280	/* If errors occured we may have gone below zero, fix this. */
3281	if (nr_free < 0)
3282		nr_free = 0;
3283	vol->nr_free_clusters = nr_free;
3284	ntfs_debug("Done (nr_clusters %lld, nr_free_clusters %lld).",
3285			(long long)vol->nr_clusters, (long long)nr_free);
3286	lck_rw_unlock_exclusive(&vol->lcnbmp_lock);
3287	return 0;
3288}
3289
3290/**
3291 * ntfs_set_nr_mft_records - set the number of total/free mft records
3292 * @vol:	volume for which to set the number of total/free mft records
3293 *
3294 * Calculate the number of mft records (inodes) as well as the number of free
3295 * mft records on the mounted ntfs volume @vol and cache the results in
3296 * @vol->nr_mft_records and @vol->nr_free_mft_records, respectively.
3297 *
3298 * If any part of the bitmap cannot be read we assume all mft records in the
3299 * erroring part(s) are in use.  This means we return an underestimate of the
3300 * number of free mft records on errors which is better than an overrestimate.
3301 *
3302 * FIXME: HFS uses the maximum ever possible by basing it on the volume size
3303 * rather than the current total/free.  Do we want to keep it the ntfsprogs and
3304 * Linux NTFS driver way or move to the HFS way?
3305 */
3306static errno_t ntfs_set_nr_mft_records(ntfs_volume *vol)
3307{
3308	s64 nr_free;
3309	errno_t err;
3310
3311	ntfs_debug("Entering.");
3312	/*
3313	 * First, determine the total number of mft records from the size of
3314	 * the $MFT/$DATA attribute.
3315	 */
3316	lck_rw_lock_exclusive(&vol->mftbmp_lock);
3317	lck_spin_lock(&vol->mft_ni->size_lock);
3318	vol->nr_mft_records = vol->mft_ni->data_size >>
3319			vol->mft_record_size_shift;
3320	lck_spin_unlock(&vol->mft_ni->size_lock);
3321	err = ntfs_get_nr_set_bits(vol->mftbmp_ni->vn,
3322			vol->mft_ni->initialized_size >>
3323			vol->mft_record_size_shift, &nr_free);
3324	if (err) {
3325		ntfs_error(vol->mp, "Failed to get vnode for $MFT/$BITMAP.");
3326		lck_rw_unlock_exclusive(&vol->mftbmp_lock);
3327		return err;
3328	}
3329	/* Determine the number of zero bits from the number of set bits. */
3330	nr_free = vol->nr_mft_records - nr_free;
3331	/* If errors occured we may well have gone below zero, fix this. */
3332	if (nr_free < 0)
3333		nr_free = 0;
3334	vol->nr_free_mft_records = nr_free;
3335	ntfs_debug("Done (nr_mft_records %lld, nr_free_mft_records %lld).",
3336			(long long)vol->nr_mft_records, (long long)nr_free);
3337	lck_rw_unlock_exclusive(&vol->mftbmp_lock);
3338	return 0;
3339}
3340
3341/**
3342 * ntfs_statfs - return information about a mounted ntfs volume
3343 * @vol:	ntfs volume about which to return information
3344 * @sfs:	vfsstatfs structure in which to return the information
3345 *
3346 * Return information about the mounted ntfs volume @vol in the vfsstatfs
3347 * structure @sfs.  We interpret the values to be correct of the moment in time
3348 * at which we are called.  Most values are variable otherwise and this is not
3349 * just the free values but the totals as well.  For example we can increase
3350 * the total number of file nodes if we run out and we can keep doing this
3351 * until there is no more space on the volume left at all.
3352 *
3353 * This is only called from ntfs_mount() hence we only need to set the
3354 * fields that are not already set.
3355 *
3356 * The mount() system call sets @sfs to zero and then sets up f_owner, f_flags,
3357 * f_fstypename, f_mntonname, f_mntfromname, and f_reserved.
3358 *
3359 * ntfs_mount() then sets f_fsid and calls ntfs_statfs() and the rest of @sfs
3360 * is set here.
3361 *
3362 * Note: No need for locking as this is only called from ntfs_mount().
3363 */
3364static void ntfs_statfs(ntfs_volume *vol, struct vfsstatfs *sfs)
3365{
3366	ntfs_debug("Entering.");
3367	/*
3368	 * Block size for the below size values.  We use the cluster size of
3369	 * the volume as that means we do not convert to a different unit.
3370	 * Alternatively, we could return the sector size instead.
3371	 */
3372	sfs->f_bsize = vol->cluster_size;
3373	/* Optimal transfer block size (in bytes). */
3374	sfs->f_iosize = ubc_upl_maxbufsize();
3375	/* Total data blocks in file system (in units of @f_bsize). */
3376	sfs->f_blocks = (u64)vol->nr_clusters;
3377	/* Free data blocks in file system (in units of @f_bsize). */
3378	sfs->f_bfree = (u64)vol->nr_free_clusters;
3379	/*
3380	 * Free blocks available to non-superuser (in units of @f_bsize), same
3381	 * as above for ntfs.
3382	 * FIXME: We could provide a mount option to cause a virtual, reserved
3383	 * percentage of total space for superuser and perhaps even use a
3384	 * non-zero default and enforce it in the cluster allocator.  If we do
3385	 * that we would need to subtract that percentage from
3386	 * @vol->nr_free_clusters and return the result in @sfs->f_bavail
3387	 * unless the result is below zero in which case we would just set
3388	 * @sfs->f_bavail to 0.
3389	 */
3390	sfs->f_bavail = (u64)vol->nr_free_clusters;
3391	/* Blocks in use (in units of @f_bsize). */
3392	sfs->f_bused = (u64)(vol->nr_clusters - vol->nr_free_clusters);
3393	/* Number of inodes in file system (at this point in time). */
3394	sfs->f_files = (u64)vol->nr_mft_records;
3395	/* Free inodes in file system (at this point in time). */
3396	sfs->f_ffree = (u64)vol->nr_free_mft_records;
3397	/*
3398	 * File system subtype.  Set this to the ntfs version encoded into 16
3399	 * bits, the high 8 bits being the major version and the low 8 bits
3400	 * being the minor version.  This is then extended to 32 bits, thus the
3401	 * higher 16 bits are currently zero.
3402	 */
3403	sfs->f_fssubtype = (u32)vol->major_ver << 8 | vol->minor_ver;
3404	ntfs_debug("Done.");
3405}
3406
3407/**
3408 * ntfs_unmount_callback_recycle - callback for vnode iterate in ntfs_unmount()
3409 * @vn:		vnode the callback is invoked with (has iocount reference)
3410 * @data:	for us always NULL and ignored
3411 *
3412 * This callback is called from vnode_iterate() which is called from
3413 * ntfs_unmount() for all in-core, non-dead, non-suspend vnodes belonging to
3414 * the mounted volume that still have an ntfs inode attached.
3415 *
3416 * We mark all vnodes for termination so they are reclaimed as soon as all
3417 * references to them are released.
3418 */
3419static int ntfs_unmount_callback_recycle(vnode_t vn, void *data __unused)
3420{
3421#ifdef DEBUG
3422	if (NTFS_I(vn))
3423		ntfs_debug("Entering for mft_no 0x%llx.",
3424				(unsigned long long)NTFS_I(vn)->mft_no);
3425#endif
3426	(void)vnode_recycle(vn);
3427	ntfs_debug("Done.");
3428	return VNODE_RETURNED;
3429}
3430
3431/**
3432 * ntfs_unmount_inode_detach - detach an inode at umount time
3433 * @pni:	pointer to the attached ntfs inode to detach
3434 * @parent_ni:	parent ntfs inode
3435 *
3436 * Mark the vnode of the ntfs inode *@pni for termination and detach the ntfs
3437 * inode *@pni from the mounted ntfs volume @vol by dropping the reference on
3438 * its vnode and setting *@pni to NULL.
3439 */
3440static void ntfs_unmount_inode_detach(ntfs_inode **pni, ntfs_inode *parent_ni)
3441{
3442	ntfs_inode *ni = *pni;
3443	if (ni) {
3444		ntfs_debug("Entering for mft_no 0x%llx.",
3445				(unsigned long long)ni->mft_no);
3446		/* Drop the internal reference on the parent inode. */
3447		if (parent_ni)
3448			OSDecrementAtomic(&parent_ni->nr_refs);
3449		OSDecrementAtomic(&ni->nr_refs);
3450		if (ni->vn) {
3451			(void)vnode_recycle(ni->vn);
3452			vnode_rele(ni->vn);
3453		} else
3454			ntfs_inode_reclaim(ni);
3455		*pni = NULL;
3456		ntfs_debug("Done.");
3457	}
3458}
3459
3460/**
3461 * ntfs_unmount_attr_inode_detach - detach an attribute inode at umount time
3462 * @pni:	pointer to the attached ntfs inode to detach
3463 *
3464 * Mark the vnode of the ntfs inode *@pni for termination and detach the ntfs
3465 * inode *@pni from the mounted ntfs volume @vol by dropping the reference on
3466 * its vnode and setting *@pni to NULL.
3467 */
3468static void ntfs_unmount_attr_inode_detach(ntfs_inode **pni)
3469{
3470	ntfs_inode *ni = *pni;
3471	if (ni) {
3472		ntfs_debug("Entering for mft_no 0x%llx.",
3473				(unsigned long long)ni->mft_no);
3474		/*
3475		 * Drop the internal reference on the base inode @base_ni
3476		 * (which is also the parent inode).
3477		 */
3478		if (NInoAttr(ni) && ni->base_ni)
3479			OSDecrementAtomic(&ni->base_ni->nr_refs);
3480		OSDecrementAtomic(&ni->nr_refs);
3481		if (ni->vn) {
3482			(void)vnode_recycle(ni->vn);
3483			vnode_rele(ni->vn);
3484		} else
3485			ntfs_inode_reclaim(ni);
3486		*pni = NULL;
3487		ntfs_debug("Done.");
3488	}
3489}
3490
3491/**
3492 * ntfs_do_postponed_release - release resources used by an ntfs volume
3493 * @vol:	ntfs volume to release
3494 *
3495 * Release resources used by the ntfs volume @vol.
3496 *
3497 * This is called either at unmount time or if there were still inodes active
3498 * then it is called when the last inode is freed.  This ensures the @vol
3499 * pointer in the ntfs_inode structure remains valid until all inodes are gone.
3500 */
3501void ntfs_do_postponed_release(ntfs_volume *vol)
3502{
3503	ntfs_debug("Doing postponed release of volume.");
3504	lck_mtx_lock(&ntfs_lock);
3505	if (vol->upcase && vol->upcase == ntfs_default_upcase) {
3506		vol->upcase = NULL;
3507		/*
3508		 * Drop our reference on the default upcase table and throw it
3509		 * away if we had the only reference.
3510		 */
3511		if (!--ntfs_default_upcase_users) {
3512			OSFree(ntfs_default_upcase, ntfs_default_upcase_size,
3513					ntfs_malloc_tag);
3514			ntfs_default_upcase = NULL;
3515		}
3516	}
3517	if (NVolCompressionEnabled(vol)) {
3518		/*
3519		 * Drop our reference on the compression buffer and throw it
3520		 * away if we had the only reference.
3521		 */
3522		if (!--ntfs_compression_users) {
3523			OSFree(ntfs_compression_buffer,
3524					ntfs_compression_buffer_size,
3525					ntfs_malloc_tag);
3526			ntfs_compression_buffer = NULL;
3527		}
3528	}
3529	lck_mtx_unlock(&ntfs_lock);
3530	/* If we loaded the attribute definitions table, throw it away now. */
3531	if (vol->attrdef)
3532		OSFree(vol->attrdef, vol->attrdef_size, ntfs_malloc_tag);
3533	/* If we used a volume specific upcase table, throw it away now. */
3534	if (vol->upcase)
3535		OSFree(vol->upcase, vol->upcase_len << NTFSCHAR_SIZE_SHIFT,
3536				ntfs_malloc_tag);
3537	/* If we cached a volume name, throw it away now. */
3538	if (vol->name)
3539		OSFree(vol->name, vol->name_size, ntfs_malloc_tag);
3540	/* Deinitialize the ntfs_volume locks. */
3541	lck_rw_destroy(&vol->mftbmp_lock, ntfs_lock_grp);
3542	lck_rw_destroy(&vol->lcnbmp_lock, ntfs_lock_grp);
3543	lck_mtx_destroy(&vol->rename_lock, ntfs_lock_grp);
3544	lck_rw_destroy(&vol->secure_lock, ntfs_lock_grp);
3545	lck_spin_destroy(&vol->security_id_lock, ntfs_lock_grp);
3546	lck_mtx_destroy(&vol->inodes_lock, ntfs_lock_grp);
3547	/* Finally, free the ntfs volume. */
3548	OSFree(vol, sizeof(ntfs_volume), ntfs_malloc_tag);
3549	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
3550}
3551
3552/**
3553 * ntfs_unmount - unmount an ntfs file system
3554 * @mp:		mount point to unmount
3555 * @mnt_flags:	flags describing the unmount (MNT_FORCE is the only one)
3556 * @context:	vfs context
3557 *
3558 * The VFS calls this via VFS_UNMOUNT() when it wants to unmount an ntfs
3559 * volume.  We sync and release all held inodes as well as all other resources.
3560 *
3561 * For each held inode, if we have the vnode already, go through vfs reclaim
3562 * which will also get rid off the ntfs inode.  Otherwise kill the ntfs inode
3563 * directly.
3564 *
3565 * If the volume is successfully unmounted, we must call
3566 * OSKextReleaseKextWithLoadTag() to allow the KEXT to be unloaded when no
3567 * longer in use.
3568 *
3569 * Return 0 on success and errno on error.
3570 */
3571static int ntfs_unmount(mount_t mp, int mnt_flags,
3572		vfs_context_t context __unused)
3573{
3574	ntfs_volume *vol;
3575	int vflags, err;
3576	BOOL force;
3577
3578	ntfs_debug("Entering.");
3579	vol = NTFS_MP(mp);
3580	if (!vol)
3581		goto unload;
3582	if (!vol->mft_ni) {
3583		/* Split our ntfs_volume away from the mount. */
3584		vfs_setfsprivate(mp, NULL);
3585		goto no_mft;
3586	}
3587	vflags = 0;
3588	force = FALSE;
3589	if (mnt_flags & MNT_FORCE) {
3590		vflags |= FORCECLOSE;
3591		force = TRUE;
3592	}
3593	if (!vol->root_ni)
3594		goto no_root;
3595	/*
3596	 * Try to reclaim all non-root and non-system vnodes.  For a non-forced
3597	 * unmount, this will fail if there are any open files.
3598	 */
3599	err = vflush(mp, NULLVP, vflags|SKIPROOT|SKIPSYSTEM);
3600	if (err) {
3601		ntfs_warning(mp, "Cannot unmount (vflush() returned error "
3602				"%d).  Are there open files keeping the "
3603				"volume busy?\n", err);
3604		goto abort;
3605	}
3606	/*
3607	 * Once we get here, the only vnodes left are our system vnodes, which
3608	 * we will detach and vnode_put below.  At this point, the system
3609	 * directories may still have index attributes with references on the
3610	 * directory vnodes.  And we might have other system vnodes still
3611	 * hanging around, with no references.  So we will explicitly try to
3612	 * recycle all remaining vnodes so that they will all be reclaimed as
3613	 * soon as their last references are dropped.
3614	 */
3615	(void)vnode_iterate(mp, 0, ntfs_unmount_callback_recycle, NULL);
3616	/*
3617	 * If a read-write mount and no volume errors have been detected, mark
3618	 * the volume clean.
3619	 */
3620	if (!NVolReadOnly(vol) && vol->vol_ni) {
3621		if (!NVolErrors(vol)) {
3622			if (ntfs_volume_flags_clear(vol, VOLUME_IS_DIRTY))
3623				ntfs_warning(mp, "Failed to clear dirty bit "
3624						"in volume information "
3625						"flags.  Run chkdsk.");
3626		} else
3627			ntfs_warning(mp, "Volume has errors.  Leaving volume "
3628					"marked dirty.  Run chkdsk.");
3629	}
3630	/* Ntfs 3.0+ specific clean up. */
3631	if (vol->vol_ni && vol->major_ver >= 3) {
3632		ntfs_unmount_attr_inode_detach(&vol->usnjrnl_j_ni);
3633		ntfs_unmount_attr_inode_detach(&vol->usnjrnl_max_ni);
3634		ntfs_unmount_inode_detach(&vol->usnjrnl_ni, vol->extend_ni);
3635		ntfs_unmount_attr_inode_detach(&vol->quota_q_ni);
3636		ntfs_unmount_inode_detach(&vol->quota_ni, vol->extend_ni);
3637		ntfs_unmount_attr_inode_detach(&vol->objid_o_ni);
3638		ntfs_unmount_inode_detach(&vol->objid_ni, vol->extend_ni);
3639		ntfs_unmount_inode_detach(&vol->extend_ni, vol->root_ni);
3640		ntfs_unmount_attr_inode_detach(&vol->secure_sds_ni);
3641		ntfs_unmount_attr_inode_detach(&vol->secure_sdh_ni);
3642		ntfs_unmount_attr_inode_detach(&vol->secure_sii_ni);
3643		ntfs_unmount_inode_detach(&vol->secure_ni, vol->root_ni);
3644	}
3645	ntfs_unmount_inode_detach(&vol->vol_ni, vol->root_ni);
3646	ntfs_unmount_inode_detach(&vol->lcnbmp_ni, vol->root_ni);
3647	ntfs_unmount_attr_inode_detach(&vol->mftbmp_ni);
3648	ntfs_unmount_inode_detach(&vol->logfile_ni, vol->root_ni);
3649	/*
3650	 * The root directory vnode is still held by the parent vnode
3651	 * references of the $MFT and $MFTMirr vnodes thus it will only be
3652	 * inactivated after those vnodes are reclaimed.  The problem with this
3653	 * is that when VNOP_INACTIVE() is called for the root directory vnode
3654	 * this in turn calls ntfs_inode_sync() which in turn calls
3655	 * ntfs_mft_record_sync() which in turn calls buf_getblk() followed by
3656	 * buf_bwrite() for the vnode of $MFT which fails as the vnode for $MFT
3657	 * has been reclaimed already.  The solution is thus to drop the parent
3658	 * vnode references held by $MFT and $MFTMirr now so that the root
3659	 * directory vnode can be recycled now.
3660	 */
3661	if (vol->mftmirr_ni && vol->mftmirr_ni->vn) {
3662		/* Drop the internal reference on the parent inode. */
3663		if (vol->root_ni)
3664			OSDecrementAtomic(&vol->root_ni->nr_refs);
3665		vnode_update_identity(vol->mftmirr_ni->vn, NULL, NULL, 0, 0,
3666				VNODE_UPDATE_PARENT);
3667	}
3668	if (vol->mft_ni && vol->mft_ni->vn) {
3669		/* Drop the internal reference on the parent inode. */
3670		if (vol->root_ni)
3671			OSDecrementAtomic(&vol->root_ni->nr_refs);
3672		vnode_update_identity(vol->mft_ni->vn, NULL, NULL, 0, 0,
3673				VNODE_UPDATE_PARENT);
3674	}
3675	/*
3676	 * Nothing references the root inode any more so we can release it.
3677	 * Note the VFS still holds a reference that it will drop after
3678	 * ntfs_unmount() completes thus the root vnode will be the last one to
3679	 * be reclaimed.
3680	 */
3681	ntfs_unmount_inode_detach(&vol->root_ni, NULL);
3682	/*
3683	 * Do a final flush to get rid of any vnodes that have not been
3684	 * inactivated/recycled yet.  Note this must be done without the force
3685	 * flag otherwise it blows away the mft mirror and mft inodes which we
3686	 * will recycle below.
3687	 */
3688	(void)vflush(mp, NULLVP, vflags & ~FORCECLOSE);
3689	ntfs_unmount_inode_detach(&vol->mftmirr_ni, NULL);
3690no_root:
3691	if (vol->mft_ni) {
3692		if (vol->mft_ni->vn)
3693			ntfs_unmount_inode_detach(&vol->mft_ni, NULL);
3694		else {
3695			/*
3696			 * There may be no vnode in the error code paths of
3697			 * ntfs_mount() which calls ntfs_unmount() to clean up.
3698			 */
3699			ntfs_inode_reclaim(vol->mft_ni);
3700			vol->mft_ni = NULL;
3701		}
3702	}
3703	/*
3704	 * We are holding no inodes at all now.  It is time to blow everything
3705	 * away that is remaining.  If this is a forced unmount, we immediately
3706	 * and forcibly blow everything away.  If not forced, we try to blow
3707	 * everything away that is not busy but if anything is busy vflush()
3708	 * does not do anything at all.  In that case we report an error, and
3709	 * then forcibly blow everything away anyway.  FIXME: We could undo the
3710	 * unmount by re-reading all the system inodes we just released, but do
3711	 * we want to?  It does not seem to be worth the hassle given it should
3712	 * never really happen...
3713	 */
3714	err = vflush(mp, NULLVP, vflags);
3715	if (err && !force) {
3716		ntfs_error(mp, "There are busy vnodes after unmounting!  "
3717				"Forcibly closing and reclaiming them.");
3718		(void)vflush(mp, NULLVP, FORCECLOSE);
3719
3720	}
3721	/* Split our ntfs_volume away from the mount. */
3722	vol->mp = NULL;
3723	vfs_setfsprivate(mp, NULL);
3724	/* If there are still inodes attached, postpone freeing the volume. */
3725	lck_mtx_lock(&vol->inodes_lock);
3726	if (!LIST_EMPTY(&vol->inodes)) {
3727		NVolSetPostponedRelease(vol);
3728		lck_mtx_unlock(&vol->inodes_lock);
3729		ntfs_debug("Scheduled postponed release of volume.");
3730		return 0;
3731	}
3732	lck_mtx_unlock(&vol->inodes_lock);
3733	ntfs_do_postponed_release(vol);
3734	ntfs_debug("Done.");
3735	return 0;
3736no_mft:
3737	/* Deinitialize the ntfs_volume locks. */
3738	lck_rw_destroy(&vol->mftbmp_lock, ntfs_lock_grp);
3739	lck_rw_destroy(&vol->lcnbmp_lock, ntfs_lock_grp);
3740	lck_mtx_destroy(&vol->rename_lock, ntfs_lock_grp);
3741	lck_rw_destroy(&vol->secure_lock, ntfs_lock_grp);
3742	lck_spin_destroy(&vol->security_id_lock, ntfs_lock_grp);
3743	lck_mtx_destroy(&vol->inodes_lock, ntfs_lock_grp);
3744	/* Finally, free the ntfs volume. */
3745	OSFree(vol, sizeof(ntfs_volume), ntfs_malloc_tag);
3746unload:
3747	err = 0;
3748	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
3749abort:
3750	ntfs_debug("Done.");
3751	return err;
3752}
3753
3754/**
3755 * ntfs_sync_args - arguments for the ntfs_sync_callback (see below)
3756 * @sync:	if IO_SYNC wait for all i/o to complete
3757 * @err:	if an error occurred the error code is returned here
3758 */
3759struct ntfs_sync_args {
3760	int sync;
3761	int err;
3762};
3763
3764/**
3765 * ntfs_sync_callback - callback for vnode iterate in ntfs_sync()
3766 * @vn:		vnode the callback is invoked with (has iocount reference)
3767 * @arg:	pointer to an ntfs_sync_args structure
3768 *
3769 * This callback is called from vnode_iterate() which is called from
3770 * ntfs_sync() for all in-core, non-dead, non-suspend vnodes belonging to the
3771 * mounted volume that still have an ntfs inode attached.
3772 *
3773 * We sync all dirty inodes to disk and if an error occurs we record it in the
3774 * @err field of the ntfs_sync_args structure pointed to by @arg.  Note we
3775 * preserve the old error code if an error is already recorded unless that
3776 * error code is ENOTSUP.
3777 *
3778 * If the @sync field of the ntfs_sync_args structure pointed to by @arg is
3779 * IO_SYNC, wait for all i/o to complete.
3780 */
3781static int ntfs_sync_callback(vnode_t vn, void *arg)
3782{
3783	ntfs_inode *ni = NTFS_I(vn);
3784	ntfs_volume *vol = ni->vol;
3785	struct ntfs_sync_args *args = (struct ntfs_sync_args*)arg;
3786
3787	/*
3788	 * Skip the inodes for $MFT and $MFTMirr.  They are done separately as
3789	 * the last ones to be synced.
3790	 */
3791	if (ni != vol->mft_ni && ni != vol->mftmirr_ni) {
3792		errno_t err;
3793
3794		/*
3795		 * Sync the inode data to disk and sync the ntfs inode to the
3796		 * mft record(s) but do not write the mft record(s) to disk.
3797		 */
3798		err = ntfs_inode_sync(ni, args->sync, TRUE);
3799		/*
3800		 * Only record the first error that is not ENOTSUP or record
3801		 * ENOTSUP if that is the only error.
3802		 *
3803		 * Skip deleted inodes.
3804		 */
3805		if (err && err != ENOENT) {
3806			if (!args->err || args->err == ENOTSUP)
3807				args->err = err;
3808		}
3809	}
3810	return VNODE_RETURNED;
3811}
3812
3813/**
3814 * ntfs_sync_helper - helper for ntfs_sync()
3815 * @ni:				ntfs inode the helper is invoked for
3816 * @args:			pointer to an ntfs_sync_args structure
3817 * @skip_mft_record_sync:	do not sync the mft record(s) to disk
3818 *
3819 * This helper is called from ntfs_sync() when syncing the $MFT and $MFTMirr
3820 * inodes.
3821 *
3822 * Any errors are returned in @args->err.
3823 */
3824static void ntfs_sync_helper(ntfs_inode *ni, struct ntfs_sync_args *args,
3825		const BOOL skip_mft_record_sync)
3826{
3827	errno_t err;
3828
3829	err = vnode_get(ni->vn);
3830	if (err) {
3831		ntfs_error(ni->vol->mp, "Failed to get vnode for $MFT%s "
3832				"(error %d).",
3833				(ni == ni->vol->mft_ni) ? "" : "Mirr",
3834				(int)err);
3835		goto err;
3836	}
3837	err = ntfs_inode_sync(ni, args->sync, skip_mft_record_sync);
3838	vnode_put(ni->vn);
3839	/* Skip deleted inodes. */
3840	if (err && err != ENOENT) {
3841		ntfs_error(ni->vol->mp, "Failed to sync $MFT%s (error %d).",
3842				(ni == ni->vol->mft_ni) ? "" : "Mirr",
3843				(int)err);
3844		goto err;
3845	}
3846	return;
3847err:
3848	if (!args->err || args->err == ENOTSUP)
3849		args->err = err;
3850	return;
3851}
3852
3853/**
3854 * ntfs_sync - sync a mounted volume to disk
3855 * @mp:		mount point of ntfs file system
3856 * @waitfor:	if MNT_WAIT wait fo i/o to complete
3857 * @context:	vfs context
3858 *
3859 * The VFS calls this via VFS_SYNC() when it wants to sync all cached data of
3860 * the mounted ntfs volume described by the mount @mp.
3861 *
3862 * If @waitfor is MNT_WAIT, wait for all i/o to complete before returning.
3863 *
3864 * Return 0 on success and errno on error.
3865 *
3866 * Note this function is only called for r/w mounted volumes so no need to
3867 * check if the volume is read-only.
3868 */
3869static int ntfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
3870{
3871	ntfs_volume *vol = NTFS_MP(mp);
3872	struct ntfs_sync_args args;
3873
3874	/* If we are mounted read-only, we do not need to sync anything. */
3875	if (NVolReadOnly(vol))
3876		return 0;
3877	ntfs_debug("Entering.");
3878	args.sync = (waitfor == MNT_WAIT) ? IO_SYNC : 0;
3879	args.err = 0;
3880	/* Iterate over all vnodes and run ntfs_inode_sync() on each of them. */
3881	(void)vnode_iterate(mp, 0, ntfs_sync_callback, (void*)&args);
3882	/*
3883	 * Finally, sync the inodes for $MFT and $MFTMirr to disk.  Note we do
3884	 * the sync twice to ensure that any interdependent changes that are
3885	 * flushed from one inode to the other are actually written to disk.
3886	 */
3887	ntfs_sync_helper(vol->mftmirr_ni, &args, TRUE);
3888	ntfs_sync_helper(vol->mft_ni, &args, TRUE);
3889	ntfs_sync_helper(vol->mftmirr_ni, &args, FALSE);
3890	ntfs_sync_helper(vol->mft_ni, &args, FALSE);
3891	if (!args.err)
3892		ntfs_debug("Done.");
3893	else
3894		ntfs_error(mp, "Failed to sync volume (error %d).", args.err);
3895	return args.err;
3896}
3897
3898/**
3899 * ntfs_remount - change the mount options of a mounted ntfs file system
3900 * @mp:		mount point of mounted ntfs file system
3901 * @opts:	ntfs specific mount options (already copied from user space)
3902 *
3903 * Change the mount options of an already mounted ntfs file system.
3904 *
3905 * Return 0 on success and errno on error.
3906 *
3907 * If the remount fails, we must call OSKextReleaseKextWithLoadTag
3908 * to allow the KEXT to be unloaded when no longer in use.
3909 *
3910 *
3911 * Note we are at mount protocol version 0.0 where we do not have any ntfs
3912 * specific mount options so we annotate @opts as __unused to make gcc happy.
3913 */
3914static errno_t ntfs_remount(mount_t mp,
3915		ntfs_mount_options_1_0 *opts)
3916{
3917	errno_t err = 0;
3918	ntfs_volume *vol = NTFS_MP(mp);
3919
3920	ntfs_debug("Entering.");
3921	/*
3922	 * Check for a change in the case sensitivity semantics and abort if
3923	 * one is requested as things could get very confused if we allow a
3924	 * remount to switch from case sensitive to case insensitive or vice
3925	 * versa.
3926	 */
3927	if (((opts->flags & NTFS_MNT_OPT_CASE_SENSITIVE) &&
3928			!NVolCaseSensitive(vol)) ||
3929			(!(opts->flags & NTFS_MNT_OPT_CASE_SENSITIVE) &&
3930			NVolCaseSensitive(vol))) {
3931		ntfs_error(mp, "Cannot change case sensitivity semantics via "
3932				"remount.  You need to unmount and then mount "
3933				"again with the desired options.");
3934		err = EINVAL;
3935		goto err_exit;
3936	}
3937	/*
3938	 * If we are remounting read-write, make sure there are no volume
3939	 * errors and that no unsupported volume flags are set.  Also, empty
3940	 * the logfile journal as it would become stale as soon as something is
3941	 * written to the volume and mark the volume dirty so that chkdsk is
3942	 * run if the volume is not umounted cleanly.  Finally, mark the quotas
3943	 * out of date so Windows rescans the volume on boot and updates them.
3944	 *
3945	 * When remounting read-only, mark the volume clean if no volume errors
3946	 * have occured.
3947	 */
3948	if (vfs_iswriteupgrade(mp)) {
3949		/* We no longer allow (re-)mounting read/write. */
3950		ntfs_error(mp, "Remounting read/write is not supported");
3951		goto EROFS_exit;
3952#if 0
3953		static const char es[] = ".  Cannot remount read-write.  To "
3954				"fix this problem boot into Windows, run "
3955				"chkdsk c: /f /v /x from the command prompt "
3956				"(replace c: with the drive letter of this "
3957				"volume), then reboot into Mac OS X and mount "
3958				"the volume again.";
3959
3960		/* Remounting read-write. */
3961		if (NVolErrors(vol)) {
3962			ntfs_error(mp, "Volume has errors and is read-only%s",
3963					es);
3964			goto EROFS_exit;
3965		}
3966		if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
3967			ntfs_error(mp, "Volume has unsupported flags set "
3968					"(0x%x) and is read-only%s",
3969					(unsigned)le16_to_cpu(vol->vol_flags),
3970					es);
3971			goto EROFS_exit;
3972		}
3973		if (ntfs_volume_flags_set(vol, VOLUME_IS_DIRTY)) {
3974			ntfs_error(mp, "Failed to set dirty bit in volume "
3975					"information flags%s", es);
3976			goto EROFS_exit;
3977		}
3978		if (ntfs_logfile_empty(vol->logfile_ni)) {
3979			ntfs_error(mp, "Failed to empty journal $LogFile%s",
3980					es);
3981			NVolSetErrors(vol);
3982			goto EROFS_exit;
3983		}
3984		if (ntfs_quotas_mark_out_of_date(vol)) {
3985			ntfs_error(mp, "Failed to mark quotas out of date%s",
3986					es);
3987			NVolSetErrors(vol);
3988			goto EROFS_exit;
3989		}
3990		if (ntfs_usnjrnl_stamp(vol)) {
3991			ntfs_error(mp, "Failed to stamp transation log "
3992					"($UsnJrnl)%s", es);
3993			NVolSetErrors(vol);
3994			goto EROFS_exit;
3995		}
3996		NVolClearReadOnly(vol);
3997#endif /* r/w upgrade not supported */
3998	} else if (!NVolReadOnly(vol) && vfs_isrdonly(mp)) {
3999		/* Remounting read-only, flush all pending writes. */
4000		err = ntfs_sync(mp, MNT_WAIT, NULL);
4001		if (err) {
4002			ntfs_error(mp, "Failed to sync volume (error %d).  "
4003					"Cannot remount read-only.", err);
4004			goto err_exit;
4005		}
4006		/* If no volume errors have occured, mark the volume clean. */
4007		if (!NVolErrors(vol)) {
4008			if (ntfs_volume_flags_clear(vol, VOLUME_IS_DIRTY))
4009				ntfs_warning(mp, "Failed to clear dirty bit "
4010						"in volume information "
4011						"flags.  Run chkdsk.");
4012			/* Flush the changes to disk. */
4013			err = ntfs_sync(mp, MNT_WAIT, NULL);
4014			if (err) {
4015				ntfs_error(mp, "Failed to sync volume (error "
4016						"%d).  Cannot remount "
4017						"read-only.", err);
4018				/*
4019				 * Try to set the dirty flag again in case we
4020				 * did clear it but something else failed.  We
4021				 * do not care about any errors as we almost
4022				 * expect them to happen if we got here.
4023				 */
4024				(void)ntfs_volume_flags_set(vol,
4025						VOLUME_IS_DIRTY);
4026				goto err_exit;
4027			}
4028		} else
4029			ntfs_warning(mp, "Volume has errors.  Leaving volume "
4030					"marked dirty.  Run chkdsk.");
4031		NVolSetReadOnly(vol);
4032	}
4033	/* Don't allow the user to clear MNT_DONTBROWSE for read/write volumes. */
4034	if (vfs_isrdwr(mp))
4035		vfs_setflags(mp, MNT_DONTBROWSE);
4036	// TODO: Copy mount options from @opts to @vol.
4037	ntfs_debug("Done.");
4038	return 0;
4039EROFS_exit:
4040	err = EROFS;
4041err_exit:
4042	OSKextReleaseKextWithLoadTag(OSKextGetCurrentLoadTag());
4043	return err;
4044}
4045
4046/**
4047 * ntfs_mount - mount an ntfs file system
4048 * @mp:		mount point to initialize/mount
4049 * @dev_vn:	vnode of the device we are mounting
4050 * @data:	mount options (in user space)
4051 * @context:	vfs context
4052 *
4053 * The VFS calls this via VFS_MOUNT() when it wants to mount an ntfs volume.
4054 *
4055 * Note: @dev_vn is NULLVP if this is a MNT_UPDATE or MNT_RELOAD mount but of
4056 * course in those cases it can be retrieved from the NTFS_MP(mp)->dev_vn.
4057 *
4058 * Return 0 on success and errno on error.
4059 *
4060 * We call OSKextRetainKextWithLoadTag() to prevent the KEXT from being
4061 * unloaded automatically while in use.  If the mount fails, we must call
4062 * OSKextReleaseKextWithLoadTag() to allow the KEXT to be unloaded.
4063 */
4064static int ntfs_mount(mount_t mp, vnode_t dev_vn, user_addr_t data,
4065		vfs_context_t context)
4066{
4067	daddr64_t nr_blocks;
4068	struct vfsstatfs *sfs = vfs_statfs(mp);
4069	ntfs_volume *vol;
4070	buf_t buf;
4071	kauth_cred_t cred;
4072	dev_t dev;
4073	NTFS_BOOT_SECTOR *bs;
4074	errno_t err, err2;
4075	u32 blocksize;
4076	ntfs_mount_options_header opts_hdr;
4077	ntfs_mount_options_1_0 opts;
4078
4079	ntfs_debug("Entering.");
4080	OSKextRetainKextWithLoadTag(OSKextGetCurrentLoadTag());
4081	/*
4082	 * FIXME: Not convinced that this is necessary.  It may well be
4083	 * sufficient to set cred = vfs_context_ucred(context) as some file
4084	 * systems do (e.g. msdosfs, old ntfs), but HFS does it this way so we
4085	 * follow suit.  Also, some file systems even simply set cred = NOCRED
4086	 * (e.g. udf).  Should investigate or ask someone...
4087	 */
4088	cred = vfs_context_proc(context) ? vfs_context_ucred(context) : NOCRED;
4089	/* Copy our mount options header from user space. */
4090	err = copyin(data, (caddr_t)&opts_hdr, sizeof(opts_hdr));
4091	if (err) {
4092		ntfs_error(mp, "Failed to copy mount options header from user "
4093				"space (error %d).", err);
4094		goto unload;
4095	}
4096	ntfs_debug("Mount options header version %d.%d.", opts_hdr.major_ver,
4097			opts_hdr.minor_ver);
4098	/* Get and check options. */
4099	switch (opts_hdr.major_ver) {
4100	case 1:
4101		if (opts_hdr.minor_ver != 0)
4102			ntfs_warning(mp, "Your version of /sbin/mount_ntfs is "
4103					"newer than this driver, ignoring any "
4104					"new options.");
4105		/* Version 1.x has one option so copy it from user space. */
4106		err = copyin((data + sizeof(opts_hdr) + 7) & ~7,
4107				(caddr_t)&opts, sizeof(opts));
4108		if (err) {
4109			ntfs_error(mp, "Failed to copy NTFS mount options "
4110					"from user space (error %d).", err);
4111			goto unload;
4112		}
4113		break;
4114	case 0:
4115		/* Version 0.x has no options at all. */
4116		bzero(&opts, sizeof(opts));
4117		break;
4118	default:
4119		ntfs_warning(mp, "Your version of /sbin/mount_ntfs is not "
4120				"compatible with this driver, ignoring NTFS "
4121				"specific mount options.");
4122		bzero(&opts, sizeof(opts));
4123		break;
4124	}
4125	/*
4126	 * We only allow read/write mounts if the "nobrowse" option was also
4127	 * given.  This is to discourage end users from mounting read/write,
4128	 * but still allows our utilities (such as an OS install) to make
4129	 * changes to an NTFS volume.  Without the "nobrowse" option, we force
4130	 * a read-only mount.  Note that we also check for non-update mounts
4131	 * here.  In the case of an update mount, ntfs_remount() will do the
4132	 * appropriate checking for changing the writability of the mount.
4133	 */
4134	if ((vfs_flags(mp) & MNT_DONTBROWSE) == 0 && !vfs_isupdate(mp))
4135		vfs_setflags(mp, MNT_RDONLY);
4136	/*
4137	 * TODO: For now we do not implement ACLs thus we force the "noowners"
4138	 * mount option.
4139	 */
4140	vfs_setflags(mp, MNT_IGNORE_OWNERSHIP);
4141	/*
4142	 * We do not support MNT_RELOAD yet.  Note, MNT_RELOAD implies the
4143	 * file system is currently read-only.
4144	 */
4145	if (vfs_isreload(mp)) {
4146		ntfs_error(mp, "MNT_RELOAD is not supported yet.");
4147		err = ENOTSUP;
4148		goto unload;
4149	}
4150	/*
4151	 * If this is a remount request, handle this elsewhere.  Note this
4152	 * check has to come after the vfs_isreload() check as vfs_isupdate()
4153	 * is always true when vfs_isreload() is true but this is not true the
4154	 * other way round.
4155	 */
4156	if (vfs_isupdate(mp))
4157		return ntfs_remount(mp, &opts);
4158	/* We know this is a real mount request thus @dev_vn is not NULL. */
4159	dev = vnode_specrdev(dev_vn);
4160	/* Let the VFS do advisory locking for us. */
4161	vfs_setlocklocal(mp);
4162	/*
4163	 * Tell old-style applications that we support VolFS style lookups.
4164	 *
4165	 * Note we do not set MNT_DOVOLFS because then various things start
4166	 * breaking like for example the Finder "Empty Trash" command always
4167	 * fails silently unless we also support va_nchildren in
4168	 * ntfs_vnop_getattr() and set ATTR_DIR_ENTRYCOUNT in our valid
4169	 * directory attributes in ntfs_getattr().
4170	 */
4171	//vfs_setflags(mp, MNT_DOVOLFS);
4172	/*
4173	 * Set the file system id in the fsstat part of the mount structure.
4174	 * We use the device @dev for the first 32-bit value and the dynamic
4175	 * file system number assigned by the VFS to us for the second 32-bit
4176	 * value.  This is important because the VFS uses the first 32-bit
4177	 * value to satisfy the ATTR_CMN_DEVID request in getattrlist() and
4178	 * getvolattrlist() thus it must be the device.
4179	 */
4180	sfs->f_fsid.val[0] = (int32_t)dev;
4181	sfs->f_fsid.val[1] = (int32_t)vfs_typenum(mp);
4182	/*
4183	 * Allocate and initialize an ntfs volume and attach it to the vfs
4184	 * mount.
4185	 */
4186	vol = OSMalloc(sizeof(ntfs_volume), ntfs_malloc_tag);
4187	if (!vol) {
4188		ntfs_error(mp, "Failed to allocate ntfs volume buffer.");
4189		err = ENOMEM;
4190		goto unload;
4191	}
4192	*vol = (ntfs_volume) {
4193		.mp = mp,
4194		.dev = dev,
4195		.dev_vn = dev_vn,
4196		/*
4197		 * Default is group and other have read-only access to files
4198		 * and directories while owner has full access.  Everyone gets
4199		 * directory search and file execute permission.  The latter is
4200		 * so people can execute binaries from NTFS volumes.
4201		 *
4202		 * In reality it does not matter as we set MNT_IGNORE_OWNERSHIP
4203		 * thus everyone can fully access the NTFS volume.  The only
4204		 * reason to set the umask this way is that when people copy
4205		 * files with the Finder or "cp -p" from an NTFS volume to a
4206		 * HFS for example, the file does not end up being world
4207		 * writable.
4208		 */
4209		.fmask = 0022,
4210		.dmask = 0022,
4211		.mft_zone_multiplier = 1,
4212		.on_errors = ON_ERRORS_CONTINUE|ON_ERRORS_FAIL_DIRTY,
4213	};
4214	lck_rw_init(&vol->mftbmp_lock, ntfs_lock_grp, ntfs_lock_attr);
4215	lck_rw_init(&vol->lcnbmp_lock, ntfs_lock_grp, ntfs_lock_attr);
4216	lck_mtx_init(&vol->rename_lock, ntfs_lock_grp, ntfs_lock_attr);
4217	lck_rw_init(&vol->secure_lock, ntfs_lock_grp, ntfs_lock_attr);
4218	lck_spin_init(&vol->security_id_lock, ntfs_lock_grp, ntfs_lock_attr);
4219	lck_mtx_init(&vol->inodes_lock, ntfs_lock_grp, ntfs_lock_attr);
4220	vfs_setfsprivate(mp, vol);
4221	if (vfs_isrdonly(mp))
4222		NVolSetReadOnly(vol);
4223	/* Check for the requested case sensitivity semantics. */
4224	if (opts.flags & NTFS_MNT_OPT_CASE_SENSITIVE) {
4225		ntfs_debug("Mounting volume case sensitive.");
4226		NVolSetCaseSensitive(vol);
4227	}
4228// FIXME: For now disable sparse support as it is not done yet...
4229#if 0
4230	/* By default, enable sparse support. */
4231	NVolSetSparseEnabled(vol);
4232#endif
4233	/* By default, enable compression support. */
4234	NVolSetCompressionEnabled(vol);
4235	blocksize = vfs_devblocksize(mp);
4236	/* We support device sector sizes up to the PAGE_SIZE. */
4237	if (blocksize > PAGE_SIZE) {
4238		ntfs_error(mp, "Device has unsupported sector size (%u).  "
4239				"The maximum supported sector size on this "
4240				"system is %u bytes.", blocksize, PAGE_SIZE);
4241		err = ENOTSUP;
4242		goto err;
4243	}
4244	/*
4245	 * If the block size of the device we are to mount is less than
4246	 * NTFS_BLOCK_SIZE, change the block size to NTFS_BLOCK_SIZE.
4247	 */
4248	if (blocksize < NTFS_BLOCK_SIZE) {
4249		ntfs_debug("Setting device block size to NTFS_BLOCK_SIZE.");
4250		err = ntfs_blocksize_set(mp, dev_vn, NTFS_BLOCK_SIZE, context);
4251		if (err) {
4252			ntfs_error(mp, "Failed to set device block size to "
4253					"NTFS_BLOCK_SIZE (512 bytes) because "
4254					"the DKIOCSETBLOCKSIZE ioctl returned "
4255					"error %d).", err);
4256			goto err;
4257		}
4258		blocksize = NTFS_BLOCK_SIZE;
4259	} else
4260		ntfs_debug("Device block size (%u) is greater than or equal "
4261				"to NTFS_BLOCK_SIZE.", blocksize);
4262	/* Get the size of the device in units of blocksize bytes. */
4263	err = VNOP_IOCTL(dev_vn, DKIOCGETBLOCKCOUNT, (caddr_t)&nr_blocks, 0,
4264			context);
4265	if (err) {
4266		ntfs_error(mp, "Failed to determine the size of the device "
4267				"(DKIOCGETBLOCKCOUNT ioctl returned error "
4268				"%d).", err);
4269		err = ENXIO;
4270		goto err;
4271	}
4272	vol->nr_blocks = nr_blocks;
4273#ifdef DEBUG
4274	{
4275		u64 dev_size, u;
4276		char *suffix;
4277		int shift = 0;
4278		u8 blocksize_shift = ffs(blocksize) - 1;
4279
4280		dev_size = u = (u64)nr_blocks << blocksize_shift;
4281		while ((u >>= 10) > 10 && shift < 40)
4282			shift += 10;
4283		switch (shift) {
4284		case 0:
4285			suffix = "bytes";
4286			break;
4287		case 10:
4288			suffix = "kiB";
4289			break;
4290		case 20:
4291			suffix = "MiB";
4292			break;
4293		case 30:
4294			suffix = "GiB";
4295			break;
4296		default:
4297			suffix = "TiB";
4298			break;
4299		}
4300		ntfs_debug("Device size is %llu%s (%llu bytes).",
4301				(unsigned long long)dev_size >> shift, suffix,
4302				(unsigned long long)dev_size);
4303	}
4304#endif
4305	/* Read the boot sector and return the buffer containing it. */
4306	buf = NULL;
4307	bs = NULL;
4308	err = ntfs_boot_sector_read(vol, cred, &buf, &bs);
4309	if (err) {
4310		ntfs_error(mp, "Not an NTFS volume.");
4311		goto err;
4312	}
4313	/*
4314	 * Extract the data from the boot sector and setup the ntfs volume
4315	 * using it.
4316	 */
4317	err = ntfs_boot_sector_parse(vol, bs);
4318	err2 = buf_unmap(buf);
4319	if (err2)
4320		ntfs_error(mp, "Failed to unmap buffer of boot sector (error "
4321				"%d).", err2);
4322	buf_brelse(buf);
4323	if (err) {
4324		ntfs_error(mp, "%s NTFS file system.",
4325				err == ENOTSUP ? "Unsupported" : "Invalid");
4326		goto err;
4327	}
4328	/*
4329	 * If the boot sector indicates a sector size bigger than the current
4330	 * device block size, switch the device block size to the sector size.
4331	 * TODO: It may be possible to support this case even when the set
4332	 * below fails, we would just be breaking up the i/o for each sector
4333	 * into multiple blocks for i/o purposes but otherwise it should just
4334	 * work.  However it is safer to leave disabled until someone hits this
4335	 * error message and then we can get them to try it without the setting
4336	 * so we know for sure that it works.  We would then want to set
4337	 * vol->sector_size* to the current blocksize or add vol->blocksize*...
4338	 * No, cannot do that or will break directory operations.  We will need
4339	 * to move to using vol->blocksize* instead of vol->sector_size in most
4340	 * places and stick with vol->sector_size where we really want its
4341	 * actual value.
4342	 */
4343	if (vol->sector_size > blocksize) {
4344		ntfs_debug("Setting device block size to sector size.");
4345		err = ntfs_blocksize_set(mp, dev_vn, vol->sector_size, context);
4346		if (err) {
4347			ntfs_error(mp, "Failed to set device block size to "
4348					"sector size (%u bytes) because "
4349					"the DKIOCSETBLOCKSIZE ioctl returned "
4350					"error %d).", vol->sector_size, err);
4351			goto err;
4352		}
4353		blocksize = vol->sector_size;
4354	}
4355	/* Initialize the cluster and mft allocators. */
4356	ntfs_setup_allocators(vol);
4357	/*
4358	 * Get the $MFT inode and bootstrap the volume sufficiently so we can
4359	 * get other inodes and map (extent) mft records.
4360	 */
4361	err = ntfs_mft_inode_get(vol);
4362	if (err)
4363		goto err;
4364	lck_mtx_lock(&ntfs_lock);
4365	if (NVolCompressionEnabled(vol)) {
4366		/*
4367		 * The current mount may be a compression user if the cluster
4368		 * size is less than or equal to 4kiB.
4369		 */
4370		if (vol->cluster_size <= 4096) {
4371			if (!ntfs_compression_buffer) {
4372				ntfs_compression_buffer = OSMalloc(
4373						ntfs_compression_buffer_size,
4374						ntfs_malloc_tag);
4375				if (!ntfs_compression_buffer) {
4376					// FIXME: We could continue with
4377					// compression disabled.  But do we
4378					// want to do that given the system is
4379					// that low on memory?
4380					ntfs_error(mp, "Failed to allocate "
4381							"buffer for "
4382							"compression engine.");
4383					NVolClearCompressionEnabled(vol);
4384					lck_mtx_unlock(&ntfs_lock);
4385					goto err;
4386				}
4387			}
4388			ntfs_compression_users++;
4389		} else {
4390			ntfs_debug("Disabling compression because the cluster "
4391					"size of %u bytes is above the "
4392					"allowed maximum of 4096 bytes.",
4393					(unsigned)vol->cluster_size);
4394			NVolClearCompressionEnabled(vol);
4395		}
4396	}
4397	/* Generate the global default upcase table if necessary. */
4398	if (!ntfs_default_upcase) {
4399		ntfs_default_upcase = OSMalloc(ntfs_default_upcase_size,
4400				ntfs_malloc_tag);
4401		if (!ntfs_default_upcase) {
4402			// FIXME: We could continue without a default upcase
4403			// table.  But do we want to do that given the system
4404			// is that low on memory?
4405			ntfs_error(mp, "Failed to allocate memory for default "
4406					"upcase table.");
4407			lck_mtx_unlock(&ntfs_lock);
4408			err = ENOMEM;
4409			goto err;
4410		}
4411		ntfs_upcase_table_generate(ntfs_default_upcase,
4412				ntfs_default_upcase_size);
4413	}
4414	/*
4415	 * Temporarily take a reference on the default upcase table to avoid
4416	 * race conditions with concurrent (u)mounts.
4417	 */
4418	ntfs_default_upcase_users++;
4419	lck_mtx_unlock(&ntfs_lock);
4420	/* Process the system inodes. */
4421	err = ntfs_system_inodes_get(vol);
4422	/*
4423	 * We now have the volume upcase table (either having read it from disk
4424	 * or using the default, in which case we have taken a reference on the
4425	 * default upcase table) or there was an error and we are going to bail
4426	 * out.  In any case, we can drop our temporary reference on the
4427	 * default upcase table and throw it away if we had the only reference.
4428	 */
4429	lck_mtx_lock(&ntfs_lock);
4430	if (!--ntfs_default_upcase_users) {
4431		OSFree(ntfs_default_upcase, ntfs_default_upcase_size,
4432				ntfs_malloc_tag);
4433		ntfs_default_upcase = NULL;
4434	}
4435	lck_mtx_unlock(&ntfs_lock);
4436	/* If we failed to process the system inodes, abort the mount. */
4437	if (err) {
4438		ntfs_error(mp, "Failed to load system files (error %d).", err);
4439		goto err;
4440	}
4441	/*
4442	 * Determine the number of free clusters and cache it in the volume (in
4443	 * @vol->nr_free_clusters).
4444	 */
4445	err = ntfs_set_nr_free_clusters(vol);
4446	if (err)
4447		goto err;
4448	/*
4449	 * Determine the number of both total and free mft records and cache
4450	 * them in the volume (in @vol->nr_mft_records and
4451	 * @vol->nr_free_mft_records, respectively).
4452	 */
4453	err = ntfs_set_nr_mft_records(vol);
4454	if (err)
4455		goto err;
4456	/*
4457	 * Finally, determine the statfs information for the volume and cache
4458	 * it in the vfs mount structure.
4459	 */
4460	ntfs_statfs(vol, sfs);
4461	ntfs_debug("Done.");
4462	return 0;
4463unload:
4464	/* Ensure NTFS_MP(mp) is NULL so it is safe to call ntfs_unmount(). */
4465	vfs_setfsprivate(mp, NULL);
4466err:
4467	ntfs_error(mp, "Mount failed (error %d).", err);
4468	/*
4469	 * ntfs_unmount() will clean up everything we did until we encountered
4470	 * the error condition including calling OSKextReleaseKextWithLoadTag().
4471	 *
4472	 * Note we need to pass MNT_FORCE to ensure ntfs_unmount() definitely
4473	 * ends up calling OSKextReleaseKextWithLoadTag().
4474	 */
4475	ntfs_unmount(mp, MNT_FORCE, context);
4476	return err;
4477}
4478
4479/**
4480 * ntfs_root - get the vnode of the root directory of an ntfs file system
4481 * @mp:		mount point of ntfs file system
4482 * @vpp:	destination pointer for the obtained file system root vnode
4483 * @context:	vfs context
4484 *
4485 * The VFS calls this via VFS_ROOT() when it wants to have the root directory
4486 * of a mounted ntfs volume.  We already have the root vnode/inode due to
4487 * ntfs_mount() so just get an iocount reference on the vnode and return the
4488 * vnode.
4489 *
4490 * Return 0 on success and errno on error.
4491 *
4492 * Warning: We get a panic() if we return error here!  Due to the function
4493 * checkdirs() which is called after ntfs_mount() but before VFS_START() (which
4494 * we do not implement).
4495 */
4496static int ntfs_root(mount_t mp, struct vnode **vpp,
4497		vfs_context_t context __unused)
4498{
4499	ntfs_volume *vol = NTFS_MP(mp);
4500	vnode_t vn;
4501	int err;
4502
4503	ntfs_debug("Entering.");
4504	if (!vol || !vol->root_ni || !vol->root_ni->vn)
4505		panic("%s(): Mount and/or root inode and/or vnode is not "
4506				"loaded.\n", __FUNCTION__);
4507	vn = vol->root_ni->vn;
4508	/*
4509	 * Simulate an ntfs_inode_get() by taking an iocount reference on the
4510	 * vnode of the ntfs inode.  It is ok to do this here because we know
4511	 * the root directory is loaded and attached to the ntfs volume (thus
4512	 * we already hold a use count reference on the vnode).
4513	 */
4514	err = vnode_get(vn);
4515	if (!err) {
4516		*vpp = vn;
4517		ntfs_debug("Done.");
4518	} else {
4519		*vpp = NULL;
4520		ntfs_error(mp, "Cannot return root vnode because vnode_get() "
4521				"failed (error %d).", err);
4522	}
4523	return err;
4524}
4525
4526/**
4527 * ntfs_vget - get the vnode corresponding to an inode number
4528 * @mp:		mount point of ntfs file system
4529 * @ino:	inode number / mft record number to obtain
4530 * @vpp:	destination pointer for the obtained vnode
4531 * @context:	vfs context
4532 *
4533 * Volfs and other strange places where no further path or name context is
4534 * available call this via VFS_VGET() to obtain the vnode with the inode number
4535 * @ino.
4536 *
4537 * The vnode is returned with an iocount reference.
4538 *
4539 * Return 0 on success and errno on error.
4540 *
4541 * FIXME: The only potential problem is that using only the inode / mft record
4542 * number only allows ntfs_vget() to return the file or directory vnode itself
4543 * but not for example the vnode of a named stream or other attribute.  Perhaps
4544 * this does not matter for volfs in which case everything is fine...
4545 */
4546static int ntfs_vget(mount_t mp, ino64_t ino, struct vnode **vpp,
4547		vfs_context_t context __unused)
4548{
4549	ntfs_inode *ni;
4550	errno_t err;
4551
4552	ntfs_debug("Entering for ino 0x%llx.", (unsigned long long)ino);
4553	/*
4554	 * Remove all NTFS core system files from the name space so we do not
4555	 * need to worry about users damaging a volume by writing to them or
4556	 * deleting/renaming them and so that we can return fsRtParID (1) as
4557	 * the inode number of the parent of the volume root directory and
4558	 * fsRtDirID (2) as the inode number of the volume root directory which
4559	 * are both expected by Carbon and various applications.
4560	 *
4561	 * Note we thus have to remap inode number 2 (fsRtDirID) to FILE_root
4562	 * here.
4563	 */
4564	if (ino < FILE_first_user) {
4565		if (ino != 2) {
4566			ntfs_debug("Removing core NTFS system file (mft_no "
4567					"0x%x) from name space.",
4568					(unsigned)ino);
4569			err = ENOENT;
4570			goto err;
4571		}
4572		/*
4573		 * @ino is 2, i.e. fsRtDirID, thus return the vnode of the root
4574		 * directory inode (FILE_root).
4575		 *
4576		 * First try to use the already loaded root directory inode and
4577		 * if that fails for some reason go and get it the slow way.
4578		 */
4579		ni = NTFS_MP(mp)->root_ni;
4580		if (ni) {
4581			err = vnode_get(ni->vn);
4582			if (!err)
4583				goto done;
4584		}
4585		ino = FILE_root;
4586	}
4587	err = ntfs_inode_get(NTFS_MP(mp), ino, FALSE, LCK_RW_TYPE_SHARED, &ni,
4588			NULL, NULL);
4589	if (!err) {
4590		lck_rw_unlock_shared(&ni->lock);
4591done:
4592		ntfs_debug("Done.");
4593		*vpp = ni->vn;
4594		return err;
4595	}
4596err:
4597	*vpp = NULL;
4598	if (err != ENOENT)
4599		ntfs_error(mp, "Failed to get mft_no 0x%llx (error %d).",
4600				(unsigned long long)ino, err);
4601	else
4602		ntfs_debug("Mft_no 0x%llx does not exist, returning ENOENT.",
4603				(unsigned long long)ino);
4604	return err;
4605}
4606
4607/**
4608 * ntfs_getattr - obtain information about a mounted ntfs volume
4609 * @mp:		mount point of ntfs file system
4610 * @fsa:	requested information and destination in which to return it
4611 * @context:	vfs context
4612 *
4613 * The VFS calls this via VFS_GETATTR() when it wants to obtain some
4614 * information about the mounted ntfs volume described by the mount @mp.
4615 *
4616 * Which information is requested is described by the vfs attribute structure
4617 * pointed to by @fsa, which is also the destination pointer in which the
4618 * requested information is returned.
4619 *
4620 * Return 0 on success and errno on error.
4621 *
4622 * Note: Further details are in the man page for the getattrlist function and
4623 * in the header files xnu/bsd/sys/{mount,attr}.h.
4624 */
4625static int ntfs_getattr(mount_t mp, struct vfs_attr *fsa,
4626		vfs_context_t context __unused)
4627{
4628	u64 nr_clusters, nr_free_clusters, nr_used_mft_records;
4629	u64 nr_free_mft_records;
4630	ntfs_volume *vol = NTFS_MP(mp);
4631	struct vfsstatfs *sfs = vfs_statfs(mp);
4632	ntfs_inode *ni;
4633
4634	ntfs_debug("Entering.");
4635	/* Get a fully consistent snapshot of this point in time. */
4636	lck_rw_lock_shared(&vol->mftbmp_lock);
4637	lck_rw_lock_shared(&vol->lcnbmp_lock);
4638	nr_clusters = vol->nr_clusters;
4639	nr_free_clusters = vol->nr_free_clusters;
4640	lck_rw_unlock_shared(&vol->lcnbmp_lock);
4641	nr_free_mft_records = vol->nr_free_mft_records;
4642	nr_used_mft_records = vol->nr_mft_records - nr_free_mft_records;
4643	lck_rw_unlock_shared(&vol->mftbmp_lock);
4644	/* Number of file system objects on volume (at this point in time). */
4645	VFSATTR_RETURN(fsa, f_objcount, nr_used_mft_records);
4646	/*
4647	 * Number of files on volume (at this point in time).
4648	 * FIXME: We cannot easily support this and the number of directories,
4649	 * below) as these two fields require reading the entirety of
4650	 * $MFT/$DATA, and checking each record if it is in use and if so,
4651	 * check if it is a file or directory and then return that here.  Note
4652	 * we would take all special files as files, and only real directories
4653	 * as directories.  Instead of reading all of $MFT/$DATA it may be
4654	 * worth only reading mft records that are set as in use in the
4655	 * $MFT/$BITMAP.  Also, need to check if the mft record is a base mft
4656	 * record or not and only if it is one should it be marked as
4657	 * file/directory.  Or should it be counted towards files, just like
4658	 * other special files?
4659	 *
4660	 * A quote from ZFS:
4661	 *
4662	 * <quote>Carbon depends on f_filecount and f_dircount so make up some
4663	 * values based on total objects.</quote>
4664	 *
4665	 * Thus at least for now we behave like ZFS does.
4666	 */
4667	VFSATTR_RETURN(fsa, f_filecount, nr_used_mft_records -
4668			(nr_used_mft_records / 4));
4669	/* Number of directories on volume (at this point in time). */
4670	VFSATTR_RETURN(fsa, f_dircount, nr_used_mft_records / 4);
4671	/*
4672	 * Maximum number of file system objects given infinite free space.
4673	 * The actual number will be likely smaller as it is limited by the
4674	 * amount of free space but both HFS and ZFS return the theoretical
4675	 * maximum so we do the same.
4676	 */
4677	VFSATTR_RETURN(fsa, f_maxobjcount, NTFS_MAX_NR_MFT_RECORDS);
4678	/*
4679	 * Block size for the below size values.  We use the cluster size of
4680	 * the volume as that means we do not convert to a different unit.
4681	 * Alternatively, we could return the sector size instead.
4682	 */
4683	VFSATTR_RETURN(fsa, f_bsize, vol->cluster_size);
4684	/* Optimal transfer block size (in bytes). */
4685	VFSATTR_RETURN(fsa, f_iosize, ubc_upl_maxbufsize());
4686	/* Total data blocks in file system (in units of @f_bsize). */
4687	VFSATTR_RETURN(fsa, f_blocks, nr_clusters);
4688	/* Free data blocks in file system (in units of @f_bsize). */
4689	VFSATTR_RETURN(fsa, f_bfree, nr_free_clusters);
4690	/*
4691	 * Free blocks available to non-superuser (in units of @f_bsize), same
4692	 * as the free data blocks as NTFS, like ZFS, does not support root
4693	 * reservation.
4694	 */
4695	VFSATTR_RETURN(fsa, f_bavail, nr_free_clusters);
4696	/* Blocks in use (in units of @f_bsize). */
4697	VFSATTR_RETURN(fsa, f_bused, nr_clusters - nr_free_clusters);
4698	/*
4699	 * Free inodes in file system (at this point in time).  This is made up
4700	 * of both the current number of free mft records and the amount of
4701	 * available free space for new mft records.  The number is then capped
4702	 * to the maximum allowed number of mft records.  This is what ZFS
4703	 * does, too.
4704	 */
4705	nr_free_mft_records += (nr_free_clusters << vol->cluster_size_shift) >>
4706			vol->mft_record_size_shift;
4707	if (nr_free_mft_records > NTFS_MAX_NR_MFT_RECORDS - nr_used_mft_records)
4708		nr_free_mft_records = NTFS_MAX_NR_MFT_RECORDS -
4709			nr_used_mft_records;
4710	VFSATTR_RETURN(fsa, f_ffree, nr_free_mft_records);
4711	/*
4712	 * Number of inodes in file system (at this point in time).  This is
4713	 * the number of available files we returned above plus the number of
4714	 * mft records currently in use.
4715	 */
4716	VFSATTR_RETURN(fsa, f_files, nr_used_mft_records + nr_free_mft_records);
4717	/*
4718	 * We set the file system id in the statfs part of the mount structure
4719	 * in ntfs_mount(), so just return that.
4720	 */
4721	VFSATTR_RETURN(fsa, f_fsid, sfs->f_fsid);
4722	/*
4723	 * The mount syscall sets the f_owner in the statfs structure of the
4724	 * mount structure to the uid of the user performing the mount, so just
4725	 * return that.
4726	 */
4727	VFSATTR_RETURN(fsa, f_owner, sfs->f_owner);
4728	/*
4729	 * Optional features supported by the volume.  Note, ->valid indicates
4730	 * which bits in the ->capabilities are valid whilst ->capabilities
4731	 * indicates the capabilities of the driver implementation.  An
4732	 * example: Ntfs is journalled but we do not implement journalling so
4733	 * we do not set that bit in ->capabilities, but we do set it in
4734	 * ->valid thus stating that we do not support journalling.
4735	 */
4736	if (VFSATTR_IS_ACTIVE(fsa, f_capabilities)) {
4737		vol_capabilities_attr_t *ca = &fsa->f_capabilities;
4738
4739		/* Volume format capabilities. */
4740		ca->capabilities[VOL_CAPABILITIES_FORMAT] =
4741				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4742				VOL_CAP_FMT_SYMBOLICLINKS |
4743				VOL_CAP_FMT_HARDLINKS |
4744				VOL_CAP_FMT_JOURNAL |
4745				/* We do not support journalling. */
4746				//VOL_CAP_FMT_JOURNAL_ACTIVE |
4747				VOL_CAP_FMT_SPARSE_FILES |
4748				VOL_CAP_FMT_ZERO_RUNS |
4749				/*
4750				 * Whether to be case sensitive or not is a
4751				 * mount option.
4752				 */
4753				(NVolCaseSensitive(vol) ?
4754					VOL_CAP_FMT_CASE_SENSITIVE : 0) |
4755				VOL_CAP_FMT_CASE_PRESERVING |
4756				VOL_CAP_FMT_FAST_STATFS |
4757				VOL_CAP_FMT_2TB_FILESIZE |
4758				// TODO: What do we need to do to implement
4759				// open deny modes?  And do we want to?
4760				// VOL_CAP_FMT_OPENDENYMODES |
4761				VOL_CAP_FMT_HIDDEN_FILES |
4762				/*
4763				 * VOL_CAP_FMT_PATH_FROM_ID is disabled until
4764				 * <rdar://problem/10685403> is fixed.  Use
4765				 * <rdar://problem/10685404> to re-enable.
4766				 */
4767				// VOL_CAP_FMT_PATH_FROM_ID |
4768				0;
4769		ca->valid[VOL_CAPABILITIES_FORMAT] =
4770				VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4771				VOL_CAP_FMT_SYMBOLICLINKS |
4772				VOL_CAP_FMT_HARDLINKS |
4773				VOL_CAP_FMT_JOURNAL |
4774				VOL_CAP_FMT_JOURNAL_ACTIVE |
4775				VOL_CAP_FMT_NO_ROOT_TIMES |
4776				VOL_CAP_FMT_SPARSE_FILES |
4777				VOL_CAP_FMT_ZERO_RUNS |
4778				VOL_CAP_FMT_CASE_SENSITIVE |
4779				VOL_CAP_FMT_CASE_PRESERVING |
4780				VOL_CAP_FMT_FAST_STATFS |
4781				VOL_CAP_FMT_2TB_FILESIZE |
4782				VOL_CAP_FMT_OPENDENYMODES |
4783				VOL_CAP_FMT_HIDDEN_FILES |
4784				VOL_CAP_FMT_PATH_FROM_ID |
4785				0;
4786		/* File system driver capabilities. */
4787		ca->capabilities[VOL_CAPABILITIES_INTERFACES] =
4788				/* TODO: These are not implemented yet. */
4789				// VOL_CAP_INT_SEARCHFS |
4790				VOL_CAP_INT_ATTRLIST |
4791				// VOL_CAP_INT_NFSEXPORT |
4792				// VOL_CAP_INT_READDIRATTR |
4793				// VOL_CAP_INT_EXCHANGEDATA |
4794				/*
4795				 * Nothing supports copyfile in current xnu and
4796				 * it is not documented so we do not support it
4797				 * either.
4798				 */
4799				// VOL_CAP_INT_COPYFILE |
4800				// VOL_CAP_INT_ALLOCATE |
4801				VOL_CAP_INT_VOL_RENAME |
4802				VOL_CAP_INT_ADVLOCK |
4803				VOL_CAP_INT_FLOCK |
4804				// VOL_CAP_INT_EXTENDED_SECURITY |
4805				// VOL_CAP_INT_USERACCESS |
4806				// VOL_CAP_INT_MANLOCK |
4807				VOL_CAP_INT_NAMEDSTREAMS |
4808				VOL_CAP_INT_EXTENDED_ATTR |
4809				0;
4810		ca->valid[VOL_CAPABILITIES_INTERFACES] =
4811				VOL_CAP_INT_SEARCHFS |
4812				VOL_CAP_INT_ATTRLIST |
4813				VOL_CAP_INT_NFSEXPORT |
4814				VOL_CAP_INT_READDIRATTR |
4815				VOL_CAP_INT_EXCHANGEDATA |
4816				VOL_CAP_INT_COPYFILE |
4817				VOL_CAP_INT_ALLOCATE |
4818				VOL_CAP_INT_VOL_RENAME |
4819				VOL_CAP_INT_ADVLOCK |
4820				VOL_CAP_INT_FLOCK |
4821				VOL_CAP_INT_EXTENDED_SECURITY |
4822				VOL_CAP_INT_USERACCESS |
4823				VOL_CAP_INT_MANLOCK |
4824				VOL_CAP_INT_NAMEDSTREAMS |
4825				VOL_CAP_INT_EXTENDED_ATTR |
4826				0;
4827		/* Reserved, set to zero. */
4828		ca->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
4829		ca->valid[VOL_CAPABILITIES_RESERVED1] = 0;
4830		ca->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
4831		ca->valid[VOL_CAPABILITIES_RESERVED2] = 0;
4832		VFSATTR_SET_SUPPORTED(fsa, f_capabilities);
4833	}
4834	/*
4835	 * Attributes supported by the volume.  Note, ->validattr indicates the
4836	 * capabilities of the file system driver whilst ->nativeattr indicates
4837	 * the native capabilities of the volume format itself.
4838	 */
4839	if (VFSATTR_IS_ACTIVE(fsa, f_attributes)) {
4840		vol_attributes_attr_t *aa = &fsa->f_attributes;
4841
4842		/*
4843		 * Common attribute group (these attributes apply to all of the
4844		 * below groups).
4845		 */
4846		aa->validattr.commonattr =
4847				ATTR_CMN_NAME |
4848				/*
4849				 * ATTR_CMN_DEVID, ATTR_CMN_OBJTYPE, and
4850				 * ATTR_CMN_OBJTAG are supplied by the VFS.
4851				 */
4852				ATTR_CMN_DEVID |
4853				ATTR_CMN_FSID |
4854				ATTR_CMN_OBJTYPE |
4855				ATTR_CMN_OBJTAG |
4856				ATTR_CMN_OBJID |
4857				ATTR_CMN_OBJPERMANENTID |
4858				ATTR_CMN_PAROBJID |
4859				ATTR_CMN_SCRIPT |
4860				ATTR_CMN_CRTIME |
4861				ATTR_CMN_MODTIME |
4862				ATTR_CMN_CHGTIME |
4863				ATTR_CMN_ACCTIME |
4864				ATTR_CMN_BKUPTIME |
4865				/*
4866				 * Supplied by the VFS via a call to
4867				 * vn_getxattr(XATTR_FINDERINFO_NAME).
4868				 */
4869				ATTR_CMN_FNDRINFO |
4870				ATTR_CMN_OWNERID |
4871				ATTR_CMN_GRPID |
4872				ATTR_CMN_ACCESSMASK |
4873				ATTR_CMN_FLAGS |
4874				//ATTR_CMN_NAMEDATTRCOUNT /* not implemented */ |
4875				//ATTR_CMN_NAMEDATTRLIST /* not implemented */ |
4876				/*
4877				 * Supplied by the VFS via calls to
4878				 * vnode_authorize().
4879				 */
4880				ATTR_CMN_USERACCESS |
4881				//ATTR_CMN_EXTENDED_SECURITY |
4882				//ATTR_CMN_UUID |
4883				//ATTR_CMN_GRPUUID |
4884				ATTR_CMN_FILEID |
4885				ATTR_CMN_PARENTID;
4886		aa->nativeattr.commonattr =
4887				ATTR_CMN_NAME |
4888				ATTR_CMN_DEVID |
4889				ATTR_CMN_FSID |
4890				ATTR_CMN_OBJTYPE |
4891				ATTR_CMN_OBJTAG |
4892				ATTR_CMN_OBJID |
4893				ATTR_CMN_OBJPERMANENTID |
4894				ATTR_CMN_PAROBJID |
4895				ATTR_CMN_SCRIPT |
4896				ATTR_CMN_CRTIME |
4897				ATTR_CMN_MODTIME |
4898				ATTR_CMN_CHGTIME |
4899				ATTR_CMN_ACCTIME |
4900				ATTR_CMN_BKUPTIME |
4901				ATTR_CMN_FNDRINFO |
4902				ATTR_CMN_OWNERID |
4903				ATTR_CMN_GRPID |
4904				ATTR_CMN_ACCESSMASK |
4905				ATTR_CMN_FLAGS |
4906				ATTR_CMN_NAMEDATTRCOUNT |
4907				ATTR_CMN_NAMEDATTRLIST |
4908				ATTR_CMN_USERACCESS |
4909				ATTR_CMN_EXTENDED_SECURITY |
4910				ATTR_CMN_UUID |
4911				ATTR_CMN_GRPUUID |
4912				ATTR_CMN_FILEID |
4913				ATTR_CMN_PARENTID;
4914		/* Volume attribute group. */
4915		aa->validattr.volattr =
4916				/*
4917				 * ATTR_VOL_FSTYPE, ATTR_VOL_MOUNTPOINT,
4918				 * ATTR_VOL_MOUNTFLAGS, ATTR_VOL_MOUNTEDDEVICE,
4919				 * and ATTR_VOL_ENCODINGSUSED are supplied by
4920				 * the VFS.
4921				 */
4922				ATTR_VOL_FSTYPE |
4923				ATTR_VOL_SIGNATURE |
4924				ATTR_VOL_SIZE |
4925				ATTR_VOL_SPACEFREE |
4926				ATTR_VOL_SPACEAVAIL |
4927				ATTR_VOL_MINALLOCATION |
4928				ATTR_VOL_ALLOCATIONCLUMP |
4929				ATTR_VOL_IOBLOCKSIZE |
4930				ATTR_VOL_OBJCOUNT |
4931				ATTR_VOL_FILECOUNT |
4932				ATTR_VOL_DIRCOUNT |
4933				ATTR_VOL_MAXOBJCOUNT |
4934				ATTR_VOL_MOUNTPOINT |
4935				ATTR_VOL_NAME |
4936				ATTR_VOL_MOUNTFLAGS |
4937				ATTR_VOL_MOUNTEDDEVICE |
4938				ATTR_VOL_ENCODINGSUSED |
4939				ATTR_VOL_CAPABILITIES |
4940				ATTR_VOL_ATTRIBUTES;
4941		aa->nativeattr.volattr =
4942				ATTR_VOL_FSTYPE |
4943				ATTR_VOL_SIGNATURE |
4944				ATTR_VOL_SIZE |
4945				ATTR_VOL_SPACEFREE |
4946				ATTR_VOL_SPACEAVAIL |
4947				ATTR_VOL_MINALLOCATION |
4948				ATTR_VOL_ALLOCATIONCLUMP |
4949				ATTR_VOL_IOBLOCKSIZE |
4950				ATTR_VOL_OBJCOUNT |
4951				/*
4952				 * NTFS does not provide ATTR_VOL_FILECOUNT and
4953				 * ATTR_VOL_DIRCOUNT on disk.
4954				 */
4955				//ATTR_VOL_FILECOUNT |
4956				//ATTR_VOL_DIRCOUNT |
4957				ATTR_VOL_MAXOBJCOUNT |
4958				ATTR_VOL_MOUNTPOINT |
4959				ATTR_VOL_NAME |
4960				ATTR_VOL_MOUNTFLAGS |
4961				ATTR_VOL_MOUNTEDDEVICE |
4962				ATTR_VOL_ENCODINGSUSED |
4963				ATTR_VOL_CAPABILITIES |
4964				ATTR_VOL_ATTRIBUTES;
4965		/* Directory attribute group. */
4966		aa->validattr.dirattr =
4967				/*
4968				 * ATTR_DIR_LINKCOUNT and ATTR_DIR_ENTRYCOUNT
4969				 * are hard to work out on NTFS and the
4970				 * getattrlist(2) man page states that a file
4971				 * system should not implement
4972				 * ATTR_DIR_LINKCOUNT in this case.  We choose
4973				 * not to implement ATTR_DIR_ENTRYCOUNT either.
4974				 */
4975				//ATTR_DIR_LINKCOUNT |
4976				//ATTR_DIR_ENTRYCOUNT |
4977				/* This is supplied by the VFS. */
4978				ATTR_DIR_MOUNTSTATUS;
4979		aa->nativeattr.dirattr =
4980				/*
4981				 * NTFS does not provide ATTR_DIR_LINKCOUNT and
4982				 * ATTR_DIR_ENTRYCOUNT on disk.
4983				 */
4984				//ATTR_DIR_LINKCOUNT |
4985				//ATTR_DIR_ENTRYCOUNT |
4986				ATTR_DIR_MOUNTSTATUS;
4987		/* File attribute group. */
4988		aa->validattr.fileattr =
4989				ATTR_FILE_LINKCOUNT |
4990				ATTR_FILE_TOTALSIZE |
4991				ATTR_FILE_ALLOCSIZE |
4992				ATTR_FILE_IOBLOCKSIZE |
4993				/* This is supplied by the VFS. */
4994				ATTR_FILE_CLUMPSIZE |
4995				ATTR_FILE_DEVTYPE |
4996				//ATTR_FILE_FILETYPE |
4997				//ATTR_FILE_FORKCOUNT |
4998				//ATTR_FILE_FORKLIST |
4999				ATTR_FILE_DATALENGTH |
5000				ATTR_FILE_DATAALLOCSIZE |
5001				//ATTR_FILE_DATAEXTENTS |
5002				/*
5003				 * Both ATTR_FILE_RSRCLENGTH and
5004				 * ATTR_FILE_RSRCALLOCSIZE are supplied by the
5005				 * VFS via a call to
5006				 * vn_getxattr(XATTR_RESOURCEFORK_NAME).
5007				 *
5008				 * FIXME: The VFS supplies
5009				 * ATTR_FILE_RSRCALLOCSIZE by rounding up
5010				 * ATTR_FILE_RSRCLENGTH to the the next logical
5011				 * block size boundary (for NTFS the cluster
5012				 * this is the next cluster boundary) which is
5013				 * not correct if the resource fork named
5014				 * stream is sparse which can be the case on
5015				 * NTFS.
5016				 */
5017				ATTR_FILE_RSRCLENGTH |
5018				ATTR_FILE_RSRCALLOCSIZE |
5019				//ATTR_FILE_RSRCEXTENTS |
5020				0;
5021		aa->nativeattr.fileattr =
5022				ATTR_FILE_LINKCOUNT |
5023				/*
5024				 * NTFS does not provide ATTR_FILE_TOTALSIZE
5025				 * and ATTR_FILE_ALLOCSIZE on disk or at least
5026				 * not in an easy to determine way.
5027				 */
5028				//ATTR_FILE_TOTALSIZE |
5029				//ATTR_FILE_ALLOCSIZE |
5030				ATTR_FILE_IOBLOCKSIZE |
5031				ATTR_FILE_CLUMPSIZE /* obsolete */ |
5032				ATTR_FILE_DEVTYPE |
5033				/*
5034				 * VFS does not allow setting of
5035				 * ATTR_FILE_FILETYPE, ATTR_FILE_FORKCOUNT,
5036				 * ATTR_FILE_FORKLIST, ATTR_FILE_DATAEXTENTS,
5037				 * and ATTR_FILE_RSRCEXTENTS.
5038				 */
5039				//ATTR_FILE_FILETYPE /* always zero */ |
5040				//ATTR_FILE_FORKCOUNT |
5041				//ATTR_FILE_FORKLIST |
5042				ATTR_FILE_DATALENGTH |
5043				ATTR_FILE_DATAALLOCSIZE |
5044				//ATTR_FILE_DATAEXTENTS /* obsolete, HFS-specific */ |
5045				ATTR_FILE_RSRCLENGTH |
5046				ATTR_FILE_RSRCALLOCSIZE |
5047				//ATTR_FILE_RSRCEXTENTS /* obsolete, HFS-specific */ |
5048				0;
5049		/* Fork attribute group. */
5050		aa->validattr.forkattr =
5051				/*
5052				 * getattrlist(2) man page says that we should
5053				 * not implement any fork attributes.
5054				 */
5055				//ATTR_FORK_TOTALSIZE |
5056				//ATTR_FORK_ALLOCSIZE |
5057				0;
5058		aa->nativeattr.forkattr =
5059				/* VFS does not allow setting of these. */
5060				//ATTR_FORK_TOTALSIZE |
5061				//ATTR_FORK_ALLOCSIZE |
5062				0;
5063		VFSATTR_SET_SUPPORTED(fsa, f_attributes);
5064	}
5065	ni = vol->root_ni;
5066	lck_rw_lock_shared(&ni->lock);
5067	/*
5068	 * For the volume times, we use the corresponding times from the
5069	 * standard information attribute of the root directory inode.
5070	 */
5071	/* Creation time. */
5072	VFSATTR_RETURN(fsa, f_create_time, ni->creation_time);
5073	/*
5074	 * Last modification time.  We use the last mft change time as this
5075	 * changes every time the directory is changed in any way, thus it
5076	 * reflects the volume change time the best.
5077	 */
5078	VFSATTR_RETURN(fsa, f_modify_time, ni->last_mft_change_time);
5079	/* Time of last access. */
5080	VFSATTR_RETURN(fsa, f_access_time, ni->last_access_time);
5081	/* Time of last backup. */
5082	if (VFSATTR_IS_ACTIVE(fsa, f_backup_time)) {
5083		if (NInoValidBackupTime(ni)) {
5084			VFSATTR_RETURN(fsa, f_backup_time, ni->backup_time);
5085			lck_rw_unlock_shared(&ni->lock);
5086		} else {
5087			errno_t err;
5088
5089			if (!lck_rw_lock_shared_to_exclusive(&ni->lock))
5090				lck_rw_lock_exclusive(&ni->lock);
5091			/*
5092			 * Load the AFP_AfpInfo stream and initialize the
5093			 * backup time and Finder Info (if they are not already
5094			 * valid).
5095			 */
5096			err = ntfs_inode_afpinfo_read(ni);
5097			if (err) {
5098				ntfs_error(vol->mp, "Failed to obtain AfpInfo "
5099						"for mft_no 0x%llx (error "
5100						"%d).",
5101						(unsigned long long)ni->mft_no,
5102						err);
5103				lck_rw_unlock_exclusive(&ni->lock);
5104				return err;
5105			}
5106			if (!NInoValidBackupTime(ni))
5107				panic("%s(): !NInoValidBackupTime(base_ni)\n",
5108						__FUNCTION__);
5109			VFSATTR_RETURN(fsa, f_backup_time, ni->backup_time);
5110			lck_rw_unlock_exclusive(&ni->lock);
5111		}
5112	} else
5113		lck_rw_unlock_shared(&ni->lock);
5114	/*
5115	 * File system subtype.  Set this to the ntfs version encoded into 16
5116	 * bits, the high 8 bits being the major version and the low 8 bits
5117	 * being the minor version.  This is then extended to 32 bits, thus the
5118	 * higher 16 bits are currently zero.  The latter could be used at a
5119	 * later point in time to return more information about the mount
5120	 * options of the mounted volume (e.g. enable/disable sparse creation,
5121	 * compression, encryption, quotas, acls, usnjournal, case sensitivity,
5122	 * etc).
5123	 */
5124	VFSATTR_RETURN(fsa, f_fssubtype, (u32)vol->major_ver << 8 |
5125			vol->minor_ver);
5126	/* NUL terminated volume name in decomposed UTF-8. */
5127	if (VFSATTR_IS_ACTIVE(fsa, f_vol_name)) {
5128		/* Copy the cached name from the ntfs_volume structure. */
5129		(void)strlcpy(fsa->f_vol_name, vol->name, MAXPATHLEN - 1);
5130		VFSATTR_SET_SUPPORTED(fsa, f_vol_name);
5131	}
5132	/*
5133	 * Used for ATTR_VOL_SIGNATURE, Carbon's FSVolumeInfo.signature.  The
5134	 * kernel's getvolattrlist() function will default this to 'BD' which
5135	 * is apparently the generic signature that most Carbon file systems
5136	 * should be returning.
5137	 *
5138	 * ZFS returns 'Z!' so we return 'NT'.
5139	 */
5140	VFSATTR_RETURN(fsa, f_signature, 0x4e54); /* 'NT' */
5141	/*
5142	 * Same as Carbon's FSVolumeInfo.filesystemID.  HFS and HFS Plus use a
5143	 * value of zero.  ZFS also returns zero so we do that, too.
5144	 */
5145	VFSATTR_RETURN(fsa, f_carbon_fsid, 0);
5146	/* Volume UUID (GUID).  May not exist. */
5147	if (VFSATTR_IS_ACTIVE(fsa, f_uuid) && NVolHasGUID(vol)) {
5148		bcopy(vol->uuid, fsa->f_uuid, sizeof(uuid_t));
5149		VFSATTR_SET_SUPPORTED(fsa, f_uuid);
5150	}
5151	ntfs_debug("Done.");
5152	return 0;
5153}
5154
5155/**
5156 * ntfs_volume_rename - rename an ntfs volume
5157 * @vol:	ntfs volume to rename
5158 * @name:	new name for the ntfs volume
5159 *
5160 * Rename the ntfs volume @vol to @name which is a decomposed, NUL-terminated,
5161 * UTF-8 string as used on OS X.
5162 *
5163 * Return 0 on success and errno on error.
5164 */
5165static errno_t ntfs_volume_rename(ntfs_volume *vol, char *name)
5166{
5167	ntfs_inode *ni = vol->vol_ni;
5168	MFT_RECORD *m;
5169	ntfs_attr_search_ctx *ctx;
5170	ATTR_RECORD *a;
5171	u8 *utf8_name = NULL;
5172	ntfschar *ntfs_name = NULL;
5173	size_t utf8_name_size, ntfs_name_size;
5174	signed ntfs_name_len = 0;
5175	errno_t err;
5176
5177	ntfs_debug("Entering (old name: %s, new name: %s).", vol->name, name);
5178	/*
5179	 * We do not need to do anything if the new name is the same as the old
5180	 * name.
5181	 */
5182	utf8_name_size = strlen(name) + 1;
5183	if (utf8_name_size == vol->name_size &&
5184			!strncmp(vol->name, name, vol->name_size)) {
5185		ntfs_debug("The new name is the same as the old name, "
5186				"ignoring the rename request.");
5187		return 0;
5188	}
5189	/*
5190	 * If the new name is the empty string "", no need to convert it.  We
5191	 * will simply delete the $VOLUME_NAME attribute altogether.
5192	 *
5193	 * Otherwise, convert the name from the decomposed, UTF-8 format used
5194	 * by OS X into the little endian, 2-byte, composed Unicode format used
5195	 * by NTFS.
5196	 */
5197	if (utf8_name_size > 1) {
5198		ntfs_name_len = utf8_to_ntfs(vol, (u8*)name, utf8_name_size,
5199				&ntfs_name, &ntfs_name_size);
5200		if (ntfs_name_len < 0) {
5201			err = -ntfs_name_len;
5202			ntfs_error(vol->mp, "Failed to convert volume name to "
5203					"little endian, 2-byte, composed "
5204					"Unicode (error %d).", (int)err);
5205			goto err;
5206		}
5207		/* Switch @ntfs_name_len to be the name length in bytes. */
5208		ntfs_name_len <<= NTFSCHAR_SIZE_SHIFT;
5209		/*
5210		 * Verify that the length of the new name is in the allowed
5211		 * range.
5212		 */
5213		err = ntfs_attr_size_bounds_check(vol, AT_VOLUME_NAME,
5214				ntfs_name_len);
5215		if (err) {
5216			if (err == ERANGE) {
5217				ntfs_error(vol->mp, "Specified name is too "
5218						"long (%d little endian, "
5219						"2-byte, composed Unicode "
5220						"characters).",
5221						ntfs_name_len <<
5222						NTFSCHAR_SIZE_SHIFT);
5223				err = ENAMETOOLONG;
5224			} else {
5225				ntfs_error(vol->mp, "$VOLUME_NAME attribute "
5226						"is not defined on the NTFS "
5227						"volume.  Possible "
5228						"corruption!  You should run "
5229						"chkdsk.");
5230				err = EIO;
5231			}
5232			goto err;
5233		}
5234	}
5235	/* Make a copy of the new volume name to be placed in @vol->name. */
5236	utf8_name = OSMalloc(utf8_name_size, ntfs_malloc_tag);
5237	if (!utf8_name) {
5238		ntfs_error(vol->mp, "Not enough memory to make a copy of the "
5239				"new name.");
5240		err = ENOMEM;
5241		goto err;
5242	}
5243	if (strlcpy((char*)utf8_name, name, utf8_name_size) >= utf8_name_size)
5244		panic("%s(): strlcpy() failed\n", __FUNCTION__);
5245	err = vnode_get(ni->vn);
5246	if (err) {
5247		ntfs_error(vol->mp, "Failed to get vnode for $Volume.");
5248		goto err;
5249	}
5250	err = ntfs_mft_record_map(ni, &m);
5251	if (err) {
5252		ntfs_error(vol->mp, "Failed to map mft record for $Volume "
5253				"(error %d).", err);
5254		m = NULL;
5255		ctx = NULL;
5256		goto put_err;
5257	}
5258	ctx = ntfs_attr_search_ctx_get(ni, m);
5259	if (!ctx) {
5260		ntfs_error(vol->mp, "Not enough memory to get attribute "
5261				"search context.");
5262		err = ENOMEM;
5263		goto put_err;
5264	}
5265	err = ntfs_attr_lookup(AT_VOLUME_NAME, AT_UNNAMED, 0, 0, NULL, 0, ctx);
5266	m = ctx->m;
5267	a = ctx->a;
5268	if (err || a->non_resident || a->flags) {
5269		if (err != ENOENT) {
5270			/* Real lookup error or corrupt attribute. */
5271			if (!err)
5272				goto name_err;
5273			ntfs_error(vol->mp, "Failed to lookup volume name "
5274					"attribute (error %d).", err);
5275			goto put_err;
5276		}
5277		if (!ntfs_name) {
5278			ntfs_debug("Volume has no name and new name is the "
5279					"empty string, nothing to do.");
5280			goto done;
5281		}
5282		ntfs_debug("Volume has no name.  Creating new volume name "
5283				"attribute.");
5284		err = ntfs_resident_attr_record_insert(ni, ctx, AT_VOLUME_NAME,
5285				NULL, 0, ntfs_name, ntfs_name_len);
5286		if (err || ctx->is_error) {
5287			if (!err)
5288				err = ctx->error;
5289			ntfs_error(vol->mp, "Failed to %s $Volume (error %d).",
5290					ctx->is_error ?
5291					"remap extent mft record of" :
5292					"insert volume name attribute in", err);
5293			goto put_err;
5294		}
5295	} else {
5296		u8 *val = (u8*)a + le16_to_cpu(a->value_offset);
5297		/* Some bounds checks. */
5298		if (val < (u8*)a || val + le32_to_cpu(a->value_length) >
5299				(u8*)a + le32_to_cpu(a->length) ||
5300				(u8*)a + le32_to_cpu(a->length) >
5301				(u8*)m + vol->mft_record_size)
5302			goto name_err;
5303		if (!ntfs_name) {
5304			/*
5305			 * The new name is the empty string, thus remove the
5306			 * $VOLUME_NAME attribute altogether.
5307			 */
5308			ntfs_debug("New name is the empty string.  Removing "
5309					"the existing $VOLUME_NAME attribute.");
5310			err = ntfs_attr_record_delete(ni, ctx);
5311			if (!err)
5312				goto done;
5313			ntfs_warning(vol->mp, "Failed to delete volume name "
5314					"attribute (error %d).  Truncating it "
5315					"to zero length instead.", err);
5316		}
5317		/* Resize the existing attribute to fit the new name. */
5318retry_resize:
5319		err = ntfs_resident_attr_value_resize(m, a, ntfs_name_len);
5320		if (err) {
5321			if (err != ENOSPC)
5322				panic("%s(): err != ENOSPC\n", __FUNCTION__);
5323			/*
5324			 * If the base mft record does not have an attribute
5325			 * list attribute, add it now.
5326			 */
5327			if (!NInoAttrList(ni)) {
5328				err = ntfs_attr_list_add(ni, m, ctx);
5329				if (err || ctx->is_error) {
5330					if (!err)
5331						err = ctx->error;
5332					ntfs_error(vol->mp, "Failed to %s "
5333							"$Volume (error %d).",
5334							ctx->is_error ?
5335							"remap extent mft "
5336							"record of" :
5337							"add attribute list "
5338							"attribute to", err);
5339					goto put_err;
5340				}
5341				/*
5342				 * The attribute location will have changed so
5343				 * update it from the search context.
5344				 */
5345				m = ctx->m;
5346				a = ctx->a;
5347				/*
5348				 * We now have an attribute list attribute.
5349				 * This may have cause the attribute to be
5350				 * moved out to an extent mft record in which
5351				 * case there would now be enough space to
5352				 * resize the attribute.
5353				 *
5354				 * Alternatively some other large attribute may
5355				 * have been moved out to an extent mft record
5356				 * thus generating enough space in the base mft
5357				 * record to resize the attribute.
5358				 *
5359				 * In either case we simply want to retry the
5360				 * resize.
5361				 */
5362				goto retry_resize;
5363			}
5364			/*
5365			 * If the attribute record is the only one in the mft
5366			 * record then there must have been enough space.
5367			 */
5368			if (ntfs_attr_record_is_only_one(m, a))
5369				panic("%s(): err == ENOSPC && "
5370						"ntfs_attr_record_is_only_one"
5371						"()\n", __FUNCTION__);
5372			/*
5373			 * The attribute record is not the only one in the mft
5374			 * record.  Move it out to an extent mft record which
5375			 * will cause enough space to be generated.
5376			 */
5377			lck_rw_lock_shared(&ni->attr_list_rl.lock);
5378			err = ntfs_attr_record_move(ctx);
5379			lck_rw_unlock_shared(&ni->attr_list_rl.lock);
5380			if (err) {
5381				ntfs_error(vol->mp, "Failed to move volume "
5382						"name attribute to an extent "
5383						"mft record (error %d).", err);
5384				goto put_err;
5385			}
5386			/*
5387			 * The attribute location will have changed so update
5388			 * it from the search context.
5389			 */
5390			m = ctx->m;
5391			a = ctx->a;
5392			/*
5393			 * Retry the original attribute record resize as we
5394			 * will now have enough space to do it.
5395			 */
5396			goto retry_resize;
5397		}
5398		/* Copy the new name into the resized attribute record. */
5399		if (ntfs_name)
5400			memcpy((u8*)a + le16_to_cpu(a->value_offset),
5401					ntfs_name, ntfs_name_len);
5402	}
5403	/* Free the no longer needed temporary copy of the new name. */
5404	if (ntfs_name)
5405		OSFree(ntfs_name, ntfs_name_size, ntfs_malloc_tag);
5406	/* Mark the mft record dirty to ensure it gets written out. */
5407	NInoSetMrecNeedsDirtying(ctx->ni);
5408done:
5409	/*
5410	 * Finally set the new name to be the volume name releasing the old one
5411	 * first.  Since we have no locking around accesses to the volume name,
5412	 * we have to be careful about how we update it here, i.e. we have to
5413	 * set the size to the smaller of the two, then switch the pointers,
5414	 * then set the size to the new size and only then free the old
5415	 * pointer.  This is also why we do this under the protection of the
5416	 * mapped mft record so there cannot be two concurrent
5417	 * ntfs_volume_rename()s running.
5418	 */
5419	name = vol->name;
5420	ntfs_name_size = vol->name_size;
5421	if (utf8_name_size < vol->name_size)
5422		vol->name_size = utf8_name_size;
5423	vol->name = (char*)utf8_name;
5424	vol->name_size = utf8_name_size;
5425	ntfs_attr_search_ctx_put(ctx);
5426	ntfs_mft_record_unmap(ni);
5427	(void)vnode_put(ni->vn);
5428	OSFree(name, ntfs_name_size, ntfs_malloc_tag);
5429	ntfs_debug("Done.");
5430	return 0;
5431name_err:
5432	ntfs_error(vol->mp, "Volume name attribute is corrupt.  Run chkdsk.");
5433	NVolSetErrors(vol);
5434	err = EIO;
5435put_err:
5436	if (ctx)
5437		ntfs_attr_search_ctx_put(ctx);
5438	if (m)
5439		ntfs_mft_record_unmap(ni);
5440	(void)vnode_put(ni->vn);
5441err:
5442	if (utf8_name)
5443		OSFree(utf8_name, utf8_name_size, ntfs_malloc_tag);
5444	if (ntfs_name)
5445		OSFree(ntfs_name, ntfs_name_size, ntfs_malloc_tag);
5446	return err;
5447}
5448
5449/**
5450 * ntfs_setattr - set information about a mounted ntfs volume
5451 * @mp:		mount point of ntfs file system
5452 * @fsa:	information to set
5453 * @context:	vfs context
5454 *
5455 * The VFS calls this via VFS_SETATTR() when it wants to set some information
5456 * about the mounted ntfs volume described by the mount @mp.
5457 *
5458 * Which information is to be set is described by the vfs attribute structure
5459 * pointed to by @fsa, which is also the source pointer from which the
5460 * information to be set is copied.
5461 *
5462 * At present the kernel will only ever call this function for ATTR_VOL_NAME,
5463 * i.e. to set the name of the volume.
5464 *
5465 * Return 0 on success and errno on error.
5466 *
5467 * Note: Further details are in the man pages for the getattrlist and
5468 * setattrlist functions and in the header files xnu/bsd/sys/{mount,attr}.h.
5469 *
5470 * Note this function is only called for r/w mounted volumes so no need to
5471 * check if the volume is read-only.
5472 */
5473static int ntfs_setattr(struct mount *mp, struct vfs_attr *fsa,
5474		vfs_context_t context)
5475{
5476	kauth_cred_t cred = vfs_context_ucred(context);
5477	errno_t err;
5478
5479	ntfs_debug("Entering.");
5480	/*
5481	 * Must be superuser or owner of file system to change volume
5482	 * attributes.
5483	 */
5484	if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) !=
5485			vfs_statfs(mp)->f_owner))
5486		return EACCES;
5487	/*
5488	 * Only the volume name is settable (ATTR_VOL_NAME) at present so if
5489	 * this is not requested return success.  The VFS enforces that we are
5490	 * never called with any other flags set.
5491	 */
5492	if (!VFSATTR_IS_ACTIVE(fsa, f_vol_name))
5493		return 0;
5494	if (!fsa->f_vol_name)
5495		panic("%s(): !fsa->f_vol_name\n", __FUNCTION__);
5496	err = ntfs_volume_rename(NTFS_MP(mp), fsa->f_vol_name);
5497	if (err) {
5498		ntfs_error(mp, "Failed to set the name of the volume to %s "
5499				"(error %d).", fsa->f_vol_name, err);
5500		return err;
5501	}
5502	VFSATTR_SET_SUPPORTED(fsa, f_vol_name);
5503	ntfs_debug("Done.");
5504	return 0;
5505}
5506
5507static struct vfsops ntfs_vfsops = {
5508	.vfs_mount	= ntfs_mount,
5509	.vfs_unmount	= ntfs_unmount,
5510	.vfs_root	= ntfs_root,
5511	.vfs_getattr	= ntfs_getattr,
5512	.vfs_sync	= ntfs_sync,
5513	.vfs_vget	= ntfs_vget,
5514	.vfs_setattr	= ntfs_setattr,
5515};
5516
5517static struct vnodeopv_desc *ntfs_vnodeopv_desc_list[1] = {
5518	&ntfs_vnodeopv_desc,
5519};
5520
5521/* Lock group and lock attribute for allocation and freeing of locks. */
5522static lck_grp_attr_t *ntfs_lock_grp_attr;
5523lck_grp_t *ntfs_lock_grp;
5524lck_attr_t *ntfs_lock_attr;
5525
5526/* A tag to allow allocation and freeing of memory. */
5527OSMallocTag ntfs_malloc_tag;
5528
5529static vfstable_t ntfs_vfstable;
5530
5531extern kern_return_t ntfs_module_start(kmod_info_t *ki __unused,
5532		void *data __unused);
5533kern_return_t ntfs_module_start(kmod_info_t *ki __unused, void *data __unused)
5534{
5535	errno_t err;
5536	struct vfs_fsentry vfe;
5537
5538    printf("NTFS driver " NTFS_VERSION_STRING " [Flags: R/W"
5539#ifdef DEBUG
5540			" DEBUG"
5541#endif
5542			"].\n");
5543	/* This should never happen. */
5544	if (ntfs_lock_grp_attr || ntfs_lock_grp || ntfs_lock_attr ||
5545			ntfs_malloc_tag)
5546		panic("%s(): Lock(s) and/or malloc tag already initialized.\n",
5547				__FUNCTION__);
5548	/* First initialize the lock group so we can initialize debugging. */
5549	ntfs_lock_grp_attr = lck_grp_attr_alloc_init();
5550	if (!ntfs_lock_grp_attr) {
5551lck_err:
5552		printf("NTFS: Failed to allocate a lock element.\n");
5553		goto dbg_err;
5554	}
5555#ifdef DEBUG
5556	lck_grp_attr_setstat(ntfs_lock_grp_attr);
5557#endif
5558	ntfs_lock_grp = lck_grp_alloc_init("com.apple.filesystems.ntfs",
5559			ntfs_lock_grp_attr);
5560	if (!ntfs_lock_grp)
5561		goto lck_err;
5562	ntfs_lock_attr = lck_attr_alloc_init();
5563	if (!ntfs_lock_attr)
5564		goto lck_err;
5565#ifdef DEBUG
5566	lck_attr_setdebug(ntfs_lock_attr);
5567#endif
5568	/* Allocate a tag so we can allocate memory. */
5569	ntfs_malloc_tag = OSMalloc_Tagalloc("com.apple.filesystems.ntfs",
5570			OSMT_DEFAULT);
5571	if (!ntfs_malloc_tag) {
5572		printf("NTFS: OSMalloc_Tagalloc() failed.\n");
5573		goto dbg_err;
5574	}
5575	/* Initialize the driver wide lock. */
5576	lck_mtx_init(&ntfs_lock, ntfs_lock_grp, ntfs_lock_attr);
5577	/*
5578	 * This call must happen before we can use ntfs_debug(),
5579	 * ntfs_warning(), and ntfs_error().
5580	 */
5581	ntfs_debug_init();
5582	ntfs_debug("Debug messages are enabled.");
5583	err = ntfs_default_sds_entries_init();
5584	if (err)
5585		goto sds_err;
5586	err = ntfs_inode_hash_init();
5587	if (err)
5588		goto hash_err;
5589	vfe = (struct vfs_fsentry) {
5590		.vfe_vfsops	= &ntfs_vfsops,
5591		.vfe_vopcnt	= 1,	/* For now we just use one set of vnode
5592					   operations for all file types.
5593					   Note: Current max is 5 due to (not
5594					   needed) hard-coded limit in xnu. */
5595		.vfe_opvdescs	= ntfs_vnodeopv_desc_list,
5596		.vfe_fsname	= "ntfs",
5597// TODO: Implement VFS_TBLREADDIR_EXTENDED and set it here.
5598		.vfe_flags	= VFS_TBLNATIVEXATTR | VFS_TBL64BITREADY |
5599				  VFS_TBLLOCALVOL | VFS_TBLNOTYPENUM |
5600				  VFS_TBLFSNODELOCK | VFS_TBLTHREADSAFE,
5601	};
5602	err = vfs_fsadd(&vfe, &ntfs_vfstable);
5603	if (!err) {
5604		ntfs_debug("NTFS driver registered successfully.");
5605		return KERN_SUCCESS;
5606	}
5607	ntfs_error(NULL, "vfs_fsadd() failed (error %d).", (int)err);
5608	ntfs_inode_hash_deinit();
5609hash_err:
5610	OSFree(ntfs_file_sds_entry, 0x60 * 4, ntfs_malloc_tag);
5611	ntfs_file_sds_entry = NULL;
5612sds_err:
5613	ntfs_debug_deinit();
5614	lck_mtx_destroy(&ntfs_lock, ntfs_lock_grp);
5615dbg_err:
5616	if (ntfs_malloc_tag) {
5617		OSMalloc_Tagfree(ntfs_malloc_tag);
5618		ntfs_malloc_tag = NULL;
5619	}
5620	if (ntfs_lock_attr) {
5621		lck_attr_free(ntfs_lock_attr);
5622		ntfs_lock_attr = NULL;
5623	}
5624	if (ntfs_lock_grp) {
5625		lck_grp_free(ntfs_lock_grp);
5626		ntfs_lock_grp = NULL;
5627	}
5628	if (ntfs_lock_grp_attr) {
5629		lck_grp_attr_free(ntfs_lock_grp_attr);
5630		ntfs_lock_grp_attr = NULL;
5631	}
5632	printf("NTFS: Failed to register the NTFS driver.\n");
5633	return KERN_FAILURE;
5634}
5635
5636extern kern_return_t ntfs_module_stop(kmod_info_t *ki __unused,
5637		void *data __unused);
5638kern_return_t ntfs_module_stop(kmod_info_t *ki __unused, void *data __unused)
5639{
5640	errno_t err;
5641
5642	if (!ntfs_lock_grp_attr || !ntfs_lock_grp || !ntfs_lock_attr ||
5643			!ntfs_malloc_tag)
5644		panic("%s(): Lock(s) and/or malloc tag not yet initialized.\n",
5645				__FUNCTION__);
5646	ntfs_debug("Unregistering NTFS driver.");
5647	err = vfs_fsremove(ntfs_vfstable);
5648	if (err) {
5649		if (err == EBUSY)
5650			printf("NTFS: Failed to unregister the NTFS driver "
5651					"because there are mounted NTFS "
5652					"volumes.\n");
5653		else
5654			printf("NTFS: Failed to unregister the NTFS driver "
5655					"because vfs_fsremove() failed (error "
5656					"%d).\n", err);
5657		return KERN_FAILURE;
5658	}
5659	ntfs_inode_hash_deinit();
5660	OSFree(ntfs_file_sds_entry, 0x60 * 4, ntfs_malloc_tag);
5661	ntfs_file_sds_entry = NULL;
5662	ntfs_debug("Done.");
5663	/*
5664	 * Once this completes, we cannot use ntfs_debug(), ntfs_warning(), and
5665	 * ntfs_error() any more.  Since it cannot fail we cheat and report
5666	 * "Done." before the call.
5667	 */
5668	ntfs_debug_deinit();
5669	lck_mtx_destroy(&ntfs_lock, ntfs_lock_grp);
5670	OSMalloc_Tagfree(ntfs_malloc_tag);
5671	ntfs_malloc_tag = NULL;
5672	lck_attr_free(ntfs_lock_attr);
5673	ntfs_lock_attr = NULL;
5674	lck_grp_free(ntfs_lock_grp);
5675	ntfs_lock_grp = NULL;
5676	lck_grp_attr_free(ntfs_lock_grp_attr);
5677	ntfs_lock_grp_attr = NULL;
5678	return KERN_SUCCESS;
5679}
5680