1/**
2 * mft.c - Mft record handling code. Originated from the Linux-NTFS project.
3 *
4 * Copyright (c) 2000-2004 Anton Altaparmakov
5 * Copyright (c) 2004-2005 Richard Russon
6 * Copyright (c) 2004-2008 Szabolcs Szakacsits
7 * Copyright (c)      2005 Yura Pakhuchiy
8 *
9 * This program/include file is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as published
11 * by the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program/include file is distributed in the hope that it will be
15 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program (in the main directory of the NTFS-3G
21 * distribution in the file COPYING); if not, write to the Free Software
22 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23 */
24
25#ifdef HAVE_CONFIG_H
26#include "config.h"
27#endif
28
29#ifdef HAVE_STDLIB_H
30#include <stdlib.h>
31#endif
32#ifdef HAVE_STDIO_H
33#include <stdio.h>
34#endif
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#ifdef HAVE_STRING_H
39#include <string.h>
40#endif
41#ifdef HAVE_LIMITS_H
42#include <limits.h>
43#endif
44#include <time.h>
45
46#include "compat.h"
47#include "types.h"
48#include "device.h"
49#include "debug.h"
50#include "bitmap.h"
51#include "attrib.h"
52#include "inode.h"
53#include "volume.h"
54#include "layout.h"
55#include "lcnalloc.h"
56#include "mft.h"
57#include "logging.h"
58#include "misc.h"
59
60/**
61 * ntfs_mft_records_read - read records from the mft from disk
62 * @vol:	volume to read from
63 * @mref:	starting mft record number to read
64 * @count:	number of mft records to read
65 * @b:		output data buffer
66 *
67 * Read @count mft records starting at @mref from volume @vol into buffer
68 * @b. Return 0 on success or -1 on error, with errno set to the error
69 * code.
70 *
71 * If any of the records exceed the initialized size of the $MFT/$DATA
72 * attribute, i.e. they cannot possibly be allocated mft records, assume this
73 * is a bug and return error code ESPIPE.
74 *
75 * The read mft records are mst deprotected and are hence ready to use. The
76 * caller should check each record with is_baad_record() in case mst
77 * deprotection failed.
78 *
79 * NOTE: @b has to be at least of size @count * vol->mft_record_size.
80 */
81int ntfs_mft_records_read(const ntfs_volume *vol, const MFT_REF mref,
82		const s64 count, MFT_RECORD *b)
83{
84	s64 br;
85	VCN m;
86
87	ntfs_log_trace("inode %llu\n", (unsigned long long)MREF(mref));
88
89	if (!vol || !vol->mft_na || !b || count < 0) {
90		errno = EINVAL;
91		ntfs_log_perror("%s: b=%p  count=%lld  mft=%llu", __FUNCTION__,
92			b, (long long)count, (unsigned long long)MREF(mref));
93		return -1;
94	}
95	m = MREF(mref);
96	/* Refuse to read non-allocated mft records. */
97	if (m + count > vol->mft_na->initialized_size >>
98			vol->mft_record_size_bits) {
99		errno = ESPIPE;
100		ntfs_log_perror("Trying to read non-allocated mft records "
101				"(%lld > %lld)", (long long)m + count,
102				(long long)vol->mft_na->initialized_size >>
103				vol->mft_record_size_bits);
104		return -1;
105	}
106	br = ntfs_attr_mst_pread(vol->mft_na, m << vol->mft_record_size_bits,
107			count, vol->mft_record_size, b);
108	if (br != count) {
109		if (br != -1)
110			errno = EIO;
111		ntfs_log_perror("Failed to read of MFT, mft=%llu count=%lld "
112				"br=%lld", (long long)m, (long long)count,
113				(long long)br);
114		return -1;
115	}
116	return 0;
117}
118
119/**
120 * ntfs_mft_records_write - write mft records to disk
121 * @vol:	volume to write to
122 * @mref:	starting mft record number to write
123 * @count:	number of mft records to write
124 * @b:		data buffer containing the mft records to write
125 *
126 * Write @count mft records starting at @mref from data buffer @b to volume
127 * @vol. Return 0 on success or -1 on error, with errno set to the error code.
128 *
129 * If any of the records exceed the initialized size of the $MFT/$DATA
130 * attribute, i.e. they cannot possibly be allocated mft records, assume this
131 * is a bug and return error code ESPIPE.
132 *
133 * Before the mft records are written, they are mst protected. After the write,
134 * they are deprotected again, thus resulting in an increase in the update
135 * sequence number inside the data buffer @b.
136 *
137 * If any mft records are written which are also represented in the mft mirror
138 * $MFTMirr, we make a copy of the relevant parts of the data buffer @b into a
139 * temporary buffer before we do the actual write. Then if at least one mft
140 * record was successfully written, we write the appropriate mft records from
141 * the copied buffer to the mft mirror, too.
142 */
143int ntfs_mft_records_write(const ntfs_volume *vol, const MFT_REF mref,
144		const s64 count, MFT_RECORD *b)
145{
146	s64 bw;
147	VCN m;
148	void *bmirr = NULL;
149	int cnt = 0, res = 0;
150
151	if (!vol || !vol->mft_na || vol->mftmirr_size <= 0 || !b || count < 0) {
152		errno = EINVAL;
153		return -1;
154	}
155	m = MREF(mref);
156	/* Refuse to write non-allocated mft records. */
157	if (m + count > vol->mft_na->initialized_size >>
158			vol->mft_record_size_bits) {
159		errno = ESPIPE;
160		ntfs_log_perror("Trying to write non-allocated mft records "
161				"(%lld > %lld)", (long long)m + count,
162				(long long)vol->mft_na->initialized_size >>
163				vol->mft_record_size_bits);
164		return -1;
165	}
166	if (m < vol->mftmirr_size) {
167		if (!vol->mftmirr_na) {
168			errno = EINVAL;
169			return -1;
170		}
171		cnt = vol->mftmirr_size - m;
172		if (cnt > count)
173			cnt = count;
174		bmirr = ntfs_malloc(cnt * vol->mft_record_size);
175		if (!bmirr)
176			return -1;
177		memcpy(bmirr, b, cnt * vol->mft_record_size);
178	}
179	bw = ntfs_attr_mst_pwrite(vol->mft_na, m << vol->mft_record_size_bits,
180			count, vol->mft_record_size, b);
181	if (bw != count) {
182		if (bw != -1)
183			errno = EIO;
184		if (bw >= 0)
185			ntfs_log_debug("Error: partial write while writing $Mft "
186					"record(s)!\n");
187		else
188			ntfs_log_perror("Error writing $Mft record(s)");
189		res = errno;
190	}
191	if (bmirr && bw > 0) {
192		if (bw < cnt)
193			cnt = bw;
194		bw = ntfs_attr_mst_pwrite(vol->mftmirr_na,
195				m << vol->mft_record_size_bits, cnt,
196				vol->mft_record_size, bmirr);
197		if (bw != cnt) {
198			if (bw != -1)
199				errno = EIO;
200			ntfs_log_debug("Error: failed to sync $MFTMirr! Run "
201					"chkdsk.\n");
202			res = errno;
203		}
204	}
205	free(bmirr);
206	if (!res)
207		return res;
208	errno = res;
209	return -1;
210}
211
212int ntfs_mft_record_check(const ntfs_volume *vol, const MFT_REF mref,
213			  MFT_RECORD *m)
214{
215	ATTR_RECORD *a;
216	int ret = -1;
217
218	if (!ntfs_is_file_record(m->magic)) {
219		if (!NVolNoFixupWarn(vol))
220			ntfs_log_error("Record %llu has no FILE magic (0x%x)\n",
221				(unsigned long long)MREF(mref),
222				(int)le32_to_cpu(*(le32*)m));
223		goto err_out;
224	}
225
226	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
227		ntfs_log_error("Record %llu has corrupt allocation size "
228			       "(%u <> %u)\n", (unsigned long long)MREF(mref),
229			       vol->mft_record_size,
230			       le32_to_cpu(m->bytes_allocated));
231		goto err_out;
232	}
233
234	a = (ATTR_RECORD *)((char *)m + le16_to_cpu(m->attrs_offset));
235	if (p2n(a) < p2n(m) || (char *)a > (char *)m + vol->mft_record_size) {
236		ntfs_log_error("Record %llu is corrupt\n",
237			       (unsigned long long)MREF(mref));
238		goto err_out;
239	}
240
241	ret = 0;
242err_out:
243	if (ret)
244		errno = EIO;
245	return ret;
246}
247
248/**
249 * ntfs_file_record_read - read a FILE record from the mft from disk
250 * @vol:	volume to read from
251 * @mref:	mft reference specifying mft record to read
252 * @mrec:	address of pointer in which to return the mft record
253 * @attr:	address of pointer in which to return the first attribute
254 *
255 * Read a FILE record from the mft of @vol from the storage medium. @mref
256 * specifies the mft record to read, including the sequence number, which can
257 * be 0 if no sequence number checking is to be performed.
258 *
259 * The function allocates a buffer large enough to hold the mft record and
260 * reads the record into the buffer (mst deprotecting it in the process).
261 * *@mrec is then set to point to the buffer.
262 *
263 * If @attr is not NULL, *@attr is set to point to the first attribute in the
264 * mft record, i.e. *@attr is a pointer into *@mrec.
265 *
266 * Return 0 on success, or -1 on error, with errno set to the error code.
267 *
268 * The read mft record is checked for having the magic FILE,
269 * and for having a matching sequence number (if MSEQNO(*@mref) != 0).
270 * If either of these fails, -1 is returned and errno is set to EIO. If you get
271 * this, but you still want to read the mft record (e.g. in order to correct
272 * it), use ntfs_mft_record_read() directly.
273 *
274 * Note: Caller has to free *@mrec when finished.
275 *
276 * Note: We do not check if the mft record is flagged in use. The caller can
277 *	 check if desired.
278 */
279int ntfs_file_record_read(const ntfs_volume *vol, const MFT_REF mref,
280		MFT_RECORD **mrec, ATTR_RECORD **attr)
281{
282	MFT_RECORD *m;
283
284	if (!vol || !mrec) {
285		errno = EINVAL;
286		ntfs_log_perror("%s: mrec=%p", __FUNCTION__, mrec);
287		return -1;
288	}
289
290	m = *mrec;
291	if (!m) {
292		m = ntfs_malloc(vol->mft_record_size);
293		if (!m)
294			return -1;
295	}
296	if (ntfs_mft_record_read(vol, mref, m))
297		goto err_out;
298
299	if (ntfs_mft_record_check(vol, mref, m))
300		goto err_out;
301
302	if (MSEQNO(mref) && MSEQNO(mref) != le16_to_cpu(m->sequence_number)) {
303		ntfs_log_error("Record %llu has wrong SeqNo (%d <> %d)\n",
304			       (unsigned long long)MREF(mref), MSEQNO(mref),
305			       le16_to_cpu(m->sequence_number));
306		errno = EIO;
307		goto err_out;
308	}
309	*mrec = m;
310	if (attr)
311		*attr = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset));
312	return 0;
313err_out:
314	if (m != *mrec)
315		free(m);
316	return -1;
317}
318
319/**
320 * ntfs_mft_record_layout - layout an mft record into a memory buffer
321 * @vol:	volume to which the mft record will belong
322 * @mref:	mft reference specifying the mft record number
323 * @mrec:	destination buffer of size >= @vol->mft_record_size bytes
324 *
325 * Layout an empty, unused mft record with the mft reference @mref into the
326 * buffer @m.  The volume @vol is needed because the mft record structure was
327 * modified in NTFS 3.1 so we need to know which volume version this mft record
328 * will be used on.
329 *
330 * On success return 0 and on error return -1 with errno set to the error code.
331 */
332int ntfs_mft_record_layout(const ntfs_volume *vol, const MFT_REF mref,
333		MFT_RECORD *mrec)
334{
335	ATTR_RECORD *a;
336
337	if (!vol || !mrec) {
338		errno = EINVAL;
339		ntfs_log_perror("%s: mrec=%p", __FUNCTION__, mrec);
340		return -1;
341	}
342	/* Aligned to 2-byte boundary. */
343	if (vol->major_ver < 3 || (vol->major_ver == 3 && !vol->minor_ver))
344		mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD_OLD) + 1) & ~1);
345	else {
346		/* Abort if mref is > 32 bits. */
347		if (MREF(mref) & 0x0000ffff00000000ull) {
348			errno = ERANGE;
349			ntfs_log_perror("Mft reference exceeds 32 bits");
350			return -1;
351		}
352		mrec->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1);
353		/*
354		 * Set the NTFS 3.1+ specific fields while we know that the
355		 * volume version is 3.1+.
356		 */
357		mrec->reserved = cpu_to_le16(0);
358		mrec->mft_record_number = cpu_to_le32(MREF(mref));
359	}
360	mrec->magic = magic_FILE;
361	if (vol->mft_record_size >= NTFS_BLOCK_SIZE)
362		mrec->usa_count = cpu_to_le16(vol->mft_record_size /
363				NTFS_BLOCK_SIZE + 1);
364	else {
365		mrec->usa_count = cpu_to_le16(1);
366		ntfs_log_error("Sector size is bigger than MFT record size.  "
367				"Setting usa_count to 1.  If Windows chkdsk "
368				"reports this as corruption, please email %s "
369				"stating that you saw this message and that "
370				"the file system created was corrupt.  "
371				"Thank you.\n", NTFS_DEV_LIST);
372	}
373	/* Set the update sequence number to 1. */
374	*(u16*)((u8*)mrec + le16_to_cpu(mrec->usa_ofs)) = cpu_to_le16(1);
375	mrec->lsn = cpu_to_le64(0ull);
376	mrec->sequence_number = cpu_to_le16(1);
377	mrec->link_count = cpu_to_le16(0);
378	/* Aligned to 8-byte boundary. */
379	mrec->attrs_offset = cpu_to_le16((le16_to_cpu(mrec->usa_ofs) +
380			(le16_to_cpu(mrec->usa_count) << 1) + 7) & ~7);
381	mrec->flags = cpu_to_le16(0);
382	/*
383	 * Using attrs_offset plus eight bytes (for the termination attribute),
384	 * aligned to 8-byte boundary.
385	 */
386	mrec->bytes_in_use = cpu_to_le32((le16_to_cpu(mrec->attrs_offset) + 8 +
387			7) & ~7);
388	mrec->bytes_allocated = cpu_to_le32(vol->mft_record_size);
389	mrec->base_mft_record = cpu_to_le64((MFT_REF)0);
390	mrec->next_attr_instance = cpu_to_le16(0);
391	a = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset));
392	a->type = AT_END;
393	a->length = cpu_to_le32(0);
394	/* Finally, clear the unused part of the mft record. */
395	memset((u8*)a + 8, 0, vol->mft_record_size - ((u8*)a + 8 - (u8*)mrec));
396	return 0;
397}
398
399/**
400 * ntfs_mft_record_format - format an mft record on an ntfs volume
401 * @vol:	volume on which to format the mft record
402 * @mref:	mft reference specifying mft record to format
403 *
404 * Format the mft record with the mft reference @mref in $MFT/$DATA, i.e. lay
405 * out an empty, unused mft record in memory and write it to the volume @vol.
406 *
407 * On success return 0 and on error return -1 with errno set to the error code.
408 */
409int ntfs_mft_record_format(const ntfs_volume *vol, const MFT_REF mref)
410{
411	MFT_RECORD *m;
412	int ret = -1;
413
414	ntfs_log_enter("Entering\n");
415
416	m = ntfs_calloc(vol->mft_record_size);
417	if (!m)
418		goto out;
419
420	if (ntfs_mft_record_layout(vol, mref, m))
421		goto free_m;
422
423	if (ntfs_mft_record_write(vol, mref, m))
424		goto free_m;
425
426	ret = 0;
427free_m:
428	free(m);
429out:
430	ntfs_log_leave("\n");
431	return ret;
432}
433
434static const char *es = "  Leaving inconsistent metadata.  Run chkdsk.";
435
436/**
437 * ntfs_ffz - Find the first unset (zero) bit in a word
438 * @word:
439 *
440 * Description...
441 *
442 * Returns:
443 */
444static inline unsigned int ntfs_ffz(unsigned int word)
445{
446	return ffs(~word) - 1;
447}
448
449static int ntfs_is_mft(ntfs_inode *ni)
450{
451	if (ni && ni->mft_no == FILE_MFT)
452		return 1;
453	return 0;
454}
455
456#ifndef PAGE_SIZE
457#define PAGE_SIZE 4096
458#endif
459
460#define RESERVED_MFT_RECORDS   64
461
462/**
463 * ntfs_mft_bitmap_find_free_rec - find a free mft record in the mft bitmap
464 * @vol:	volume on which to search for a free mft record
465 * @base_ni:	open base inode if allocating an extent mft record or NULL
466 *
467 * Search for a free mft record in the mft bitmap attribute on the ntfs volume
468 * @vol.
469 *
470 * If @base_ni is NULL start the search at the default allocator position.
471 *
472 * If @base_ni is not NULL start the search at the mft record after the base
473 * mft record @base_ni.
474 *
475 * Return the free mft record on success and -1 on error with errno set to the
476 * error code.  An error code of ENOSPC means that there are no free mft
477 * records in the currently initialized mft bitmap.
478 */
479static int ntfs_mft_bitmap_find_free_rec(ntfs_volume *vol, ntfs_inode *base_ni)
480{
481	s64 pass_end, ll, data_pos, pass_start, ofs, bit;
482	ntfs_attr *mftbmp_na;
483	u8 *buf, *byte;
484	unsigned int size;
485	u8 pass, b;
486	int ret = -1;
487
488	ntfs_log_enter("Entering\n");
489
490	mftbmp_na = vol->mftbmp_na;
491	/*
492	 * Set the end of the pass making sure we do not overflow the mft
493	 * bitmap.
494	 */
495	size = PAGE_SIZE;
496	pass_end = vol->mft_na->allocated_size >> vol->mft_record_size_bits;
497	ll = mftbmp_na->initialized_size << 3;
498	if (pass_end > ll)
499		pass_end = ll;
500	pass = 1;
501	if (!base_ni)
502		data_pos = vol->mft_data_pos;
503	else
504		data_pos = base_ni->mft_no + 1;
505	if (data_pos < RESERVED_MFT_RECORDS)
506		data_pos = RESERVED_MFT_RECORDS;
507	if (data_pos >= pass_end) {
508		data_pos = RESERVED_MFT_RECORDS;
509		pass = 2;
510		/* This happens on a freshly formatted volume. */
511		if (data_pos >= pass_end) {
512			errno = ENOSPC;
513			goto leave;
514		}
515	}
516	if (ntfs_is_mft(base_ni)) {
517		data_pos = 0;
518		pass = 2;
519	}
520	pass_start = data_pos;
521	buf = ntfs_malloc(PAGE_SIZE);
522	if (!buf)
523		goto leave;
524
525	ntfs_log_debug("Starting bitmap search: pass %u, pass_start 0x%llx, "
526			"pass_end 0x%llx, data_pos 0x%llx.\n", pass,
527			(long long)pass_start, (long long)pass_end,
528			(long long)data_pos);
529#ifdef DEBUG
530	byte = NULL;
531	b = 0;
532#endif
533	/* Loop until a free mft record is found. */
534	for (; pass <= 2; size = PAGE_SIZE) {
535		/* Cap size to pass_end. */
536		ofs = data_pos >> 3;
537		ll = ((pass_end + 7) >> 3) - ofs;
538		if (size > ll)
539			size = ll;
540		ll = ntfs_attr_pread(mftbmp_na, ofs, size, buf);
541		if (ll < 0) {
542			ntfs_log_perror("Failed to read $MFT bitmap");
543			free(buf);
544			goto leave;
545		}
546		ntfs_log_debug("Read 0x%llx bytes.\n", (long long)ll);
547		/* If we read at least one byte, search @buf for a zero bit. */
548		if (ll) {
549			size = ll << 3;
550			bit = data_pos & 7;
551			data_pos &= ~7ull;
552			ntfs_log_debug("Before inner for loop: size 0x%x, "
553					"data_pos 0x%llx, bit 0x%llx, "
554					"*byte 0x%hhx, b %u.\n", size,
555					(long long)data_pos, (long long)bit,
556					byte ? *byte : -1, b);
557			for (; bit < size && data_pos + bit < pass_end;
558					bit &= ~7ull, bit += 8) {
559				/*
560				 * If we're extending $MFT and running out of the first
561				 * mft record (base record) then give up searching since
562				 * no guarantee that the found record will be accessible.
563				 */
564				if (ntfs_is_mft(base_ni) && bit > 400)
565					goto out;
566
567				byte = buf + (bit >> 3);
568				if (*byte == 0xff)
569					continue;
570
571				/* Note: ffz() result must be zero based. */
572				b = ntfs_ffz((unsigned long)*byte);
573				if (b < 8 && b >= (bit & 7)) {
574					free(buf);
575					ret = data_pos + (bit & ~7ull) + b;
576					goto leave;
577				}
578			}
579			ntfs_log_debug("After inner for loop: size 0x%x, "
580					"data_pos 0x%llx, bit 0x%llx, "
581					"*byte 0x%hhx, b %u.\n", size,
582					(long long)data_pos, (long long)bit,
583					byte ? *byte : -1, b);
584			data_pos += size;
585			/*
586			 * If the end of the pass has not been reached yet,
587			 * continue searching the mft bitmap for a zero bit.
588			 */
589			if (data_pos < pass_end)
590				continue;
591		}
592		/* Do the next pass. */
593		pass++;
594		if (pass == 2) {
595			/*
596			 * Starting the second pass, in which we scan the first
597			 * part of the zone which we omitted earlier.
598			 */
599			pass_end = pass_start;
600			data_pos = pass_start = RESERVED_MFT_RECORDS;
601			ntfs_log_debug("pass %i, pass_start 0x%llx, pass_end "
602					"0x%llx.\n", pass, (long long)pass_start,
603					(long long)pass_end);
604			if (data_pos >= pass_end)
605				break;
606		}
607	}
608	/* No free mft records in currently initialized mft bitmap. */
609out:
610	free(buf);
611	errno = ENOSPC;
612leave:
613	ntfs_log_leave("\n");
614	return ret;
615}
616
617static int ntfs_mft_attr_extend(ntfs_attr *na)
618{
619	int ret = STATUS_ERROR;
620	ntfs_log_enter("Entering\n");
621
622	if (!NInoAttrList(na->ni)) {
623		if (ntfs_inode_add_attrlist(na->ni)) {
624			ntfs_log_perror("%s: Can not add attrlist #3", __FUNCTION__);
625			goto out;
626		}
627		/* We can't sync the $MFT inode since its runlist is bogus. */
628		ret = STATUS_KEEP_SEARCHING;
629		goto out;
630	}
631
632	if (ntfs_attr_update_mapping_pairs(na, 0)) {
633		ntfs_log_perror("%s: MP update failed", __FUNCTION__);
634		goto out;
635	}
636
637	ret = STATUS_OK;
638out:
639	ntfs_log_leave("\n");
640	return ret;
641}
642
643/**
644 * ntfs_mft_bitmap_extend_allocation_i - see ntfs_mft_bitmap_extend_allocation
645 */
646static int ntfs_mft_bitmap_extend_allocation_i(ntfs_volume *vol)
647{
648	LCN lcn;
649	s64 ll = 0; /* silence compiler warning */
650	ntfs_attr *mftbmp_na;
651	runlist_element *rl, *rl2 = NULL; /* silence compiler warning */
652	ntfs_attr_search_ctx *ctx;
653	MFT_RECORD *m = NULL; /* silence compiler warning */
654	ATTR_RECORD *a = NULL; /* silence compiler warning */
655	int err, mp_size;
656	int ret = STATUS_ERROR;
657	u32 old_alen = 0; /* silence compiler warning */
658	BOOL mp_rebuilt = FALSE;
659	BOOL update_mp = FALSE;
660
661	mftbmp_na = vol->mftbmp_na;
662	/*
663	 * Determine the last lcn of the mft bitmap.  The allocated size of the
664	 * mft bitmap cannot be zero so we are ok to do this.
665	 */
666	rl = ntfs_attr_find_vcn(mftbmp_na, (mftbmp_na->allocated_size - 1) >>
667			vol->cluster_size_bits);
668	if (!rl || !rl->length || rl->lcn < 0) {
669		ntfs_log_error("Failed to determine last allocated "
670				"cluster of mft bitmap attribute.\n");
671		if (rl)
672			errno = EIO;
673		return STATUS_ERROR;
674	}
675	lcn = rl->lcn + rl->length;
676
677	rl2 = ntfs_cluster_alloc(vol, rl[1].vcn, 1, lcn, DATA_ZONE);
678	if (!rl2) {
679		ntfs_log_error("Failed to allocate a cluster for "
680				"the mft bitmap.\n");
681		return STATUS_ERROR;
682	}
683	rl = ntfs_runlists_merge(mftbmp_na->rl, rl2);
684	if (!rl) {
685		err = errno;
686		ntfs_log_error("Failed to merge runlists for mft "
687				"bitmap.\n");
688		if (ntfs_cluster_free_from_rl(vol, rl2))
689			ntfs_log_error("Failed to deallocate "
690					"cluster.%s\n", es);
691		free(rl2);
692		errno = err;
693		return STATUS_ERROR;
694	}
695	mftbmp_na->rl = rl;
696	ntfs_log_debug("Adding one run to mft bitmap.\n");
697	/* Find the last run in the new runlist. */
698	for (; rl[1].length; rl++)
699		;
700	/*
701	 * Update the attribute record as well.  Note: @rl is the last
702	 * (non-terminator) runlist element of mft bitmap.
703	 */
704	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
705	if (!ctx)
706		goto undo_alloc;
707
708	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
709			mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
710		ntfs_log_error("Failed to find last attribute extent of "
711				"mft bitmap attribute.\n");
712		goto undo_alloc;
713	}
714	m = ctx->mrec;
715	a = ctx->attr;
716	ll = sle64_to_cpu(a->lowest_vcn);
717	rl2 = ntfs_attr_find_vcn(mftbmp_na, ll);
718	if (!rl2 || !rl2->length) {
719		ntfs_log_error("Failed to determine previous last "
720				"allocated cluster of mft bitmap attribute.\n");
721		if (rl2)
722			errno = EIO;
723		goto undo_alloc;
724	}
725	/* Get the size for the new mapping pairs array for this extent. */
726	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, INT_MAX);
727	if (mp_size <= 0) {
728		ntfs_log_error("Get size for mapping pairs failed for "
729				"mft bitmap attribute extent.\n");
730		goto undo_alloc;
731	}
732	/* Expand the attribute record if necessary. */
733	old_alen = le32_to_cpu(a->length);
734	if (ntfs_attr_record_resize(m, a, mp_size +
735			le16_to_cpu(a->mapping_pairs_offset))) {
736		ntfs_log_info("extending $MFT bitmap\n");
737		ret = ntfs_mft_attr_extend(vol->mftbmp_na);
738		if (ret == STATUS_OK)
739			goto ok;
740		if (ret == STATUS_ERROR) {
741			ntfs_log_perror("%s: ntfs_mft_attr_extend failed", __FUNCTION__);
742			update_mp = TRUE;
743		}
744		goto undo_alloc;
745	}
746	mp_rebuilt = TRUE;
747	/* Generate the mapping pairs array directly into the attr record. */
748	if (ntfs_mapping_pairs_build(vol, (u8*)a +
749			le16_to_cpu(a->mapping_pairs_offset), mp_size, rl2, ll,
750			NULL)) {
751		ntfs_log_error("Failed to build mapping pairs array for "
752				"mft bitmap attribute.\n");
753		errno = EIO;
754		goto undo_alloc;
755	}
756	/* Update the highest_vcn. */
757	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
758	/*
759	 * We now have extended the mft bitmap allocated_size by one cluster.
760	 * Reflect this in the ntfs_attr structure and the attribute record.
761	 */
762	if (a->lowest_vcn) {
763		/*
764		 * We are not in the first attribute extent, switch to it, but
765		 * first ensure the changes will make it to disk later.
766		 */
767		ntfs_inode_mark_dirty(ctx->ntfs_ino);
768		ntfs_attr_reinit_search_ctx(ctx);
769		if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
770				mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
771			ntfs_log_error("Failed to find first attribute "
772					"extent of mft bitmap attribute.\n");
773			goto restore_undo_alloc;
774		}
775		a = ctx->attr;
776	}
777ok:
778	mftbmp_na->allocated_size += vol->cluster_size;
779	a->allocated_size = cpu_to_sle64(mftbmp_na->allocated_size);
780	/* Ensure the changes make it to disk. */
781	ntfs_inode_mark_dirty(ctx->ntfs_ino);
782	ntfs_attr_put_search_ctx(ctx);
783	return STATUS_OK;
784
785restore_undo_alloc:
786	err = errno;
787	ntfs_attr_reinit_search_ctx(ctx);
788	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
789			mftbmp_na->name_len, 0, rl[1].vcn, NULL, 0, ctx)) {
790		ntfs_log_error("Failed to find last attribute extent of "
791				"mft bitmap attribute.%s\n", es);
792		ntfs_attr_put_search_ctx(ctx);
793		mftbmp_na->allocated_size += vol->cluster_size;
794		/*
795		 * The only thing that is now wrong is ->allocated_size of the
796		 * base attribute extent which chkdsk should be able to fix.
797		 */
798		errno = err;
799		return STATUS_ERROR;
800	}
801	m = ctx->mrec;
802	a = ctx->attr;
803	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 2);
804	errno = err;
805undo_alloc:
806	err = errno;
807
808	/* Remove the last run from the runlist. */
809	lcn = rl->lcn;
810	rl->lcn = rl[1].lcn;
811	rl->length = 0;
812
813	/* FIXME: use an ntfs_cluster_free_* function */
814	if (ntfs_bitmap_clear_bit(vol->lcnbmp_na, lcn))
815		ntfs_log_error("Failed to free cluster.%s\n", es);
816	else
817		vol->free_clusters++;
818	if (mp_rebuilt) {
819		if (ntfs_mapping_pairs_build(vol, (u8*)a +
820				le16_to_cpu(a->mapping_pairs_offset),
821				old_alen - le16_to_cpu(a->mapping_pairs_offset),
822				rl2, ll, NULL))
823			ntfs_log_error("Failed to restore mapping "
824					"pairs array.%s\n", es);
825		if (ntfs_attr_record_resize(m, a, old_alen))
826			ntfs_log_error("Failed to restore attribute "
827					"record.%s\n", es);
828		ntfs_inode_mark_dirty(ctx->ntfs_ino);
829	}
830	if (update_mp) {
831		if (ntfs_attr_update_mapping_pairs(vol->mftbmp_na, 0))
832			ntfs_log_perror("%s: MP update failed", __FUNCTION__);
833	}
834	if (ctx)
835		ntfs_attr_put_search_ctx(ctx);
836	errno = err;
837	return ret;
838}
839
840/**
841 * ntfs_mft_bitmap_extend_allocation - extend mft bitmap attribute by a cluster
842 * @vol:	volume on which to extend the mft bitmap attribute
843 *
844 * Extend the mft bitmap attribute on the ntfs volume @vol by one cluster.
845 *
846 * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
847 * data_size.
848 *
849 * Return 0 on success and -1 on error with errno set to the error code.
850 */
851static int ntfs_mft_bitmap_extend_allocation(ntfs_volume *vol)
852{
853	int ret;
854
855	ntfs_log_enter("Entering\n");
856	ret = ntfs_mft_bitmap_extend_allocation_i(vol);
857	ntfs_log_leave("\n");
858	return ret;
859}
860/**
861 * ntfs_mft_bitmap_extend_initialized - extend mft bitmap initialized data
862 * @vol:	volume on which to extend the mft bitmap attribute
863 *
864 * Extend the initialized portion of the mft bitmap attribute on the ntfs
865 * volume @vol by 8 bytes.
866 *
867 * Note:  Only changes initialized_size and data_size, i.e. requires that
868 * allocated_size is big enough to fit the new initialized_size.
869 *
870 * Return 0 on success and -1 on error with errno set to the error code.
871 */
872static int ntfs_mft_bitmap_extend_initialized(ntfs_volume *vol)
873{
874	s64 old_data_size, old_initialized_size, ll;
875	ntfs_attr *mftbmp_na;
876	ntfs_attr_search_ctx *ctx;
877	ATTR_RECORD *a;
878	int err;
879	int ret = -1;
880
881	ntfs_log_enter("Entering\n");
882
883	mftbmp_na = vol->mftbmp_na;
884	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
885	if (!ctx)
886		goto out;
887
888	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
889			mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
890		ntfs_log_error("Failed to find first attribute extent of "
891				"mft bitmap attribute.\n");
892		err = errno;
893		goto put_err_out;
894	}
895	a = ctx->attr;
896	old_data_size = mftbmp_na->data_size;
897	old_initialized_size = mftbmp_na->initialized_size;
898	mftbmp_na->initialized_size += 8;
899	a->initialized_size = cpu_to_sle64(mftbmp_na->initialized_size);
900	if (mftbmp_na->initialized_size > mftbmp_na->data_size) {
901		mftbmp_na->data_size = mftbmp_na->initialized_size;
902		a->data_size = cpu_to_sle64(mftbmp_na->data_size);
903	}
904	/* Ensure the changes make it to disk. */
905	ntfs_inode_mark_dirty(ctx->ntfs_ino);
906	ntfs_attr_put_search_ctx(ctx);
907	/* Initialize the mft bitmap attribute value with zeroes. */
908	ll = 0;
909	ll = ntfs_attr_pwrite(mftbmp_na, old_initialized_size, 8, &ll);
910	if (ll == 8) {
911		ntfs_log_debug("Wrote eight initialized bytes to mft bitmap.\n");
912		vol->free_mft_records += (8 * 8);
913		ret = 0;
914		goto out;
915	}
916	ntfs_log_error("Failed to write to mft bitmap.\n");
917	err = errno;
918	if (ll >= 0)
919		err = EIO;
920	/* Try to recover from the error. */
921	ctx = ntfs_attr_get_search_ctx(mftbmp_na->ni, NULL);
922	if (!ctx)
923		goto err_out;
924
925	if (ntfs_attr_lookup(mftbmp_na->type, mftbmp_na->name,
926			mftbmp_na->name_len, 0, 0, NULL, 0, ctx)) {
927		ntfs_log_error("Failed to find first attribute extent of "
928				"mft bitmap attribute.%s\n", es);
929put_err_out:
930		ntfs_attr_put_search_ctx(ctx);
931		goto err_out;
932	}
933	a = ctx->attr;
934	mftbmp_na->initialized_size = old_initialized_size;
935	a->initialized_size = cpu_to_sle64(old_initialized_size);
936	if (mftbmp_na->data_size != old_data_size) {
937		mftbmp_na->data_size = old_data_size;
938		a->data_size = cpu_to_sle64(old_data_size);
939	}
940	ntfs_inode_mark_dirty(ctx->ntfs_ino);
941	ntfs_attr_put_search_ctx(ctx);
942	ntfs_log_debug("Restored status of mftbmp: allocated_size 0x%llx, "
943			"data_size 0x%llx, initialized_size 0x%llx.\n",
944			(long long)mftbmp_na->allocated_size,
945			(long long)mftbmp_na->data_size,
946			(long long)mftbmp_na->initialized_size);
947err_out:
948	errno = err;
949out:
950	ntfs_log_leave("\n");
951	return ret;
952}
953
954/**
955 * ntfs_mft_data_extend_allocation - extend mft data attribute
956 * @vol:	volume on which to extend the mft data attribute
957 *
958 * Extend the mft data attribute on the ntfs volume @vol by 16 mft records
959 * worth of clusters or if not enough space for this by one mft record worth
960 * of clusters.
961 *
962 * Note:  Only changes allocated_size, i.e. does not touch initialized_size or
963 * data_size.
964 *
965 * Return 0 on success and -1 on error with errno set to the error code.
966 */
967static int ntfs_mft_data_extend_allocation(ntfs_volume *vol)
968{
969	LCN lcn;
970	VCN old_last_vcn;
971	s64 min_nr, nr, ll = 0; /* silence compiler warning */
972	ntfs_attr *mft_na;
973	runlist_element *rl, *rl2;
974	ntfs_attr_search_ctx *ctx;
975	MFT_RECORD *m = NULL; /* silence compiler warning */
976	ATTR_RECORD *a = NULL; /* silence compiler warning */
977	int err, mp_size;
978	int ret = STATUS_ERROR;
979	u32 old_alen = 0; /* silence compiler warning */
980	BOOL mp_rebuilt = FALSE;
981	BOOL update_mp = FALSE;
982
983	ntfs_log_enter("Extending mft data allocation.\n");
984
985	mft_na = vol->mft_na;
986	/*
987	 * Determine the preferred allocation location, i.e. the last lcn of
988	 * the mft data attribute.  The allocated size of the mft data
989	 * attribute cannot be zero so we are ok to do this.
990	 */
991	rl = ntfs_attr_find_vcn(mft_na,
992			(mft_na->allocated_size - 1) >> vol->cluster_size_bits);
993
994	if (!rl || !rl->length || rl->lcn < 0) {
995		ntfs_log_error("Failed to determine last allocated "
996				"cluster of mft data attribute.\n");
997		if (rl)
998			errno = EIO;
999		goto out;
1000	}
1001
1002	lcn = rl->lcn + rl->length;
1003	ntfs_log_debug("Last lcn of mft data attribute is 0x%llx.\n", (long long)lcn);
1004	/* Minimum allocation is one mft record worth of clusters. */
1005	min_nr = vol->mft_record_size >> vol->cluster_size_bits;
1006	if (!min_nr)
1007		min_nr = 1;
1008	/* Want to allocate 16 mft records worth of clusters. */
1009	nr = vol->mft_record_size << 4 >> vol->cluster_size_bits;
1010	if (!nr)
1011		nr = min_nr;
1012
1013	old_last_vcn = rl[1].vcn;
1014	do {
1015		rl2 = ntfs_cluster_alloc(vol, old_last_vcn, nr, lcn, MFT_ZONE);
1016		if (rl2)
1017			break;
1018		if (errno != ENOSPC || nr == min_nr) {
1019			ntfs_log_perror("Failed to allocate (%lld) clusters "
1020					"for $MFT", (long long)nr);
1021			goto out;
1022		}
1023		/*
1024		 * There is not enough space to do the allocation, but there
1025		 * might be enough space to do a minimal allocation so try that
1026		 * before failing.
1027		 */
1028		nr = min_nr;
1029		ntfs_log_debug("Retrying mft data allocation with minimal cluster "
1030				"count %lli.\n", (long long)nr);
1031	} while (1);
1032
1033	ntfs_log_debug("Allocated %lld clusters.\n", (long long)nr);
1034
1035	rl = ntfs_runlists_merge(mft_na->rl, rl2);
1036	if (!rl) {
1037		err = errno;
1038		ntfs_log_error("Failed to merge runlists for mft data "
1039				"attribute.\n");
1040		if (ntfs_cluster_free_from_rl(vol, rl2))
1041			ntfs_log_error("Failed to deallocate clusters "
1042					"from the mft data attribute.%s\n", es);
1043		free(rl2);
1044		errno = err;
1045		goto out;
1046	}
1047	mft_na->rl = rl;
1048
1049	/* Find the last run in the new runlist. */
1050	for (; rl[1].length; rl++)
1051		;
1052	/* Update the attribute record as well. */
1053	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1054	if (!ctx)
1055		goto undo_alloc;
1056
1057	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1058			rl[1].vcn, NULL, 0, ctx)) {
1059		ntfs_log_error("Failed to find last attribute extent of "
1060				"mft data attribute.\n");
1061		goto undo_alloc;
1062	}
1063	m = ctx->mrec;
1064	a = ctx->attr;
1065	ll = sle64_to_cpu(a->lowest_vcn);
1066	rl2 = ntfs_attr_find_vcn(mft_na, ll);
1067	if (!rl2 || !rl2->length) {
1068		ntfs_log_error("Failed to determine previous last "
1069				"allocated cluster of mft data attribute.\n");
1070		if (rl2)
1071			errno = EIO;
1072		goto undo_alloc;
1073	}
1074	/* Get the size for the new mapping pairs array for this extent. */
1075	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl2, ll, INT_MAX);
1076	if (mp_size <= 0) {
1077		ntfs_log_error("Get size for mapping pairs failed for "
1078				"mft data attribute extent.\n");
1079		goto undo_alloc;
1080	}
1081	/* Expand the attribute record if necessary. */
1082	old_alen = le32_to_cpu(a->length);
1083	if (ntfs_attr_record_resize(m, a,
1084			mp_size + le16_to_cpu(a->mapping_pairs_offset))) {
1085		ret = ntfs_mft_attr_extend(vol->mft_na);
1086		if (ret == STATUS_OK)
1087			goto ok;
1088		if (ret == STATUS_ERROR) {
1089			ntfs_log_perror("%s: ntfs_mft_attr_extend failed", __FUNCTION__);
1090			update_mp = TRUE;
1091		}
1092		goto undo_alloc;
1093	}
1094	mp_rebuilt = TRUE;
1095	/*
1096	 * Generate the mapping pairs array directly into the attribute record.
1097	 */
1098	if (ntfs_mapping_pairs_build(vol,
1099			(u8*)a + le16_to_cpu(a->mapping_pairs_offset), mp_size,
1100			rl2, ll, NULL)) {
1101		ntfs_log_error("Failed to build mapping pairs array of "
1102				"mft data attribute.\n");
1103		errno = EIO;
1104		goto undo_alloc;
1105	}
1106	/* Update the highest_vcn. */
1107	a->highest_vcn = cpu_to_sle64(rl[1].vcn - 1);
1108	/*
1109	 * We now have extended the mft data allocated_size by nr clusters.
1110	 * Reflect this in the ntfs_attr structure and the attribute record.
1111	 * @rl is the last (non-terminator) runlist element of mft data
1112	 * attribute.
1113	 */
1114	if (a->lowest_vcn) {
1115		/*
1116		 * We are not in the first attribute extent, switch to it, but
1117		 * first ensure the changes will make it to disk later.
1118		 */
1119		ntfs_inode_mark_dirty(ctx->ntfs_ino);
1120		ntfs_attr_reinit_search_ctx(ctx);
1121		if (ntfs_attr_lookup(mft_na->type, mft_na->name,
1122				mft_na->name_len, 0, 0, NULL, 0, ctx)) {
1123			ntfs_log_error("Failed to find first attribute "
1124					"extent of mft data attribute.\n");
1125			goto restore_undo_alloc;
1126		}
1127		a = ctx->attr;
1128	}
1129ok:
1130	mft_na->allocated_size += nr << vol->cluster_size_bits;
1131	a->allocated_size = cpu_to_sle64(mft_na->allocated_size);
1132	/* Ensure the changes make it to disk. */
1133	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1134	ntfs_attr_put_search_ctx(ctx);
1135	ret = STATUS_OK;
1136out:
1137	ntfs_log_leave("\n");
1138	return ret;
1139
1140restore_undo_alloc:
1141	err = errno;
1142	ntfs_attr_reinit_search_ctx(ctx);
1143	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1144			rl[1].vcn, NULL, 0, ctx)) {
1145		ntfs_log_error("Failed to find last attribute extent of "
1146				"mft data attribute.%s\n", es);
1147		ntfs_attr_put_search_ctx(ctx);
1148		mft_na->allocated_size += nr << vol->cluster_size_bits;
1149		/*
1150		 * The only thing that is now wrong is ->allocated_size of the
1151		 * base attribute extent which chkdsk should be able to fix.
1152		 */
1153		errno = err;
1154		ret = STATUS_ERROR;
1155		goto out;
1156	}
1157	m = ctx->mrec;
1158	a = ctx->attr;
1159	a->highest_vcn = cpu_to_sle64(old_last_vcn - 1);
1160	errno = err;
1161undo_alloc:
1162	err = errno;
1163	if (ntfs_cluster_free(vol, mft_na, old_last_vcn, -1) < 0)
1164		ntfs_log_error("Failed to free clusters from mft data "
1165				"attribute.%s\n", es);
1166	if (ntfs_rl_truncate(&mft_na->rl, old_last_vcn))
1167		ntfs_log_error("Failed to truncate mft data attribute "
1168				"runlist.%s\n", es);
1169	if (mp_rebuilt) {
1170		if (ntfs_mapping_pairs_build(vol, (u8*)a +
1171				le16_to_cpu(a->mapping_pairs_offset),
1172				old_alen - le16_to_cpu(a->mapping_pairs_offset),
1173				rl2, ll, NULL))
1174			ntfs_log_error("Failed to restore mapping pairs "
1175					"array.%s\n", es);
1176		if (ntfs_attr_record_resize(m, a, old_alen))
1177			ntfs_log_error("Failed to restore attribute "
1178					"record.%s\n", es);
1179		ntfs_inode_mark_dirty(ctx->ntfs_ino);
1180	}
1181	if (update_mp) {
1182		if (ntfs_attr_update_mapping_pairs(vol->mft_na, 0))
1183			ntfs_log_perror("%s: MP update failed", __FUNCTION__);
1184	}
1185	if (ctx)
1186		ntfs_attr_put_search_ctx(ctx);
1187	errno = err;
1188	goto out;
1189}
1190
1191
1192static int ntfs_mft_record_init(ntfs_volume *vol, s64 size)
1193{
1194	int ret = -1;
1195	ntfs_attr *mft_na;
1196	s64 old_data_initialized, old_data_size;
1197	ntfs_attr_search_ctx *ctx;
1198
1199	ntfs_log_enter("Entering\n");
1200
1201	/* NOTE: Caller must sanity check vol, vol->mft_na and vol->mftbmp_na */
1202
1203	mft_na = vol->mft_na;
1204
1205	/*
1206	 * The mft record is outside the initialized data. Extend the mft data
1207	 * attribute until it covers the allocated record. The loop is only
1208	 * actually traversed more than once when a freshly formatted volume
1209	 * is first written to so it optimizes away nicely in the common case.
1210	 */
1211	ntfs_log_debug("Status of mft data before extension: "
1212			"allocated_size 0x%llx, data_size 0x%llx, "
1213			"initialized_size 0x%llx.\n",
1214			(long long)mft_na->allocated_size,
1215			(long long)mft_na->data_size,
1216			(long long)mft_na->initialized_size);
1217	while (size > mft_na->allocated_size) {
1218		if (ntfs_mft_data_extend_allocation(vol) == STATUS_ERROR)
1219			goto out;
1220		ntfs_log_debug("Status of mft data after allocation extension: "
1221				"allocated_size 0x%llx, data_size 0x%llx, "
1222				"initialized_size 0x%llx.\n",
1223				(long long)mft_na->allocated_size,
1224				(long long)mft_na->data_size,
1225				(long long)mft_na->initialized_size);
1226	}
1227
1228	old_data_initialized = mft_na->initialized_size;
1229	old_data_size = mft_na->data_size;
1230
1231	/*
1232	 * Extend mft data initialized size (and data size of course) to reach
1233	 * the allocated mft record, formatting the mft records along the way.
1234	 * Note: We only modify the ntfs_attr structure as that is all that is
1235	 * needed by ntfs_mft_record_format().  We will update the attribute
1236	 * record itself in one fell swoop later on.
1237	 */
1238	while (size > mft_na->initialized_size) {
1239		s64 ll2 = mft_na->initialized_size >> vol->mft_record_size_bits;
1240		mft_na->initialized_size += vol->mft_record_size;
1241		if (mft_na->initialized_size > mft_na->data_size)
1242			mft_na->data_size = mft_na->initialized_size;
1243		ntfs_log_debug("Initializing mft record 0x%llx.\n", (long long)ll2);
1244		if (ntfs_mft_record_format(vol, ll2) < 0) {
1245			ntfs_log_perror("Failed to format mft record");
1246			goto undo_data_init;
1247		}
1248	}
1249
1250	/* Update the mft data attribute record to reflect the new sizes. */
1251	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1252	if (!ctx)
1253		goto undo_data_init;
1254
1255	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1256			0, NULL, 0, ctx)) {
1257		ntfs_log_error("Failed to find first attribute extent of "
1258				"mft data attribute.\n");
1259		ntfs_attr_put_search_ctx(ctx);
1260		goto undo_data_init;
1261	}
1262	ctx->attr->initialized_size = cpu_to_sle64(mft_na->initialized_size);
1263	ctx->attr->data_size = cpu_to_sle64(mft_na->data_size);
1264	ctx->attr->allocated_size = cpu_to_sle64(mft_na->allocated_size);
1265
1266	/* Ensure the changes make it to disk. */
1267	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1268	ntfs_attr_put_search_ctx(ctx);
1269	ntfs_log_debug("Status of mft data after mft record initialization: "
1270			"allocated_size 0x%llx, data_size 0x%llx, "
1271			"initialized_size 0x%llx.\n",
1272			(long long)mft_na->allocated_size,
1273			(long long)mft_na->data_size,
1274			(long long)mft_na->initialized_size);
1275
1276	/* Sanity checks. */
1277	if (mft_na->data_size > mft_na->allocated_size ||
1278	    mft_na->initialized_size > mft_na->data_size)
1279		NTFS_BUG("mft_na sanity checks failed");
1280
1281	/* Sync MFT to minimize data loss if there won't be clean unmount. */
1282	if (ntfs_inode_sync(mft_na->ni))
1283		goto undo_data_init;
1284
1285	ret = 0;
1286out:
1287	ntfs_log_leave("\n");
1288	return ret;
1289
1290undo_data_init:
1291	mft_na->initialized_size = old_data_initialized;
1292	mft_na->data_size = old_data_size;
1293	goto out;
1294}
1295
1296static int ntfs_mft_rec_init(ntfs_volume *vol, s64 size)
1297{
1298	int ret = -1;
1299	ntfs_attr *mft_na;
1300	s64 old_data_initialized, old_data_size;
1301	ntfs_attr_search_ctx *ctx;
1302
1303	ntfs_log_enter("Entering\n");
1304
1305	mft_na = vol->mft_na;
1306
1307	if (size > mft_na->allocated_size || size > mft_na->initialized_size) {
1308		errno = EIO;
1309		ntfs_log_perror("%s: unexpected $MFT sizes, see below", __FUNCTION__);
1310		ntfs_log_error("$MFT: size=%lld  allocated_size=%lld  "
1311			       "data_size=%lld  initialized_size=%lld\n",
1312			       (long long)size,
1313			       (long long)mft_na->allocated_size,
1314			       (long long)mft_na->data_size,
1315			       (long long)mft_na->initialized_size);
1316		goto out;
1317	}
1318
1319	old_data_initialized = mft_na->initialized_size;
1320	old_data_size = mft_na->data_size;
1321
1322	/* Update the mft data attribute record to reflect the new sizes. */
1323	ctx = ntfs_attr_get_search_ctx(mft_na->ni, NULL);
1324	if (!ctx)
1325		goto undo_data_init;
1326
1327	if (ntfs_attr_lookup(mft_na->type, mft_na->name, mft_na->name_len, 0,
1328			0, NULL, 0, ctx)) {
1329		ntfs_log_error("Failed to find first attribute extent of "
1330				"mft data attribute.\n");
1331		ntfs_attr_put_search_ctx(ctx);
1332		goto undo_data_init;
1333	}
1334	ctx->attr->initialized_size = cpu_to_sle64(mft_na->initialized_size);
1335	ctx->attr->data_size = cpu_to_sle64(mft_na->data_size);
1336
1337	/* CHECKME: ctx->attr->allocation_size is already ok? */
1338
1339	/* Ensure the changes make it to disk. */
1340	ntfs_inode_mark_dirty(ctx->ntfs_ino);
1341	ntfs_attr_put_search_ctx(ctx);
1342
1343	/* Sanity checks. */
1344	if (mft_na->data_size > mft_na->allocated_size ||
1345	    mft_na->initialized_size > mft_na->data_size)
1346		NTFS_BUG("mft_na sanity checks failed");
1347out:
1348	ntfs_log_leave("\n");
1349	return ret;
1350
1351undo_data_init:
1352	mft_na->initialized_size = old_data_initialized;
1353	mft_na->data_size = old_data_size;
1354	goto out;
1355}
1356
1357static ntfs_inode *ntfs_mft_rec_alloc(ntfs_volume *vol)
1358{
1359	s64 ll, bit;
1360	ntfs_attr *mft_na, *mftbmp_na;
1361	MFT_RECORD *m;
1362	ntfs_inode *ni = NULL;
1363	ntfs_inode *base_ni;
1364	int err;
1365	le16 seq_no, usn;
1366
1367	ntfs_log_enter("Entering\n");
1368
1369	mft_na = vol->mft_na;
1370	mftbmp_na = vol->mftbmp_na;
1371
1372	base_ni = mft_na->ni;
1373
1374	bit = ntfs_mft_bitmap_find_free_rec(vol, base_ni);
1375	if (bit >= 0)
1376		goto found_free_rec;
1377
1378	if (errno != ENOSPC)
1379		goto out;
1380
1381	errno = ENOSPC;
1382	/* strerror() is intentionally used below, we want to log this error. */
1383	ntfs_log_error("No free mft record for $MFT: %s\n", strerror(errno));
1384	goto err_out;
1385
1386found_free_rec:
1387	if (ntfs_bitmap_set_bit(mftbmp_na, bit)) {
1388		ntfs_log_error("Failed to allocate bit in mft bitmap #2\n");
1389		goto err_out;
1390	}
1391
1392	ll = (bit + 1) << vol->mft_record_size_bits;
1393	if (ll > mft_na->initialized_size)
1394		if (ntfs_mft_rec_init(vol, ll) < 0)
1395			goto undo_mftbmp_alloc;
1396	/*
1397	 * We now have allocated and initialized the mft record.  Need to read
1398	 * it from disk and re-format it, preserving the sequence number if it
1399	 * is not zero as well as the update sequence number if it is not zero
1400	 * or -1 (0xffff).
1401	 */
1402	m = ntfs_malloc(vol->mft_record_size);
1403	if (!m)
1404		goto undo_mftbmp_alloc;
1405
1406	if (ntfs_mft_record_read(vol, bit, m)) {
1407		free(m);
1408		goto undo_mftbmp_alloc;
1409	}
1410	/* Sanity check that the mft record is really not in use. */
1411	if (ntfs_is_file_record(m->magic) && (m->flags & MFT_RECORD_IN_USE)) {
1412		ntfs_log_error("Inode %lld is used but it wasn't marked in "
1413			       "$MFT bitmap. Fixed.\n", (long long)bit);
1414		free(m);
1415		goto undo_mftbmp_alloc;
1416	}
1417
1418	seq_no = m->sequence_number;
1419	usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs));
1420	if (ntfs_mft_record_layout(vol, bit, m)) {
1421		ntfs_log_error("Failed to re-format mft record.\n");
1422		free(m);
1423		goto undo_mftbmp_alloc;
1424	}
1425	if (seq_no)
1426		m->sequence_number = seq_no;
1427	seq_no = usn;
1428	if (seq_no && seq_no != const_cpu_to_le16(0xffff))
1429		*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
1430	/* Set the mft record itself in use. */
1431	m->flags |= MFT_RECORD_IN_USE;
1432	/* Now need to open an ntfs inode for the mft record. */
1433	ni = ntfs_inode_allocate(vol);
1434	if (!ni) {
1435		ntfs_log_error("Failed to allocate buffer for inode.\n");
1436		free(m);
1437		goto undo_mftbmp_alloc;
1438	}
1439	ni->mft_no = bit;
1440	ni->mrec = m;
1441	/*
1442	 * If we are allocating an extent mft record, make the opened inode an
1443	 * extent inode and attach it to the base inode.  Also, set the base
1444	 * mft record reference in the extent inode.
1445	 */
1446	ni->nr_extents = -1;
1447	ni->base_ni = base_ni;
1448	m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
1449					le16_to_cpu(base_ni->mrec->sequence_number));
1450	/*
1451	 * Attach the extent inode to the base inode, reallocating
1452	 * memory if needed.
1453	 */
1454	if (!(base_ni->nr_extents & 3)) {
1455		ntfs_inode **extent_nis;
1456		int i;
1457
1458		i = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
1459		extent_nis = ntfs_malloc(i);
1460		if (!extent_nis) {
1461			free(m);
1462			free(ni);
1463			goto undo_mftbmp_alloc;
1464		}
1465		if (base_ni->nr_extents) {
1466			memcpy(extent_nis, base_ni->extent_nis,
1467					i - 4 * sizeof(ntfs_inode *));
1468			free(base_ni->extent_nis);
1469		}
1470		base_ni->extent_nis = extent_nis;
1471	}
1472	base_ni->extent_nis[base_ni->nr_extents++] = ni;
1473
1474	/* Make sure the allocated inode is written out to disk later. */
1475	ntfs_inode_mark_dirty(ni);
1476	/* Initialize time, allocated and data size in ntfs_inode struct. */
1477	ni->data_size = ni->allocated_size = 0;
1478	ni->flags = 0;
1479	ni->creation_time = ni->last_data_change_time =
1480			ni->last_mft_change_time =
1481			ni->last_access_time = ntfs_current_time();
1482	/* Update the default mft allocation position if it was used. */
1483	if (!base_ni)
1484		vol->mft_data_pos = bit + 1;
1485	/* Return the opened, allocated inode of the allocated mft record. */
1486	ntfs_log_error("allocated %sinode %lld\n",
1487			base_ni ? "extent " : "", (long long)bit);
1488out:
1489	ntfs_log_leave("\n");
1490	return ni;
1491
1492undo_mftbmp_alloc:
1493	err = errno;
1494	if (ntfs_bitmap_clear_bit(mftbmp_na, bit))
1495		ntfs_log_error("Failed to clear bit in mft bitmap.%s\n", es);
1496	errno = err;
1497err_out:
1498	if (!errno)
1499		errno = EIO;
1500	ni = NULL;
1501	goto out;
1502}
1503
1504/**
1505 * ntfs_mft_record_alloc - allocate an mft record on an ntfs volume
1506 * @vol:	volume on which to allocate the mft record
1507 * @base_ni:	open base inode if allocating an extent mft record or NULL
1508 *
1509 * Allocate an mft record in $MFT/$DATA of an open ntfs volume @vol.
1510 *
1511 * If @base_ni is NULL make the mft record a base mft record and allocate it at
1512 * the default allocator position.
1513 *
1514 * If @base_ni is not NULL make the allocated mft record an extent record,
1515 * allocate it starting at the mft record after the base mft record and attach
1516 * the allocated and opened ntfs inode to the base inode @base_ni.
1517 *
1518 * On success return the now opened ntfs (extent) inode of the mft record.
1519 *
1520 * On error return NULL with errno set to the error code.
1521 *
1522 * To find a free mft record, we scan the mft bitmap for a zero bit.  To
1523 * optimize this we start scanning at the place specified by @base_ni or if
1524 * @base_ni is NULL we start where we last stopped and we perform wrap around
1525 * when we reach the end.  Note, we do not try to allocate mft records below
1526 * number 24 because numbers 0 to 15 are the defined system files anyway and 16
1527 * to 24 are special in that they are used for storing extension mft records
1528 * for the $DATA attribute of $MFT.  This is required to avoid the possibility
1529 * of creating a run list with a circular dependence which once written to disk
1530 * can never be read in again.  Windows will only use records 16 to 24 for
1531 * normal files if the volume is completely out of space.  We never use them
1532 * which means that when the volume is really out of space we cannot create any
1533 * more files while Windows can still create up to 8 small files.  We can start
1534 * doing this at some later time, it does not matter much for now.
1535 *
1536 * When scanning the mft bitmap, we only search up to the last allocated mft
1537 * record.  If there are no free records left in the range 24 to number of
1538 * allocated mft records, then we extend the $MFT/$DATA attribute in order to
1539 * create free mft records.  We extend the allocated size of $MFT/$DATA by 16
1540 * records at a time or one cluster, if cluster size is above 16kiB.  If there
1541 * is not sufficient space to do this, we try to extend by a single mft record
1542 * or one cluster, if cluster size is above the mft record size, but we only do
1543 * this if there is enough free space, which we know from the values returned
1544 * by the failed cluster allocation function when we tried to do the first
1545 * allocation.
1546 *
1547 * No matter how many mft records we allocate, we initialize only the first
1548 * allocated mft record, incrementing mft data size and initialized size
1549 * accordingly, open an ntfs_inode for it and return it to the caller, unless
1550 * there are less than 24 mft records, in which case we allocate and initialize
1551 * mft records until we reach record 24 which we consider as the first free mft
1552 * record for use by normal files.
1553 *
1554 * If during any stage we overflow the initialized data in the mft bitmap, we
1555 * extend the initialized size (and data size) by 8 bytes, allocating another
1556 * cluster if required.  The bitmap data size has to be at least equal to the
1557 * number of mft records in the mft, but it can be bigger, in which case the
1558 * superfluous bits are padded with zeroes.
1559 *
1560 * Thus, when we return successfully (return value non-zero), we will have:
1561 *	- initialized / extended the mft bitmap if necessary,
1562 *	- initialized / extended the mft data if necessary,
1563 *	- set the bit corresponding to the mft record being allocated in the
1564 *	  mft bitmap,
1565 *	- open an ntfs_inode for the allocated mft record, and we will
1566 *	- return the ntfs_inode.
1567 *
1568 * On error (return value zero), nothing will have changed.  If we had changed
1569 * anything before the error occurred, we will have reverted back to the
1570 * starting state before returning to the caller.  Thus, except for bugs, we
1571 * should always leave the volume in a consistent state when returning from
1572 * this function.
1573 *
1574 * Note, this function cannot make use of most of the normal functions, like
1575 * for example for attribute resizing, etc, because when the run list overflows
1576 * the base mft record and an attribute list is used, it is very important that
1577 * the extension mft records used to store the $DATA attribute of $MFT can be
1578 * reached without having to read the information contained inside them, as
1579 * this would make it impossible to find them in the first place after the
1580 * volume is dismounted.  $MFT/$BITMAP probably does not need to follow this
1581 * rule because the bitmap is not essential for finding the mft records, but on
1582 * the other hand, handling the bitmap in this special way would make life
1583 * easier because otherwise there might be circular invocations of functions
1584 * when reading the bitmap but if we are careful, we should be able to avoid
1585 * all problems.
1586 */
1587ntfs_inode *ntfs_mft_record_alloc(ntfs_volume *vol, ntfs_inode *base_ni)
1588{
1589	s64 ll, bit;
1590	ntfs_attr *mft_na, *mftbmp_na;
1591	MFT_RECORD *m;
1592	ntfs_inode *ni = NULL;
1593	int err;
1594	le16 seq_no, usn;
1595
1596	if (base_ni)
1597		ntfs_log_enter("Entering (allocating an extent mft record for "
1598			       "base mft record %lld).\n",
1599			       (long long)base_ni->mft_no);
1600	else
1601		ntfs_log_enter("Entering (allocating a base mft record)\n");
1602	if (!vol || !vol->mft_na || !vol->mftbmp_na) {
1603		errno = EINVAL;
1604		goto out;
1605	}
1606
1607	if (ntfs_is_mft(base_ni)) {
1608		ni = ntfs_mft_rec_alloc(vol);
1609		goto out;
1610	}
1611
1612	mft_na = vol->mft_na;
1613	mftbmp_na = vol->mftbmp_na;
1614retry:
1615	bit = ntfs_mft_bitmap_find_free_rec(vol, base_ni);
1616	if (bit >= 0) {
1617		ntfs_log_debug("found free record (#1) at %lld\n",
1618				(long long)bit);
1619		goto found_free_rec;
1620	}
1621	if (errno != ENOSPC)
1622		goto out;
1623	/*
1624	 * No free mft records left.  If the mft bitmap already covers more
1625	 * than the currently used mft records, the next records are all free,
1626	 * so we can simply allocate the first unused mft record.
1627	 * Note: We also have to make sure that the mft bitmap at least covers
1628	 * the first 24 mft records as they are special and whilst they may not
1629	 * be in use, we do not allocate from them.
1630	 */
1631	ll = mft_na->initialized_size >> vol->mft_record_size_bits;
1632	if (mftbmp_na->initialized_size << 3 > ll &&
1633			mftbmp_na->initialized_size > RESERVED_MFT_RECORDS / 8) {
1634		bit = ll;
1635		if (bit < RESERVED_MFT_RECORDS)
1636			bit = RESERVED_MFT_RECORDS;
1637		ntfs_log_debug("found free record (#2) at %lld\n",
1638				(long long)bit);
1639		goto found_free_rec;
1640	}
1641	/*
1642	 * The mft bitmap needs to be expanded until it covers the first unused
1643	 * mft record that we can allocate.
1644	 * Note: The smallest mft record we allocate is mft record 24.
1645	 */
1646	ntfs_log_debug("Status of mftbmp before extension: allocated_size 0x%llx, "
1647			"data_size 0x%llx, initialized_size 0x%llx.\n",
1648			(long long)mftbmp_na->allocated_size,
1649			(long long)mftbmp_na->data_size,
1650			(long long)mftbmp_na->initialized_size);
1651	if (mftbmp_na->initialized_size + 8 > mftbmp_na->allocated_size) {
1652
1653		int ret = ntfs_mft_bitmap_extend_allocation(vol);
1654
1655		if (ret == STATUS_ERROR)
1656			goto err_out;
1657		if (ret == STATUS_KEEP_SEARCHING) {
1658			ret = ntfs_mft_bitmap_extend_allocation(vol);
1659			if (ret != STATUS_OK)
1660				goto err_out;
1661		}
1662
1663		ntfs_log_debug("Status of mftbmp after allocation extension: "
1664				"allocated_size 0x%llx, data_size 0x%llx, "
1665				"initialized_size 0x%llx.\n",
1666				(long long)mftbmp_na->allocated_size,
1667				(long long)mftbmp_na->data_size,
1668				(long long)mftbmp_na->initialized_size);
1669	}
1670	/*
1671	 * We now have sufficient allocated space, extend the initialized_size
1672	 * as well as the data_size if necessary and fill the new space with
1673	 * zeroes.
1674	 */
1675	bit = mftbmp_na->initialized_size << 3;
1676	if (ntfs_mft_bitmap_extend_initialized(vol))
1677		goto err_out;
1678	ntfs_log_debug("Status of mftbmp after initialized extension: "
1679			"allocated_size 0x%llx, data_size 0x%llx, "
1680			"initialized_size 0x%llx.\n",
1681			(long long)mftbmp_na->allocated_size,
1682			(long long)mftbmp_na->data_size,
1683			(long long)mftbmp_na->initialized_size);
1684	ntfs_log_debug("found free record (#3) at %lld\n", (long long)bit);
1685found_free_rec:
1686	/* @bit is the found free mft record, allocate it in the mft bitmap. */
1687	if (ntfs_bitmap_set_bit(mftbmp_na, bit)) {
1688		ntfs_log_error("Failed to allocate bit in mft bitmap.\n");
1689		goto err_out;
1690	}
1691
1692	/* The mft bitmap is now uptodate.  Deal with mft data attribute now. */
1693	ll = (bit + 1) << vol->mft_record_size_bits;
1694	if (ll > mft_na->initialized_size)
1695		if (ntfs_mft_record_init(vol, ll) < 0)
1696			goto undo_mftbmp_alloc;
1697
1698	/*
1699	 * We now have allocated and initialized the mft record.  Need to read
1700	 * it from disk and re-format it, preserving the sequence number if it
1701	 * is not zero as well as the update sequence number if it is not zero
1702	 * or -1 (0xffff).
1703	 */
1704	m = ntfs_malloc(vol->mft_record_size);
1705	if (!m)
1706		goto undo_mftbmp_alloc;
1707
1708	if (ntfs_mft_record_read(vol, bit, m)) {
1709		free(m);
1710		goto undo_mftbmp_alloc;
1711	}
1712	/* Sanity check that the mft record is really not in use. */
1713	if (ntfs_is_file_record(m->magic) && (m->flags & MFT_RECORD_IN_USE)) {
1714		ntfs_log_error("Inode %lld is used but it wasn't marked in "
1715			       "$MFT bitmap. Fixed.\n", (long long)bit);
1716		free(m);
1717		goto retry;
1718	}
1719	seq_no = m->sequence_number;
1720	usn = *(le16*)((u8*)m + le16_to_cpu(m->usa_ofs));
1721	if (ntfs_mft_record_layout(vol, bit, m)) {
1722		ntfs_log_error("Failed to re-format mft record.\n");
1723		free(m);
1724		goto undo_mftbmp_alloc;
1725	}
1726	if (seq_no)
1727		m->sequence_number = seq_no;
1728	seq_no = usn;
1729	if (seq_no && seq_no != const_cpu_to_le16(0xffff))
1730		*(le16*)((u8*)m + le16_to_cpu(m->usa_ofs)) = usn;
1731	/* Set the mft record itself in use. */
1732	m->flags |= MFT_RECORD_IN_USE;
1733	/* Now need to open an ntfs inode for the mft record. */
1734	ni = ntfs_inode_allocate(vol);
1735	if (!ni) {
1736		ntfs_log_error("Failed to allocate buffer for inode.\n");
1737		free(m);
1738		goto undo_mftbmp_alloc;
1739	}
1740	ni->mft_no = bit;
1741	ni->mrec = m;
1742	/*
1743	 * If we are allocating an extent mft record, make the opened inode an
1744	 * extent inode and attach it to the base inode.  Also, set the base
1745	 * mft record reference in the extent inode.
1746	 */
1747	if (base_ni) {
1748		ni->nr_extents = -1;
1749		ni->base_ni = base_ni;
1750		m->base_mft_record = MK_LE_MREF(base_ni->mft_no,
1751				le16_to_cpu(base_ni->mrec->sequence_number));
1752		/*
1753		 * Attach the extent inode to the base inode, reallocating
1754		 * memory if needed.
1755		 */
1756		if (!(base_ni->nr_extents & 3)) {
1757			ntfs_inode **extent_nis;
1758			int i;
1759
1760			i = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *);
1761			extent_nis = ntfs_malloc(i);
1762			if (!extent_nis) {
1763				free(m);
1764				free(ni);
1765				goto undo_mftbmp_alloc;
1766			}
1767			if (base_ni->nr_extents) {
1768				memcpy(extent_nis, base_ni->extent_nis,
1769						i - 4 * sizeof(ntfs_inode *));
1770				free(base_ni->extent_nis);
1771			}
1772			base_ni->extent_nis = extent_nis;
1773		}
1774		base_ni->extent_nis[base_ni->nr_extents++] = ni;
1775	}
1776	/* Make sure the allocated inode is written out to disk later. */
1777	ntfs_inode_mark_dirty(ni);
1778	/* Initialize time, allocated and data size in ntfs_inode struct. */
1779	ni->data_size = ni->allocated_size = 0;
1780	ni->flags = 0;
1781	ni->creation_time = ni->last_data_change_time =
1782			ni->last_mft_change_time =
1783			ni->last_access_time = ntfs_current_time();
1784	/* Update the default mft allocation position if it was used. */
1785	if (!base_ni)
1786		vol->mft_data_pos = bit + 1;
1787	/* Return the opened, allocated inode of the allocated mft record. */
1788	ntfs_log_debug("allocated %sinode 0x%llx.\n",
1789			base_ni ? "extent " : "", (long long)bit);
1790	vol->free_mft_records--;
1791out:
1792	ntfs_log_leave("\n");
1793	return ni;
1794
1795undo_mftbmp_alloc:
1796	err = errno;
1797	if (ntfs_bitmap_clear_bit(mftbmp_na, bit))
1798		ntfs_log_error("Failed to clear bit in mft bitmap.%s\n", es);
1799	errno = err;
1800err_out:
1801	if (!errno)
1802		errno = EIO;
1803	ni = NULL;
1804	goto out;
1805}
1806
1807/**
1808 * ntfs_mft_record_free - free an mft record on an ntfs volume
1809 * @vol:	volume on which to free the mft record
1810 * @ni:		open ntfs inode of the mft record to free
1811 *
1812 * Free the mft record of the open inode @ni on the mounted ntfs volume @vol.
1813 * Note that this function calls ntfs_inode_close() internally and hence you
1814 * cannot use the pointer @ni any more after this function returns success.
1815 *
1816 * On success return 0 and on error return -1 with errno set to the error code.
1817 */
1818int ntfs_mft_record_free(ntfs_volume *vol, ntfs_inode *ni)
1819{
1820	u64 mft_no;
1821	int err;
1822	u16 seq_no;
1823	le16 old_seq_no;
1824
1825	ntfs_log_trace("Entering for inode 0x%llx.\n", (long long) ni->mft_no);
1826
1827	if (!vol || !vol->mftbmp_na || !ni) {
1828		errno = EINVAL;
1829		return -1;
1830	}
1831
1832	/* Cache the mft reference for later. */
1833	mft_no = ni->mft_no;
1834
1835	/* Mark the mft record as not in use. */
1836	ni->mrec->flags &= ~MFT_RECORD_IN_USE;
1837
1838	/* Increment the sequence number, skipping zero, if it is not zero. */
1839	old_seq_no = ni->mrec->sequence_number;
1840	seq_no = le16_to_cpu(old_seq_no);
1841	if (seq_no == 0xffff)
1842		seq_no = 1;
1843	else if (seq_no)
1844		seq_no++;
1845	ni->mrec->sequence_number = cpu_to_le16(seq_no);
1846
1847	/* Set the inode dirty and write it out. */
1848	ntfs_inode_mark_dirty(ni);
1849	if (ntfs_inode_sync(ni)) {
1850		err = errno;
1851		goto sync_rollback;
1852	}
1853
1854	/* Clear the bit in the $MFT/$BITMAP corresponding to this record. */
1855	if (ntfs_bitmap_clear_bit(vol->mftbmp_na, mft_no)) {
1856		err = errno;
1857		// FIXME: If ntfs_bitmap_clear_run() guarantees rollback on
1858		//	  error, this could be changed to goto sync_rollback;
1859		goto bitmap_rollback;
1860	}
1861
1862	/* Throw away the now freed inode. */
1863#if CACHE_NIDATA_SIZE
1864	if (!ntfs_inode_real_close(ni)) {
1865#else
1866	if (!ntfs_inode_close(ni)) {
1867#endif
1868		vol->free_mft_records++;
1869		return 0;
1870	}
1871	err = errno;
1872
1873	/* Rollback what we did... */
1874bitmap_rollback:
1875	if (ntfs_bitmap_set_bit(vol->mftbmp_na, mft_no))
1876		ntfs_log_debug("Eeek! Rollback failed in ntfs_mft_record_free().  "
1877				"Leaving inconsistent metadata!\n");
1878sync_rollback:
1879	ni->mrec->flags |= MFT_RECORD_IN_USE;
1880	ni->mrec->sequence_number = old_seq_no;
1881	ntfs_inode_mark_dirty(ni);
1882	errno = err;
1883	return -1;
1884}
1885
1886/**
1887 * ntfs_mft_usn_dec - Decrement USN by one
1888 * @mrec:	pointer to an mft record
1889 *
1890 * On success return 0 and on error return -1 with errno set.
1891 */
1892int ntfs_mft_usn_dec(MFT_RECORD *mrec)
1893{
1894	u16 usn;
1895	le16 *usnp;
1896
1897	if (!mrec) {
1898		errno = EINVAL;
1899		return -1;
1900	}
1901	usnp = (le16*)((char*)mrec + le16_to_cpu(mrec->usa_ofs));
1902	usn = le16_to_cpup(usnp);
1903	if (usn-- <= 1)
1904		usn = 0xfffe;
1905	*usnp = cpu_to_le16(usn);
1906
1907	return 0;
1908}
1909
1910