1/**
2 * lcnalloc.c - Cluster (de)allocation code. Originated from the Linux-NTFS project.
3 *
4 * Copyright (c) 2002-2004 Anton Altaparmakov
5 * Copyright (c) 2004 Yura Pakhuchiy
6 * Copyright (c) 2004-2008 Szabolcs Szakacsits
7 *
8 * This program/include file is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as published
10 * by the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program/include file is distributed in the hope that it will be
14 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program (in the main directory of the NTFS-3G
20 * distribution in the file COPYING); if not, write to the Free Software
21 * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22 */
23
24#ifdef HAVE_CONFIG_H
25#include "config.h"
26#endif
27
28#ifdef HAVE_STDLIB_H
29#include <stdlib.h>
30#endif
31#ifdef HAVE_STDIO_H
32#include <stdio.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37
38#include "types.h"
39#include "attrib.h"
40#include "bitmap.h"
41#include "debug.h"
42#include "runlist.h"
43#include "volume.h"
44#include "lcnalloc.h"
45#include "logging.h"
46#include "misc.h"
47
48/*
49 * Plenty possibilities for big optimizations all over in the cluster
50 * allocation, however at the moment the dominant bottleneck (~ 90%) is
51 * the update of the mapping pairs which converges to the cubic Faulhaber's
52 * formula as the function of the number of extents (fragments, runs).
53 */
54#define NTFS_LCNALLOC_BSIZE 4096
55#define NTFS_LCNALLOC_SKIP  NTFS_LCNALLOC_BSIZE
56
57static void ntfs_cluster_set_zone_pos(LCN start, LCN end, LCN *pos, LCN tc)
58{
59	ntfs_log_trace("pos: %lld  tc: %lld\n", (long long)*pos, (long long)tc);
60
61	if (tc >= end)
62		*pos = start;
63	else if (tc >= start)
64		*pos = tc;
65}
66
67static void ntfs_cluster_update_zone_pos(ntfs_volume *vol, u8 zone, LCN tc)
68{
69	ntfs_log_trace("tc = %lld, zone = %d\n", (long long)tc, zone);
70
71	if (zone == 1)
72		ntfs_cluster_set_zone_pos(vol->mft_lcn, vol->mft_zone_end,
73					  &vol->mft_zone_pos, tc);
74	else if (zone == 2)
75		ntfs_cluster_set_zone_pos(vol->mft_zone_end, vol->nr_clusters,
76					  &vol->data1_zone_pos, tc);
77	else /* zone == 4 */
78		ntfs_cluster_set_zone_pos(0, vol->mft_zone_start,
79					  &vol->data2_zone_pos, tc);
80}
81
82static s64 max_empty_bit_range(unsigned char *buf, int size)
83{
84	int i, j, run = 0;
85	int max_range = 0;
86	s64 start_pos = -1;
87
88	ntfs_log_trace("Entering\n");
89
90	for (i = 0; i < size; i++, buf++) {
91
92		if (*buf == 0) {
93			run += 8;
94			continue;
95		}
96
97		for (j = 0; j < 8; j++) {
98
99			int bit = *buf & (1 << j);
100
101			if (bit) {
102				if (run > max_range) {
103					max_range = run;
104					start_pos = i * 8 + j - run;
105				}
106				run = 0;
107			} else
108				run++;
109		}
110	}
111
112	if (run > max_range)
113		start_pos = i * 8 - run;
114
115	return start_pos;
116}
117
118static int bitmap_writeback(ntfs_volume *vol, s64 pos, s64 size, void *b,
119			    u8 *writeback)
120{
121	s64 written;
122
123	ntfs_log_trace("Entering\n");
124
125	if (!*writeback)
126		return 0;
127
128	*writeback = 0;
129
130	written = ntfs_attr_pwrite(vol->lcnbmp_na, pos, size, b);
131	if (written != size) {
132		if (!written)
133			errno = EIO;
134		ntfs_log_perror("Bitmap write error (%lld, %lld)",
135				(long long)pos, (long long)size);
136		return -1;
137	}
138
139	return 0;
140}
141
142/**
143 * ntfs_cluster_alloc - allocate clusters on an ntfs volume
144 * @vol:	mounted ntfs volume on which to allocate the clusters
145 * @start_vcn:	vcn to use for the first allocated cluster
146 * @count:	number of clusters to allocate
147 * @start_lcn:	starting lcn at which to allocate the clusters (or -1 if none)
148 * @zone:	zone from which to allocate the clusters
149 *
150 * Allocate @count clusters preferably starting at cluster @start_lcn or at the
151 * current allocator position if @start_lcn is -1, on the mounted ntfs volume
152 * @vol. @zone is either DATA_ZONE for allocation of normal clusters and
153 * MFT_ZONE for allocation of clusters for the master file table, i.e. the
154 * $MFT/$DATA attribute.
155 *
156 * On success return a runlist describing the allocated cluster(s).
157 *
158 * On error return NULL with errno set to the error code.
159 *
160 * Notes on the allocation algorithm
161 * =================================
162 *
163 * There are two data zones. First is the area between the end of the mft zone
164 * and the end of the volume, and second is the area between the start of the
165 * volume and the start of the mft zone. On unmodified/standard NTFS 1.x
166 * volumes, the second data zone doesn't exist due to the mft zone being
167 * expanded to cover the start of the volume in order to reserve space for the
168 * mft bitmap attribute.
169 *
170 * The complexity stems from the need of implementing the mft vs data zoned
171 * approach and from the fact that we have access to the lcn bitmap via up to
172 * NTFS_LCNALLOC_BSIZE bytes at a time, so we need to cope with crossing over
173 * boundaries of two buffers. Further, the fact that the allocator allows for
174 * caller supplied hints as to the location of where allocation should begin
175 * and the fact that the allocator keeps track of where in the data zones the
176 * next natural allocation should occur, contribute to the complexity of the
177 * function. But it should all be worthwhile, because this allocator:
178 *   1) implements MFT zone reservation
179 *   2) causes reduction in fragmentation.
180 * The code is not optimized for speed.
181 */
182runlist *ntfs_cluster_alloc(ntfs_volume *vol, VCN start_vcn, s64 count,
183		LCN start_lcn, const NTFS_CLUSTER_ALLOCATION_ZONES zone)
184{
185	LCN zone_start, zone_end;  /* current search range */
186	LCN last_read_pos, lcn;
187	LCN bmp_pos;		/* current bit position inside the bitmap */
188	LCN prev_lcn = 0, prev_run_len = 0;
189	s64 clusters, br;
190	runlist *rl = NULL, *trl;
191	u8 *buf, *byte, bit, writeback;
192	u8 pass = 1; 	/* 1: inside zone;  2: start of zone */
193	u8 search_zone; /* 4: data2 (start) 1: mft (middle) 2: data1 (end) */
194	u8 done_zones = 0;
195	u8 has_guess, used_zone_pos;
196	int err = 0, rlpos, rlsize, buf_size;
197
198	ntfs_log_enter("Entering with count = 0x%llx, start_lcn = 0x%llx, "
199		       "zone = %s_ZONE.\n", (long long)count, (long long)
200		       start_lcn, zone == MFT_ZONE ? "MFT" : "DATA");
201
202	if (!vol || count < 0 || start_lcn < -1 || !vol->lcnbmp_na ||
203			(s8)zone < FIRST_ZONE || zone > LAST_ZONE) {
204		errno = EINVAL;
205		ntfs_log_perror("%s: vcn: %lld, count: %lld, lcn: %lld",
206				__FUNCTION__, (long long)start_vcn,
207				(long long)count, (long long)start_lcn);
208		goto out;
209	}
210
211	/* Return empty runlist if @count == 0 */
212	if (!count) {
213		rl = ntfs_malloc(0x1000);
214		if (rl) {
215			rl[0].vcn = start_vcn;
216			rl[0].lcn = LCN_RL_NOT_MAPPED;
217			rl[0].length = 0;
218		}
219		goto out;
220	}
221
222	buf = ntfs_malloc(NTFS_LCNALLOC_BSIZE);
223	if (!buf)
224		goto out;
225	/*
226	 * If no @start_lcn was requested, use the current zone
227	 * position otherwise use the requested @start_lcn.
228	 */
229	has_guess = 1;
230	zone_start = start_lcn;
231
232	if (zone_start < 0) {
233		if (zone == DATA_ZONE)
234			zone_start = vol->data1_zone_pos;
235		else
236			zone_start = vol->mft_zone_pos;
237		has_guess = 0;
238	}
239
240	used_zone_pos = has_guess ? 0 : 1;
241
242	if (!zone_start || zone_start == vol->mft_zone_start ||
243			zone_start == vol->mft_zone_end)
244		pass = 2;
245
246	if (zone_start < vol->mft_zone_start) {
247		zone_end = vol->mft_zone_start;
248		search_zone = 4;
249	} else if (zone_start < vol->mft_zone_end) {
250		zone_end = vol->mft_zone_end;
251		search_zone = 1;
252	} else {
253		zone_end = vol->nr_clusters;
254		search_zone = 2;
255	}
256
257	bmp_pos = zone_start;
258
259	/* Loop until all clusters are allocated. */
260	clusters = count;
261	rlpos = rlsize = 0;
262	while (1) {
263		last_read_pos = bmp_pos >> 3;
264		br = ntfs_attr_pread(vol->lcnbmp_na, last_read_pos,
265				     NTFS_LCNALLOC_BSIZE, buf);
266		if (br <= 0) {
267			if (!br)
268				goto zone_pass_done;
269			err = errno;
270			ntfs_log_perror("Reading $BITMAP failed");
271			goto err_ret;
272		}
273		/*
274		 * We might have read less than NTFS_LCNALLOC_BSIZE bytes
275		 * if we are close to the end of the attribute.
276		 */
277		buf_size = (int)br << 3;
278		lcn = bmp_pos & 7;
279		bmp_pos &= ~7;
280		writeback = 0;
281
282		while (lcn < buf_size) {
283			byte = buf + (lcn >> 3);
284			bit = 1 << (lcn & 7);
285			if (has_guess) {
286				if (*byte & bit) {
287					has_guess = 0;
288					break;
289				}
290			} else {
291				lcn = max_empty_bit_range(buf, br);
292				if (lcn < 0)
293					break;
294				has_guess = 1;
295				continue;
296			}
297
298			/* First free bit is at lcn + bmp_pos. */
299
300			/* Reallocate memory if necessary. */
301			if ((rlpos + 2) * (int)sizeof(runlist) >= rlsize) {
302				rlsize += 4096;
303				trl = realloc(rl, rlsize);
304				if (!trl) {
305					err = ENOMEM;
306					ntfs_log_perror("realloc() failed");
307					goto wb_err_ret;
308				}
309				rl = trl;
310			}
311
312			/* Allocate the bitmap bit. */
313			*byte |= bit;
314			writeback = 1;
315			if (vol->free_clusters <= 0)
316				ntfs_log_error("Non-positive free clusters "
317					       "(%lld)!\n",
318						(long long)vol->free_clusters);
319			else
320				vol->free_clusters--;
321
322			/*
323			 * Coalesce with previous run if adjacent LCNs.
324			 * Otherwise, append a new run.
325			 */
326			if (prev_lcn == lcn + bmp_pos - prev_run_len && rlpos) {
327				ntfs_log_debug("Cluster coalesce: prev_lcn: "
328					       "%lld  lcn: %lld  bmp_pos: %lld  "
329					       "prev_run_len: %lld\n",
330					       (long long)prev_lcn,
331					       (long long)lcn, (long long)bmp_pos,
332					       (long long)prev_run_len);
333				rl[rlpos - 1].length = ++prev_run_len;
334			} else {
335				if (rlpos)
336					rl[rlpos].vcn = rl[rlpos - 1].vcn +
337							prev_run_len;
338				else {
339					rl[rlpos].vcn = start_vcn;
340					ntfs_log_debug("Start_vcn: %lld\n",
341						       (long long)start_vcn);
342				}
343
344				rl[rlpos].lcn = prev_lcn = lcn + bmp_pos;
345				rl[rlpos].length = prev_run_len = 1;
346				rlpos++;
347			}
348
349			ntfs_log_debug("RUN:   %-16lld %-16lld %-16lld\n",
350				       (long long)rl[rlpos - 1].vcn,
351				       (long long)rl[rlpos - 1].lcn,
352				       (long long)rl[rlpos - 1].length);
353			/* Done? */
354			if (!--clusters) {
355				if (used_zone_pos)
356					ntfs_cluster_update_zone_pos(vol,
357						search_zone, lcn + bmp_pos + 1 +
358							NTFS_LCNALLOC_SKIP);
359				goto done_ret;
360			}
361
362			lcn++;
363		}
364
365		if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback)) {
366			err = errno;
367			goto err_ret;
368		}
369
370		if (!used_zone_pos) {
371
372			used_zone_pos = 1;
373
374			if (search_zone == 1)
375				zone_start = vol->mft_zone_pos;
376			else if (search_zone == 2)
377				zone_start = vol->data1_zone_pos;
378			else
379				zone_start = vol->data2_zone_pos;
380
381			if (!zone_start || zone_start == vol->mft_zone_start ||
382					zone_start == vol->mft_zone_end)
383				pass = 2;
384			bmp_pos = zone_start;
385		} else
386			bmp_pos += buf_size;
387
388		if (bmp_pos < zone_end)
389			continue;
390
391zone_pass_done:
392		ntfs_log_trace("Finished current zone pass(%i).\n", pass);
393		if (pass == 1) {
394
395			pass = 2;
396			zone_end = zone_start;
397
398			if (search_zone == 1)
399				zone_start = vol->mft_zone_start;
400			else if (search_zone == 2)
401				zone_start = vol->mft_zone_end;
402			else
403				zone_start = 0;
404
405			/* Sanity check. */
406			if (zone_end < zone_start)
407				zone_end = zone_start;
408
409			bmp_pos = zone_start;
410
411			continue;
412		}
413		/* pass == 2 */
414done_zones_check:
415		done_zones |= search_zone;
416		if (done_zones < 7) {
417			ntfs_log_trace("Switching zone.\n");
418			pass = 1;
419			if (rlpos) {
420				LCN tc = tc = rl[rlpos - 1].lcn +
421				      rl[rlpos - 1].length + NTFS_LCNALLOC_SKIP;
422
423				if (used_zone_pos)
424					ntfs_cluster_update_zone_pos(vol,
425						search_zone, tc);
426			}
427
428			switch (search_zone) {
429			case 1:
430				ntfs_log_trace("Zone switch: mft -> data1\n");
431switch_to_data1_zone:		search_zone = 2;
432				zone_start = vol->data1_zone_pos;
433				zone_end = vol->nr_clusters;
434				if (zone_start == vol->mft_zone_end)
435					pass = 2;
436				break;
437			case 2:
438				ntfs_log_trace("Zone switch: data1 -> data2\n");
439				search_zone = 4;
440				zone_start = vol->data2_zone_pos;
441				zone_end = vol->mft_zone_start;
442				if (!zone_start)
443					pass = 2;
444				break;
445			case 4:
446				if (!(done_zones & 2)) {
447					ntfs_log_trace("data2 -> data1\n");
448					goto switch_to_data1_zone;
449				}
450				ntfs_log_trace("Zone switch: data2 -> mft\n");
451				search_zone = 1;
452				zone_start = vol->mft_zone_pos;
453				zone_end = vol->mft_zone_end;
454				if (zone_start == vol->mft_zone_start)
455					pass = 2;
456				break;
457			}
458
459			bmp_pos = zone_start;
460
461			if (zone_start == zone_end) {
462				ntfs_log_trace("Empty zone, skipped.\n");
463				goto done_zones_check;
464			}
465
466			continue;
467		}
468
469		ntfs_log_trace("All zones are finished, no space on device.\n");
470		err = ENOSPC;
471		goto err_ret;
472	}
473done_ret:
474	ntfs_log_debug("At done_ret.\n");
475	/* Add runlist terminator element. */
476	rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
477	rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
478	rl[rlpos].length = 0;
479	if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback)) {
480		err = errno;
481		goto err_ret;
482	}
483done_err_ret:
484	free(buf);
485	if (err) {
486		errno = err;
487		ntfs_log_perror("Failed to allocate clusters");
488		rl = NULL;
489	}
490out:
491	ntfs_log_leave("\n");
492	return rl;
493
494wb_err_ret:
495	ntfs_log_trace("At wb_err_ret.\n");
496	if (bitmap_writeback(vol, last_read_pos, br, buf, &writeback))
497		err = errno;
498err_ret:
499	ntfs_log_trace("At err_ret.\n");
500	if (rl) {
501		/* Add runlist terminator element. */
502		rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length;
503		rl[rlpos].lcn = LCN_RL_NOT_MAPPED;
504		rl[rlpos].length = 0;
505		ntfs_debug_runlist_dump(rl);
506		ntfs_cluster_free_from_rl(vol, rl);
507		free(rl);
508		rl = NULL;
509	}
510	goto done_err_ret;
511}
512
513/**
514 * ntfs_cluster_free_from_rl - free clusters from runlist
515 * @vol:	mounted ntfs volume on which to free the clusters
516 * @rl:		runlist from which deallocate clusters
517 *
518 * On success return 0 and on error return -1 with errno set to the error code.
519 */
520int ntfs_cluster_free_from_rl(ntfs_volume *vol, runlist *rl)
521{
522	s64 nr_freed = 0;
523	int ret = -1;
524
525	ntfs_log_trace("Entering.\n");
526
527	for (; rl->length; rl++) {
528
529		ntfs_log_trace("Dealloc lcn 0x%llx, len 0x%llx.\n",
530			       (long long)rl->lcn, (long long)rl->length);
531
532		if (rl->lcn >= 0) {
533			if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn,
534						  rl->length)) {
535				ntfs_log_perror("Cluster deallocation failed "
536					       "(%lld, %lld)",
537						(long long)rl->lcn,
538						(long long)rl->length);
539				goto out;
540			}
541			nr_freed += rl->length ;
542		}
543	}
544
545	ret = 0;
546out:
547	vol->free_clusters += nr_freed;
548	if (vol->free_clusters > vol->nr_clusters)
549		ntfs_log_error("Too many free clusters (%lld > %lld)!",
550			       (long long)vol->free_clusters,
551			       (long long)vol->nr_clusters);
552	return ret;
553}
554
555/**
556 * ntfs_cluster_free - free clusters on an ntfs volume
557 * @vol:	mounted ntfs volume on which to free the clusters
558 * @na:		attribute whose runlist describes the clusters to free
559 * @start_vcn:	vcn in @rl at which to start freeing clusters
560 * @count:	number of clusters to free or -1 for all clusters
561 *
562 * Free @count clusters starting at the cluster @start_vcn in the runlist
563 * described by the attribute @na from the mounted ntfs volume @vol.
564 *
565 * If @count is -1, all clusters from @start_vcn to the end of the runlist
566 * are deallocated.
567 *
568 * On success return the number of deallocated clusters (not counting sparse
569 * clusters) and on error return -1 with errno set to the error code.
570 */
571int ntfs_cluster_free(ntfs_volume *vol, ntfs_attr *na, VCN start_vcn, s64 count)
572{
573	runlist *rl;
574	s64 delta, to_free, nr_freed = 0;
575	int ret = -1;
576
577	if (!vol || !vol->lcnbmp_na || !na || start_vcn < 0 ||
578			(count < 0 && count != -1)) {
579		ntfs_log_trace("Invalid arguments!\n");
580		errno = EINVAL;
581		return -1;
582	}
583
584	ntfs_log_enter("Entering for inode 0x%llx, attr 0x%x, count 0x%llx, "
585		       "vcn 0x%llx.\n", (unsigned long long)na->ni->mft_no,
586		       na->type, (long long)count, (long long)start_vcn);
587
588	rl = ntfs_attr_find_vcn(na, start_vcn);
589	if (!rl) {
590		if (errno == ENOENT)
591			ret = 0;
592		goto leave;
593	}
594
595	if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
596		errno = EIO;
597		ntfs_log_perror("%s: Unexpected lcn (%lld)", __FUNCTION__,
598				(long long)rl->lcn);
599		goto leave;
600	}
601
602	/* Find the starting cluster inside the run that needs freeing. */
603	delta = start_vcn - rl->vcn;
604
605	/* The number of clusters in this run that need freeing. */
606	to_free = rl->length - delta;
607	if (count >= 0 && to_free > count)
608		to_free = count;
609
610	if (rl->lcn != LCN_HOLE) {
611		/* Do the actual freeing of the clusters in this run. */
612		if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn + delta,
613					  to_free))
614			goto leave;
615		nr_freed = to_free;
616	}
617
618	/* Go to the next run and adjust the number of clusters left to free. */
619	++rl;
620	if (count >= 0)
621		count -= to_free;
622
623	/*
624	 * Loop over the remaining runs, using @count as a capping value, and
625	 * free them.
626	 */
627	for (; rl->length && count != 0; ++rl) {
628		// FIXME: Need to try ntfs_attr_map_runlist() for attribute
629		//	  list support! (AIA)
630		if (rl->lcn < 0 && rl->lcn != LCN_HOLE) {
631			// FIXME: Eeek! We need rollback! (AIA)
632			errno = EIO;
633			ntfs_log_perror("%s: Invalid lcn (%lli)",
634					__FUNCTION__, (long long)rl->lcn);
635			goto out;
636		}
637
638		/* The number of clusters in this run that need freeing. */
639		to_free = rl->length;
640		if (count >= 0 && to_free > count)
641			to_free = count;
642
643		if (rl->lcn != LCN_HOLE) {
644			if (ntfs_bitmap_clear_run(vol->lcnbmp_na, rl->lcn,
645					to_free)) {
646				// FIXME: Eeek! We need rollback! (AIA)
647				ntfs_log_perror("%s: Clearing bitmap run failed",
648						__FUNCTION__);
649				goto out;
650			}
651			nr_freed += to_free;
652		}
653
654		if (count >= 0)
655			count -= to_free;
656	}
657
658	if (count != -1 && count != 0) {
659		// FIXME: Eeek! BUG()
660		errno = EIO;
661		ntfs_log_perror("%s: count still not zero (%lld)", __FUNCTION__,
662			       (long long)count);
663		goto out;
664	}
665
666	ret = nr_freed;
667out:
668	vol->free_clusters += nr_freed ;
669	if (vol->free_clusters > vol->nr_clusters)
670		ntfs_log_error("Too many free clusters (%lld > %lld)!",
671			       (long long)vol->free_clusters,
672			       (long long)vol->nr_clusters);
673leave:
674	ntfs_log_leave("\n");
675	return ret;
676}
677