fslog.c revision fa3cacf5
1// SPDX-License-Identifier: GPL-2.0
2/*
3 *
4 * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved.
5 *
6 */
7
8#include <linux/blkdev.h>
9#include <linux/buffer_head.h>
10#include <linux/fs.h>
11#include <linux/hash.h>
12#include <linux/nls.h>
13#include <linux/random.h>
14#include <linux/ratelimit.h>
15#include <linux/slab.h>
16
17#include "debug.h"
18#include "ntfs.h"
19#include "ntfs_fs.h"
20
21/*
22 * LOG FILE structs
23 */
24
25// clang-format off
26
27#define MaxLogFileSize     0x100000000ull
28#define DefaultLogPageSize 4096
29#define MinLogRecordPages  0x30
30
31struct RESTART_HDR {
32	struct NTFS_RECORD_HEADER rhdr; // 'RSTR'
33	__le32 sys_page_size; // 0x10: Page size of the system which initialized the log
34	__le32 page_size;     // 0x14: Log page size used for this log file
35	__le16 ra_off;        // 0x18:
36	__le16 minor_ver;     // 0x1A:
37	__le16 major_ver;     // 0x1C:
38	__le16 fixups[];
39};
40
41#define LFS_NO_CLIENT 0xffff
42#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff)
43
44struct CLIENT_REC {
45	__le64 oldest_lsn;
46	__le64 restart_lsn; // 0x08:
47	__le16 prev_client; // 0x10:
48	__le16 next_client; // 0x12:
49	__le16 seq_num;     // 0x14:
50	u8 align[6];        // 0x16
51	__le32 name_bytes;  // 0x1C: in bytes
52	__le16 name[32];    // 0x20: name of client
53};
54
55static_assert(sizeof(struct CLIENT_REC) == 0x60);
56
57/* Two copies of these will exist at the beginning of the log file */
58struct RESTART_AREA {
59	__le64 current_lsn;    // 0x00: Current logical end of log file
60	__le16 log_clients;    // 0x08: Maximum number of clients
61	__le16 client_idx[2];  // 0x0A: free/use index into the client record arrays
62	__le16 flags;          // 0x0E: See RESTART_SINGLE_PAGE_IO
63	__le32 seq_num_bits;   // 0x10: the number of bits in sequence number.
64	__le16 ra_len;         // 0x14:
65	__le16 client_off;     // 0x16:
66	__le64 l_size;         // 0x18: Usable log file size.
67	__le32 last_lsn_data_len; // 0x20:
68	__le16 rec_hdr_len;    // 0x24: log page data offset
69	__le16 data_off;       // 0x26: log page data length
70	__le32 open_log_count; // 0x28:
71	__le32 align[5];       // 0x2C:
72	struct CLIENT_REC clients[]; // 0x40:
73};
74
75struct LOG_REC_HDR {
76	__le16 redo_op;      // 0x00:  NTFS_LOG_OPERATION
77	__le16 undo_op;      // 0x02:  NTFS_LOG_OPERATION
78	__le16 redo_off;     // 0x04:  Offset to Redo record
79	__le16 redo_len;     // 0x06:  Redo length
80	__le16 undo_off;     // 0x08:  Offset to Undo record
81	__le16 undo_len;     // 0x0A:  Undo length
82	__le16 target_attr;  // 0x0C:
83	__le16 lcns_follow;  // 0x0E:
84	__le16 record_off;   // 0x10:
85	__le16 attr_off;     // 0x12:
86	__le16 cluster_off;  // 0x14:
87	__le16 reserved;     // 0x16:
88	__le64 target_vcn;   // 0x18:
89	__le64 page_lcns[];  // 0x20:
90};
91
92static_assert(sizeof(struct LOG_REC_HDR) == 0x20);
93
94#define RESTART_ENTRY_ALLOCATED    0xFFFFFFFF
95#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF)
96
97struct RESTART_TABLE {
98	__le16 size;       // 0x00:  In bytes
99	__le16 used;       // 0x02: entries
100	__le16 total;      // 0x04: entries
101	__le16 res[3];     // 0x06:
102	__le32 free_goal;  // 0x0C:
103	__le32 first_free; // 0x10
104	__le32 last_free;  // 0x14
105
106};
107
108static_assert(sizeof(struct RESTART_TABLE) == 0x18);
109
110struct ATTR_NAME_ENTRY {
111	__le16 off; // offset in the Open attribute Table
112	__le16 name_bytes;
113	__le16 name[];
114};
115
116struct OPEN_ATTR_ENRTY {
117	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
118	__le32 bytes_per_index; // 0x04:
119	enum ATTR_TYPE type;    // 0x08:
120	u8 is_dirty_pages;      // 0x0C:
121	u8 is_attr_name;        // 0x0B: Faked field to manage 'ptr'
122	u8 name_len;            // 0x0C: Faked field to manage 'ptr'
123	u8 res;
124	struct MFT_REF ref; // 0x10: File Reference of file containing attribute
125	__le64 open_record_lsn; // 0x18:
126	void *ptr;              // 0x20:
127};
128
129/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */
130struct OPEN_ATTR_ENRTY_32 {
131	__le32 next;            // 0x00: RESTART_ENTRY_ALLOCATED if allocated
132	__le32 ptr;             // 0x04:
133	struct MFT_REF ref;     // 0x08:
134	__le64 open_record_lsn; // 0x10:
135	u8 is_dirty_pages;      // 0x18:
136	u8 is_attr_name;        // 0x19
137	u8 res1[2];
138	enum ATTR_TYPE type;    // 0x1C:
139	u8 name_len;            // 0x20:  in wchar
140	u8 res2[3];
141	__le32 AttributeName;   // 0x24:
142	__le32 bytes_per_index; // 0x28:
143};
144
145#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c
146// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) );
147static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0);
148
149/*
150 * One entry exists in the Dirty Pages Table for each page which is dirty at the
151 * time the Restart Area is written
152 */
153struct DIR_PAGE_ENTRY {
154	__le32 next;         // 0x00:  RESTART_ENTRY_ALLOCATED if allocated
155	__le32 target_attr;  // 0x04:  Index into the Open attribute Table
156	__le32 transfer_len; // 0x08:
157	__le32 lcns_follow;  // 0x0C:
158	__le64 vcn;          // 0x10:  Vcn of dirty page
159	__le64 oldest_lsn;   // 0x18:
160	__le64 page_lcns[];  // 0x20:
161};
162
163static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x20);
164
165/* 32 bit version of 'struct DIR_PAGE_ENTRY' */
166struct DIR_PAGE_ENTRY_32 {
167	__le32 next;         // 0x00:  RESTART_ENTRY_ALLOCATED if allocated
168	__le32 target_attr;  // 0x04:  Index into the Open attribute Table
169	__le32 transfer_len; // 0x08:
170	__le32 lcns_follow;  // 0x0C:
171	__le32 reserved;     // 0x10:
172	__le32 vcn_low;      // 0x14:  Vcn of dirty page
173	__le32 vcn_hi;       // 0x18:  Vcn of dirty page
174	__le32 oldest_lsn_low; // 0x1C:
175	__le32 oldest_lsn_hi; // 0x1C:
176	__le32 page_lcns_low; // 0x24:
177	__le32 page_lcns_hi; // 0x24:
178};
179
180static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14);
181static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c);
182
183enum transact_state {
184	TransactionUninitialized = 0,
185	TransactionActive,
186	TransactionPrepared,
187	TransactionCommitted
188};
189
190struct TRANSACTION_ENTRY {
191	__le32 next;          // 0x00: RESTART_ENTRY_ALLOCATED if allocated
192	u8 transact_state;    // 0x04:
193	u8 reserved[3];       // 0x05:
194	__le64 first_lsn;     // 0x08:
195	__le64 prev_lsn;      // 0x10:
196	__le64 undo_next_lsn; // 0x18:
197	__le32 undo_records;  // 0x20: Number of undo log records pending abort
198	__le32 undo_len;      // 0x24: Total undo size
199};
200
201static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28);
202
203struct NTFS_RESTART {
204	__le32 major_ver;             // 0x00:
205	__le32 minor_ver;             // 0x04:
206	__le64 check_point_start;     // 0x08:
207	__le64 open_attr_table_lsn;   // 0x10:
208	__le64 attr_names_lsn;        // 0x18:
209	__le64 dirty_pages_table_lsn; // 0x20:
210	__le64 transact_table_lsn;    // 0x28:
211	__le32 open_attr_len;         // 0x30: In bytes
212	__le32 attr_names_len;        // 0x34: In bytes
213	__le32 dirty_pages_len;       // 0x38: In bytes
214	__le32 transact_table_len;    // 0x3C: In bytes
215};
216
217static_assert(sizeof(struct NTFS_RESTART) == 0x40);
218
219struct NEW_ATTRIBUTE_SIZES {
220	__le64 alloc_size;
221	__le64 valid_size;
222	__le64 data_size;
223	__le64 total_size;
224};
225
226struct BITMAP_RANGE {
227	__le32 bitmap_off;
228	__le32 bits;
229};
230
231struct LCN_RANGE {
232	__le64 lcn;
233	__le64 len;
234};
235
236/* The following type defines the different log record types */
237#define LfsClientRecord  cpu_to_le32(1)
238#define LfsClientRestart cpu_to_le32(2)
239
240/* This is used to uniquely identify a client for a particular log file */
241struct CLIENT_ID {
242	__le16 seq_num;
243	__le16 client_idx;
244};
245
246/* This is the header that begins every Log Record in the log file */
247struct LFS_RECORD_HDR {
248	__le64 this_lsn;    // 0x00:
249	__le64 client_prev_lsn;  // 0x08:
250	__le64 client_undo_next_lsn; // 0x10:
251	__le32 client_data_len;  // 0x18:
252	struct CLIENT_ID client; // 0x1C: Owner of this log record
253	__le32 record_type; // 0x20: LfsClientRecord or LfsClientRestart
254	__le32 transact_id; // 0x24:
255	__le16 flags;       // 0x28:	LOG_RECORD_MULTI_PAGE
256	u8 align[6];        // 0x2A:
257};
258
259#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1)
260
261static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30);
262
263struct LFS_RECORD {
264	__le16 next_record_off; // 0x00: Offset of the free space in the page
265	u8 align[6];         // 0x02:
266	__le64 last_end_lsn; // 0x08: lsn for the last log record which ends on the page
267};
268
269static_assert(sizeof(struct LFS_RECORD) == 0x10);
270
271struct RECORD_PAGE_HDR {
272	struct NTFS_RECORD_HEADER rhdr; // 'RCRD'
273	__le32 rflags;     // 0x10:  See LOG_PAGE_LOG_RECORD_END
274	__le16 page_count; // 0x14:
275	__le16 page_pos;   // 0x16:
276	struct LFS_RECORD record_hdr; // 0x18
277	__le16 fixups[10]; // 0x28
278	__le32 file_off;   // 0x3c: used when major version >= 2
279};
280
281// clang-format on
282
283// Page contains the end of a log record
284#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001)
285
286static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr)
287{
288	return hdr->rflags & LOG_PAGE_LOG_RECORD_END;
289}
290
291static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c);
292
293/*
294 * END of NTFS LOG structures
295 */
296
297/* Define some tuning parameters to keep the restart tables a reasonable size */
298#define INITIAL_NUMBER_TRANSACTIONS 5
299
300enum NTFS_LOG_OPERATION {
301
302	Noop = 0x00,
303	CompensationLogRecord = 0x01,
304	InitializeFileRecordSegment = 0x02,
305	DeallocateFileRecordSegment = 0x03,
306	WriteEndOfFileRecordSegment = 0x04,
307	CreateAttribute = 0x05,
308	DeleteAttribute = 0x06,
309	UpdateResidentValue = 0x07,
310	UpdateNonresidentValue = 0x08,
311	UpdateMappingPairs = 0x09,
312	DeleteDirtyClusters = 0x0A,
313	SetNewAttributeSizes = 0x0B,
314	AddIndexEntryRoot = 0x0C,
315	DeleteIndexEntryRoot = 0x0D,
316	AddIndexEntryAllocation = 0x0E,
317	DeleteIndexEntryAllocation = 0x0F,
318	WriteEndOfIndexBuffer = 0x10,
319	SetIndexEntryVcnRoot = 0x11,
320	SetIndexEntryVcnAllocation = 0x12,
321	UpdateFileNameRoot = 0x13,
322	UpdateFileNameAllocation = 0x14,
323	SetBitsInNonresidentBitMap = 0x15,
324	ClearBitsInNonresidentBitMap = 0x16,
325	HotFix = 0x17,
326	EndTopLevelAction = 0x18,
327	PrepareTransaction = 0x19,
328	CommitTransaction = 0x1A,
329	ForgetTransaction = 0x1B,
330	OpenNonresidentAttribute = 0x1C,
331	OpenAttributeTableDump = 0x1D,
332	AttributeNamesDump = 0x1E,
333	DirtyPageTableDump = 0x1F,
334	TransactionTableDump = 0x20,
335	UpdateRecordDataRoot = 0x21,
336	UpdateRecordDataAllocation = 0x22,
337
338	UpdateRelativeDataInIndex =
339		0x23, // NtOfsRestartUpdateRelativeDataInIndex
340	UpdateRelativeDataInIndex2 = 0x24,
341	ZeroEndOfFileRecord = 0x25,
342};
343
344/*
345 * Array for log records which require a target attribute
346 * A true indicates that the corresponding restart operation requires a target attribute
347 */
348static const u8 AttributeRequired[] = {
349	0xFC, 0xFB, 0xFF, 0x10, 0x06,
350};
351
352static inline bool is_target_required(u16 op)
353{
354	bool ret = op <= UpdateRecordDataAllocation &&
355		   (AttributeRequired[op >> 3] >> (op & 7) & 1);
356	return ret;
357}
358
359static inline bool can_skip_action(enum NTFS_LOG_OPERATION op)
360{
361	switch (op) {
362	case Noop:
363	case DeleteDirtyClusters:
364	case HotFix:
365	case EndTopLevelAction:
366	case PrepareTransaction:
367	case CommitTransaction:
368	case ForgetTransaction:
369	case CompensationLogRecord:
370	case OpenNonresidentAttribute:
371	case OpenAttributeTableDump:
372	case AttributeNamesDump:
373	case DirtyPageTableDump:
374	case TransactionTableDump:
375		return true;
376	default:
377		return false;
378	}
379}
380
381enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next };
382
383/* bytes per restart table */
384static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt)
385{
386	return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) +
387	       sizeof(struct RESTART_TABLE);
388}
389
390/* log record length */
391static inline u32 lrh_length(const struct LOG_REC_HDR *lr)
392{
393	u16 t16 = le16_to_cpu(lr->lcns_follow);
394
395	return struct_size(lr, page_lcns, max_t(u16, 1, t16));
396}
397
398struct lcb {
399	struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn
400	struct LOG_REC_HDR *log_rec;
401	u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next
402	struct CLIENT_ID client;
403	bool alloc; // if true the we should deallocate 'log_rec'
404};
405
406static void lcb_put(struct lcb *lcb)
407{
408	if (lcb->alloc)
409		ntfs_free(lcb->log_rec);
410	ntfs_free(lcb->lrh);
411	ntfs_free(lcb);
412}
413
414/*
415 * oldest_client_lsn
416 *
417 * find the oldest lsn from active clients.
418 */
419static inline void oldest_client_lsn(const struct CLIENT_REC *ca,
420				     __le16 next_client, u64 *oldest_lsn)
421{
422	while (next_client != LFS_NO_CLIENT_LE) {
423		const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client);
424		u64 lsn = le64_to_cpu(cr->oldest_lsn);
425
426		/* ignore this block if it's oldest lsn is 0 */
427		if (lsn && lsn < *oldest_lsn)
428			*oldest_lsn = lsn;
429
430		next_client = cr->next_client;
431	}
432}
433
434static inline bool is_rst_page_hdr_valid(u32 file_off,
435					 const struct RESTART_HDR *rhdr)
436{
437	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
438	u32 page_size = le32_to_cpu(rhdr->page_size);
439	u32 end_usa;
440	u16 ro;
441
442	if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE ||
443	    sys_page & (sys_page - 1) || page_size & (page_size - 1)) {
444		return false;
445	}
446
447	/* Check that if the file offset isn't 0, it is the system page size */
448	if (file_off && file_off != sys_page)
449		return false;
450
451	/* Check support version 1.1+ */
452	if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver)
453		return false;
454
455	if (le16_to_cpu(rhdr->major_ver) > 2)
456		return false;
457
458	ro = le16_to_cpu(rhdr->ra_off);
459	if (!IS_ALIGNED(ro, 8) || ro > sys_page)
460		return false;
461
462	end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short);
463	end_usa += le16_to_cpu(rhdr->rhdr.fix_off);
464
465	if (ro < end_usa)
466		return false;
467
468	return true;
469}
470
471static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr)
472{
473	const struct RESTART_AREA *ra;
474	u16 cl, fl, ul;
475	u32 off, l_size, file_dat_bits, file_size_round;
476	u16 ro = le16_to_cpu(rhdr->ra_off);
477	u32 sys_page = le32_to_cpu(rhdr->sys_page_size);
478
479	if (ro + offsetof(struct RESTART_AREA, l_size) >
480	    SECTOR_SIZE - sizeof(short))
481		return false;
482
483	ra = Add2Ptr(rhdr, ro);
484	cl = le16_to_cpu(ra->log_clients);
485
486	if (cl > 1)
487		return false;
488
489	off = le16_to_cpu(ra->client_off);
490
491	if (!IS_ALIGNED(off, 8) || ro + off > SECTOR_SIZE - sizeof(short))
492		return false;
493
494	off += cl * sizeof(struct CLIENT_REC);
495
496	if (off > sys_page)
497		return false;
498
499	/*
500	 * Check the restart length field and whether the entire
501	 * restart area is contained that length
502	 */
503	if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page ||
504	    off > le16_to_cpu(ra->ra_len)) {
505		return false;
506	}
507
508	/*
509	 * As a final check make sure that the use list and the free list
510	 * are either empty or point to a valid client
511	 */
512	fl = le16_to_cpu(ra->client_idx[0]);
513	ul = le16_to_cpu(ra->client_idx[1]);
514	if ((fl != LFS_NO_CLIENT && fl >= cl) ||
515	    (ul != LFS_NO_CLIENT && ul >= cl))
516		return false;
517
518	/* Make sure the sequence number bits match the log file size */
519	l_size = le64_to_cpu(ra->l_size);
520
521	file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits);
522	file_size_round = 1u << (file_dat_bits + 3);
523	if (file_size_round != l_size &&
524	    (file_size_round < l_size || (file_size_round / 2) > l_size)) {
525		return false;
526	}
527
528	/* The log page data offset and record header length must be quad-aligned */
529	if (!IS_ALIGNED(le16_to_cpu(ra->data_off), 8) ||
530	    !IS_ALIGNED(le16_to_cpu(ra->rec_hdr_len), 8))
531		return false;
532
533	return true;
534}
535
536static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr,
537					bool usa_error)
538{
539	u16 ro = le16_to_cpu(rhdr->ra_off);
540	const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro);
541	u16 ra_len = le16_to_cpu(ra->ra_len);
542	const struct CLIENT_REC *ca;
543	u32 i;
544
545	if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short))
546		return false;
547
548	/* Find the start of the client array */
549	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
550
551	/*
552	 * Start with the free list
553	 * Check that all the clients are valid and that there isn't a cycle
554	 * Do the in-use list on the second pass
555	 */
556	for (i = 0; i < 2; i++) {
557		u16 client_idx = le16_to_cpu(ra->client_idx[i]);
558		bool first_client = true;
559		u16 clients = le16_to_cpu(ra->log_clients);
560
561		while (client_idx != LFS_NO_CLIENT) {
562			const struct CLIENT_REC *cr;
563
564			if (!clients ||
565			    client_idx >= le16_to_cpu(ra->log_clients))
566				return false;
567
568			clients -= 1;
569			cr = ca + client_idx;
570
571			client_idx = le16_to_cpu(cr->next_client);
572
573			if (first_client) {
574				first_client = false;
575				if (cr->prev_client != LFS_NO_CLIENT_LE)
576					return false;
577			}
578		}
579	}
580
581	return true;
582}
583
584/*
585 * remove_client
586 *
587 * remove a client record from a client record list an restart area
588 */
589static inline void remove_client(struct CLIENT_REC *ca,
590				 const struct CLIENT_REC *cr, __le16 *head)
591{
592	if (cr->prev_client == LFS_NO_CLIENT_LE)
593		*head = cr->next_client;
594	else
595		ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client;
596
597	if (cr->next_client != LFS_NO_CLIENT_LE)
598		ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client;
599}
600
601/*
602 * add_client
603 *
604 * add a client record to the start of a list
605 */
606static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head)
607{
608	struct CLIENT_REC *cr = ca + index;
609
610	cr->prev_client = LFS_NO_CLIENT_LE;
611	cr->next_client = *head;
612
613	if (*head != LFS_NO_CLIENT_LE)
614		ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index);
615
616	*head = cpu_to_le16(index);
617}
618
619/*
620 * enum_rstbl
621 *
622 */
623static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c)
624{
625	__le32 *e;
626	u32 bprt;
627	u16 rsize = t ? le16_to_cpu(t->size) : 0;
628
629	if (!c) {
630		if (!t || !t->total)
631			return NULL;
632		e = Add2Ptr(t, sizeof(struct RESTART_TABLE));
633	} else {
634		e = Add2Ptr(c, rsize);
635	}
636
637	/* Loop until we hit the first one allocated, or the end of the list */
638	for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt;
639	     e = Add2Ptr(e, rsize)) {
640		if (*e == RESTART_ENTRY_ALLOCATED_LE)
641			return e;
642	}
643	return NULL;
644}
645
646/*
647 * find_dp
648 *
649 * searches for a 'vcn' in Dirty Page Table,
650 */
651static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl,
652					     u32 target_attr, u64 vcn)
653{
654	__le32 ta = cpu_to_le32(target_attr);
655	struct DIR_PAGE_ENTRY *dp = NULL;
656
657	while ((dp = enum_rstbl(dptbl, dp))) {
658		u64 dp_vcn = le64_to_cpu(dp->vcn);
659
660		if (dp->target_attr == ta && vcn >= dp_vcn &&
661		    vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) {
662			return dp;
663		}
664	}
665	return NULL;
666}
667
668static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default)
669{
670	if (use_default)
671		page_size = DefaultLogPageSize;
672
673	/* Round the file size down to a system page boundary */
674	*l_size &= ~(page_size - 1);
675
676	/* File should contain at least 2 restart pages and MinLogRecordPages pages */
677	if (*l_size < (MinLogRecordPages + 2) * page_size)
678		return 0;
679
680	return page_size;
681}
682
683static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr,
684			  u32 bytes_per_attr_entry)
685{
686	u16 t16;
687
688	if (bytes < sizeof(struct LOG_REC_HDR))
689		return false;
690	if (!tr)
691		return false;
692
693	if ((tr - sizeof(struct RESTART_TABLE)) %
694	    sizeof(struct TRANSACTION_ENTRY))
695		return false;
696
697	if (le16_to_cpu(lr->redo_off) & 7)
698		return false;
699
700	if (le16_to_cpu(lr->undo_off) & 7)
701		return false;
702
703	if (lr->target_attr)
704		goto check_lcns;
705
706	if (is_target_required(le16_to_cpu(lr->redo_op)))
707		return false;
708
709	if (is_target_required(le16_to_cpu(lr->undo_op)))
710		return false;
711
712check_lcns:
713	if (!lr->lcns_follow)
714		goto check_length;
715
716	t16 = le16_to_cpu(lr->target_attr);
717	if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry)
718		return false;
719
720check_length:
721	if (bytes < lrh_length(lr))
722		return false;
723
724	return true;
725}
726
727static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes)
728{
729	u32 ts;
730	u32 i, off;
731	u16 rsize = le16_to_cpu(rt->size);
732	u16 ne = le16_to_cpu(rt->used);
733	u32 ff = le32_to_cpu(rt->first_free);
734	u32 lf = le32_to_cpu(rt->last_free);
735
736	ts = rsize * ne + sizeof(struct RESTART_TABLE);
737
738	if (!rsize || rsize > bytes ||
739	    rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts ||
740	    le16_to_cpu(rt->total) > ne || ff > ts || lf > ts ||
741	    (ff && ff < sizeof(struct RESTART_TABLE)) ||
742	    (lf && lf < sizeof(struct RESTART_TABLE))) {
743		return false;
744	}
745
746	/* Verify each entry is either allocated or points
747	 * to a valid offset the table
748	 */
749	for (i = 0; i < ne; i++) {
750		off = le32_to_cpu(*(__le32 *)Add2Ptr(
751			rt, i * rsize + sizeof(struct RESTART_TABLE)));
752
753		if (off != RESTART_ENTRY_ALLOCATED && off &&
754		    (off < sizeof(struct RESTART_TABLE) ||
755		     ((off - sizeof(struct RESTART_TABLE)) % rsize))) {
756			return false;
757		}
758	}
759
760	/* Walk through the list headed by the first entry to make
761	 * sure none of the entries are currently being used
762	 */
763	for (off = ff; off;) {
764		if (off == RESTART_ENTRY_ALLOCATED)
765			return false;
766
767		off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off));
768	}
769
770	return true;
771}
772
773/*
774 * free_rsttbl_idx
775 *
776 * frees a previously allocated index a Restart Table.
777 */
778static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off)
779{
780	__le32 *e;
781	u32 lf = le32_to_cpu(rt->last_free);
782	__le32 off_le = cpu_to_le32(off);
783
784	e = Add2Ptr(rt, off);
785
786	if (off < le32_to_cpu(rt->free_goal)) {
787		*e = rt->first_free;
788		rt->first_free = off_le;
789		if (!lf)
790			rt->last_free = off_le;
791	} else {
792		if (lf)
793			*(__le32 *)Add2Ptr(rt, lf) = off_le;
794		else
795			rt->first_free = off_le;
796
797		rt->last_free = off_le;
798		*e = 0;
799	}
800
801	le16_sub_cpu(&rt->total, 1);
802}
803
804static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used)
805{
806	__le32 *e, *last_free;
807	u32 off;
808	u32 bytes = esize * used + sizeof(struct RESTART_TABLE);
809	u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize;
810	struct RESTART_TABLE *t = ntfs_zalloc(bytes);
811
812	t->size = cpu_to_le16(esize);
813	t->used = cpu_to_le16(used);
814	t->free_goal = cpu_to_le32(~0u);
815	t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE));
816	t->last_free = cpu_to_le32(lf);
817
818	e = (__le32 *)(t + 1);
819	last_free = Add2Ptr(t, lf);
820
821	for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free;
822	     e = Add2Ptr(e, esize), off += esize) {
823		*e = cpu_to_le32(off);
824	}
825	return t;
826}
827
828static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl,
829						  u32 add, u32 free_goal)
830{
831	u16 esize = le16_to_cpu(tbl->size);
832	__le32 osize = cpu_to_le32(bytes_per_rt(tbl));
833	u32 used = le16_to_cpu(tbl->used);
834	struct RESTART_TABLE *rt = init_rsttbl(esize, used + add);
835
836	memcpy(rt + 1, tbl + 1, esize * used);
837
838	rt->free_goal = free_goal == ~0u
839				? cpu_to_le32(~0u)
840				: cpu_to_le32(sizeof(struct RESTART_TABLE) +
841					      free_goal * esize);
842
843	if (tbl->first_free) {
844		rt->first_free = tbl->first_free;
845		*(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize;
846	} else {
847		rt->first_free = osize;
848	}
849
850	rt->total = tbl->total;
851
852	ntfs_free(tbl);
853	return rt;
854}
855
856/*
857 * alloc_rsttbl_idx
858 *
859 * allocates an index from within a previously initialized Restart Table
860 */
861static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl)
862{
863	u32 off;
864	__le32 *e;
865	struct RESTART_TABLE *t = *tbl;
866
867	if (!t->first_free)
868		*tbl = t = extend_rsttbl(t, 16, ~0u);
869
870	off = le32_to_cpu(t->first_free);
871
872	/* Dequeue this entry and zero it. */
873	e = Add2Ptr(t, off);
874
875	t->first_free = *e;
876
877	memset(e, 0, le16_to_cpu(t->size));
878
879	*e = RESTART_ENTRY_ALLOCATED_LE;
880
881	/* If list is going empty, then we fix the last_free as well. */
882	if (!t->first_free)
883		t->last_free = 0;
884
885	le16_add_cpu(&t->total, 1);
886
887	return Add2Ptr(t, off);
888}
889
890/*
891 * alloc_rsttbl_from_idx
892 *
893 * allocates a specific index from within a previously initialized Restart Table
894 */
895static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo)
896{
897	u32 off;
898	__le32 *e;
899	struct RESTART_TABLE *rt = *tbl;
900	u32 bytes = bytes_per_rt(rt);
901	u16 esize = le16_to_cpu(rt->size);
902
903	/* If the entry is not the table, we will have to extend the table */
904	if (vbo >= bytes) {
905		/*
906		 * extend the size by computing the number of entries between
907		 * the existing size and the desired index and adding
908		 * 1 to that
909		 */
910		u32 bytes2idx = vbo - bytes;
911
912		/* There should always be an integral number of entries being added */
913		/* Now extend the table */
914		*tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes);
915		if (!rt)
916			return NULL;
917	}
918
919	/* see if the entry is already allocated, and just return if it is. */
920	e = Add2Ptr(rt, vbo);
921
922	if (*e == RESTART_ENTRY_ALLOCATED_LE)
923		return e;
924
925	/*
926	 * Walk through the table, looking for the entry we're
927	 * interested and the previous entry
928	 */
929	off = le32_to_cpu(rt->first_free);
930	e = Add2Ptr(rt, off);
931
932	if (off == vbo) {
933		/* this is a match */
934		rt->first_free = *e;
935		goto skip_looking;
936	}
937
938	/*
939	 * need to walk through the list looking for the predecessor of our entry
940	 */
941	for (;;) {
942		/* Remember the entry just found */
943		u32 last_off = off;
944		__le32 *last_e = e;
945
946		/* should never run of entries. */
947
948		/* Lookup up the next entry the list */
949		off = le32_to_cpu(*last_e);
950		e = Add2Ptr(rt, off);
951
952		/* If this is our match we are done */
953		if (off == vbo) {
954			*last_e = *e;
955
956			/* If this was the last entry, we update that the table as well */
957			if (le32_to_cpu(rt->last_free) == off)
958				rt->last_free = cpu_to_le32(last_off);
959			break;
960		}
961	}
962
963skip_looking:
964	/* If the list is now empty, we fix the last_free as well */
965	if (!rt->first_free)
966		rt->last_free = 0;
967
968	/* Zero this entry */
969	memset(e, 0, esize);
970	*e = RESTART_ENTRY_ALLOCATED_LE;
971
972	le16_add_cpu(&rt->total, 1);
973
974	return e;
975}
976
977#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001)
978
979#define NTFSLOG_WRAPPED 0x00000001
980#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002
981#define NTFSLOG_NO_LAST_LSN 0x00000004
982#define NTFSLOG_REUSE_TAIL 0x00000010
983#define NTFSLOG_NO_OLDEST_LSN 0x00000020
984
985/*
986 * Helper struct to work with NTFS LogFile
987 */
988struct ntfs_log {
989	struct ntfs_inode *ni;
990
991	u32 l_size;
992	u32 sys_page_size;
993	u32 sys_page_mask;
994	u32 page_size;
995	u32 page_mask; // page_size - 1
996	u8 page_bits;
997	struct RECORD_PAGE_HDR *one_page_buf;
998
999	struct RESTART_TABLE *open_attr_tbl;
1000	u32 transaction_id;
1001	u32 clst_per_page;
1002
1003	u32 first_page;
1004	u32 next_page;
1005	u32 ra_off;
1006	u32 data_off;
1007	u32 restart_size;
1008	u32 data_size;
1009	u16 record_header_len;
1010	u64 seq_num;
1011	u32 seq_num_bits;
1012	u32 file_data_bits;
1013	u32 seq_num_mask; /* (1 << file_data_bits) - 1 */
1014
1015	struct RESTART_AREA *ra; /* in-memory image of the next restart area */
1016	u32 ra_size; /* the usable size of the restart area */
1017
1018	/*
1019	 * If true, then the in-memory restart area is to be written
1020	 * to the first position on the disk
1021	 */
1022	bool init_ra;
1023	bool set_dirty; /* true if we need to set dirty flag */
1024
1025	u64 oldest_lsn;
1026
1027	u32 oldest_lsn_off;
1028	u64 last_lsn;
1029
1030	u32 total_avail;
1031	u32 total_avail_pages;
1032	u32 total_undo_commit;
1033	u32 max_current_avail;
1034	u32 current_avail;
1035	u32 reserved;
1036
1037	short major_ver;
1038	short minor_ver;
1039
1040	u32 l_flags; /* See NTFSLOG_XXX */
1041	u32 current_openlog_count; /* On-disk value for open_log_count */
1042
1043	struct CLIENT_ID client_id;
1044	u32 client_undo_commit;
1045};
1046
1047static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn)
1048{
1049	u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3);
1050
1051	return vbo;
1052}
1053
1054/* compute the offset in the log file of the next log page */
1055static inline u32 next_page_off(struct ntfs_log *log, u32 off)
1056{
1057	off = (off & ~log->sys_page_mask) + log->page_size;
1058	return off >= log->l_size ? log->first_page : off;
1059}
1060
1061static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn)
1062{
1063	return (((u32)lsn) << 3) & log->page_mask;
1064}
1065
1066static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq)
1067{
1068	return (off >> 3) + (Seq << log->file_data_bits);
1069}
1070
1071static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn)
1072{
1073	return lsn >= log->oldest_lsn &&
1074	       lsn <= le64_to_cpu(log->ra->current_lsn);
1075}
1076
1077static inline u32 hdr_file_off(struct ntfs_log *log,
1078			       struct RECORD_PAGE_HDR *hdr)
1079{
1080	if (log->major_ver < 2)
1081		return le64_to_cpu(hdr->rhdr.lsn);
1082
1083	return le32_to_cpu(hdr->file_off);
1084}
1085
1086static inline u64 base_lsn(struct ntfs_log *log,
1087			   const struct RECORD_PAGE_HDR *hdr, u64 lsn)
1088{
1089	u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn);
1090	u64 ret = (((h_lsn >> log->file_data_bits) +
1091		    (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0))
1092		   << log->file_data_bits) +
1093		  ((((is_log_record_end(hdr) &&
1094		      h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn))
1095			     ? le16_to_cpu(hdr->record_hdr.next_record_off)
1096			     : log->page_size) +
1097		    lsn) >>
1098		   3);
1099
1100	return ret;
1101}
1102
1103static inline bool verify_client_lsn(struct ntfs_log *log,
1104				     const struct CLIENT_REC *client, u64 lsn)
1105{
1106	return lsn >= le64_to_cpu(client->oldest_lsn) &&
1107	       lsn <= le64_to_cpu(log->ra->current_lsn) && lsn;
1108}
1109
1110struct restart_info {
1111	u64 last_lsn;
1112	struct RESTART_HDR *r_page;
1113	u32 vbo;
1114	bool chkdsk_was_run;
1115	bool valid_page;
1116	bool initialized;
1117	bool restart;
1118};
1119
1120static int read_log_page(struct ntfs_log *log, u32 vbo,
1121			 struct RECORD_PAGE_HDR **buffer, bool *usa_error)
1122{
1123	int err = 0;
1124	u32 page_idx = vbo >> log->page_bits;
1125	u32 page_off = vbo & log->page_mask;
1126	u32 bytes = log->page_size - page_off;
1127	void *to_free = NULL;
1128	u32 page_vbo = page_idx << log->page_bits;
1129	struct RECORD_PAGE_HDR *page_buf;
1130	struct ntfs_inode *ni = log->ni;
1131	bool bBAAD;
1132
1133	if (vbo >= log->l_size)
1134		return -EINVAL;
1135
1136	if (!*buffer) {
1137		to_free = ntfs_malloc(bytes);
1138		if (!to_free)
1139			return -ENOMEM;
1140		*buffer = to_free;
1141	}
1142
1143	page_buf = page_off ? log->one_page_buf : *buffer;
1144
1145	err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf,
1146			       log->page_size, NULL);
1147	if (err)
1148		goto out;
1149
1150	if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE)
1151		ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false);
1152
1153	if (page_buf != *buffer)
1154		memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes);
1155
1156	bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE;
1157
1158	if (usa_error)
1159		*usa_error = bBAAD;
1160	/* Check that the update sequence array for this page is valid */
1161	/* If we don't allow errors, raise an error status */
1162	else if (bBAAD)
1163		err = -EINVAL;
1164
1165out:
1166	if (err && to_free) {
1167		ntfs_free(to_free);
1168		*buffer = NULL;
1169	}
1170
1171	return err;
1172}
1173
1174/*
1175 * log_read_rst
1176 *
1177 * it walks through 512 blocks of the file looking for a valid restart page header
1178 * It will stop the first time we find a valid page header
1179 */
1180static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first,
1181			struct restart_info *info)
1182{
1183	u32 skip, vbo;
1184	struct RESTART_HDR *r_page = ntfs_malloc(DefaultLogPageSize);
1185
1186	if (!r_page)
1187		return -ENOMEM;
1188
1189	memset(info, 0, sizeof(struct restart_info));
1190
1191	/* Determine which restart area we are looking for */
1192	if (first) {
1193		vbo = 0;
1194		skip = 512;
1195	} else {
1196		vbo = 512;
1197		skip = 0;
1198	}
1199
1200	/* loop continuously until we succeed */
1201	for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) {
1202		bool usa_error;
1203		u32 sys_page_size;
1204		bool brst, bchk;
1205		struct RESTART_AREA *ra;
1206
1207		/* Read a page header at the current offset */
1208		if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page,
1209				  &usa_error)) {
1210			/* ignore any errors */
1211			continue;
1212		}
1213
1214		/* exit if the signature is a log record page */
1215		if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) {
1216			info->initialized = true;
1217			break;
1218		}
1219
1220		brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE;
1221		bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE;
1222
1223		if (!bchk && !brst) {
1224			if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) {
1225				/*
1226				 * Remember if the signature does not
1227				 * indicate uninitialized file
1228				 */
1229				info->initialized = true;
1230			}
1231			continue;
1232		}
1233
1234		ra = NULL;
1235		info->valid_page = false;
1236		info->initialized = true;
1237		info->vbo = vbo;
1238
1239		/* Let's check the restart area if this is a valid page */
1240		if (!is_rst_page_hdr_valid(vbo, r_page))
1241			goto check_result;
1242		ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1243
1244		if (!is_rst_area_valid(r_page))
1245			goto check_result;
1246
1247		/*
1248		 * We have a valid restart page header and restart area.
1249		 * If chkdsk was run or we have no clients then we have
1250		 * no more checking to do
1251		 */
1252		if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) {
1253			info->valid_page = true;
1254			goto check_result;
1255		}
1256
1257		/* Read the entire restart area */
1258		sys_page_size = le32_to_cpu(r_page->sys_page_size);
1259		if (DefaultLogPageSize != sys_page_size) {
1260			ntfs_free(r_page);
1261			r_page = ntfs_zalloc(sys_page_size);
1262			if (!r_page)
1263				return -ENOMEM;
1264
1265			if (read_log_page(log, vbo,
1266					  (struct RECORD_PAGE_HDR **)&r_page,
1267					  &usa_error)) {
1268				/* ignore any errors */
1269				ntfs_free(r_page);
1270				r_page = NULL;
1271				continue;
1272			}
1273		}
1274
1275		if (is_client_area_valid(r_page, usa_error)) {
1276			info->valid_page = true;
1277			ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off));
1278		}
1279
1280check_result:
1281		/* If chkdsk was run then update the caller's values and return */
1282		if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) {
1283			info->chkdsk_was_run = true;
1284			info->last_lsn = le64_to_cpu(r_page->rhdr.lsn);
1285			info->restart = true;
1286			info->r_page = r_page;
1287			return 0;
1288		}
1289
1290		/* If we have a valid page then copy the values we need from it */
1291		if (info->valid_page) {
1292			info->last_lsn = le64_to_cpu(ra->current_lsn);
1293			info->restart = true;
1294			info->r_page = r_page;
1295			return 0;
1296		}
1297	}
1298
1299	ntfs_free(r_page);
1300
1301	return 0;
1302}
1303
1304/*
1305 * log_init_pg_hdr
1306 *
1307 * init "log' from restart page header
1308 */
1309static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size,
1310			    u32 page_size, u16 major_ver, u16 minor_ver)
1311{
1312	log->sys_page_size = sys_page_size;
1313	log->sys_page_mask = sys_page_size - 1;
1314	log->page_size = page_size;
1315	log->page_mask = page_size - 1;
1316	log->page_bits = blksize_bits(page_size);
1317
1318	log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits;
1319	if (!log->clst_per_page)
1320		log->clst_per_page = 1;
1321
1322	log->first_page = major_ver >= 2
1323				  ? 0x22 * page_size
1324				  : ((sys_page_size << 1) + (page_size << 1));
1325	log->major_ver = major_ver;
1326	log->minor_ver = minor_ver;
1327}
1328
1329/*
1330 * log_create
1331 *
1332 * init "log" in cases when we don't have a restart area to use
1333 */
1334static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn,
1335		       u32 open_log_count, bool wrapped, bool use_multi_page)
1336{
1337	log->l_size = l_size;
1338	/* All file offsets must be quadword aligned */
1339	log->file_data_bits = blksize_bits(l_size) - 3;
1340	log->seq_num_mask = (8 << log->file_data_bits) - 1;
1341	log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits;
1342	log->seq_num = (last_lsn >> log->file_data_bits) + 2;
1343	log->next_page = log->first_page;
1344	log->oldest_lsn = log->seq_num << log->file_data_bits;
1345	log->oldest_lsn_off = 0;
1346	log->last_lsn = log->oldest_lsn;
1347
1348	log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN;
1349
1350	/* Set the correct flags for the I/O and indicate if we have wrapped */
1351	if (wrapped)
1352		log->l_flags |= NTFSLOG_WRAPPED;
1353
1354	if (use_multi_page)
1355		log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO;
1356
1357	/* Compute the log page values */
1358	log->data_off = ALIGN(
1359		offsetof(struct RECORD_PAGE_HDR, fixups) +
1360		sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1), 8);
1361	log->data_size = log->page_size - log->data_off;
1362	log->record_header_len = sizeof(struct LFS_RECORD_HDR);
1363
1364	/* Remember the different page sizes for reservation */
1365	log->reserved = log->data_size - log->record_header_len;
1366
1367	/* Compute the restart page values. */
1368	log->ra_off = ALIGN(
1369		offsetof(struct RESTART_HDR, fixups) +
1370		sizeof(short) * ((log->sys_page_size >> SECTOR_SHIFT) + 1), 8);
1371	log->restart_size = log->sys_page_size - log->ra_off;
1372	log->ra_size = struct_size(log->ra, clients, 1);
1373	log->current_openlog_count = open_log_count;
1374
1375	/*
1376	 * The total available log file space is the number of
1377	 * log file pages times the space available on each page
1378	 */
1379	log->total_avail_pages = log->l_size - log->first_page;
1380	log->total_avail = log->total_avail_pages >> log->page_bits;
1381
1382	/*
1383	 * We assume that we can't use the end of the page less than
1384	 * the file record size
1385	 * Then we won't need to reserve more than the caller asks for
1386	 */
1387	log->max_current_avail = log->total_avail * log->reserved;
1388	log->total_avail = log->total_avail * log->data_size;
1389	log->current_avail = log->max_current_avail;
1390}
1391
1392/*
1393 * log_create_ra
1394 *
1395 * This routine is called to fill a restart area from the values stored in 'log'
1396 */
1397static struct RESTART_AREA *log_create_ra(struct ntfs_log *log)
1398{
1399	struct CLIENT_REC *cr;
1400	struct RESTART_AREA *ra = ntfs_zalloc(log->restart_size);
1401
1402	if (!ra)
1403		return NULL;
1404
1405	ra->current_lsn = cpu_to_le64(log->last_lsn);
1406	ra->log_clients = cpu_to_le16(1);
1407	ra->client_idx[1] = LFS_NO_CLIENT_LE;
1408	if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO)
1409		ra->flags = RESTART_SINGLE_PAGE_IO;
1410	ra->seq_num_bits = cpu_to_le32(log->seq_num_bits);
1411	ra->ra_len = cpu_to_le16(log->ra_size);
1412	ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients));
1413	ra->l_size = cpu_to_le64(log->l_size);
1414	ra->rec_hdr_len = cpu_to_le16(log->record_header_len);
1415	ra->data_off = cpu_to_le16(log->data_off);
1416	ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1);
1417
1418	cr = ra->clients;
1419
1420	cr->prev_client = LFS_NO_CLIENT_LE;
1421	cr->next_client = LFS_NO_CLIENT_LE;
1422
1423	return ra;
1424}
1425
1426static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len)
1427{
1428	u32 base_vbo = lsn << 3;
1429	u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask;
1430	u32 page_off = base_vbo & log->page_mask;
1431	u32 tail = log->page_size - page_off;
1432
1433	page_off -= 1;
1434
1435	/* Add the length of the header */
1436	data_len += log->record_header_len;
1437
1438	/*
1439	 * If this lsn is contained this log page we are done
1440	 * Otherwise we need to walk through several log pages
1441	 */
1442	if (data_len > tail) {
1443		data_len -= tail;
1444		tail = log->data_size;
1445		page_off = log->data_off - 1;
1446
1447		for (;;) {
1448			final_log_off = next_page_off(log, final_log_off);
1449
1450			/* We are done if the remaining bytes fit on this page */
1451			if (data_len <= tail)
1452				break;
1453			data_len -= tail;
1454		}
1455	}
1456
1457	/*
1458	 * We add the remaining bytes to our starting position on this page
1459	 * and then add that value to the file offset of this log page
1460	 */
1461	return final_log_off + data_len + page_off;
1462}
1463
1464static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh,
1465			u64 *lsn)
1466{
1467	int err;
1468	u64 this_lsn = le64_to_cpu(rh->this_lsn);
1469	u32 vbo = lsn_to_vbo(log, this_lsn);
1470	u32 end =
1471		final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len));
1472	u32 hdr_off = end & ~log->sys_page_mask;
1473	u64 seq = this_lsn >> log->file_data_bits;
1474	struct RECORD_PAGE_HDR *page = NULL;
1475
1476	/* Remember if we wrapped */
1477	if (end <= vbo)
1478		seq += 1;
1479
1480	/* log page header for this page */
1481	err = read_log_page(log, hdr_off, &page, NULL);
1482	if (err)
1483		return err;
1484
1485	/*
1486	 * If the lsn we were given was not the last lsn on this page,
1487	 * then the starting offset for the next lsn is on a quad word
1488	 * boundary following the last file offset for the current lsn
1489	 * Otherwise the file offset is the start of the data on the next page
1490	 */
1491	if (this_lsn == le64_to_cpu(page->rhdr.lsn)) {
1492		/* If we wrapped, we need to increment the sequence number */
1493		hdr_off = next_page_off(log, hdr_off);
1494		if (hdr_off == log->first_page)
1495			seq += 1;
1496
1497		vbo = hdr_off + log->data_off;
1498	} else {
1499		vbo = ALIGN(end, 8);
1500	}
1501
1502	/* Compute the lsn based on the file offset and the sequence count */
1503	*lsn = vbo_to_lsn(log, vbo, seq);
1504
1505	/*
1506	 * If this lsn is within the legal range for the file, we return true
1507	 * Otherwise false indicates that there are no more lsn's
1508	 */
1509	if (!is_lsn_in_file(log, *lsn))
1510		*lsn = 0;
1511
1512	ntfs_free(page);
1513
1514	return 0;
1515}
1516
1517/*
1518 * current_log_avail
1519 *
1520 * calculate the number of bytes available for log records
1521 */
1522static u32 current_log_avail(struct ntfs_log *log)
1523{
1524	u32 oldest_off, next_free_off, free_bytes;
1525
1526	if (log->l_flags & NTFSLOG_NO_LAST_LSN) {
1527		/* The entire file is available */
1528		return log->max_current_avail;
1529	}
1530
1531	/*
1532	 * If there is a last lsn the restart area then we know that we will
1533	 * have to compute the free range
1534	 * If there is no oldest lsn then start at the first page of the file
1535	 */
1536	oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN)
1537			     ? log->first_page
1538			     : (log->oldest_lsn_off & ~log->sys_page_mask);
1539
1540	/*
1541	 * We will use the next log page offset to compute the next free page\
1542	 * If we are going to reuse this page go to the next page
1543	 * If we are at the first page then use the end of the file
1544	 */
1545	next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL)
1546				? log->next_page + log->page_size
1547				: log->next_page == log->first_page
1548					  ? log->l_size
1549					  : log->next_page;
1550
1551	/* If the two offsets are the same then there is no available space */
1552	if (oldest_off == next_free_off)
1553		return 0;
1554	/*
1555	 * If the free offset follows the oldest offset then subtract
1556	 * this range from the total available pages
1557	 */
1558	free_bytes =
1559		oldest_off < next_free_off
1560			? log->total_avail_pages - (next_free_off - oldest_off)
1561			: oldest_off - next_free_off;
1562
1563	free_bytes >>= log->page_bits;
1564	return free_bytes * log->reserved;
1565}
1566
1567static bool check_subseq_log_page(struct ntfs_log *log,
1568				  const struct RECORD_PAGE_HDR *rp, u32 vbo,
1569				  u64 seq)
1570{
1571	u64 lsn_seq;
1572	const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr;
1573	u64 lsn = le64_to_cpu(rhdr->lsn);
1574
1575	if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign)
1576		return false;
1577
1578	/*
1579	 * If the last lsn on the page occurs was written after the page
1580	 * that caused the original error then we have a fatal error
1581	 */
1582	lsn_seq = lsn >> log->file_data_bits;
1583
1584	/*
1585	 * If the sequence number for the lsn the page is equal or greater
1586	 * than lsn we expect, then this is a subsequent write
1587	 */
1588	return lsn_seq >= seq ||
1589	       (lsn_seq == seq - 1 && log->first_page == vbo &&
1590		vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask));
1591}
1592
1593/*
1594 * last_log_lsn
1595 *
1596 * This routine walks through the log pages for a file, searching for the
1597 * last log page written to the file
1598 */
1599static int last_log_lsn(struct ntfs_log *log)
1600{
1601	int err;
1602	bool usa_error = false;
1603	bool replace_page = false;
1604	bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL;
1605	bool wrapped_file, wrapped;
1606
1607	u32 page_cnt = 1, page_pos = 1;
1608	u32 page_off = 0, page_off1 = 0, saved_off = 0;
1609	u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0;
1610	u32 first_file_off = 0, second_file_off = 0;
1611	u32 part_io_count = 0;
1612	u32 tails = 0;
1613	u32 this_off, curpage_off, nextpage_off, remain_pages;
1614
1615	u64 expected_seq, seq_base = 0, lsn_base = 0;
1616	u64 best_lsn, best_lsn1, best_lsn2;
1617	u64 lsn_cur, lsn1, lsn2;
1618	u64 last_ok_lsn = reuse_page ? log->last_lsn : 0;
1619
1620	u16 cur_pos, best_page_pos;
1621
1622	struct RECORD_PAGE_HDR *page = NULL;
1623	struct RECORD_PAGE_HDR *tst_page = NULL;
1624	struct RECORD_PAGE_HDR *first_tail = NULL;
1625	struct RECORD_PAGE_HDR *second_tail = NULL;
1626	struct RECORD_PAGE_HDR *tail_page = NULL;
1627	struct RECORD_PAGE_HDR *second_tail_prev = NULL;
1628	struct RECORD_PAGE_HDR *first_tail_prev = NULL;
1629	struct RECORD_PAGE_HDR *page_bufs = NULL;
1630	struct RECORD_PAGE_HDR *best_page;
1631
1632	if (log->major_ver >= 2) {
1633		final_off = 0x02 * log->page_size;
1634		second_off = 0x12 * log->page_size;
1635
1636		// 0x10 == 0x12 - 0x2
1637		page_bufs = ntfs_malloc(log->page_size * 0x10);
1638		if (!page_bufs)
1639			return -ENOMEM;
1640	} else {
1641		second_off = log->first_page - log->page_size;
1642		final_off = second_off - log->page_size;
1643	}
1644
1645next_tail:
1646	/* Read second tail page (at pos 3/0x12000) */
1647	if (read_log_page(log, second_off, &second_tail, &usa_error) ||
1648	    usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1649		ntfs_free(second_tail);
1650		second_tail = NULL;
1651		second_file_off = 0;
1652		lsn2 = 0;
1653	} else {
1654		second_file_off = hdr_file_off(log, second_tail);
1655		lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn);
1656	}
1657
1658	/* Read first tail page (at pos 2/0x2000 ) */
1659	if (read_log_page(log, final_off, &first_tail, &usa_error) ||
1660	    usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) {
1661		ntfs_free(first_tail);
1662		first_tail = NULL;
1663		first_file_off = 0;
1664		lsn1 = 0;
1665	} else {
1666		first_file_off = hdr_file_off(log, first_tail);
1667		lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn);
1668	}
1669
1670	if (log->major_ver < 2) {
1671		int best_page;
1672
1673		first_tail_prev = first_tail;
1674		final_off_prev = first_file_off;
1675		second_tail_prev = second_tail;
1676		second_off_prev = second_file_off;
1677		tails = 1;
1678
1679		if (!first_tail && !second_tail)
1680			goto tail_read;
1681
1682		if (first_tail && second_tail)
1683			best_page = lsn1 < lsn2 ? 1 : 0;
1684		else if (first_tail)
1685			best_page = 0;
1686		else
1687			best_page = 1;
1688
1689		page_off = best_page ? second_file_off : first_file_off;
1690		seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits;
1691		goto tail_read;
1692	}
1693
1694	best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0;
1695	best_lsn2 =
1696		second_tail ? base_lsn(log, second_tail, second_file_off) : 0;
1697
1698	if (first_tail && second_tail) {
1699		if (best_lsn1 > best_lsn2) {
1700			best_lsn = best_lsn1;
1701			best_page = first_tail;
1702			this_off = first_file_off;
1703		} else {
1704			best_lsn = best_lsn2;
1705			best_page = second_tail;
1706			this_off = second_file_off;
1707		}
1708	} else if (first_tail) {
1709		best_lsn = best_lsn1;
1710		best_page = first_tail;
1711		this_off = first_file_off;
1712	} else if (second_tail) {
1713		best_lsn = best_lsn2;
1714		best_page = second_tail;
1715		this_off = second_file_off;
1716	} else {
1717		goto tail_read;
1718	}
1719
1720	best_page_pos = le16_to_cpu(best_page->page_pos);
1721
1722	if (!tails) {
1723		if (best_page_pos == page_pos) {
1724			seq_base = best_lsn >> log->file_data_bits;
1725			saved_off = page_off = le32_to_cpu(best_page->file_off);
1726			lsn_base = best_lsn;
1727
1728			memmove(page_bufs, best_page, log->page_size);
1729
1730			page_cnt = le16_to_cpu(best_page->page_count);
1731			if (page_cnt > 1)
1732				page_pos += 1;
1733
1734			tails = 1;
1735		}
1736	} else if (seq_base == (best_lsn >> log->file_data_bits) &&
1737		   saved_off + log->page_size == this_off &&
1738		   lsn_base < best_lsn &&
1739		   (page_pos != page_cnt || best_page_pos == page_pos ||
1740		    best_page_pos == 1) &&
1741		   (page_pos >= page_cnt || best_page_pos == page_pos)) {
1742		u16 bppc = le16_to_cpu(best_page->page_count);
1743
1744		saved_off += log->page_size;
1745		lsn_base = best_lsn;
1746
1747		memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page,
1748			log->page_size);
1749
1750		tails += 1;
1751
1752		if (best_page_pos != bppc) {
1753			page_cnt = bppc;
1754			page_pos = best_page_pos;
1755
1756			if (page_cnt > 1)
1757				page_pos += 1;
1758		} else {
1759			page_pos = page_cnt = 1;
1760		}
1761	} else {
1762		ntfs_free(first_tail);
1763		ntfs_free(second_tail);
1764		goto tail_read;
1765	}
1766
1767	ntfs_free(first_tail_prev);
1768	first_tail_prev = first_tail;
1769	final_off_prev = first_file_off;
1770	first_tail = NULL;
1771
1772	ntfs_free(second_tail_prev);
1773	second_tail_prev = second_tail;
1774	second_off_prev = second_file_off;
1775	second_tail = NULL;
1776
1777	final_off += log->page_size;
1778	second_off += log->page_size;
1779
1780	if (tails < 0x10)
1781		goto next_tail;
1782tail_read:
1783	first_tail = first_tail_prev;
1784	final_off = final_off_prev;
1785
1786	second_tail = second_tail_prev;
1787	second_off = second_off_prev;
1788
1789	page_cnt = page_pos = 1;
1790
1791	curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off)
1792					       : log->next_page;
1793
1794	wrapped_file =
1795		curpage_off == log->first_page &&
1796		!(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL));
1797
1798	expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num;
1799
1800	nextpage_off = curpage_off;
1801
1802next_page:
1803	tail_page = NULL;
1804	/* Read the next log page */
1805	err = read_log_page(log, curpage_off, &page, &usa_error);
1806
1807	/* Compute the next log page offset the file */
1808	nextpage_off = next_page_off(log, curpage_off);
1809	wrapped = nextpage_off == log->first_page;
1810
1811	if (tails > 1) {
1812		struct RECORD_PAGE_HDR *cur_page =
1813			Add2Ptr(page_bufs, curpage_off - page_off);
1814
1815		if (curpage_off == saved_off) {
1816			tail_page = cur_page;
1817			goto use_tail_page;
1818		}
1819
1820		if (page_off > curpage_off || curpage_off >= saved_off)
1821			goto use_tail_page;
1822
1823		if (page_off1)
1824			goto use_cur_page;
1825
1826		if (!err && !usa_error &&
1827		    page->rhdr.sign == NTFS_RCRD_SIGNATURE &&
1828		    cur_page->rhdr.lsn == page->rhdr.lsn &&
1829		    cur_page->record_hdr.next_record_off ==
1830			    page->record_hdr.next_record_off &&
1831		    ((page_pos == page_cnt &&
1832		      le16_to_cpu(page->page_pos) == 1) ||
1833		     (page_pos != page_cnt &&
1834		      le16_to_cpu(page->page_pos) == page_pos + 1 &&
1835		      le16_to_cpu(page->page_count) == page_cnt))) {
1836			cur_page = NULL;
1837			goto use_tail_page;
1838		}
1839
1840		page_off1 = page_off;
1841
1842use_cur_page:
1843
1844		lsn_cur = le64_to_cpu(cur_page->rhdr.lsn);
1845
1846		if (last_ok_lsn !=
1847			    le64_to_cpu(cur_page->record_hdr.last_end_lsn) &&
1848		    ((lsn_cur >> log->file_data_bits) +
1849		     ((curpage_off <
1850		       (lsn_to_vbo(log, lsn_cur) & ~log->page_mask))
1851			      ? 1
1852			      : 0)) != expected_seq) {
1853			goto check_tail;
1854		}
1855
1856		if (!is_log_record_end(cur_page)) {
1857			tail_page = NULL;
1858			last_ok_lsn = lsn_cur;
1859			goto next_page_1;
1860		}
1861
1862		log->seq_num = expected_seq;
1863		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1864		log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1865		log->ra->current_lsn = cur_page->record_hdr.last_end_lsn;
1866
1867		if (log->record_header_len <=
1868		    log->page_size -
1869			    le16_to_cpu(cur_page->record_hdr.next_record_off)) {
1870			log->l_flags |= NTFSLOG_REUSE_TAIL;
1871			log->next_page = curpage_off;
1872		} else {
1873			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
1874			log->next_page = nextpage_off;
1875		}
1876
1877		if (wrapped_file)
1878			log->l_flags |= NTFSLOG_WRAPPED;
1879
1880		last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn);
1881		goto next_page_1;
1882	}
1883
1884	/*
1885	 * If we are at the expected first page of a transfer check to see
1886	 * if either tail copy is at this offset
1887	 * If this page is the last page of a transfer, check if we wrote
1888	 * a subsequent tail copy
1889	 */
1890	if (page_cnt == page_pos || page_cnt == page_pos + 1) {
1891		/*
1892		 * Check if the offset matches either the first or second
1893		 * tail copy. It is possible it will match both
1894		 */
1895		if (curpage_off == final_off)
1896			tail_page = first_tail;
1897
1898		/*
1899		 * If we already matched on the first page then
1900		 * check the ending lsn's.
1901		 */
1902		if (curpage_off == second_off) {
1903			if (!tail_page ||
1904			    (second_tail &&
1905			     le64_to_cpu(second_tail->record_hdr.last_end_lsn) >
1906				     le64_to_cpu(first_tail->record_hdr
1907							 .last_end_lsn))) {
1908				tail_page = second_tail;
1909			}
1910		}
1911	}
1912
1913use_tail_page:
1914	if (tail_page) {
1915		/* we have a candidate for a tail copy */
1916		lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
1917
1918		if (last_ok_lsn < lsn_cur) {
1919			/*
1920			 * If the sequence number is not expected,
1921			 * then don't use the tail copy
1922			 */
1923			if (expected_seq != (lsn_cur >> log->file_data_bits))
1924				tail_page = NULL;
1925		} else if (last_ok_lsn > lsn_cur) {
1926			/*
1927			 * If the last lsn is greater than the one on
1928			 * this page then forget this tail
1929			 */
1930			tail_page = NULL;
1931		}
1932	}
1933
1934	/* If we have an error on the current page, we will break of this loop */
1935	if (err || usa_error)
1936		goto check_tail;
1937
1938	/*
1939	 * Done if the last lsn on this page doesn't match the previous known
1940	 * last lsn or the sequence number is not expected
1941	 */
1942	lsn_cur = le64_to_cpu(page->rhdr.lsn);
1943	if (last_ok_lsn != lsn_cur &&
1944	    expected_seq != (lsn_cur >> log->file_data_bits)) {
1945		goto check_tail;
1946	}
1947
1948	/*
1949	 * Check that the page position and page count values are correct
1950	 * If this is the first page of a transfer the position must be 1
1951	 * and the count will be unknown
1952	 */
1953	if (page_cnt == page_pos) {
1954		if (page->page_pos != cpu_to_le16(1) &&
1955		    (!reuse_page || page->page_pos != page->page_count)) {
1956			/*
1957			 * If the current page is the first page we are
1958			 * looking at and we are reusing this page then
1959			 * it can be either the first or last page of a
1960			 * transfer. Otherwise it can only be the first.
1961			 */
1962			goto check_tail;
1963		}
1964	} else if (le16_to_cpu(page->page_count) != page_cnt ||
1965		   le16_to_cpu(page->page_pos) != page_pos + 1) {
1966		/*
1967		 * The page position better be 1 more than the last page
1968		 * position and the page count better match
1969		 */
1970		goto check_tail;
1971	}
1972
1973	/*
1974	 * We have a valid page the file and may have a valid page
1975	 * the tail copy area
1976	 * If the tail page was written after the page the file then
1977	 * break of the loop
1978	 */
1979	if (tail_page &&
1980	    le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) {
1981		/* Remember if we will replace the page */
1982		replace_page = true;
1983		goto check_tail;
1984	}
1985
1986	tail_page = NULL;
1987
1988	if (is_log_record_end(page)) {
1989		/*
1990		 * Since we have read this page we know the sequence number
1991		 * is the same as our expected value
1992		 */
1993		log->seq_num = expected_seq;
1994		log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn);
1995		log->ra->current_lsn = page->record_hdr.last_end_lsn;
1996		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
1997
1998		/*
1999		 * If there is room on this page for another header then
2000		 * remember we want to reuse the page
2001		 */
2002		if (log->record_header_len <=
2003		    log->page_size -
2004			    le16_to_cpu(page->record_hdr.next_record_off)) {
2005			log->l_flags |= NTFSLOG_REUSE_TAIL;
2006			log->next_page = curpage_off;
2007		} else {
2008			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
2009			log->next_page = nextpage_off;
2010		}
2011
2012		/* Remember if we wrapped the log file */
2013		if (wrapped_file)
2014			log->l_flags |= NTFSLOG_WRAPPED;
2015	}
2016
2017	/*
2018	 * Remember the last page count and position.
2019	 * Also remember the last known lsn
2020	 */
2021	page_cnt = le16_to_cpu(page->page_count);
2022	page_pos = le16_to_cpu(page->page_pos);
2023	last_ok_lsn = le64_to_cpu(page->rhdr.lsn);
2024
2025next_page_1:
2026
2027	if (wrapped) {
2028		expected_seq += 1;
2029		wrapped_file = 1;
2030	}
2031
2032	curpage_off = nextpage_off;
2033	ntfs_free(page);
2034	page = NULL;
2035	reuse_page = 0;
2036	goto next_page;
2037
2038check_tail:
2039	if (tail_page) {
2040		log->seq_num = expected_seq;
2041		log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn);
2042		log->ra->current_lsn = tail_page->record_hdr.last_end_lsn;
2043		log->l_flags &= ~NTFSLOG_NO_LAST_LSN;
2044
2045		if (log->page_size -
2046			    le16_to_cpu(
2047				    tail_page->record_hdr.next_record_off) >=
2048		    log->record_header_len) {
2049			log->l_flags |= NTFSLOG_REUSE_TAIL;
2050			log->next_page = curpage_off;
2051		} else {
2052			log->l_flags &= ~NTFSLOG_REUSE_TAIL;
2053			log->next_page = nextpage_off;
2054		}
2055
2056		if (wrapped)
2057			log->l_flags |= NTFSLOG_WRAPPED;
2058	}
2059
2060	/* Remember that the partial IO will start at the next page */
2061	second_off = nextpage_off;
2062
2063	/*
2064	 * If the next page is the first page of the file then update
2065	 * the sequence number for log records which begon the next page
2066	 */
2067	if (wrapped)
2068		expected_seq += 1;
2069
2070	/*
2071	 * If we have a tail copy or are performing single page I/O we can
2072	 * immediately look at the next page
2073	 */
2074	if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) {
2075		page_cnt = 2;
2076		page_pos = 1;
2077		goto check_valid;
2078	}
2079
2080	if (page_pos != page_cnt)
2081		goto check_valid;
2082	/*
2083	 * If the next page causes us to wrap to the beginning of the log
2084	 * file then we know which page to check next.
2085	 */
2086	if (wrapped) {
2087		page_cnt = 2;
2088		page_pos = 1;
2089		goto check_valid;
2090	}
2091
2092	cur_pos = 2;
2093
2094next_test_page:
2095	ntfs_free(tst_page);
2096	tst_page = NULL;
2097
2098	/* Walk through the file, reading log pages */
2099	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2100
2101	/*
2102	 * If we get a USA error then assume that we correctly found
2103	 * the end of the original transfer
2104	 */
2105	if (usa_error)
2106		goto file_is_valid;
2107
2108	/*
2109	 * If we were able to read the page, we examine it to see if it
2110	 * is the same or different Io block
2111	 */
2112	if (err)
2113		goto next_test_page_1;
2114
2115	if (le16_to_cpu(tst_page->page_pos) == cur_pos &&
2116	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2117		page_cnt = le16_to_cpu(tst_page->page_count) + 1;
2118		page_pos = le16_to_cpu(tst_page->page_pos);
2119		goto check_valid;
2120	} else {
2121		goto file_is_valid;
2122	}
2123
2124next_test_page_1:
2125
2126	nextpage_off = next_page_off(log, curpage_off);
2127	wrapped = nextpage_off == log->first_page;
2128
2129	if (wrapped) {
2130		expected_seq += 1;
2131		page_cnt = 2;
2132		page_pos = 1;
2133	}
2134
2135	cur_pos += 1;
2136	part_io_count += 1;
2137	if (!wrapped)
2138		goto next_test_page;
2139
2140check_valid:
2141	/* Skip over the remaining pages this transfer */
2142	remain_pages = page_cnt - page_pos - 1;
2143	part_io_count += remain_pages;
2144
2145	while (remain_pages--) {
2146		nextpage_off = next_page_off(log, curpage_off);
2147		wrapped = nextpage_off == log->first_page;
2148
2149		if (wrapped)
2150			expected_seq += 1;
2151	}
2152
2153	/* Call our routine to check this log page */
2154	ntfs_free(tst_page);
2155	tst_page = NULL;
2156
2157	err = read_log_page(log, nextpage_off, &tst_page, &usa_error);
2158	if (!err && !usa_error &&
2159	    check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) {
2160		err = -EINVAL;
2161		goto out;
2162	}
2163
2164file_is_valid:
2165
2166	/* We have a valid file */
2167	if (page_off1 || tail_page) {
2168		struct RECORD_PAGE_HDR *tmp_page;
2169
2170		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2171			err = -EROFS;
2172			goto out;
2173		}
2174
2175		if (page_off1) {
2176			tmp_page = Add2Ptr(page_bufs, page_off1 - page_off);
2177			tails -= (page_off1 - page_off) / log->page_size;
2178			if (!tail_page)
2179				tails -= 1;
2180		} else {
2181			tmp_page = tail_page;
2182			tails = 1;
2183		}
2184
2185		while (tails--) {
2186			u64 off = hdr_file_off(log, tmp_page);
2187
2188			if (!page) {
2189				page = ntfs_malloc(log->page_size);
2190				if (!page)
2191					return -ENOMEM;
2192			}
2193
2194			/*
2195			 * Correct page and copy the data from this page
2196			 * into it and flush it to disk
2197			 */
2198			memcpy(page, tmp_page, log->page_size);
2199
2200			/* Fill last flushed lsn value flush the page */
2201			if (log->major_ver < 2)
2202				page->rhdr.lsn = page->record_hdr.last_end_lsn;
2203			else
2204				page->file_off = 0;
2205
2206			page->page_pos = page->page_count = cpu_to_le16(1);
2207
2208			ntfs_fix_pre_write(&page->rhdr, log->page_size);
2209
2210			err = ntfs_sb_write_run(log->ni->mi.sbi,
2211						&log->ni->file.run, off, page,
2212						log->page_size);
2213
2214			if (err)
2215				goto out;
2216
2217			if (part_io_count && second_off == off) {
2218				second_off += log->page_size;
2219				part_io_count -= 1;
2220			}
2221
2222			tmp_page = Add2Ptr(tmp_page, log->page_size);
2223		}
2224	}
2225
2226	if (part_io_count) {
2227		if (sb_rdonly(log->ni->mi.sbi->sb)) {
2228			err = -EROFS;
2229			goto out;
2230		}
2231	}
2232
2233out:
2234	ntfs_free(second_tail);
2235	ntfs_free(first_tail);
2236	ntfs_free(page);
2237	ntfs_free(tst_page);
2238	ntfs_free(page_bufs);
2239
2240	return err;
2241}
2242
2243/*
2244 * read_log_rec_buf
2245 *
2246 * copies a log record from the file to a buffer
2247 * The log record may span several log pages and may even wrap the file
2248 */
2249static int read_log_rec_buf(struct ntfs_log *log,
2250			    const struct LFS_RECORD_HDR *rh, void *buffer)
2251{
2252	int err;
2253	struct RECORD_PAGE_HDR *ph = NULL;
2254	u64 lsn = le64_to_cpu(rh->this_lsn);
2255	u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask;
2256	u32 off = lsn_to_page_off(log, lsn) + log->record_header_len;
2257	u32 data_len = le32_to_cpu(rh->client_data_len);
2258
2259	/*
2260	 * While there are more bytes to transfer,
2261	 * we continue to attempt to perform the read
2262	 */
2263	for (;;) {
2264		bool usa_error;
2265		u32 tail = log->page_size - off;
2266
2267		if (tail >= data_len)
2268			tail = data_len;
2269
2270		data_len -= tail;
2271
2272		err = read_log_page(log, vbo, &ph, &usa_error);
2273		if (err)
2274			goto out;
2275
2276		/*
2277		 * The last lsn on this page better be greater or equal
2278		 * to the lsn we are copying
2279		 */
2280		if (lsn > le64_to_cpu(ph->rhdr.lsn)) {
2281			err = -EINVAL;
2282			goto out;
2283		}
2284
2285		memcpy(buffer, Add2Ptr(ph, off), tail);
2286
2287		/* If there are no more bytes to transfer, we exit the loop */
2288		if (!data_len) {
2289			if (!is_log_record_end(ph) ||
2290			    lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) {
2291				err = -EINVAL;
2292				goto out;
2293			}
2294			break;
2295		}
2296
2297		if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn ||
2298		    lsn > le64_to_cpu(ph->rhdr.lsn)) {
2299			err = -EINVAL;
2300			goto out;
2301		}
2302
2303		vbo = next_page_off(log, vbo);
2304		off = log->data_off;
2305
2306		/*
2307		 * adjust our pointer the user's buffer to transfer
2308		 * the next block to
2309		 */
2310		buffer = Add2Ptr(buffer, tail);
2311	}
2312
2313out:
2314	ntfs_free(ph);
2315	return err;
2316}
2317
2318static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_,
2319			 u64 *lsn)
2320{
2321	int err;
2322	struct LFS_RECORD_HDR *rh = NULL;
2323	const struct CLIENT_REC *cr =
2324		Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2325	u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn);
2326	u32 len;
2327	struct NTFS_RESTART *rst;
2328
2329	*lsn = 0;
2330	*rst_ = NULL;
2331
2332	/* If the client doesn't have a restart area, go ahead and exit now */
2333	if (!lsnc)
2334		return 0;
2335
2336	err = read_log_page(log, lsn_to_vbo(log, lsnc),
2337			    (struct RECORD_PAGE_HDR **)&rh, NULL);
2338	if (err)
2339		return err;
2340
2341	rst = NULL;
2342	lsnr = le64_to_cpu(rh->this_lsn);
2343
2344	if (lsnc != lsnr) {
2345		/* If the lsn values don't match, then the disk is corrupt */
2346		err = -EINVAL;
2347		goto out;
2348	}
2349
2350	*lsn = lsnr;
2351	len = le32_to_cpu(rh->client_data_len);
2352
2353	if (!len) {
2354		err = 0;
2355		goto out;
2356	}
2357
2358	if (len < sizeof(struct NTFS_RESTART)) {
2359		err = -EINVAL;
2360		goto out;
2361	}
2362
2363	rst = ntfs_malloc(len);
2364	if (!rst) {
2365		err = -ENOMEM;
2366		goto out;
2367	}
2368
2369	/* Copy the data into the 'rst' buffer */
2370	err = read_log_rec_buf(log, rh, rst);
2371	if (err)
2372		goto out;
2373
2374	*rst_ = rst;
2375	rst = NULL;
2376
2377out:
2378	ntfs_free(rh);
2379	ntfs_free(rst);
2380
2381	return err;
2382}
2383
2384static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb)
2385{
2386	int err;
2387	struct LFS_RECORD_HDR *rh = lcb->lrh;
2388	u32 rec_len, len;
2389
2390	/* Read the record header for this lsn */
2391	if (!rh) {
2392		err = read_log_page(log, lsn_to_vbo(log, lsn),
2393				    (struct RECORD_PAGE_HDR **)&rh, NULL);
2394
2395		lcb->lrh = rh;
2396		if (err)
2397			return err;
2398	}
2399
2400	/*
2401	 * If the lsn the log record doesn't match the desired
2402	 * lsn then the disk is corrupt
2403	 */
2404	if (lsn != le64_to_cpu(rh->this_lsn))
2405		return -EINVAL;
2406
2407	len = le32_to_cpu(rh->client_data_len);
2408
2409	/*
2410	 * check that the length field isn't greater than the total
2411	 * available space the log file
2412	 */
2413	rec_len = len + log->record_header_len;
2414	if (rec_len >= log->total_avail)
2415		return -EINVAL;
2416
2417	/*
2418	 * If the entire log record is on this log page,
2419	 * put a pointer to the log record the context block
2420	 */
2421	if (rh->flags & LOG_RECORD_MULTI_PAGE) {
2422		void *lr = ntfs_malloc(len);
2423
2424		if (!lr)
2425			return -ENOMEM;
2426
2427		lcb->log_rec = lr;
2428		lcb->alloc = true;
2429
2430		/* Copy the data into the buffer returned */
2431		err = read_log_rec_buf(log, rh, lr);
2432		if (err)
2433			return err;
2434	} else {
2435		/* If beyond the end of the current page -> an error */
2436		u32 page_off = lsn_to_page_off(log, lsn);
2437
2438		if (page_off + len + log->record_header_len > log->page_size)
2439			return -EINVAL;
2440
2441		lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR));
2442		lcb->alloc = false;
2443	}
2444
2445	return 0;
2446}
2447
2448/*
2449 * read_log_rec_lcb
2450 *
2451 * initiates the query operation.
2452 */
2453static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode,
2454			    struct lcb **lcb_)
2455{
2456	int err;
2457	const struct CLIENT_REC *cr;
2458	struct lcb *lcb;
2459
2460	switch (ctx_mode) {
2461	case lcb_ctx_undo_next:
2462	case lcb_ctx_prev:
2463	case lcb_ctx_next:
2464		break;
2465	default:
2466		return -EINVAL;
2467	}
2468
2469	/* check that the given lsn is the legal range for this client */
2470	cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off));
2471
2472	if (!verify_client_lsn(log, cr, lsn))
2473		return -EINVAL;
2474
2475	lcb = ntfs_zalloc(sizeof(struct lcb));
2476	if (!lcb)
2477		return -ENOMEM;
2478	lcb->client = log->client_id;
2479	lcb->ctx_mode = ctx_mode;
2480
2481	/* Find the log record indicated by the given lsn */
2482	err = find_log_rec(log, lsn, lcb);
2483	if (err)
2484		goto out;
2485
2486	*lcb_ = lcb;
2487	return 0;
2488
2489out:
2490	lcb_put(lcb);
2491	*lcb_ = NULL;
2492	return err;
2493}
2494
2495/*
2496 * find_client_next_lsn
2497 *
2498 * attempt to find the next lsn to return to a client based on the context mode.
2499 */
2500static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2501{
2502	int err;
2503	u64 next_lsn;
2504	struct LFS_RECORD_HDR *hdr;
2505
2506	hdr = lcb->lrh;
2507	*lsn = 0;
2508
2509	if (lcb_ctx_next != lcb->ctx_mode)
2510		goto check_undo_next;
2511
2512	/* Loop as long as another lsn can be found */
2513	for (;;) {
2514		u64 current_lsn;
2515
2516		err = next_log_lsn(log, hdr, &current_lsn);
2517		if (err)
2518			goto out;
2519
2520		if (!current_lsn)
2521			break;
2522
2523		if (hdr != lcb->lrh)
2524			ntfs_free(hdr);
2525
2526		hdr = NULL;
2527		err = read_log_page(log, lsn_to_vbo(log, current_lsn),
2528				    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2529		if (err)
2530			goto out;
2531
2532		if (memcmp(&hdr->client, &lcb->client,
2533			   sizeof(struct CLIENT_ID))) {
2534			/*err = -EINVAL; */
2535		} else if (LfsClientRecord == hdr->record_type) {
2536			ntfs_free(lcb->lrh);
2537			lcb->lrh = hdr;
2538			*lsn = current_lsn;
2539			return 0;
2540		}
2541	}
2542
2543out:
2544	if (hdr != lcb->lrh)
2545		ntfs_free(hdr);
2546	return err;
2547
2548check_undo_next:
2549	if (lcb_ctx_undo_next == lcb->ctx_mode)
2550		next_lsn = le64_to_cpu(hdr->client_undo_next_lsn);
2551	else if (lcb_ctx_prev == lcb->ctx_mode)
2552		next_lsn = le64_to_cpu(hdr->client_prev_lsn);
2553	else
2554		return 0;
2555
2556	if (!next_lsn)
2557		return 0;
2558
2559	if (!verify_client_lsn(
2560		    log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)),
2561		    next_lsn))
2562		return 0;
2563
2564	hdr = NULL;
2565	err = read_log_page(log, lsn_to_vbo(log, next_lsn),
2566			    (struct RECORD_PAGE_HDR **)&hdr, NULL);
2567	if (err)
2568		return err;
2569	ntfs_free(lcb->lrh);
2570	lcb->lrh = hdr;
2571
2572	*lsn = next_lsn;
2573
2574	return 0;
2575}
2576
2577static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn)
2578{
2579	int err;
2580
2581	err = find_client_next_lsn(log, lcb, lsn);
2582	if (err)
2583		return err;
2584
2585	if (!*lsn)
2586		return 0;
2587
2588	if (lcb->alloc)
2589		ntfs_free(lcb->log_rec);
2590
2591	lcb->log_rec = NULL;
2592	lcb->alloc = false;
2593	ntfs_free(lcb->lrh);
2594	lcb->lrh = NULL;
2595
2596	return find_log_rec(log, *lsn, lcb);
2597}
2598
2599static inline bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes)
2600{
2601	__le16 mask;
2602	u32 min_de, de_off, used, total;
2603	const struct NTFS_DE *e;
2604
2605	if (hdr_has_subnode(hdr)) {
2606		min_de = sizeof(struct NTFS_DE) + sizeof(u64);
2607		mask = NTFS_IE_HAS_SUBNODES;
2608	} else {
2609		min_de = sizeof(struct NTFS_DE);
2610		mask = 0;
2611	}
2612
2613	de_off = le32_to_cpu(hdr->de_off);
2614	used = le32_to_cpu(hdr->used);
2615	total = le32_to_cpu(hdr->total);
2616
2617	if (de_off > bytes - min_de || used > bytes || total > bytes ||
2618	    de_off + min_de > used || used > total) {
2619		return false;
2620	}
2621
2622	e = Add2Ptr(hdr, de_off);
2623	for (;;) {
2624		u16 esize = le16_to_cpu(e->size);
2625		struct NTFS_DE *next = Add2Ptr(e, esize);
2626
2627		if (esize < min_de || PtrOffset(hdr, next) > used ||
2628		    (e->flags & NTFS_IE_HAS_SUBNODES) != mask) {
2629			return false;
2630		}
2631
2632		if (de_is_last(e))
2633			break;
2634
2635		e = next;
2636	}
2637
2638	return true;
2639}
2640
2641static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes)
2642{
2643	u16 fo;
2644	const struct NTFS_RECORD_HEADER *r = &ib->rhdr;
2645
2646	if (r->sign != NTFS_INDX_SIGNATURE)
2647		return false;
2648
2649	fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short));
2650
2651	if (le16_to_cpu(r->fix_off) > fo)
2652		return false;
2653
2654	if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes)
2655		return false;
2656
2657	return check_index_header(&ib->ihdr,
2658				  bytes - offsetof(struct INDEX_BUFFER, ihdr));
2659}
2660
2661static inline bool check_index_root(const struct ATTRIB *attr,
2662				    struct ntfs_sb_info *sbi)
2663{
2664	bool ret;
2665	const struct INDEX_ROOT *root = resident_data(attr);
2666	u8 index_bits = le32_to_cpu(root->index_block_size) >= sbi->cluster_size
2667				? sbi->cluster_bits
2668				: SECTOR_SHIFT;
2669	u8 block_clst = root->index_block_clst;
2670
2671	if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) ||
2672	    (root->type != ATTR_NAME && root->type != ATTR_ZERO) ||
2673	    (root->type == ATTR_NAME &&
2674	     root->rule != NTFS_COLLATION_TYPE_FILENAME) ||
2675	    (le32_to_cpu(root->index_block_size) !=
2676	     (block_clst << index_bits)) ||
2677	    (block_clst != 1 && block_clst != 2 && block_clst != 4 &&
2678	     block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 &&
2679	     block_clst != 0x40 && block_clst != 0x80)) {
2680		return false;
2681	}
2682
2683	ret = check_index_header(&root->ihdr,
2684				 le32_to_cpu(attr->res.data_size) -
2685					 offsetof(struct INDEX_ROOT, ihdr));
2686	return ret;
2687}
2688
2689static inline bool check_attr(const struct MFT_REC *rec,
2690			      const struct ATTRIB *attr,
2691			      struct ntfs_sb_info *sbi)
2692{
2693	u32 asize = le32_to_cpu(attr->size);
2694	u32 rsize = 0;
2695	u64 dsize, svcn, evcn;
2696	u16 run_off;
2697
2698	/* Check the fixed part of the attribute record header */
2699	if (asize >= sbi->record_size ||
2700	    asize + PtrOffset(rec, attr) >= sbi->record_size ||
2701	    (attr->name_len &&
2702	     le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) >
2703		     asize)) {
2704		return false;
2705	}
2706
2707	/* Check the attribute fields */
2708	switch (attr->non_res) {
2709	case 0:
2710		rsize = le32_to_cpu(attr->res.data_size);
2711		if (rsize >= asize ||
2712		    le16_to_cpu(attr->res.data_off) + rsize > asize) {
2713			return false;
2714		}
2715		break;
2716
2717	case 1:
2718		dsize = le64_to_cpu(attr->nres.data_size);
2719		svcn = le64_to_cpu(attr->nres.svcn);
2720		evcn = le64_to_cpu(attr->nres.evcn);
2721		run_off = le16_to_cpu(attr->nres.run_off);
2722
2723		if (svcn > evcn + 1 || run_off >= asize ||
2724		    le64_to_cpu(attr->nres.valid_size) > dsize ||
2725		    dsize > le64_to_cpu(attr->nres.alloc_size)) {
2726			return false;
2727		}
2728
2729		if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn,
2730			       Add2Ptr(attr, run_off), asize - run_off) < 0) {
2731			return false;
2732		}
2733
2734		return true;
2735
2736	default:
2737		return false;
2738	}
2739
2740	switch (attr->type) {
2741	case ATTR_NAME:
2742		if (fname_full_size(Add2Ptr(
2743			    attr, le16_to_cpu(attr->res.data_off))) > asize) {
2744			return false;
2745		}
2746		break;
2747
2748	case ATTR_ROOT:
2749		return check_index_root(attr, sbi);
2750
2751	case ATTR_STD:
2752		if (rsize < sizeof(struct ATTR_STD_INFO5) &&
2753		    rsize != sizeof(struct ATTR_STD_INFO)) {
2754			return false;
2755		}
2756		break;
2757
2758	case ATTR_LIST:
2759	case ATTR_ID:
2760	case ATTR_SECURE:
2761	case ATTR_LABEL:
2762	case ATTR_VOL_INFO:
2763	case ATTR_DATA:
2764	case ATTR_ALLOC:
2765	case ATTR_BITMAP:
2766	case ATTR_REPARSE:
2767	case ATTR_EA_INFO:
2768	case ATTR_EA:
2769	case ATTR_PROPERTYSET:
2770	case ATTR_LOGGED_UTILITY_STREAM:
2771		break;
2772
2773	default:
2774		return false;
2775	}
2776
2777	return true;
2778}
2779
2780static inline bool check_file_record(const struct MFT_REC *rec,
2781				     const struct MFT_REC *rec2,
2782				     struct ntfs_sb_info *sbi)
2783{
2784	const struct ATTRIB *attr;
2785	u16 fo = le16_to_cpu(rec->rhdr.fix_off);
2786	u16 fn = le16_to_cpu(rec->rhdr.fix_num);
2787	u16 ao = le16_to_cpu(rec->attr_off);
2788	u32 rs = sbi->record_size;
2789
2790	/* check the file record header for consistency */
2791	if (rec->rhdr.sign != NTFS_FILE_SIGNATURE ||
2792	    fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) ||
2793	    (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 ||
2794	    ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) ||
2795	    le32_to_cpu(rec->total) != rs) {
2796		return false;
2797	}
2798
2799	/* Loop to check all of the attributes */
2800	for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END;
2801	     attr = Add2Ptr(attr, le32_to_cpu(attr->size))) {
2802		if (check_attr(rec, attr, sbi))
2803			continue;
2804		return false;
2805	}
2806
2807	return true;
2808}
2809
2810static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr,
2811			    const u64 *rlsn)
2812{
2813	u64 lsn;
2814
2815	if (!rlsn)
2816		return true;
2817
2818	lsn = le64_to_cpu(hdr->lsn);
2819
2820	if (hdr->sign == NTFS_HOLE_SIGNATURE)
2821		return false;
2822
2823	if (*rlsn > lsn)
2824		return true;
2825
2826	return false;
2827}
2828
2829static inline bool check_if_attr(const struct MFT_REC *rec,
2830				 const struct LOG_REC_HDR *lrh)
2831{
2832	u16 ro = le16_to_cpu(lrh->record_off);
2833	u16 o = le16_to_cpu(rec->attr_off);
2834	const struct ATTRIB *attr = Add2Ptr(rec, o);
2835
2836	while (o < ro) {
2837		u32 asize;
2838
2839		if (attr->type == ATTR_END)
2840			break;
2841
2842		asize = le32_to_cpu(attr->size);
2843		if (!asize)
2844			break;
2845
2846		o += asize;
2847		attr = Add2Ptr(attr, asize);
2848	}
2849
2850	return o == ro;
2851}
2852
2853static inline bool check_if_index_root(const struct MFT_REC *rec,
2854				       const struct LOG_REC_HDR *lrh)
2855{
2856	u16 ro = le16_to_cpu(lrh->record_off);
2857	u16 o = le16_to_cpu(rec->attr_off);
2858	const struct ATTRIB *attr = Add2Ptr(rec, o);
2859
2860	while (o < ro) {
2861		u32 asize;
2862
2863		if (attr->type == ATTR_END)
2864			break;
2865
2866		asize = le32_to_cpu(attr->size);
2867		if (!asize)
2868			break;
2869
2870		o += asize;
2871		attr = Add2Ptr(attr, asize);
2872	}
2873
2874	return o == ro && attr->type == ATTR_ROOT;
2875}
2876
2877static inline bool check_if_root_index(const struct ATTRIB *attr,
2878				       const struct INDEX_HDR *hdr,
2879				       const struct LOG_REC_HDR *lrh)
2880{
2881	u16 ao = le16_to_cpu(lrh->attr_off);
2882	u32 de_off = le32_to_cpu(hdr->de_off);
2883	u32 o = PtrOffset(attr, hdr) + de_off;
2884	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2885	u32 asize = le32_to_cpu(attr->size);
2886
2887	while (o < ao) {
2888		u16 esize;
2889
2890		if (o >= asize)
2891			break;
2892
2893		esize = le16_to_cpu(e->size);
2894		if (!esize)
2895			break;
2896
2897		o += esize;
2898		e = Add2Ptr(e, esize);
2899	}
2900
2901	return o == ao;
2902}
2903
2904static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr,
2905					u32 attr_off)
2906{
2907	u32 de_off = le32_to_cpu(hdr->de_off);
2908	u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off;
2909	const struct NTFS_DE *e = Add2Ptr(hdr, de_off);
2910	u32 used = le32_to_cpu(hdr->used);
2911
2912	while (o < attr_off) {
2913		u16 esize;
2914
2915		if (de_off >= used)
2916			break;
2917
2918		esize = le16_to_cpu(e->size);
2919		if (!esize)
2920			break;
2921
2922		o += esize;
2923		de_off += esize;
2924		e = Add2Ptr(e, esize);
2925	}
2926
2927	return o == attr_off;
2928}
2929
2930static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr,
2931				    u32 nsize)
2932{
2933	u32 asize = le32_to_cpu(attr->size);
2934	int dsize = nsize - asize;
2935	u8 *next = Add2Ptr(attr, asize);
2936	u32 used = le32_to_cpu(rec->used);
2937
2938	memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next));
2939
2940	rec->used = cpu_to_le32(used + dsize);
2941	attr->size = cpu_to_le32(nsize);
2942}
2943
2944struct OpenAttr {
2945	struct ATTRIB *attr;
2946	struct runs_tree *run1;
2947	struct runs_tree run0;
2948	struct ntfs_inode *ni;
2949	// CLST rno;
2950};
2951
2952/* Returns 0 if 'attr' has the same type and name */
2953static inline int cmp_type_and_name(const struct ATTRIB *a1,
2954				    const struct ATTRIB *a2)
2955{
2956	return a1->type != a2->type || a1->name_len != a2->name_len ||
2957	       (a1->name_len && memcmp(attr_name(a1), attr_name(a2),
2958				       a1->name_len * sizeof(short)));
2959}
2960
2961static struct OpenAttr *find_loaded_attr(struct ntfs_log *log,
2962					 const struct ATTRIB *attr, CLST rno)
2963{
2964	struct OPEN_ATTR_ENRTY *oe = NULL;
2965
2966	while ((oe = enum_rstbl(log->open_attr_tbl, oe))) {
2967		struct OpenAttr *op_attr;
2968
2969		if (ino_get(&oe->ref) != rno)
2970			continue;
2971
2972		op_attr = (struct OpenAttr *)oe->ptr;
2973		if (!cmp_type_and_name(op_attr->attr, attr))
2974			return op_attr;
2975	}
2976	return NULL;
2977}
2978
2979static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi,
2980					     enum ATTR_TYPE type, u64 size,
2981					     const u16 *name, size_t name_len,
2982					     __le16 flags)
2983{
2984	struct ATTRIB *attr;
2985	u32 name_size = ALIGN(name_len * sizeof(short), 8);
2986	bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED);
2987	u32 asize = name_size +
2988		    (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT);
2989
2990	attr = ntfs_zalloc(asize);
2991	if (!attr)
2992		return NULL;
2993
2994	attr->type = type;
2995	attr->size = cpu_to_le32(asize);
2996	attr->flags = flags;
2997	attr->non_res = 1;
2998	attr->name_len = name_len;
2999
3000	attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1);
3001	attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size));
3002	attr->nres.data_size = cpu_to_le64(size);
3003	attr->nres.valid_size = attr->nres.data_size;
3004	if (is_ext) {
3005		attr->name_off = SIZEOF_NONRESIDENT_EX_LE;
3006		if (is_attr_compressed(attr))
3007			attr->nres.c_unit = COMPRESSION_UNIT;
3008
3009		attr->nres.run_off =
3010			cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size);
3011		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name,
3012		       name_len * sizeof(short));
3013	} else {
3014		attr->name_off = SIZEOF_NONRESIDENT_LE;
3015		attr->nres.run_off =
3016			cpu_to_le16(SIZEOF_NONRESIDENT + name_size);
3017		memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name,
3018		       name_len * sizeof(short));
3019	}
3020
3021	return attr;
3022}
3023
3024/*
3025 * do_action
3026 *
3027 * common routine for the Redo and Undo Passes
3028 * If rlsn is NULL then undo
3029 */
3030static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe,
3031		     const struct LOG_REC_HDR *lrh, u32 op, void *data,
3032		     u32 dlen, u32 rec_len, const u64 *rlsn)
3033{
3034	int err = 0;
3035	struct ntfs_sb_info *sbi = log->ni->mi.sbi;
3036	struct inode *inode = NULL, *inode_parent;
3037	struct mft_inode *mi = NULL, *mi2_child = NULL;
3038	CLST rno = 0, rno_base = 0;
3039	struct INDEX_BUFFER *ib = NULL;
3040	struct MFT_REC *rec = NULL;
3041	struct ATTRIB *attr = NULL, *attr2;
3042	struct INDEX_HDR *hdr;
3043	struct INDEX_ROOT *root;
3044	struct NTFS_DE *e, *e1, *e2;
3045	struct NEW_ATTRIBUTE_SIZES *new_sz;
3046	struct ATTR_FILE_NAME *fname;
3047	struct OpenAttr *oa, *oa2;
3048	u32 nsize, t32, asize, used, esize, bmp_off, bmp_bits;
3049	u16 id, id2;
3050	u32 record_size = sbi->record_size;
3051	u64 t64;
3052	u16 roff = le16_to_cpu(lrh->record_off);
3053	u16 aoff = le16_to_cpu(lrh->attr_off);
3054	u64 lco = 0;
3055	u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
3056	u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits;
3057	u64 vbo = cbo + tvo;
3058	void *buffer_le = NULL;
3059	u32 bytes = 0;
3060	bool a_dirty = false;
3061	u16 data_off;
3062
3063	oa = oe->ptr;
3064
3065	/* Big switch to prepare */
3066	switch (op) {
3067	/* ============================================================
3068	 * Process MFT records, as described by the current log record
3069	 * ============================================================
3070	 */
3071	case InitializeFileRecordSegment:
3072	case DeallocateFileRecordSegment:
3073	case WriteEndOfFileRecordSegment:
3074	case CreateAttribute:
3075	case DeleteAttribute:
3076	case UpdateResidentValue:
3077	case UpdateMappingPairs:
3078	case SetNewAttributeSizes:
3079	case AddIndexEntryRoot:
3080	case DeleteIndexEntryRoot:
3081	case SetIndexEntryVcnRoot:
3082	case UpdateFileNameRoot:
3083	case UpdateRecordDataRoot:
3084	case ZeroEndOfFileRecord:
3085		rno = vbo >> sbi->record_bits;
3086		inode = ilookup(sbi->sb, rno);
3087		if (inode) {
3088			mi = &ntfs_i(inode)->mi;
3089		} else if (op == InitializeFileRecordSegment) {
3090			mi = ntfs_zalloc(sizeof(struct mft_inode));
3091			if (!mi)
3092				return -ENOMEM;
3093			err = mi_format_new(mi, sbi, rno, 0, false);
3094			if (err)
3095				goto out;
3096		} else {
3097			/* read from disk */
3098			err = mi_get(sbi, rno, &mi);
3099			if (err)
3100				return err;
3101		}
3102		rec = mi->mrec;
3103
3104		if (op == DeallocateFileRecordSegment)
3105			goto skip_load_parent;
3106
3107		if (InitializeFileRecordSegment != op) {
3108			if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE)
3109				goto dirty_vol;
3110			if (!check_lsn(&rec->rhdr, rlsn))
3111				goto out;
3112			if (!check_file_record(rec, NULL, sbi))
3113				goto dirty_vol;
3114			attr = Add2Ptr(rec, roff);
3115		}
3116
3117		if (is_rec_base(rec) || InitializeFileRecordSegment == op) {
3118			rno_base = rno;
3119			goto skip_load_parent;
3120		}
3121
3122		rno_base = ino_get(&rec->parent_ref);
3123		inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL);
3124		if (IS_ERR(inode_parent))
3125			goto skip_load_parent;
3126
3127		if (is_bad_inode(inode_parent)) {
3128			iput(inode_parent);
3129			goto skip_load_parent;
3130		}
3131
3132		if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) {
3133			iput(inode_parent);
3134		} else {
3135			if (mi2_child->mrec != mi->mrec)
3136				memcpy(mi2_child->mrec, mi->mrec,
3137				       sbi->record_size);
3138
3139			if (inode)
3140				iput(inode);
3141			else if (mi)
3142				mi_put(mi);
3143
3144			inode = inode_parent;
3145			mi = mi2_child;
3146			rec = mi2_child->mrec;
3147			attr = Add2Ptr(rec, roff);
3148		}
3149
3150skip_load_parent:
3151		inode_parent = NULL;
3152		break;
3153
3154	/* ============================================================
3155	 * Process attributes, as described by the current log record
3156	 * ============================================================
3157	 */
3158	case UpdateNonresidentValue:
3159	case AddIndexEntryAllocation:
3160	case DeleteIndexEntryAllocation:
3161	case WriteEndOfIndexBuffer:
3162	case SetIndexEntryVcnAllocation:
3163	case UpdateFileNameAllocation:
3164	case SetBitsInNonresidentBitMap:
3165	case ClearBitsInNonresidentBitMap:
3166	case UpdateRecordDataAllocation:
3167		attr = oa->attr;
3168		bytes = UpdateNonresidentValue == op ? dlen : 0;
3169		lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits;
3170
3171		if (attr->type == ATTR_ALLOC) {
3172			t32 = le32_to_cpu(oe->bytes_per_index);
3173			if (bytes < t32)
3174				bytes = t32;
3175		}
3176
3177		if (!bytes)
3178			bytes = lco - cbo;
3179
3180		bytes += roff;
3181		if (attr->type == ATTR_ALLOC)
3182			bytes = (bytes + 511) & ~511; // align
3183
3184		buffer_le = ntfs_malloc(bytes);
3185		if (!buffer_le)
3186			return -ENOMEM;
3187
3188		err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes,
3189				       NULL);
3190		if (err)
3191			goto out;
3192
3193		if (attr->type == ATTR_ALLOC && *(int *)buffer_le)
3194			ntfs_fix_post_read(buffer_le, bytes, false);
3195		break;
3196
3197	default:
3198		WARN_ON(1);
3199	}
3200
3201	/* Big switch to do operation */
3202	switch (op) {
3203	case InitializeFileRecordSegment:
3204		if (roff + dlen > record_size)
3205			goto dirty_vol;
3206
3207		memcpy(Add2Ptr(rec, roff), data, dlen);
3208		mi->dirty = true;
3209		break;
3210
3211	case DeallocateFileRecordSegment:
3212		clear_rec_inuse(rec);
3213		le16_add_cpu(&rec->seq, 1);
3214		mi->dirty = true;
3215		break;
3216
3217	case WriteEndOfFileRecordSegment:
3218		attr2 = (struct ATTRIB *)data;
3219		if (!check_if_attr(rec, lrh) || roff + dlen > record_size)
3220			goto dirty_vol;
3221
3222		memmove(attr, attr2, dlen);
3223		rec->used = cpu_to_le32(ALIGN(roff + dlen, 8));
3224
3225		mi->dirty = true;
3226		break;
3227
3228	case CreateAttribute:
3229		attr2 = (struct ATTRIB *)data;
3230		asize = le32_to_cpu(attr2->size);
3231		used = le32_to_cpu(rec->used);
3232
3233		if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT ||
3234		    !IS_ALIGNED(asize, 8) ||
3235		    Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) ||
3236		    dlen > record_size - used) {
3237			goto dirty_vol;
3238		}
3239
3240		memmove(Add2Ptr(attr, asize), attr, used - roff);
3241		memcpy(attr, attr2, asize);
3242
3243		rec->used = cpu_to_le32(used + asize);
3244		id = le16_to_cpu(rec->next_attr_id);
3245		id2 = le16_to_cpu(attr2->id);
3246		if (id <= id2)
3247			rec->next_attr_id = cpu_to_le16(id2 + 1);
3248		if (is_attr_indexed(attr))
3249			le16_add_cpu(&rec->hard_links, 1);
3250
3251		oa2 = find_loaded_attr(log, attr, rno_base);
3252		if (oa2) {
3253			void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
3254
3255			if (p2) {
3256				// run_close(oa2->run1);
3257				ntfs_free(oa2->attr);
3258				oa2->attr = p2;
3259			}
3260		}
3261
3262		mi->dirty = true;
3263		break;
3264
3265	case DeleteAttribute:
3266		asize = le32_to_cpu(attr->size);
3267		used = le32_to_cpu(rec->used);
3268
3269		if (!check_if_attr(rec, lrh))
3270			goto dirty_vol;
3271
3272		rec->used = cpu_to_le32(used - asize);
3273		if (is_attr_indexed(attr))
3274			le16_add_cpu(&rec->hard_links, -1);
3275
3276		memmove(attr, Add2Ptr(attr, asize), used - asize - roff);
3277
3278		mi->dirty = true;
3279		break;
3280
3281	case UpdateResidentValue:
3282		nsize = aoff + dlen;
3283
3284		if (!check_if_attr(rec, lrh))
3285			goto dirty_vol;
3286
3287		asize = le32_to_cpu(attr->size);
3288		used = le32_to_cpu(rec->used);
3289
3290		if (lrh->redo_len == lrh->undo_len) {
3291			if (nsize > asize)
3292				goto dirty_vol;
3293			goto move_data;
3294		}
3295
3296		if (nsize > asize && nsize - asize > record_size - used)
3297			goto dirty_vol;
3298
3299		nsize = ALIGN(nsize, 8);
3300		data_off = le16_to_cpu(attr->res.data_off);
3301
3302		if (nsize < asize) {
3303			memmove(Add2Ptr(attr, aoff), data, dlen);
3304			data = NULL; // To skip below memmove
3305		}
3306
3307		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3308			used - le16_to_cpu(lrh->record_off) - asize);
3309
3310		rec->used = cpu_to_le32(used + nsize - asize);
3311		attr->size = cpu_to_le32(nsize);
3312		attr->res.data_size = cpu_to_le32(aoff + dlen - data_off);
3313
3314move_data:
3315		if (data)
3316			memmove(Add2Ptr(attr, aoff), data, dlen);
3317
3318		oa2 = find_loaded_attr(log, attr, rno_base);
3319		if (oa2) {
3320			void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
3321
3322			if (p2) {
3323				// run_close(&oa2->run0);
3324				oa2->run1 = &oa2->run0;
3325				ntfs_free(oa2->attr);
3326				oa2->attr = p2;
3327			}
3328		}
3329
3330		mi->dirty = true;
3331		break;
3332
3333	case UpdateMappingPairs:
3334		nsize = aoff + dlen;
3335		asize = le32_to_cpu(attr->size);
3336		used = le32_to_cpu(rec->used);
3337
3338		if (!check_if_attr(rec, lrh) || !attr->non_res ||
3339		    aoff < le16_to_cpu(attr->nres.run_off) || aoff > asize ||
3340		    (nsize > asize && nsize - asize > record_size - used)) {
3341			goto dirty_vol;
3342		}
3343
3344		nsize = ALIGN(nsize, 8);
3345
3346		memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize),
3347			used - le16_to_cpu(lrh->record_off) - asize);
3348		rec->used = cpu_to_le32(used + nsize - asize);
3349		attr->size = cpu_to_le32(nsize);
3350		memmove(Add2Ptr(attr, aoff), data, dlen);
3351
3352		if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn),
3353					attr_run(attr), &t64)) {
3354			goto dirty_vol;
3355		}
3356
3357		attr->nres.evcn = cpu_to_le64(t64);
3358		oa2 = find_loaded_attr(log, attr, rno_base);
3359		if (oa2 && oa2->attr->non_res)
3360			oa2->attr->nres.evcn = attr->nres.evcn;
3361
3362		mi->dirty = true;
3363		break;
3364
3365	case SetNewAttributeSizes:
3366		new_sz = data;
3367		if (!check_if_attr(rec, lrh) || !attr->non_res)
3368			goto dirty_vol;
3369
3370		attr->nres.alloc_size = new_sz->alloc_size;
3371		attr->nres.data_size = new_sz->data_size;
3372		attr->nres.valid_size = new_sz->valid_size;
3373
3374		if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES))
3375			attr->nres.total_size = new_sz->total_size;
3376
3377		oa2 = find_loaded_attr(log, attr, rno_base);
3378		if (oa2) {
3379			void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size));
3380
3381			if (p2) {
3382				ntfs_free(oa2->attr);
3383				oa2->attr = p2;
3384			}
3385		}
3386		mi->dirty = true;
3387		break;
3388
3389	case AddIndexEntryRoot:
3390		e = (struct NTFS_DE *)data;
3391		esize = le16_to_cpu(e->size);
3392		root = resident_data(attr);
3393		hdr = &root->ihdr;
3394		used = le32_to_cpu(hdr->used);
3395
3396		if (!check_if_index_root(rec, lrh) ||
3397		    !check_if_root_index(attr, hdr, lrh) ||
3398		    Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) ||
3399		    esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) {
3400			goto dirty_vol;
3401		}
3402
3403		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3404
3405		change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize);
3406
3407		memmove(Add2Ptr(e1, esize), e1,
3408			PtrOffset(e1, Add2Ptr(hdr, used)));
3409		memmove(e1, e, esize);
3410
3411		le32_add_cpu(&attr->res.data_size, esize);
3412		hdr->used = cpu_to_le32(used + esize);
3413		le32_add_cpu(&hdr->total, esize);
3414
3415		mi->dirty = true;
3416		break;
3417
3418	case DeleteIndexEntryRoot:
3419		root = resident_data(attr);
3420		hdr = &root->ihdr;
3421		used = le32_to_cpu(hdr->used);
3422
3423		if (!check_if_index_root(rec, lrh) ||
3424		    !check_if_root_index(attr, hdr, lrh)) {
3425			goto dirty_vol;
3426		}
3427
3428		e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3429		esize = le16_to_cpu(e1->size);
3430		e2 = Add2Ptr(e1, esize);
3431
3432		memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used)));
3433
3434		le32_sub_cpu(&attr->res.data_size, esize);
3435		hdr->used = cpu_to_le32(used - esize);
3436		le32_sub_cpu(&hdr->total, esize);
3437
3438		change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize);
3439
3440		mi->dirty = true;
3441		break;
3442
3443	case SetIndexEntryVcnRoot:
3444		root = resident_data(attr);
3445		hdr = &root->ihdr;
3446
3447		if (!check_if_index_root(rec, lrh) ||
3448		    !check_if_root_index(attr, hdr, lrh)) {
3449			goto dirty_vol;
3450		}
3451
3452		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3453
3454		de_set_vbn_le(e, *(__le64 *)data);
3455		mi->dirty = true;
3456		break;
3457
3458	case UpdateFileNameRoot:
3459		root = resident_data(attr);
3460		hdr = &root->ihdr;
3461
3462		if (!check_if_index_root(rec, lrh) ||
3463		    !check_if_root_index(attr, hdr, lrh)) {
3464			goto dirty_vol;
3465		}
3466
3467		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3468		fname = (struct ATTR_FILE_NAME *)(e + 1);
3469		memmove(&fname->dup, data, sizeof(fname->dup)); //
3470		mi->dirty = true;
3471		break;
3472
3473	case UpdateRecordDataRoot:
3474		root = resident_data(attr);
3475		hdr = &root->ihdr;
3476
3477		if (!check_if_index_root(rec, lrh) ||
3478		    !check_if_root_index(attr, hdr, lrh)) {
3479			goto dirty_vol;
3480		}
3481
3482		e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off));
3483
3484		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3485
3486		mi->dirty = true;
3487		break;
3488
3489	case ZeroEndOfFileRecord:
3490		if (roff + dlen > record_size)
3491			goto dirty_vol;
3492
3493		memset(attr, 0, dlen);
3494		mi->dirty = true;
3495		break;
3496
3497	case UpdateNonresidentValue:
3498		if (lco < cbo + roff + dlen)
3499			goto dirty_vol;
3500
3501		memcpy(Add2Ptr(buffer_le, roff), data, dlen);
3502
3503		a_dirty = true;
3504		if (attr->type == ATTR_ALLOC)
3505			ntfs_fix_pre_write(buffer_le, bytes);
3506		break;
3507
3508	case AddIndexEntryAllocation:
3509		ib = Add2Ptr(buffer_le, roff);
3510		hdr = &ib->ihdr;
3511		e = data;
3512		esize = le16_to_cpu(e->size);
3513		e1 = Add2Ptr(ib, aoff);
3514
3515		if (is_baad(&ib->rhdr))
3516			goto dirty_vol;
3517		if (!check_lsn(&ib->rhdr, rlsn))
3518			goto out;
3519
3520		used = le32_to_cpu(hdr->used);
3521
3522		if (!check_index_buffer(ib, bytes) ||
3523		    !check_if_alloc_index(hdr, aoff) ||
3524		    Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) ||
3525		    used + esize > le32_to_cpu(hdr->total)) {
3526			goto dirty_vol;
3527		}
3528
3529		memmove(Add2Ptr(e1, esize), e1,
3530			PtrOffset(e1, Add2Ptr(hdr, used)));
3531		memcpy(e1, e, esize);
3532
3533		hdr->used = cpu_to_le32(used + esize);
3534
3535		a_dirty = true;
3536
3537		ntfs_fix_pre_write(&ib->rhdr, bytes);
3538		break;
3539
3540	case DeleteIndexEntryAllocation:
3541		ib = Add2Ptr(buffer_le, roff);
3542		hdr = &ib->ihdr;
3543		e = Add2Ptr(ib, aoff);
3544		esize = le16_to_cpu(e->size);
3545
3546		if (is_baad(&ib->rhdr))
3547			goto dirty_vol;
3548		if (!check_lsn(&ib->rhdr, rlsn))
3549			goto out;
3550
3551		if (!check_index_buffer(ib, bytes) ||
3552		    !check_if_alloc_index(hdr, aoff)) {
3553			goto dirty_vol;
3554		}
3555
3556		e1 = Add2Ptr(e, esize);
3557		nsize = esize;
3558		used = le32_to_cpu(hdr->used);
3559
3560		memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used)));
3561
3562		hdr->used = cpu_to_le32(used - nsize);
3563
3564		a_dirty = true;
3565
3566		ntfs_fix_pre_write(&ib->rhdr, bytes);
3567		break;
3568
3569	case WriteEndOfIndexBuffer:
3570		ib = Add2Ptr(buffer_le, roff);
3571		hdr = &ib->ihdr;
3572		e = Add2Ptr(ib, aoff);
3573
3574		if (is_baad(&ib->rhdr))
3575			goto dirty_vol;
3576		if (!check_lsn(&ib->rhdr, rlsn))
3577			goto out;
3578		if (!check_index_buffer(ib, bytes) ||
3579		    !check_if_alloc_index(hdr, aoff) ||
3580		    aoff + dlen > offsetof(struct INDEX_BUFFER, ihdr) +
3581					  le32_to_cpu(hdr->total)) {
3582			goto dirty_vol;
3583		}
3584
3585		hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e));
3586		memmove(e, data, dlen);
3587
3588		a_dirty = true;
3589		ntfs_fix_pre_write(&ib->rhdr, bytes);
3590		break;
3591
3592	case SetIndexEntryVcnAllocation:
3593		ib = Add2Ptr(buffer_le, roff);
3594		hdr = &ib->ihdr;
3595		e = Add2Ptr(ib, aoff);
3596
3597		if (is_baad(&ib->rhdr))
3598			goto dirty_vol;
3599
3600		if (!check_lsn(&ib->rhdr, rlsn))
3601			goto out;
3602		if (!check_index_buffer(ib, bytes) ||
3603		    !check_if_alloc_index(hdr, aoff)) {
3604			goto dirty_vol;
3605		}
3606
3607		de_set_vbn_le(e, *(__le64 *)data);
3608
3609		a_dirty = true;
3610		ntfs_fix_pre_write(&ib->rhdr, bytes);
3611		break;
3612
3613	case UpdateFileNameAllocation:
3614		ib = Add2Ptr(buffer_le, roff);
3615		hdr = &ib->ihdr;
3616		e = Add2Ptr(ib, aoff);
3617
3618		if (is_baad(&ib->rhdr))
3619			goto dirty_vol;
3620
3621		if (!check_lsn(&ib->rhdr, rlsn))
3622			goto out;
3623		if (!check_index_buffer(ib, bytes) ||
3624		    !check_if_alloc_index(hdr, aoff)) {
3625			goto dirty_vol;
3626		}
3627
3628		fname = (struct ATTR_FILE_NAME *)(e + 1);
3629		memmove(&fname->dup, data, sizeof(fname->dup));
3630
3631		a_dirty = true;
3632		ntfs_fix_pre_write(&ib->rhdr, bytes);
3633		break;
3634
3635	case SetBitsInNonresidentBitMap:
3636		bmp_off =
3637			le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3638		bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3639
3640		if (cbo + (bmp_off + 7) / 8 > lco ||
3641		    cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
3642			goto dirty_vol;
3643		}
3644
3645		__bitmap_set(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
3646		a_dirty = true;
3647		break;
3648
3649	case ClearBitsInNonresidentBitMap:
3650		bmp_off =
3651			le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off);
3652		bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits);
3653
3654		if (cbo + (bmp_off + 7) / 8 > lco ||
3655		    cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) {
3656			goto dirty_vol;
3657		}
3658
3659		__bitmap_clear(Add2Ptr(buffer_le, roff), bmp_off, bmp_bits);
3660		a_dirty = true;
3661		break;
3662
3663	case UpdateRecordDataAllocation:
3664		ib = Add2Ptr(buffer_le, roff);
3665		hdr = &ib->ihdr;
3666		e = Add2Ptr(ib, aoff);
3667
3668		if (is_baad(&ib->rhdr))
3669			goto dirty_vol;
3670
3671		if (!check_lsn(&ib->rhdr, rlsn))
3672			goto out;
3673		if (!check_index_buffer(ib, bytes) ||
3674		    !check_if_alloc_index(hdr, aoff)) {
3675			goto dirty_vol;
3676		}
3677
3678		memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen);
3679
3680		a_dirty = true;
3681		ntfs_fix_pre_write(&ib->rhdr, bytes);
3682		break;
3683
3684	default:
3685		WARN_ON(1);
3686	}
3687
3688	if (rlsn) {
3689		__le64 t64 = cpu_to_le64(*rlsn);
3690
3691		if (rec)
3692			rec->rhdr.lsn = t64;
3693		if (ib)
3694			ib->rhdr.lsn = t64;
3695	}
3696
3697	if (mi && mi->dirty) {
3698		err = mi_write(mi, 0);
3699		if (err)
3700			goto out;
3701	}
3702
3703	if (a_dirty) {
3704		attr = oa->attr;
3705		err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes);
3706		if (err)
3707			goto out;
3708	}
3709
3710out:
3711
3712	if (inode)
3713		iput(inode);
3714	else if (mi != mi2_child)
3715		mi_put(mi);
3716
3717	ntfs_free(buffer_le);
3718
3719	return err;
3720
3721dirty_vol:
3722	log->set_dirty = true;
3723	goto out;
3724}
3725
3726/*
3727 * log_replay
3728 *
3729 * this function is called during mount operation
3730 * it replays log and empties it
3731 * initialized is set false if logfile contains '-1'
3732 */
3733int log_replay(struct ntfs_inode *ni, bool *initialized)
3734{
3735	int err;
3736	struct ntfs_sb_info *sbi = ni->mi.sbi;
3737	struct ntfs_log *log;
3738
3739	struct restart_info rst_info, rst_info2;
3740	u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0;
3741	struct ATTR_NAME_ENTRY *attr_names = NULL;
3742	struct ATTR_NAME_ENTRY *ane;
3743	struct RESTART_TABLE *dptbl = NULL;
3744	struct RESTART_TABLE *trtbl = NULL;
3745	const struct RESTART_TABLE *rt;
3746	struct RESTART_TABLE *oatbl = NULL;
3747	struct inode *inode;
3748	struct OpenAttr *oa;
3749	struct ntfs_inode *ni_oe;
3750	struct ATTRIB *attr = NULL;
3751	u64 size, vcn, undo_next_lsn;
3752	CLST rno, lcn, lcn0, len0, clen;
3753	void *data;
3754	struct NTFS_RESTART *rst = NULL;
3755	struct lcb *lcb = NULL;
3756	struct OPEN_ATTR_ENRTY *oe;
3757	struct TRANSACTION_ENTRY *tr;
3758	struct DIR_PAGE_ENTRY *dp;
3759	u32 i, bytes_per_attr_entry;
3760	u32 l_size = ni->vfs_inode.i_size;
3761	u32 orig_file_size = l_size;
3762	u32 page_size, vbo, tail, off, dlen;
3763	u32 saved_len, rec_len, transact_id;
3764	bool use_second_page;
3765	struct RESTART_AREA *ra2, *ra = NULL;
3766	struct CLIENT_REC *ca, *cr;
3767	__le16 client;
3768	struct RESTART_HDR *rh;
3769	const struct LFS_RECORD_HDR *frh;
3770	const struct LOG_REC_HDR *lrh;
3771	bool is_mapped;
3772	bool is_ro = sb_rdonly(sbi->sb);
3773	u64 t64;
3774	u16 t16;
3775	u32 t32;
3776
3777	/* Get the size of page. NOTE: To replay we can use default page */
3778#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2
3779	page_size = norm_file_page(PAGE_SIZE, &l_size, true);
3780#else
3781	page_size = norm_file_page(PAGE_SIZE, &l_size, false);
3782#endif
3783	if (!page_size)
3784		return -EINVAL;
3785
3786	log = ntfs_zalloc(sizeof(struct ntfs_log));
3787	if (!log)
3788		return -ENOMEM;
3789
3790	log->ni = ni;
3791	log->l_size = l_size;
3792	log->one_page_buf = ntfs_malloc(page_size);
3793
3794	if (!log->one_page_buf) {
3795		err = -ENOMEM;
3796		goto out;
3797	}
3798
3799	log->page_size = page_size;
3800	log->page_mask = page_size - 1;
3801	log->page_bits = blksize_bits(page_size);
3802
3803	/* Look for a restart area on the disk */
3804	err = log_read_rst(log, l_size, true, &rst_info);
3805	if (err)
3806		goto out;
3807
3808	/* remember 'initialized' */
3809	*initialized = rst_info.initialized;
3810
3811	if (!rst_info.restart) {
3812		if (rst_info.initialized) {
3813			/* no restart area but the file is not initialized */
3814			err = -EINVAL;
3815			goto out;
3816		}
3817
3818		log_init_pg_hdr(log, page_size, page_size, 1, 1);
3819		log_create(log, l_size, 0, get_random_int(), false, false);
3820
3821		log->ra = ra;
3822
3823		ra = log_create_ra(log);
3824		if (!ra) {
3825			err = -ENOMEM;
3826			goto out;
3827		}
3828		log->ra = ra;
3829		log->init_ra = true;
3830
3831		goto process_log;
3832	}
3833
3834	/*
3835	 * If the restart offset above wasn't zero then we won't
3836	 * look for a second restart
3837	 */
3838	if (rst_info.vbo)
3839		goto check_restart_area;
3840
3841	err = log_read_rst(log, l_size, false, &rst_info2);
3842
3843	/* Determine which restart area to use */
3844	if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn)
3845		goto use_first_page;
3846
3847	use_second_page = true;
3848
3849	if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) {
3850		struct RECORD_PAGE_HDR *sp = NULL;
3851		bool usa_error;
3852
3853		if (!read_log_page(log, page_size, &sp, &usa_error) &&
3854		    sp->rhdr.sign == NTFS_CHKD_SIGNATURE) {
3855			use_second_page = false;
3856		}
3857		ntfs_free(sp);
3858	}
3859
3860	if (use_second_page) {
3861		ntfs_free(rst_info.r_page);
3862		memcpy(&rst_info, &rst_info2, sizeof(struct restart_info));
3863		rst_info2.r_page = NULL;
3864	}
3865
3866use_first_page:
3867	ntfs_free(rst_info2.r_page);
3868
3869check_restart_area:
3870	/* If the restart area is at offset 0, we want to write the second restart area first */
3871	log->init_ra = !!rst_info.vbo;
3872
3873	/* If we have a valid page then grab a pointer to the restart area */
3874	ra2 = rst_info.valid_page
3875		      ? Add2Ptr(rst_info.r_page,
3876				le16_to_cpu(rst_info.r_page->ra_off))
3877		      : NULL;
3878
3879	if (rst_info.chkdsk_was_run ||
3880	    (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) {
3881		bool wrapped = false;
3882		bool use_multi_page = false;
3883		u32 open_log_count;
3884
3885		/* Do some checks based on whether we have a valid log page */
3886		if (!rst_info.valid_page) {
3887			open_log_count = get_random_int();
3888			goto init_log_instance;
3889		}
3890		open_log_count = le32_to_cpu(ra2->open_log_count);
3891
3892		/*
3893		 * If the restart page size isn't changing then we want to
3894		 * check how much work we need to do
3895		 */
3896		if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size))
3897			goto init_log_instance;
3898
3899init_log_instance:
3900		log_init_pg_hdr(log, page_size, page_size, 1, 1);
3901
3902		log_create(log, l_size, rst_info.last_lsn, open_log_count,
3903			   wrapped, use_multi_page);
3904
3905		ra = log_create_ra(log);
3906		if (!ra) {
3907			err = -ENOMEM;
3908			goto out;
3909		}
3910		log->ra = ra;
3911
3912		/* Put the restart areas and initialize the log file as required */
3913		goto process_log;
3914	}
3915
3916	if (!ra2) {
3917		err = -EINVAL;
3918		goto out;
3919	}
3920
3921	/*
3922	 * If the log page or the system page sizes have changed, we can't use the log file
3923	 * We must use the system page size instead of the default size
3924	 * if there is not a clean shutdown
3925	 */
3926	t32 = le32_to_cpu(rst_info.r_page->sys_page_size);
3927	if (page_size != t32) {
3928		l_size = orig_file_size;
3929		page_size =
3930			norm_file_page(t32, &l_size, t32 == DefaultLogPageSize);
3931	}
3932
3933	if (page_size != t32 ||
3934	    page_size != le32_to_cpu(rst_info.r_page->page_size)) {
3935		err = -EINVAL;
3936		goto out;
3937	}
3938
3939	/* If the file size has shrunk then we won't mount it */
3940	if (l_size < le64_to_cpu(ra2->l_size)) {
3941		err = -EINVAL;
3942		goto out;
3943	}
3944
3945	log_init_pg_hdr(log, page_size, page_size,
3946			le16_to_cpu(rst_info.r_page->major_ver),
3947			le16_to_cpu(rst_info.r_page->minor_ver));
3948
3949	log->l_size = le64_to_cpu(ra2->l_size);
3950	log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits);
3951	log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits;
3952	log->seq_num_mask = (8 << log->file_data_bits) - 1;
3953	log->last_lsn = le64_to_cpu(ra2->current_lsn);
3954	log->seq_num = log->last_lsn >> log->file_data_bits;
3955	log->ra_off = le16_to_cpu(rst_info.r_page->ra_off);
3956	log->restart_size = log->sys_page_size - log->ra_off;
3957	log->record_header_len = le16_to_cpu(ra2->rec_hdr_len);
3958	log->ra_size = le16_to_cpu(ra2->ra_len);
3959	log->data_off = le16_to_cpu(ra2->data_off);
3960	log->data_size = log->page_size - log->data_off;
3961	log->reserved = log->data_size - log->record_header_len;
3962
3963	vbo = lsn_to_vbo(log, log->last_lsn);
3964
3965	if (vbo < log->first_page) {
3966		/* This is a pseudo lsn */
3967		log->l_flags |= NTFSLOG_NO_LAST_LSN;
3968		log->next_page = log->first_page;
3969		goto find_oldest;
3970	}
3971
3972	/* Find the end of this log record */
3973	off = final_log_off(log, log->last_lsn,
3974			    le32_to_cpu(ra2->last_lsn_data_len));
3975
3976	/* If we wrapped the file then increment the sequence number */
3977	if (off <= vbo) {
3978		log->seq_num += 1;
3979		log->l_flags |= NTFSLOG_WRAPPED;
3980	}
3981
3982	/* Now compute the next log page to use */
3983	vbo &= ~log->sys_page_mask;
3984	tail = log->page_size - (off & log->page_mask) - 1;
3985
3986	/* If we can fit another log record on the page, move back a page the log file */
3987	if (tail >= log->record_header_len) {
3988		log->l_flags |= NTFSLOG_REUSE_TAIL;
3989		log->next_page = vbo;
3990	} else {
3991		log->next_page = next_page_off(log, vbo);
3992	}
3993
3994find_oldest:
3995	/* Find the oldest client lsn. Use the last flushed lsn as a starting point */
3996	log->oldest_lsn = log->last_lsn;
3997	oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)),
3998			  ra2->client_idx[1], &log->oldest_lsn);
3999	log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn);
4000
4001	if (log->oldest_lsn_off < log->first_page)
4002		log->l_flags |= NTFSLOG_NO_OLDEST_LSN;
4003
4004	if (!(ra2->flags & RESTART_SINGLE_PAGE_IO))
4005		log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO;
4006
4007	log->current_openlog_count = le32_to_cpu(ra2->open_log_count);
4008	log->total_avail_pages = log->l_size - log->first_page;
4009	log->total_avail = log->total_avail_pages >> log->page_bits;
4010	log->max_current_avail = log->total_avail * log->reserved;
4011	log->total_avail = log->total_avail * log->data_size;
4012
4013	log->current_avail = current_log_avail(log);
4014
4015	ra = ntfs_zalloc(log->restart_size);
4016	if (!ra) {
4017		err = -ENOMEM;
4018		goto out;
4019	}
4020	log->ra = ra;
4021
4022	t16 = le16_to_cpu(ra2->client_off);
4023	if (t16 == offsetof(struct RESTART_AREA, clients)) {
4024		memcpy(ra, ra2, log->ra_size);
4025	} else {
4026		memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients));
4027		memcpy(ra->clients, Add2Ptr(ra2, t16),
4028		       le16_to_cpu(ra2->ra_len) - t16);
4029
4030		log->current_openlog_count = get_random_int();
4031		ra->open_log_count = cpu_to_le32(log->current_openlog_count);
4032		log->ra_size = offsetof(struct RESTART_AREA, clients) +
4033			       sizeof(struct CLIENT_REC);
4034		ra->client_off =
4035			cpu_to_le16(offsetof(struct RESTART_AREA, clients));
4036		ra->ra_len = cpu_to_le16(log->ra_size);
4037	}
4038
4039	le32_add_cpu(&ra->open_log_count, 1);
4040
4041	/* Now we need to walk through looking for the last lsn */
4042	err = last_log_lsn(log);
4043	if (err)
4044		goto out;
4045
4046	log->current_avail = current_log_avail(log);
4047
4048	/* Remember which restart area to write first */
4049	log->init_ra = rst_info.vbo;
4050
4051process_log:
4052	/* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values */
4053	switch ((log->major_ver << 16) + log->minor_ver) {
4054	case 0x10000:
4055	case 0x10001:
4056	case 0x20000:
4057		break;
4058	default:
4059		ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported",
4060			  log->major_ver, log->minor_ver);
4061		err = -EOPNOTSUPP;
4062		log->set_dirty = true;
4063		goto out;
4064	}
4065
4066	/* One client "NTFS" per logfile */
4067	ca = Add2Ptr(ra, le16_to_cpu(ra->client_off));
4068
4069	for (client = ra->client_idx[1];; client = cr->next_client) {
4070		if (client == LFS_NO_CLIENT_LE) {
4071			/* Insert "NTFS" client LogFile */
4072			client = ra->client_idx[0];
4073			if (client == LFS_NO_CLIENT_LE)
4074				return -EINVAL;
4075
4076			t16 = le16_to_cpu(client);
4077			cr = ca + t16;
4078
4079			remove_client(ca, cr, &ra->client_idx[0]);
4080
4081			cr->restart_lsn = 0;
4082			cr->oldest_lsn = cpu_to_le64(log->oldest_lsn);
4083			cr->name_bytes = cpu_to_le32(8);
4084			cr->name[0] = cpu_to_le16('N');
4085			cr->name[1] = cpu_to_le16('T');
4086			cr->name[2] = cpu_to_le16('F');
4087			cr->name[3] = cpu_to_le16('S');
4088
4089			add_client(ca, t16, &ra->client_idx[1]);
4090			break;
4091		}
4092
4093		cr = ca + le16_to_cpu(client);
4094
4095		if (cpu_to_le32(8) == cr->name_bytes &&
4096		    cpu_to_le16('N') == cr->name[0] &&
4097		    cpu_to_le16('T') == cr->name[1] &&
4098		    cpu_to_le16('F') == cr->name[2] &&
4099		    cpu_to_le16('S') == cr->name[3])
4100			break;
4101	}
4102
4103	/* Update the client handle with the client block information */
4104	log->client_id.seq_num = cr->seq_num;
4105	log->client_id.client_idx = client;
4106
4107	err = read_rst_area(log, &rst, &ra_lsn);
4108	if (err)
4109		goto out;
4110
4111	if (!rst)
4112		goto out;
4113
4114	bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28;
4115
4116	checkpt_lsn = le64_to_cpu(rst->check_point_start);
4117	if (!checkpt_lsn)
4118		checkpt_lsn = ra_lsn;
4119
4120	/* Allocate and Read the Transaction Table */
4121	if (!rst->transact_table_len)
4122		goto check_dirty_page_table;
4123
4124	t64 = le64_to_cpu(rst->transact_table_lsn);
4125	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4126	if (err)
4127		goto out;
4128
4129	lrh = lcb->log_rec;
4130	frh = lcb->lrh;
4131	rec_len = le32_to_cpu(frh->client_data_len);
4132
4133	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4134			   bytes_per_attr_entry)) {
4135		err = -EINVAL;
4136		goto out;
4137	}
4138
4139	t16 = le16_to_cpu(lrh->redo_off);
4140
4141	rt = Add2Ptr(lrh, t16);
4142	t32 = rec_len - t16;
4143
4144	/* Now check that this is a valid restart table */
4145	if (!check_rstbl(rt, t32)) {
4146		err = -EINVAL;
4147		goto out;
4148	}
4149
4150	trtbl = ntfs_memdup(rt, t32);
4151	if (!trtbl) {
4152		err = -ENOMEM;
4153		goto out;
4154	}
4155
4156	lcb_put(lcb);
4157	lcb = NULL;
4158
4159check_dirty_page_table:
4160	/* The next record back should be the Dirty Pages Table */
4161	if (!rst->dirty_pages_len)
4162		goto check_attribute_names;
4163
4164	t64 = le64_to_cpu(rst->dirty_pages_table_lsn);
4165	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4166	if (err)
4167		goto out;
4168
4169	lrh = lcb->log_rec;
4170	frh = lcb->lrh;
4171	rec_len = le32_to_cpu(frh->client_data_len);
4172
4173	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4174			   bytes_per_attr_entry)) {
4175		err = -EINVAL;
4176		goto out;
4177	}
4178
4179	t16 = le16_to_cpu(lrh->redo_off);
4180
4181	rt = Add2Ptr(lrh, t16);
4182	t32 = rec_len - t16;
4183
4184	/* Now check that this is a valid restart table */
4185	if (!check_rstbl(rt, t32)) {
4186		err = -EINVAL;
4187		goto out;
4188	}
4189
4190	dptbl = ntfs_memdup(rt, t32);
4191	if (!dptbl) {
4192		err = -ENOMEM;
4193		goto out;
4194	}
4195
4196	/* Convert Ra version '0' into version '1' */
4197	if (rst->major_ver)
4198		goto end_conv_1;
4199
4200	dp = NULL;
4201	while ((dp = enum_rstbl(dptbl, dp))) {
4202		struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp;
4203		// NOTE: Danger. Check for of boundary
4204		memmove(&dp->vcn, &dp0->vcn_low,
4205			2 * sizeof(u64) +
4206				le32_to_cpu(dp->lcns_follow) * sizeof(u64));
4207	}
4208
4209end_conv_1:
4210	lcb_put(lcb);
4211	lcb = NULL;
4212
4213	/* Go through the table and remove the duplicates, remembering the oldest lsn values */
4214	if (sbi->cluster_size <= log->page_size)
4215		goto trace_dp_table;
4216
4217	dp = NULL;
4218	while ((dp = enum_rstbl(dptbl, dp))) {
4219		struct DIR_PAGE_ENTRY *next = dp;
4220
4221		while ((next = enum_rstbl(dptbl, next))) {
4222			if (next->target_attr == dp->target_attr &&
4223			    next->vcn == dp->vcn) {
4224				if (le64_to_cpu(next->oldest_lsn) <
4225				    le64_to_cpu(dp->oldest_lsn)) {
4226					dp->oldest_lsn = next->oldest_lsn;
4227				}
4228
4229				free_rsttbl_idx(dptbl, PtrOffset(dptbl, next));
4230			}
4231		}
4232	}
4233trace_dp_table:
4234check_attribute_names:
4235	/* The next record should be the Attribute Names */
4236	if (!rst->attr_names_len)
4237		goto check_attr_table;
4238
4239	t64 = le64_to_cpu(rst->attr_names_lsn);
4240	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4241	if (err)
4242		goto out;
4243
4244	lrh = lcb->log_rec;
4245	frh = lcb->lrh;
4246	rec_len = le32_to_cpu(frh->client_data_len);
4247
4248	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4249			   bytes_per_attr_entry)) {
4250		err = -EINVAL;
4251		goto out;
4252	}
4253
4254	t32 = lrh_length(lrh);
4255	rec_len -= t32;
4256
4257	attr_names = ntfs_memdup(Add2Ptr(lrh, t32), rec_len);
4258
4259	lcb_put(lcb);
4260	lcb = NULL;
4261
4262check_attr_table:
4263	/* The next record should be the attribute Table */
4264	if (!rst->open_attr_len)
4265		goto check_attribute_names2;
4266
4267	t64 = le64_to_cpu(rst->open_attr_table_lsn);
4268	err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb);
4269	if (err)
4270		goto out;
4271
4272	lrh = lcb->log_rec;
4273	frh = lcb->lrh;
4274	rec_len = le32_to_cpu(frh->client_data_len);
4275
4276	if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id),
4277			   bytes_per_attr_entry)) {
4278		err = -EINVAL;
4279		goto out;
4280	}
4281
4282	t16 = le16_to_cpu(lrh->redo_off);
4283
4284	rt = Add2Ptr(lrh, t16);
4285	t32 = rec_len - t16;
4286
4287	if (!check_rstbl(rt, t32)) {
4288		err = -EINVAL;
4289		goto out;
4290	}
4291
4292	oatbl = ntfs_memdup(rt, t32);
4293	if (!oatbl) {
4294		err = -ENOMEM;
4295		goto out;
4296	}
4297
4298	log->open_attr_tbl = oatbl;
4299
4300	/* Clear all of the Attr pointers */
4301	oe = NULL;
4302	while ((oe = enum_rstbl(oatbl, oe))) {
4303		if (!rst->major_ver) {
4304			struct OPEN_ATTR_ENRTY_32 oe0;
4305
4306			/* Really 'oe' points to OPEN_ATTR_ENRTY_32 */
4307			memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0);
4308
4309			oe->bytes_per_index = oe0.bytes_per_index;
4310			oe->type = oe0.type;
4311			oe->is_dirty_pages = oe0.is_dirty_pages;
4312			oe->name_len = 0;
4313			oe->ref = oe0.ref;
4314			oe->open_record_lsn = oe0.open_record_lsn;
4315		}
4316
4317		oe->is_attr_name = 0;
4318		oe->ptr = NULL;
4319	}
4320
4321	lcb_put(lcb);
4322	lcb = NULL;
4323
4324check_attribute_names2:
4325	if (!rst->attr_names_len)
4326		goto trace_attribute_table;
4327
4328	ane = attr_names;
4329	if (!oatbl)
4330		goto trace_attribute_table;
4331	while (ane->off) {
4332		/* TODO: Clear table on exit! */
4333		oe = Add2Ptr(oatbl, le16_to_cpu(ane->off));
4334		t16 = le16_to_cpu(ane->name_bytes);
4335		oe->name_len = t16 / sizeof(short);
4336		oe->ptr = ane->name;
4337		oe->is_attr_name = 2;
4338		ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16);
4339	}
4340
4341trace_attribute_table:
4342	/*
4343	 * If the checkpt_lsn is zero, then this is a freshly
4344	 * formatted disk and we have no work to do
4345	 */
4346	if (!checkpt_lsn) {
4347		err = 0;
4348		goto out;
4349	}
4350
4351	if (!oatbl) {
4352		oatbl = init_rsttbl(bytes_per_attr_entry, 8);
4353		if (!oatbl) {
4354			err = -ENOMEM;
4355			goto out;
4356		}
4357	}
4358
4359	log->open_attr_tbl = oatbl;
4360
4361	/* Start the analysis pass from the Checkpoint lsn. */
4362	rec_lsn = checkpt_lsn;
4363
4364	/* Read the first lsn */
4365	err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb);
4366	if (err)
4367		goto out;
4368
4369	/* Loop to read all subsequent records to the end of the log file */
4370next_log_record_analyze:
4371	err = read_next_log_rec(log, lcb, &rec_lsn);
4372	if (err)
4373		goto out;
4374
4375	if (!rec_lsn)
4376		goto end_log_records_enumerate;
4377
4378	frh = lcb->lrh;
4379	transact_id = le32_to_cpu(frh->transact_id);
4380	rec_len = le32_to_cpu(frh->client_data_len);
4381	lrh = lcb->log_rec;
4382
4383	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4384		err = -EINVAL;
4385		goto out;
4386	}
4387
4388	/*
4389	 * The first lsn after the previous lsn remembered
4390	 * the checkpoint is the first candidate for the rlsn
4391	 */
4392	if (!rlsn)
4393		rlsn = rec_lsn;
4394
4395	if (LfsClientRecord != frh->record_type)
4396		goto next_log_record_analyze;
4397
4398	/*
4399	 * Now update the Transaction Table for this transaction
4400	 * If there is no entry present or it is unallocated we allocate the entry
4401	 */
4402	if (!trtbl) {
4403		trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY),
4404				    INITIAL_NUMBER_TRANSACTIONS);
4405		if (!trtbl) {
4406			err = -ENOMEM;
4407			goto out;
4408		}
4409	}
4410
4411	tr = Add2Ptr(trtbl, transact_id);
4412
4413	if (transact_id >= bytes_per_rt(trtbl) ||
4414	    tr->next != RESTART_ENTRY_ALLOCATED_LE) {
4415		tr = alloc_rsttbl_from_idx(&trtbl, transact_id);
4416		if (!tr) {
4417			err = -ENOMEM;
4418			goto out;
4419		}
4420		tr->transact_state = TransactionActive;
4421		tr->first_lsn = cpu_to_le64(rec_lsn);
4422	}
4423
4424	tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn);
4425
4426	/*
4427	 * If this is a compensation log record, then change
4428	 * the undo_next_lsn to be the undo_next_lsn of this record
4429	 */
4430	if (lrh->undo_op == cpu_to_le16(CompensationLogRecord))
4431		tr->undo_next_lsn = frh->client_undo_next_lsn;
4432
4433	/* Dispatch to handle log record depending on type */
4434	switch (le16_to_cpu(lrh->redo_op)) {
4435	case InitializeFileRecordSegment:
4436	case DeallocateFileRecordSegment:
4437	case WriteEndOfFileRecordSegment:
4438	case CreateAttribute:
4439	case DeleteAttribute:
4440	case UpdateResidentValue:
4441	case UpdateNonresidentValue:
4442	case UpdateMappingPairs:
4443	case SetNewAttributeSizes:
4444	case AddIndexEntryRoot:
4445	case DeleteIndexEntryRoot:
4446	case AddIndexEntryAllocation:
4447	case DeleteIndexEntryAllocation:
4448	case WriteEndOfIndexBuffer:
4449	case SetIndexEntryVcnRoot:
4450	case SetIndexEntryVcnAllocation:
4451	case UpdateFileNameRoot:
4452	case UpdateFileNameAllocation:
4453	case SetBitsInNonresidentBitMap:
4454	case ClearBitsInNonresidentBitMap:
4455	case UpdateRecordDataRoot:
4456	case UpdateRecordDataAllocation:
4457	case ZeroEndOfFileRecord:
4458		t16 = le16_to_cpu(lrh->target_attr);
4459		t64 = le64_to_cpu(lrh->target_vcn);
4460		dp = find_dp(dptbl, t16, t64);
4461
4462		if (dp)
4463			goto copy_lcns;
4464
4465		/*
4466		 * Calculate the number of clusters per page the system
4467		 * which wrote the checkpoint, possibly creating the table
4468		 */
4469		if (dptbl) {
4470			t32 = (le16_to_cpu(dptbl->size) -
4471			       sizeof(struct DIR_PAGE_ENTRY)) /
4472			      sizeof(u64);
4473		} else {
4474			t32 = log->clst_per_page;
4475			ntfs_free(dptbl);
4476			dptbl = init_rsttbl(struct_size(dp, page_lcns, t32),
4477					    32);
4478			if (!dptbl) {
4479				err = -ENOMEM;
4480				goto out;
4481			}
4482		}
4483
4484		dp = alloc_rsttbl_idx(&dptbl);
4485		dp->target_attr = cpu_to_le32(t16);
4486		dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits);
4487		dp->lcns_follow = cpu_to_le32(t32);
4488		dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1));
4489		dp->oldest_lsn = cpu_to_le64(rec_lsn);
4490
4491copy_lcns:
4492		/*
4493		 * Copy the Lcns from the log record into the Dirty Page Entry
4494		 * TODO: for different page size support, must somehow make
4495		 * whole routine a loop, case Lcns do not fit below
4496		 */
4497		t16 = le16_to_cpu(lrh->lcns_follow);
4498		for (i = 0; i < t16; i++) {
4499			size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) -
4500					    le64_to_cpu(dp->vcn));
4501			dp->page_lcns[j + i] = lrh->page_lcns[i];
4502		}
4503
4504		goto next_log_record_analyze;
4505
4506	case DeleteDirtyClusters: {
4507		u32 range_count =
4508			le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE);
4509		const struct LCN_RANGE *r =
4510			Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4511
4512		/* Loop through all of the Lcn ranges this log record */
4513		for (i = 0; i < range_count; i++, r++) {
4514			u64 lcn0 = le64_to_cpu(r->lcn);
4515			u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1;
4516
4517			dp = NULL;
4518			while ((dp = enum_rstbl(dptbl, dp))) {
4519				u32 j;
4520
4521				t32 = le32_to_cpu(dp->lcns_follow);
4522				for (j = 0; j < t32; j++) {
4523					t64 = le64_to_cpu(dp->page_lcns[j]);
4524					if (t64 >= lcn0 && t64 <= lcn_e)
4525						dp->page_lcns[j] = 0;
4526				}
4527			}
4528		}
4529		goto next_log_record_analyze;
4530		;
4531	}
4532
4533	case OpenNonresidentAttribute:
4534		t16 = le16_to_cpu(lrh->target_attr);
4535		if (t16 >= bytes_per_rt(oatbl)) {
4536			/*
4537			 * Compute how big the table needs to be.
4538			 * Add 10 extra entries for some cushion
4539			 */
4540			u32 new_e = t16 / le16_to_cpu(oatbl->size);
4541
4542			new_e += 10 - le16_to_cpu(oatbl->used);
4543
4544			oatbl = extend_rsttbl(oatbl, new_e, ~0u);
4545			log->open_attr_tbl = oatbl;
4546			if (!oatbl) {
4547				err = -ENOMEM;
4548				goto out;
4549			}
4550		}
4551
4552		/* Point to the entry being opened */
4553		oe = alloc_rsttbl_from_idx(&oatbl, t16);
4554		log->open_attr_tbl = oatbl;
4555		if (!oe) {
4556			err = -ENOMEM;
4557			goto out;
4558		}
4559
4560		/* Initialize this entry from the log record */
4561		t16 = le16_to_cpu(lrh->redo_off);
4562		if (!rst->major_ver) {
4563			/* Convert version '0' into version '1' */
4564			struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16);
4565
4566			oe->bytes_per_index = oe0->bytes_per_index;
4567			oe->type = oe0->type;
4568			oe->is_dirty_pages = oe0->is_dirty_pages;
4569			oe->name_len = 0; //oe0.name_len;
4570			oe->ref = oe0->ref;
4571			oe->open_record_lsn = oe0->open_record_lsn;
4572		} else {
4573			memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry);
4574		}
4575
4576		t16 = le16_to_cpu(lrh->undo_len);
4577		if (t16) {
4578			oe->ptr = ntfs_malloc(t16);
4579			if (!oe->ptr) {
4580				err = -ENOMEM;
4581				goto out;
4582			}
4583			oe->name_len = t16 / sizeof(short);
4584			memcpy(oe->ptr,
4585			       Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16);
4586			oe->is_attr_name = 1;
4587		} else {
4588			oe->ptr = NULL;
4589			oe->is_attr_name = 0;
4590		}
4591
4592		goto next_log_record_analyze;
4593
4594	case HotFix:
4595		t16 = le16_to_cpu(lrh->target_attr);
4596		t64 = le64_to_cpu(lrh->target_vcn);
4597		dp = find_dp(dptbl, t16, t64);
4598		if (dp) {
4599			size_t j = le64_to_cpu(lrh->target_vcn) -
4600				   le64_to_cpu(dp->vcn);
4601			if (dp->page_lcns[j])
4602				dp->page_lcns[j] = lrh->page_lcns[0];
4603		}
4604		goto next_log_record_analyze;
4605
4606	case EndTopLevelAction:
4607		tr = Add2Ptr(trtbl, transact_id);
4608		tr->prev_lsn = cpu_to_le64(rec_lsn);
4609		tr->undo_next_lsn = frh->client_undo_next_lsn;
4610		goto next_log_record_analyze;
4611
4612	case PrepareTransaction:
4613		tr = Add2Ptr(trtbl, transact_id);
4614		tr->transact_state = TransactionPrepared;
4615		goto next_log_record_analyze;
4616
4617	case CommitTransaction:
4618		tr = Add2Ptr(trtbl, transact_id);
4619		tr->transact_state = TransactionCommitted;
4620		goto next_log_record_analyze;
4621
4622	case ForgetTransaction:
4623		free_rsttbl_idx(trtbl, transact_id);
4624		goto next_log_record_analyze;
4625
4626	case Noop:
4627	case OpenAttributeTableDump:
4628	case AttributeNamesDump:
4629	case DirtyPageTableDump:
4630	case TransactionTableDump:
4631		/* The following cases require no action the Analysis Pass */
4632		goto next_log_record_analyze;
4633
4634	default:
4635		/*
4636		 * All codes will be explicitly handled.
4637		 * If we see a code we do not expect, then we are trouble
4638		 */
4639		goto next_log_record_analyze;
4640	}
4641
4642end_log_records_enumerate:
4643	lcb_put(lcb);
4644	lcb = NULL;
4645
4646	/*
4647	 * Scan the Dirty Page Table and Transaction Table for
4648	 * the lowest lsn, and return it as the Redo lsn
4649	 */
4650	dp = NULL;
4651	while ((dp = enum_rstbl(dptbl, dp))) {
4652		t64 = le64_to_cpu(dp->oldest_lsn);
4653		if (t64 && t64 < rlsn)
4654			rlsn = t64;
4655	}
4656
4657	tr = NULL;
4658	while ((tr = enum_rstbl(trtbl, tr))) {
4659		t64 = le64_to_cpu(tr->first_lsn);
4660		if (t64 && t64 < rlsn)
4661			rlsn = t64;
4662	}
4663
4664	/* Only proceed if the Dirty Page Table or Transaction table are not empty */
4665	if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total))
4666		goto end_reply;
4667
4668	sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
4669	if (is_ro)
4670		goto out;
4671
4672	/* Reopen all of the attributes with dirty pages */
4673	oe = NULL;
4674next_open_attribute:
4675
4676	oe = enum_rstbl(oatbl, oe);
4677	if (!oe) {
4678		err = 0;
4679		dp = NULL;
4680		goto next_dirty_page;
4681	}
4682
4683	oa = ntfs_zalloc(sizeof(struct OpenAttr));
4684	if (!oa) {
4685		err = -ENOMEM;
4686		goto out;
4687	}
4688
4689	inode = ntfs_iget5(sbi->sb, &oe->ref, NULL);
4690	if (IS_ERR(inode))
4691		goto fake_attr;
4692
4693	if (is_bad_inode(inode)) {
4694		iput(inode);
4695fake_attr:
4696		if (oa->ni) {
4697			iput(&oa->ni->vfs_inode);
4698			oa->ni = NULL;
4699		}
4700
4701		attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr,
4702					      oe->name_len, 0);
4703		if (!attr) {
4704			ntfs_free(oa);
4705			err = -ENOMEM;
4706			goto out;
4707		}
4708		oa->attr = attr;
4709		oa->run1 = &oa->run0;
4710		goto final_oe;
4711	}
4712
4713	ni_oe = ntfs_i(inode);
4714	oa->ni = ni_oe;
4715
4716	attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len,
4717			    NULL, NULL);
4718
4719	if (!attr)
4720		goto fake_attr;
4721
4722	t32 = le32_to_cpu(attr->size);
4723	oa->attr = ntfs_memdup(attr, t32);
4724	if (!oa->attr)
4725		goto fake_attr;
4726
4727	if (!S_ISDIR(inode->i_mode)) {
4728		if (attr->type == ATTR_DATA && !attr->name_len) {
4729			oa->run1 = &ni_oe->file.run;
4730			goto final_oe;
4731		}
4732	} else {
4733		if (attr->type == ATTR_ALLOC &&
4734		    attr->name_len == ARRAY_SIZE(I30_NAME) &&
4735		    !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) {
4736			oa->run1 = &ni_oe->dir.alloc_run;
4737			goto final_oe;
4738		}
4739	}
4740
4741	if (attr->non_res) {
4742		u16 roff = le16_to_cpu(attr->nres.run_off);
4743		CLST svcn = le64_to_cpu(attr->nres.svcn);
4744
4745		err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn,
4746				 le64_to_cpu(attr->nres.evcn), svcn,
4747				 Add2Ptr(attr, roff), t32 - roff);
4748		if (err < 0) {
4749			ntfs_free(oa->attr);
4750			oa->attr = NULL;
4751			goto fake_attr;
4752		}
4753		err = 0;
4754	}
4755	oa->run1 = &oa->run0;
4756	attr = oa->attr;
4757
4758final_oe:
4759	if (oe->is_attr_name == 1)
4760		ntfs_free(oe->ptr);
4761	oe->is_attr_name = 0;
4762	oe->ptr = oa;
4763	oe->name_len = attr->name_len;
4764
4765	goto next_open_attribute;
4766
4767	/*
4768	 * Now loop through the dirty page table to extract all of the Vcn/Lcn
4769	 * Mapping that we have, and insert it into the appropriate run
4770	 */
4771next_dirty_page:
4772	dp = enum_rstbl(dptbl, dp);
4773	if (!dp)
4774		goto do_redo_1;
4775
4776	oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr));
4777
4778	if (oe->next != RESTART_ENTRY_ALLOCATED_LE)
4779		goto next_dirty_page;
4780
4781	oa = oe->ptr;
4782	if (!oa)
4783		goto next_dirty_page;
4784
4785	i = -1;
4786next_dirty_page_vcn:
4787	i += 1;
4788	if (i >= le32_to_cpu(dp->lcns_follow))
4789		goto next_dirty_page;
4790
4791	vcn = le64_to_cpu(dp->vcn) + i;
4792	size = (vcn + 1) << sbi->cluster_bits;
4793
4794	if (!dp->page_lcns[i])
4795		goto next_dirty_page_vcn;
4796
4797	rno = ino_get(&oe->ref);
4798	if (rno <= MFT_REC_MIRR &&
4799	    size < (MFT_REC_VOL + 1) * sbi->record_size &&
4800	    oe->type == ATTR_DATA) {
4801		goto next_dirty_page_vcn;
4802	}
4803
4804	lcn = le64_to_cpu(dp->page_lcns[i]);
4805
4806	if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) ||
4807	     lcn0 != lcn) &&
4808	    !run_add_entry(oa->run1, vcn, lcn, 1, false)) {
4809		err = -ENOMEM;
4810		goto out;
4811	}
4812	attr = oa->attr;
4813	t64 = le64_to_cpu(attr->nres.alloc_size);
4814	if (size > t64) {
4815		attr->nres.valid_size = attr->nres.data_size =
4816			attr->nres.alloc_size = cpu_to_le64(size);
4817	}
4818	goto next_dirty_page_vcn;
4819
4820do_redo_1:
4821	/*
4822	 * Perform the Redo Pass, to restore all of the dirty pages to the same
4823	 * contents that they had immediately before the crash
4824	 * If the dirty page table is empty, then we can skip the entire Redo Pass
4825	 */
4826	if (!dptbl || !dptbl->total)
4827		goto do_undo_action;
4828
4829	rec_lsn = rlsn;
4830
4831	/*
4832	 * Read the record at the Redo lsn, before falling
4833	 * into common code to handle each record
4834	 */
4835	err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb);
4836	if (err)
4837		goto out;
4838
4839	/*
4840	 * Now loop to read all of our log records forwards,
4841	 * until we hit the end of the file, cleaning up at the end
4842	 */
4843do_action_next:
4844	frh = lcb->lrh;
4845
4846	if (LfsClientRecord != frh->record_type)
4847		goto read_next_log_do_action;
4848
4849	transact_id = le32_to_cpu(frh->transact_id);
4850	rec_len = le32_to_cpu(frh->client_data_len);
4851	lrh = lcb->log_rec;
4852
4853	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
4854		err = -EINVAL;
4855		goto out;
4856	}
4857
4858	/* Ignore log records that do not update pages */
4859	if (lrh->lcns_follow)
4860		goto find_dirty_page;
4861
4862	goto read_next_log_do_action;
4863
4864find_dirty_page:
4865	t16 = le16_to_cpu(lrh->target_attr);
4866	t64 = le64_to_cpu(lrh->target_vcn);
4867	dp = find_dp(dptbl, t16, t64);
4868
4869	if (!dp)
4870		goto read_next_log_do_action;
4871
4872	if (rec_lsn < le64_to_cpu(dp->oldest_lsn))
4873		goto read_next_log_do_action;
4874
4875	t16 = le16_to_cpu(lrh->target_attr);
4876	if (t16 >= bytes_per_rt(oatbl)) {
4877		err = -EINVAL;
4878		goto out;
4879	}
4880
4881	oe = Add2Ptr(oatbl, t16);
4882
4883	if (oe->next != RESTART_ENTRY_ALLOCATED_LE) {
4884		err = -EINVAL;
4885		goto out;
4886	}
4887
4888	oa = oe->ptr;
4889
4890	if (!oa) {
4891		err = -EINVAL;
4892		goto out;
4893	}
4894	attr = oa->attr;
4895
4896	vcn = le64_to_cpu(lrh->target_vcn);
4897
4898	if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) ||
4899	    lcn == SPARSE_LCN) {
4900		goto read_next_log_do_action;
4901	}
4902
4903	/* Point to the Redo data and get its length */
4904	data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off));
4905	dlen = le16_to_cpu(lrh->redo_len);
4906
4907	/* Shorten length by any Lcns which were deleted */
4908	saved_len = dlen;
4909
4910	for (i = le16_to_cpu(lrh->lcns_follow); i; i--) {
4911		size_t j;
4912		u32 alen, voff;
4913
4914		voff = le16_to_cpu(lrh->record_off) +
4915		       le16_to_cpu(lrh->attr_off);
4916		voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT;
4917
4918		/* If the Vcn question is allocated, we can just get out.*/
4919		j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn);
4920		if (dp->page_lcns[j + i - 1])
4921			break;
4922
4923		if (!saved_len)
4924			saved_len = 1;
4925
4926		/*
4927		 * Calculate the allocated space left relative to the
4928		 * log record Vcn, after removing this unallocated Vcn
4929		 */
4930		alen = (i - 1) << sbi->cluster_bits;
4931
4932		/*
4933		 * If the update described this log record goes beyond
4934		 * the allocated space, then we will have to reduce the length
4935		 */
4936		if (voff >= alen)
4937			dlen = 0;
4938		else if (voff + dlen > alen)
4939			dlen = alen - voff;
4940	}
4941
4942	/* If the resulting dlen from above is now zero, we can skip this log record */
4943	if (!dlen && saved_len)
4944		goto read_next_log_do_action;
4945
4946	t16 = le16_to_cpu(lrh->redo_op);
4947	if (can_skip_action(t16))
4948		goto read_next_log_do_action;
4949
4950	/* Apply the Redo operation a common routine */
4951	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn);
4952	if (err)
4953		goto out;
4954
4955	/* Keep reading and looping back until end of file */
4956read_next_log_do_action:
4957	err = read_next_log_rec(log, lcb, &rec_lsn);
4958	if (!err && rec_lsn)
4959		goto do_action_next;
4960
4961	lcb_put(lcb);
4962	lcb = NULL;
4963
4964do_undo_action:
4965	/* Scan Transaction Table */
4966	tr = NULL;
4967transaction_table_next:
4968	tr = enum_rstbl(trtbl, tr);
4969	if (!tr)
4970		goto undo_action_done;
4971
4972	if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) {
4973		free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr));
4974		goto transaction_table_next;
4975	}
4976
4977	log->transaction_id = PtrOffset(trtbl, tr);
4978	undo_next_lsn = le64_to_cpu(tr->undo_next_lsn);
4979
4980	/*
4981	 * We only have to do anything if the transaction has
4982	 * something its undo_next_lsn field
4983	 */
4984	if (!undo_next_lsn)
4985		goto commit_undo;
4986
4987	/* Read the first record to be undone by this transaction */
4988	err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb);
4989	if (err)
4990		goto out;
4991
4992	/*
4993	 * Now loop to read all of our log records forwards,
4994	 * until we hit the end of the file, cleaning up at the end
4995	 */
4996undo_action_next:
4997
4998	lrh = lcb->log_rec;
4999	frh = lcb->lrh;
5000	transact_id = le32_to_cpu(frh->transact_id);
5001	rec_len = le32_to_cpu(frh->client_data_len);
5002
5003	if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) {
5004		err = -EINVAL;
5005		goto out;
5006	}
5007
5008	if (lrh->undo_op == cpu_to_le16(Noop))
5009		goto read_next_log_undo_action;
5010
5011	oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr));
5012	oa = oe->ptr;
5013
5014	t16 = le16_to_cpu(lrh->lcns_follow);
5015	if (!t16)
5016		goto add_allocated_vcns;
5017
5018	is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn),
5019				     &lcn, &clen, NULL);
5020
5021	/*
5022	 * If the mapping isn't already the table or the  mapping
5023	 * corresponds to a hole the mapping, we need to make sure
5024	 * there is no partial page already memory
5025	 */
5026	if (is_mapped && lcn != SPARSE_LCN && clen >= t16)
5027		goto add_allocated_vcns;
5028
5029	vcn = le64_to_cpu(lrh->target_vcn);
5030	vcn &= ~(log->clst_per_page - 1);
5031
5032add_allocated_vcns:
5033	for (i = 0, vcn = le64_to_cpu(lrh->target_vcn),
5034	    size = (vcn + 1) << sbi->cluster_bits;
5035	     i < t16; i++, vcn += 1, size += sbi->cluster_size) {
5036		attr = oa->attr;
5037		if (!attr->non_res) {
5038			if (size > le32_to_cpu(attr->res.data_size))
5039				attr->res.data_size = cpu_to_le32(size);
5040		} else {
5041			if (size > le64_to_cpu(attr->nres.data_size))
5042				attr->nres.valid_size = attr->nres.data_size =
5043					attr->nres.alloc_size =
5044						cpu_to_le64(size);
5045		}
5046	}
5047
5048	t16 = le16_to_cpu(lrh->undo_op);
5049	if (can_skip_action(t16))
5050		goto read_next_log_undo_action;
5051
5052	/* Point to the Redo data and get its length */
5053	data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off));
5054	dlen = le16_to_cpu(lrh->undo_len);
5055
5056	/* it is time to apply the undo action */
5057	err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL);
5058
5059read_next_log_undo_action:
5060	/*
5061	 * Keep reading and looping back until we have read the
5062	 * last record for this transaction
5063	 */
5064	err = read_next_log_rec(log, lcb, &rec_lsn);
5065	if (err)
5066		goto out;
5067
5068	if (rec_lsn)
5069		goto undo_action_next;
5070
5071	lcb_put(lcb);
5072	lcb = NULL;
5073
5074commit_undo:
5075	free_rsttbl_idx(trtbl, log->transaction_id);
5076
5077	log->transaction_id = 0;
5078
5079	goto transaction_table_next;
5080
5081undo_action_done:
5082
5083	ntfs_update_mftmirr(sbi, 0);
5084
5085	sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY;
5086
5087end_reply:
5088
5089	err = 0;
5090	if (is_ro)
5091		goto out;
5092
5093	rh = ntfs_zalloc(log->page_size);
5094	if (!rh) {
5095		err = -ENOMEM;
5096		goto out;
5097	}
5098
5099	rh->rhdr.sign = NTFS_RSTR_SIGNATURE;
5100	rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups));
5101	t16 = (log->page_size >> SECTOR_SHIFT) + 1;
5102	rh->rhdr.fix_num = cpu_to_le16(t16);
5103	rh->sys_page_size = cpu_to_le32(log->page_size);
5104	rh->page_size = cpu_to_le32(log->page_size);
5105
5106	t16 = ALIGN(offsetof(struct RESTART_HDR, fixups) +
5107		    sizeof(short) * t16, 8);
5108	rh->ra_off = cpu_to_le16(t16);
5109	rh->minor_ver = cpu_to_le16(1); // 0x1A:
5110	rh->major_ver = cpu_to_le16(1); // 0x1C:
5111
5112	ra2 = Add2Ptr(rh, t16);
5113	memcpy(ra2, ra, sizeof(struct RESTART_AREA));
5114
5115	ra2->client_idx[0] = 0;
5116	ra2->client_idx[1] = LFS_NO_CLIENT_LE;
5117	ra2->flags = cpu_to_le16(2);
5118
5119	le32_add_cpu(&ra2->open_log_count, 1);
5120
5121	ntfs_fix_pre_write(&rh->rhdr, log->page_size);
5122
5123	err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size);
5124	if (!err)
5125		err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size,
5126					rh, log->page_size);
5127
5128	ntfs_free(rh);
5129	if (err)
5130		goto out;
5131
5132out:
5133	ntfs_free(rst);
5134	if (lcb)
5135		lcb_put(lcb);
5136
5137	/* Scan the Open Attribute Table to close all of the open attributes */
5138	oe = NULL;
5139	while ((oe = enum_rstbl(oatbl, oe))) {
5140		rno = ino_get(&oe->ref);
5141
5142		if (oe->is_attr_name == 1) {
5143			ntfs_free(oe->ptr);
5144			oe->ptr = NULL;
5145			continue;
5146		}
5147
5148		if (oe->is_attr_name)
5149			continue;
5150
5151		oa = oe->ptr;
5152		if (!oa)
5153			continue;
5154
5155		run_close(&oa->run0);
5156		ntfs_free(oa->attr);
5157		if (oa->ni)
5158			iput(&oa->ni->vfs_inode);
5159		ntfs_free(oa);
5160	}
5161
5162	ntfs_free(trtbl);
5163	ntfs_free(oatbl);
5164	ntfs_free(dptbl);
5165	ntfs_free(attr_names);
5166	ntfs_free(rst_info.r_page);
5167
5168	ntfs_free(ra);
5169	ntfs_free(log->one_page_buf);
5170
5171	if (err)
5172		sbi->flags |= NTFS_FLAGS_NEED_REPLAY;
5173
5174	if (err == -EROFS)
5175		err = 0;
5176	else if (log->set_dirty)
5177		ntfs_set_state(sbi, NTFS_DIRTY_ERROR);
5178
5179	ntfs_free(log);
5180
5181	return err;
5182}
5183