1// SPDX-License-Identifier: GPL-2.0
2
3#include "bcachefs.h"
4#include "btree_cache.h"
5#include "disk_groups.h"
6#include "opts.h"
7#include "replicas.h"
8#include "sb-members.h"
9#include "super-io.h"
10
11#define x(t, n, ...) [n] = #t,
12static const char * const bch2_iops_measurements[] = {
13	BCH_IOPS_MEASUREMENTS()
14	NULL
15};
16
17char * const bch2_member_error_strs[] = {
18	BCH_MEMBER_ERROR_TYPES()
19	NULL
20};
21#undef x
22
23/* Code for bch_sb_field_members_v1: */
24
25struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
26{
27	return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
28}
29
30static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
31{
32	struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
33	memset(&ret, 0, sizeof(ret));
34	memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
35	return ret;
36}
37
38static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
39{
40	return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
41}
42
43static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
44{
45	struct bch_member ret, *p = members_v1_get_mut(mi, i);
46	memset(&ret, 0, sizeof(ret));
47	memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
48	return ret;
49}
50
51struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
52{
53	struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
54	if (mi2)
55		return members_v2_get(mi2, i);
56	struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
57	return members_v1_get(mi1, i);
58}
59
60static int sb_members_v2_resize_entries(struct bch_fs *c)
61{
62	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
63
64	if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
65		unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
66					      c->disk_sb.sb->nr_devices), 8);
67
68		mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
69		if (!mi)
70			return -BCH_ERR_ENOSPC_sb_members_v2;
71
72		for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
73			void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
74			memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
75			memset(dst + le16_to_cpu(mi->member_bytes),
76			       0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
77		}
78		mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
79	}
80	return 0;
81}
82
83int bch2_sb_members_v2_init(struct bch_fs *c)
84{
85	struct bch_sb_field_members_v1 *mi1;
86	struct bch_sb_field_members_v2 *mi2;
87
88	if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
89		mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
90				DIV_ROUND_UP(sizeof(*mi2) +
91					     sizeof(struct bch_member) * c->sb.nr_devices,
92					     sizeof(u64)));
93		mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
94		memcpy(&mi2->_members[0], &mi1->_members[0],
95		       BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
96		memset(&mi2->pad[0], 0, sizeof(mi2->pad));
97		mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
98	}
99
100	return sb_members_v2_resize_entries(c);
101}
102
103int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
104{
105	struct bch_sb_field_members_v1 *mi1;
106	struct bch_sb_field_members_v2 *mi2;
107
108	mi1 = bch2_sb_field_resize(disk_sb, members_v1,
109			DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
110				     disk_sb->sb->nr_devices, sizeof(u64)));
111	if (!mi1)
112		return -BCH_ERR_ENOSPC_sb_members;
113
114	mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
115
116	for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
117		memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
118
119	return 0;
120}
121
122static int validate_member(struct printbuf *err,
123			   struct bch_member m,
124			   struct bch_sb *sb,
125			   int i)
126{
127	if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
128		prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
129			   i, le64_to_cpu(m.nbuckets), LONG_MAX);
130		return -BCH_ERR_invalid_sb_members;
131	}
132
133	if (le64_to_cpu(m.nbuckets) -
134	    le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
135		prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
136			   i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
137		return -BCH_ERR_invalid_sb_members;
138	}
139
140	if (le16_to_cpu(m.bucket_size) <
141	    le16_to_cpu(sb->block_size)) {
142		prt_printf(err, "device %u: bucket size %u smaller than block size %u",
143			   i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
144		return -BCH_ERR_invalid_sb_members;
145	}
146
147	if (le16_to_cpu(m.bucket_size) <
148	    BCH_SB_BTREE_NODE_SIZE(sb)) {
149		prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
150			   i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
151		return -BCH_ERR_invalid_sb_members;
152	}
153
154	return 0;
155}
156
157static void member_to_text(struct printbuf *out,
158			   struct bch_member m,
159			   struct bch_sb_field_disk_groups *gi,
160			   struct bch_sb *sb,
161			   int i)
162{
163	unsigned data_have = bch2_sb_dev_has_data(sb, i);
164	u64 bucket_size = le16_to_cpu(m.bucket_size);
165	u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
166
167	if (!bch2_member_exists(&m))
168		return;
169
170	prt_printf(out, "Device:");
171	prt_tab(out);
172	prt_printf(out, "%u", i);
173	prt_newline(out);
174
175	printbuf_indent_add(out, 2);
176
177	prt_printf(out, "Label:");
178	prt_tab(out);
179	if (BCH_MEMBER_GROUP(&m)) {
180		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
181
182		if (idx < disk_groups_nr(gi))
183			prt_printf(out, "%s (%u)",
184				   gi->entries[idx].label, idx);
185		else
186			prt_printf(out, "(bad disk labels section)");
187	} else {
188		prt_printf(out, "(none)");
189	}
190	prt_newline(out);
191
192	prt_printf(out, "UUID:");
193	prt_tab(out);
194	pr_uuid(out, m.uuid.b);
195	prt_newline(out);
196
197	prt_printf(out, "Size:");
198	prt_tab(out);
199	prt_units_u64(out, device_size << 9);
200	prt_newline(out);
201
202	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
203		prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
204		prt_tab(out);
205		prt_u64(out, le64_to_cpu(m.errors[i]));
206		prt_newline(out);
207	}
208
209	for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
210		prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
211		prt_tab(out);
212		prt_printf(out, "%u", le32_to_cpu(m.iops[i]));
213		prt_newline(out);
214	}
215
216	prt_printf(out, "Bucket size:");
217	prt_tab(out);
218	prt_units_u64(out, bucket_size << 9);
219	prt_newline(out);
220
221	prt_printf(out, "First bucket:");
222	prt_tab(out);
223	prt_printf(out, "%u", le16_to_cpu(m.first_bucket));
224	prt_newline(out);
225
226	prt_printf(out, "Buckets:");
227	prt_tab(out);
228	prt_printf(out, "%llu", le64_to_cpu(m.nbuckets));
229	prt_newline(out);
230
231	prt_printf(out, "Last mount:");
232	prt_tab(out);
233	if (m.last_mount)
234		bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
235	else
236		prt_printf(out, "(never)");
237	prt_newline(out);
238
239	prt_printf(out, "Last superblock write:");
240	prt_tab(out);
241	prt_u64(out, le64_to_cpu(m.seq));
242	prt_newline(out);
243
244	prt_printf(out, "State:");
245	prt_tab(out);
246	prt_printf(out, "%s",
247		   BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
248		   ? bch2_member_states[BCH_MEMBER_STATE(&m)]
249		   : "unknown");
250	prt_newline(out);
251
252	prt_printf(out, "Data allowed:");
253	prt_tab(out);
254	if (BCH_MEMBER_DATA_ALLOWED(&m))
255		prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
256	else
257		prt_printf(out, "(none)");
258	prt_newline(out);
259
260	prt_printf(out, "Has data:");
261	prt_tab(out);
262	if (data_have)
263		prt_bitflags(out, __bch2_data_types, data_have);
264	else
265		prt_printf(out, "(none)");
266	prt_newline(out);
267
268	prt_str(out, "Durability:");
269	prt_tab(out);
270	prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
271	prt_newline(out);
272
273	prt_printf(out, "Discard:");
274	prt_tab(out);
275	prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
276	prt_newline(out);
277
278	prt_printf(out, "Freespace initialized:");
279	prt_tab(out);
280	prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
281	prt_newline(out);
282
283	printbuf_indent_sub(out, 2);
284}
285
286static int bch2_sb_members_v1_validate(struct bch_sb *sb,
287				    struct bch_sb_field *f,
288				    struct printbuf *err)
289{
290	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
291	unsigned i;
292
293	if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
294		prt_printf(err, "too many devices for section size");
295		return -BCH_ERR_invalid_sb_members;
296	}
297
298	for (i = 0; i < sb->nr_devices; i++) {
299		struct bch_member m = members_v1_get(mi, i);
300
301		int ret = validate_member(err, m, sb, i);
302		if (ret)
303			return ret;
304	}
305
306	return 0;
307}
308
309static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
310				       struct bch_sb_field *f)
311{
312	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
313	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
314	unsigned i;
315
316	for (i = 0; i < sb->nr_devices; i++)
317		member_to_text(out, members_v1_get(mi, i), gi, sb, i);
318}
319
320const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
321	.validate	= bch2_sb_members_v1_validate,
322	.to_text	= bch2_sb_members_v1_to_text,
323};
324
325static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
326				       struct bch_sb_field *f)
327{
328	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
329	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
330	unsigned i;
331
332	for (i = 0; i < sb->nr_devices; i++)
333		member_to_text(out, members_v2_get(mi, i), gi, sb, i);
334}
335
336static int bch2_sb_members_v2_validate(struct bch_sb *sb,
337				       struct bch_sb_field *f,
338				       struct printbuf *err)
339{
340	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
341	size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
342		(void *) mi;
343
344	if (mi_bytes > vstruct_bytes(&mi->field)) {
345		prt_printf(err, "section too small (%zu > %zu)",
346			   mi_bytes, vstruct_bytes(&mi->field));
347		return -BCH_ERR_invalid_sb_members;
348	}
349
350	for (unsigned i = 0; i < sb->nr_devices; i++) {
351		int ret = validate_member(err, members_v2_get(mi, i), sb, i);
352		if (ret)
353			return ret;
354	}
355
356	return 0;
357}
358
359const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
360	.validate	= bch2_sb_members_v2_validate,
361	.to_text	= bch2_sb_members_v2_to_text,
362};
363
364void bch2_sb_members_from_cpu(struct bch_fs *c)
365{
366	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
367
368	rcu_read_lock();
369	for_each_member_device_rcu(c, ca, NULL) {
370		struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
371
372		for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
373			m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
374	}
375	rcu_read_unlock();
376}
377
378void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
379{
380	struct bch_fs *c = ca->fs;
381	struct bch_member m;
382
383	mutex_lock(&ca->fs->sb_lock);
384	m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
385	mutex_unlock(&ca->fs->sb_lock);
386
387	printbuf_tabstop_push(out, 12);
388
389	prt_str(out, "IO errors since filesystem creation");
390	prt_newline(out);
391
392	printbuf_indent_add(out, 2);
393	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
394		prt_printf(out, "%s:", bch2_member_error_strs[i]);
395		prt_tab(out);
396		prt_u64(out, atomic64_read(&ca->errors[i]));
397		prt_newline(out);
398	}
399	printbuf_indent_sub(out, 2);
400
401	prt_str(out, "IO errors since ");
402	bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
403	prt_str(out, " ago");
404	prt_newline(out);
405
406	printbuf_indent_add(out, 2);
407	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
408		prt_printf(out, "%s:", bch2_member_error_strs[i]);
409		prt_tab(out);
410		prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
411		prt_newline(out);
412	}
413	printbuf_indent_sub(out, 2);
414}
415
416void bch2_dev_errors_reset(struct bch_dev *ca)
417{
418	struct bch_fs *c = ca->fs;
419	struct bch_member *m;
420
421	mutex_lock(&c->sb_lock);
422	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
423	for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
424		m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
425	m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
426
427	bch2_write_super(c);
428	mutex_unlock(&c->sb_lock);
429}
430
431/*
432 * Per member "range has btree nodes" bitmap:
433 *
434 * This is so that if we ever have to run the btree node scan to repair we don't
435 * have to scan full devices:
436 */
437
438bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
439{
440	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
441		if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
442							  ptr->offset, btree_sectors(c)))
443			return false;
444	return true;
445}
446
447static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
448				u64 start, unsigned sectors)
449{
450	struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
451	u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
452
453	u64 end = start + sectors;
454
455	int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
456	if (resize > 0) {
457		u64 new_bitmap = 0;
458
459		for (unsigned i = 0; i < 64; i++)
460			if (bitmap & BIT_ULL(i))
461				new_bitmap |= BIT_ULL(i >> resize);
462		bitmap = new_bitmap;
463		m->btree_bitmap_shift += resize;
464	}
465
466	for (unsigned bit = start >> m->btree_bitmap_shift;
467	     (u64) bit << m->btree_bitmap_shift < end;
468	     bit++)
469		bitmap |= BIT_ULL(bit);
470
471	m->btree_allocated_bitmap = cpu_to_le64(bitmap);
472}
473
474void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
475{
476	lockdep_assert_held(&c->sb_lock);
477
478	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
479	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
480		__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
481}
482