1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (c) 2018 Red Hat, Inc.
4 *
5 * This is a test "dust" device, which fails reads on specified
6 * sectors, emulating the behavior of a hard disk drive sending
7 * a "Read Medium Error" sense.
8 *
9 */
10
11#include <linux/device-mapper.h>
12#include <linux/module.h>
13#include <linux/rbtree.h>
14
15#define DM_MSG_PREFIX "dust"
16
17struct badblock {
18	struct rb_node node;
19	sector_t bb;
20	unsigned char wr_fail_cnt;
21};
22
23struct dust_device {
24	struct dm_dev *dev;
25	struct rb_root badblocklist;
26	unsigned long long badblock_count;
27	spinlock_t dust_lock;
28	unsigned int blksz;
29	int sect_per_block_shift;
30	unsigned int sect_per_block;
31	sector_t start;
32	bool fail_read_on_bb:1;
33	bool quiet_mode:1;
34};
35
36static struct badblock *dust_rb_search(struct rb_root *root, sector_t blk)
37{
38	struct rb_node *node = root->rb_node;
39
40	while (node) {
41		struct badblock *bblk = rb_entry(node, struct badblock, node);
42
43		if (bblk->bb > blk)
44			node = node->rb_left;
45		else if (bblk->bb < blk)
46			node = node->rb_right;
47		else
48			return bblk;
49	}
50
51	return NULL;
52}
53
54static bool dust_rb_insert(struct rb_root *root, struct badblock *new)
55{
56	struct badblock *bblk;
57	struct rb_node **link = &root->rb_node, *parent = NULL;
58	sector_t value = new->bb;
59
60	while (*link) {
61		parent = *link;
62		bblk = rb_entry(parent, struct badblock, node);
63
64		if (bblk->bb > value)
65			link = &(*link)->rb_left;
66		else if (bblk->bb < value)
67			link = &(*link)->rb_right;
68		else
69			return false;
70	}
71
72	rb_link_node(&new->node, parent, link);
73	rb_insert_color(&new->node, root);
74
75	return true;
76}
77
78static int dust_remove_block(struct dust_device *dd, unsigned long long block)
79{
80	struct badblock *bblock;
81	unsigned long flags;
82
83	spin_lock_irqsave(&dd->dust_lock, flags);
84	bblock = dust_rb_search(&dd->badblocklist, block);
85
86	if (bblock == NULL) {
87		if (!dd->quiet_mode) {
88			DMERR("%s: block %llu not found in badblocklist",
89			      __func__, block);
90		}
91		spin_unlock_irqrestore(&dd->dust_lock, flags);
92		return -EINVAL;
93	}
94
95	rb_erase(&bblock->node, &dd->badblocklist);
96	dd->badblock_count--;
97	if (!dd->quiet_mode)
98		DMINFO("%s: badblock removed at block %llu", __func__, block);
99	kfree(bblock);
100	spin_unlock_irqrestore(&dd->dust_lock, flags);
101
102	return 0;
103}
104
105static int dust_add_block(struct dust_device *dd, unsigned long long block,
106			  unsigned char wr_fail_cnt)
107{
108	struct badblock *bblock;
109	unsigned long flags;
110
111	bblock = kmalloc(sizeof(*bblock), GFP_KERNEL);
112	if (bblock == NULL) {
113		if (!dd->quiet_mode)
114			DMERR("%s: badblock allocation failed", __func__);
115		return -ENOMEM;
116	}
117
118	spin_lock_irqsave(&dd->dust_lock, flags);
119	bblock->bb = block;
120	bblock->wr_fail_cnt = wr_fail_cnt;
121	if (!dust_rb_insert(&dd->badblocklist, bblock)) {
122		if (!dd->quiet_mode) {
123			DMERR("%s: block %llu already in badblocklist",
124			      __func__, block);
125		}
126		spin_unlock_irqrestore(&dd->dust_lock, flags);
127		kfree(bblock);
128		return -EINVAL;
129	}
130
131	dd->badblock_count++;
132	if (!dd->quiet_mode) {
133		DMINFO("%s: badblock added at block %llu with write fail count %u",
134		       __func__, block, wr_fail_cnt);
135	}
136	spin_unlock_irqrestore(&dd->dust_lock, flags);
137
138	return 0;
139}
140
141static int dust_query_block(struct dust_device *dd, unsigned long long block, char *result,
142			    unsigned int maxlen, unsigned int *sz_ptr)
143{
144	struct badblock *bblock;
145	unsigned long flags;
146	unsigned int sz = *sz_ptr;
147
148	spin_lock_irqsave(&dd->dust_lock, flags);
149	bblock = dust_rb_search(&dd->badblocklist, block);
150	if (bblock != NULL)
151		DMEMIT("%s: block %llu found in badblocklist", __func__, block);
152	else
153		DMEMIT("%s: block %llu not found in badblocklist", __func__, block);
154	spin_unlock_irqrestore(&dd->dust_lock, flags);
155
156	return 1;
157}
158
159static int __dust_map_read(struct dust_device *dd, sector_t thisblock)
160{
161	struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
162
163	if (bblk)
164		return DM_MAPIO_KILL;
165
166	return DM_MAPIO_REMAPPED;
167}
168
169static int dust_map_read(struct dust_device *dd, sector_t thisblock,
170			 bool fail_read_on_bb)
171{
172	unsigned long flags;
173	int r = DM_MAPIO_REMAPPED;
174
175	if (fail_read_on_bb) {
176		thisblock >>= dd->sect_per_block_shift;
177		spin_lock_irqsave(&dd->dust_lock, flags);
178		r = __dust_map_read(dd, thisblock);
179		spin_unlock_irqrestore(&dd->dust_lock, flags);
180	}
181
182	return r;
183}
184
185static int __dust_map_write(struct dust_device *dd, sector_t thisblock)
186{
187	struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock);
188
189	if (bblk && bblk->wr_fail_cnt > 0) {
190		bblk->wr_fail_cnt--;
191		return DM_MAPIO_KILL;
192	}
193
194	if (bblk) {
195		rb_erase(&bblk->node, &dd->badblocklist);
196		dd->badblock_count--;
197		kfree(bblk);
198		if (!dd->quiet_mode) {
199			sector_div(thisblock, dd->sect_per_block);
200			DMINFO("block %llu removed from badblocklist by write",
201			       (unsigned long long)thisblock);
202		}
203	}
204
205	return DM_MAPIO_REMAPPED;
206}
207
208static int dust_map_write(struct dust_device *dd, sector_t thisblock,
209			  bool fail_read_on_bb)
210{
211	unsigned long flags;
212	int r = DM_MAPIO_REMAPPED;
213
214	if (fail_read_on_bb) {
215		thisblock >>= dd->sect_per_block_shift;
216		spin_lock_irqsave(&dd->dust_lock, flags);
217		r = __dust_map_write(dd, thisblock);
218		spin_unlock_irqrestore(&dd->dust_lock, flags);
219	}
220
221	return r;
222}
223
224static int dust_map(struct dm_target *ti, struct bio *bio)
225{
226	struct dust_device *dd = ti->private;
227	int r;
228
229	bio_set_dev(bio, dd->dev->bdev);
230	bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector);
231
232	if (bio_data_dir(bio) == READ)
233		r = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
234	else
235		r = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb);
236
237	return r;
238}
239
240static bool __dust_clear_badblocks(struct rb_root *tree,
241				   unsigned long long count)
242{
243	struct rb_node *node = NULL, *nnode = NULL;
244
245	nnode = rb_first(tree);
246	if (nnode == NULL) {
247		BUG_ON(count != 0);
248		return false;
249	}
250
251	while (nnode) {
252		node = nnode;
253		nnode = rb_next(node);
254		rb_erase(node, tree);
255		count--;
256		kfree(node);
257	}
258	BUG_ON(count != 0);
259	BUG_ON(tree->rb_node != NULL);
260
261	return true;
262}
263
264static int dust_clear_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
265				unsigned int *sz_ptr)
266{
267	unsigned long flags;
268	struct rb_root badblocklist;
269	unsigned long long badblock_count;
270	unsigned int sz = *sz_ptr;
271
272	spin_lock_irqsave(&dd->dust_lock, flags);
273	badblocklist = dd->badblocklist;
274	badblock_count = dd->badblock_count;
275	dd->badblocklist = RB_ROOT;
276	dd->badblock_count = 0;
277	spin_unlock_irqrestore(&dd->dust_lock, flags);
278
279	if (!__dust_clear_badblocks(&badblocklist, badblock_count))
280		DMEMIT("%s: no badblocks found", __func__);
281	else
282		DMEMIT("%s: badblocks cleared", __func__);
283
284	return 1;
285}
286
287static int dust_list_badblocks(struct dust_device *dd, char *result, unsigned int maxlen,
288				unsigned int *sz_ptr)
289{
290	unsigned long flags;
291	struct rb_root badblocklist;
292	struct rb_node *node;
293	struct badblock *bblk;
294	unsigned int sz = *sz_ptr;
295	unsigned long long num = 0;
296
297	spin_lock_irqsave(&dd->dust_lock, flags);
298	badblocklist = dd->badblocklist;
299	for (node = rb_first(&badblocklist); node; node = rb_next(node)) {
300		bblk = rb_entry(node, struct badblock, node);
301		DMEMIT("%llu\n", bblk->bb);
302		num++;
303	}
304
305	spin_unlock_irqrestore(&dd->dust_lock, flags);
306	if (!num)
307		DMEMIT("No blocks in badblocklist");
308
309	return 1;
310}
311
312/*
313 * Target parameters:
314 *
315 * <device_path> <offset> <blksz>
316 *
317 * device_path: path to the block device
318 * offset: offset to data area from start of device_path
319 * blksz: block size (minimum 512, maximum 1073741824, must be a power of 2)
320 */
321static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv)
322{
323	struct dust_device *dd;
324	unsigned long long tmp;
325	char dummy;
326	unsigned int blksz;
327	unsigned int sect_per_block;
328	sector_t DUST_MAX_BLKSZ_SECTORS = 2097152;
329	sector_t max_block_sectors = min(ti->len, DUST_MAX_BLKSZ_SECTORS);
330
331	if (argc != 3) {
332		ti->error = "Invalid argument count";
333		return -EINVAL;
334	}
335
336	if (kstrtouint(argv[2], 10, &blksz) || !blksz) {
337		ti->error = "Invalid block size parameter";
338		return -EINVAL;
339	}
340
341	if (blksz < 512) {
342		ti->error = "Block size must be at least 512";
343		return -EINVAL;
344	}
345
346	if (!is_power_of_2(blksz)) {
347		ti->error = "Block size must be a power of 2";
348		return -EINVAL;
349	}
350
351	if (to_sector(blksz) > max_block_sectors) {
352		ti->error = "Block size is too large";
353		return -EINVAL;
354	}
355
356	sect_per_block = (blksz >> SECTOR_SHIFT);
357
358	if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) {
359		ti->error = "Invalid device offset sector";
360		return -EINVAL;
361	}
362
363	dd = kzalloc(sizeof(struct dust_device), GFP_KERNEL);
364	if (dd == NULL) {
365		ti->error = "Cannot allocate context";
366		return -ENOMEM;
367	}
368
369	if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dd->dev)) {
370		ti->error = "Device lookup failed";
371		kfree(dd);
372		return -EINVAL;
373	}
374
375	dd->sect_per_block = sect_per_block;
376	dd->blksz = blksz;
377	dd->start = tmp;
378
379	dd->sect_per_block_shift = __ffs(sect_per_block);
380
381	/*
382	 * Whether to fail a read on a "bad" block.
383	 * Defaults to false; enabled later by message.
384	 */
385	dd->fail_read_on_bb = false;
386
387	/*
388	 * Initialize bad block list rbtree.
389	 */
390	dd->badblocklist = RB_ROOT;
391	dd->badblock_count = 0;
392	spin_lock_init(&dd->dust_lock);
393
394	dd->quiet_mode = false;
395
396	BUG_ON(dm_set_target_max_io_len(ti, dd->sect_per_block) != 0);
397
398	ti->num_discard_bios = 1;
399	ti->num_flush_bios = 1;
400	ti->private = dd;
401
402	return 0;
403}
404
405static void dust_dtr(struct dm_target *ti)
406{
407	struct dust_device *dd = ti->private;
408
409	__dust_clear_badblocks(&dd->badblocklist, dd->badblock_count);
410	dm_put_device(ti, dd->dev);
411	kfree(dd);
412}
413
414static int dust_message(struct dm_target *ti, unsigned int argc, char **argv,
415			char *result, unsigned int maxlen)
416{
417	struct dust_device *dd = ti->private;
418	sector_t size = bdev_nr_sectors(dd->dev->bdev);
419	bool invalid_msg = false;
420	int r = -EINVAL;
421	unsigned long long tmp, block;
422	unsigned char wr_fail_cnt;
423	unsigned int tmp_ui;
424	unsigned long flags;
425	unsigned int sz = 0;
426	char dummy;
427
428	if (argc == 1) {
429		if (!strcasecmp(argv[0], "addbadblock") ||
430		    !strcasecmp(argv[0], "removebadblock") ||
431		    !strcasecmp(argv[0], "queryblock")) {
432			DMERR("%s requires an additional argument", argv[0]);
433		} else if (!strcasecmp(argv[0], "disable")) {
434			DMINFO("disabling read failures on bad sectors");
435			dd->fail_read_on_bb = false;
436			r = 0;
437		} else if (!strcasecmp(argv[0], "enable")) {
438			DMINFO("enabling read failures on bad sectors");
439			dd->fail_read_on_bb = true;
440			r = 0;
441		} else if (!strcasecmp(argv[0], "countbadblocks")) {
442			spin_lock_irqsave(&dd->dust_lock, flags);
443			DMEMIT("countbadblocks: %llu badblock(s) found",
444			       dd->badblock_count);
445			spin_unlock_irqrestore(&dd->dust_lock, flags);
446			r = 1;
447		} else if (!strcasecmp(argv[0], "clearbadblocks")) {
448			r = dust_clear_badblocks(dd, result, maxlen, &sz);
449		} else if (!strcasecmp(argv[0], "quiet")) {
450			if (!dd->quiet_mode)
451				dd->quiet_mode = true;
452			else
453				dd->quiet_mode = false;
454			r = 0;
455		} else if (!strcasecmp(argv[0], "listbadblocks")) {
456			r = dust_list_badblocks(dd, result, maxlen, &sz);
457		} else {
458			invalid_msg = true;
459		}
460	} else if (argc == 2) {
461		if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
462			return r;
463
464		block = tmp;
465		sector_div(size, dd->sect_per_block);
466		if (block > size) {
467			DMERR("selected block value out of range");
468			return r;
469		}
470
471		if (!strcasecmp(argv[0], "addbadblock"))
472			r = dust_add_block(dd, block, 0);
473		else if (!strcasecmp(argv[0], "removebadblock"))
474			r = dust_remove_block(dd, block);
475		else if (!strcasecmp(argv[0], "queryblock"))
476			r = dust_query_block(dd, block, result, maxlen, &sz);
477		else
478			invalid_msg = true;
479
480	} else if (argc == 3) {
481		if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1)
482			return r;
483
484		if (sscanf(argv[2], "%u%c", &tmp_ui, &dummy) != 1)
485			return r;
486
487		block = tmp;
488		if (tmp_ui > 255) {
489			DMERR("selected write fail count out of range");
490			return r;
491		}
492		wr_fail_cnt = tmp_ui;
493		sector_div(size, dd->sect_per_block);
494		if (block > size) {
495			DMERR("selected block value out of range");
496			return r;
497		}
498
499		if (!strcasecmp(argv[0], "addbadblock"))
500			r = dust_add_block(dd, block, wr_fail_cnt);
501		else
502			invalid_msg = true;
503
504	} else
505		DMERR("invalid number of arguments '%d'", argc);
506
507	if (invalid_msg)
508		DMERR("unrecognized message '%s' received", argv[0]);
509
510	return r;
511}
512
513static void dust_status(struct dm_target *ti, status_type_t type,
514			unsigned int status_flags, char *result, unsigned int maxlen)
515{
516	struct dust_device *dd = ti->private;
517	unsigned int sz = 0;
518
519	switch (type) {
520	case STATUSTYPE_INFO:
521		DMEMIT("%s %s %s", dd->dev->name,
522		       dd->fail_read_on_bb ? "fail_read_on_bad_block" : "bypass",
523		       dd->quiet_mode ? "quiet" : "verbose");
524		break;
525
526	case STATUSTYPE_TABLE:
527		DMEMIT("%s %llu %u", dd->dev->name,
528		       (unsigned long long)dd->start, dd->blksz);
529		break;
530
531	case STATUSTYPE_IMA:
532		*result = '\0';
533		break;
534	}
535}
536
537static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
538{
539	struct dust_device *dd = ti->private;
540	struct dm_dev *dev = dd->dev;
541
542	*bdev = dev->bdev;
543
544	/*
545	 * Only pass ioctls through if the device sizes match exactly.
546	 */
547	if (dd->start || ti->len != bdev_nr_sectors(dev->bdev))
548		return 1;
549
550	return 0;
551}
552
553static int dust_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn,
554				void *data)
555{
556	struct dust_device *dd = ti->private;
557
558	return fn(ti, dd->dev, dd->start, ti->len, data);
559}
560
561static struct target_type dust_target = {
562	.name = "dust",
563	.version = {1, 0, 0},
564	.module = THIS_MODULE,
565	.ctr = dust_ctr,
566	.dtr = dust_dtr,
567	.iterate_devices = dust_iterate_devices,
568	.map = dust_map,
569	.message = dust_message,
570	.status = dust_status,
571	.prepare_ioctl = dust_prepare_ioctl,
572};
573module_dm(dust);
574
575MODULE_DESCRIPTION(DM_NAME " dust test target");
576MODULE_AUTHOR("Bryan Gurney <dm-devel@lists.linux.dev>");
577MODULE_LICENSE("GPL");
578