1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright �� 2012 NetCommWireless
4 * Iwo Mergler <Iwo.Mergler@netcommwireless.com.au>
5 *
6 * Test for multi-bit error recovery on a NAND page This mostly tests the
7 * ECC controller / driver.
8 *
9 * There are two test modes:
10 *
11 *	0 - artificially inserting bit errors until the ECC fails
12 *	    This is the default method and fairly quick. It should
13 *	    be independent of the quality of the FLASH.
14 *
15 *	1 - re-writing the same pattern repeatedly until the ECC fails.
16 *	    This method relies on the physics of NAND FLASH to eventually
17 *	    generate '0' bits if '1' has been written sufficient times.
18 *	    Depending on the NAND, the first bit errors will appear after
19 *	    1000 or more writes and then will usually snowball, reaching the
20 *	    limits of the ECC quickly.
21 *
22 *	    The test stops after 10000 cycles, should your FLASH be
23 *	    exceptionally good and not generate bit errors before that. Try
24 *	    a different page in that case.
25 *
26 * Please note that neither of these tests will significantly 'use up' any
27 * FLASH endurance. Only a maximum of two erase operations will be performed.
28 */
29
30#define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
31
32#include <linux/init.h>
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35#include <linux/mtd/mtd.h>
36#include <linux/err.h>
37#include <linux/mtd/rawnand.h>
38#include <linux/slab.h>
39#include "mtd_test.h"
40
41static int dev;
42module_param(dev, int, S_IRUGO);
43MODULE_PARM_DESC(dev, "MTD device number to use");
44
45static unsigned page_offset;
46module_param(page_offset, uint, S_IRUGO);
47MODULE_PARM_DESC(page_offset, "Page number relative to dev start");
48
49static unsigned seed;
50module_param(seed, uint, S_IRUGO);
51MODULE_PARM_DESC(seed, "Random seed");
52
53static int mode;
54module_param(mode, int, S_IRUGO);
55MODULE_PARM_DESC(mode, "0=incremental errors, 1=overwrite test");
56
57static unsigned max_overwrite = 10000;
58
59static loff_t   offset;     /* Offset of the page we're using. */
60static unsigned eraseblock; /* Eraseblock number for our page. */
61
62/* We assume that the ECC can correct up to a certain number
63 * of biterrors per subpage. */
64static unsigned subsize;  /* Size of subpages */
65static unsigned subcount; /* Number of subpages per page */
66
67static struct mtd_info *mtd;   /* MTD device */
68
69static uint8_t *wbuffer; /* One page write / compare buffer */
70static uint8_t *rbuffer; /* One page read buffer */
71
72/* 'random' bytes from known offsets */
73static uint8_t hash(unsigned offset)
74{
75	unsigned v = offset;
76	unsigned char c;
77	v ^= 0x7f7edfd3;
78	v = v ^ (v >> 3);
79	v = v ^ (v >> 5);
80	v = v ^ (v >> 13);
81	c = v & 0xFF;
82	/* Reverse bits of result. */
83	c = (c & 0x0F) << 4 | (c & 0xF0) >> 4;
84	c = (c & 0x33) << 2 | (c & 0xCC) >> 2;
85	c = (c & 0x55) << 1 | (c & 0xAA) >> 1;
86	return c;
87}
88
89/* Writes wbuffer to page */
90static int write_page(int log)
91{
92	if (log)
93		pr_info("write_page\n");
94
95	return mtdtest_write(mtd, offset, mtd->writesize, wbuffer);
96}
97
98/* Re-writes the data area while leaving the OOB alone. */
99static int rewrite_page(int log)
100{
101	int err = 0;
102	struct mtd_oob_ops ops = { };
103
104	if (log)
105		pr_info("rewrite page\n");
106
107	ops.mode      = MTD_OPS_RAW; /* No ECC */
108	ops.len       = mtd->writesize;
109	ops.retlen    = 0;
110	ops.ooblen    = 0;
111	ops.oobretlen = 0;
112	ops.ooboffs   = 0;
113	ops.datbuf    = wbuffer;
114	ops.oobbuf    = NULL;
115
116	err = mtd_write_oob(mtd, offset, &ops);
117	if (err || ops.retlen != mtd->writesize) {
118		pr_err("error: write_oob failed (%d)\n", err);
119		if (!err)
120			err = -EIO;
121	}
122
123	return err;
124}
125
126/* Reads page into rbuffer. Returns number of corrected bit errors (>=0)
127 * or error (<0) */
128static int read_page(int log)
129{
130	int err = 0;
131	size_t read;
132	struct mtd_ecc_stats oldstats;
133
134	if (log)
135		pr_info("read_page\n");
136
137	/* Saving last mtd stats */
138	memcpy(&oldstats, &mtd->ecc_stats, sizeof(oldstats));
139
140	err = mtd_read(mtd, offset, mtd->writesize, &read, rbuffer);
141	if (!err || err == -EUCLEAN)
142		err = mtd->ecc_stats.corrected - oldstats.corrected;
143
144	if (err < 0 || read != mtd->writesize) {
145		pr_err("error: read failed at %#llx\n", (long long)offset);
146		if (err >= 0)
147			err = -EIO;
148	}
149
150	return err;
151}
152
153/* Verifies rbuffer against random sequence */
154static int verify_page(int log)
155{
156	unsigned i, errs = 0;
157
158	if (log)
159		pr_info("verify_page\n");
160
161	for (i = 0; i < mtd->writesize; i++) {
162		if (rbuffer[i] != hash(i+seed)) {
163			pr_err("Error: page offset %u, expected %02x, got %02x\n",
164				i, hash(i+seed), rbuffer[i]);
165			errs++;
166		}
167	}
168
169	if (errs)
170		return -EIO;
171	else
172		return 0;
173}
174
175#define CBIT(v, n) ((v) & (1 << (n)))
176#define BCLR(v, n) ((v) = (v) & ~(1 << (n)))
177
178/* Finds the first '1' bit in wbuffer starting at offset 'byte'
179 * and sets it to '0'. */
180static int insert_biterror(unsigned byte)
181{
182	int bit;
183
184	while (byte < mtd->writesize) {
185		for (bit = 7; bit >= 0; bit--) {
186			if (CBIT(wbuffer[byte], bit)) {
187				BCLR(wbuffer[byte], bit);
188				pr_info("Inserted biterror @ %u/%u\n", byte, bit);
189				return 0;
190			}
191		}
192		byte++;
193	}
194	pr_err("biterror: Failed to find a '1' bit\n");
195	return -EIO;
196}
197
198/* Writes 'random' data to page and then introduces deliberate bit
199 * errors into the page, while verifying each step. */
200static int incremental_errors_test(void)
201{
202	int err = 0;
203	unsigned i;
204	unsigned errs_per_subpage = 0;
205
206	pr_info("incremental biterrors test\n");
207
208	for (i = 0; i < mtd->writesize; i++)
209		wbuffer[i] = hash(i+seed);
210
211	err = write_page(1);
212	if (err)
213		goto exit;
214
215	while (1) {
216
217		err = rewrite_page(1);
218		if (err)
219			goto exit;
220
221		err = read_page(1);
222		if (err > 0)
223			pr_info("Read reported %d corrected bit errors\n", err);
224		if (err < 0) {
225			pr_err("After %d biterrors per subpage, read reported error %d\n",
226				errs_per_subpage, err);
227			err = 0;
228			goto exit;
229		}
230
231		err = verify_page(1);
232		if (err) {
233			pr_err("ECC failure, read data is incorrect despite read success\n");
234			goto exit;
235		}
236
237		pr_info("Successfully corrected %d bit errors per subpage\n",
238			errs_per_subpage);
239
240		for (i = 0; i < subcount; i++) {
241			err = insert_biterror(i * subsize);
242			if (err < 0)
243				goto exit;
244		}
245		errs_per_subpage++;
246	}
247
248exit:
249	return err;
250}
251
252
253/* Writes 'random' data to page and then re-writes that same data repeatedly.
254   This eventually develops bit errors (bits written as '1' will slowly become
255   '0'), which are corrected as far as the ECC is capable of. */
256static int overwrite_test(void)
257{
258	int err = 0;
259	unsigned i;
260	unsigned max_corrected = 0;
261	unsigned opno = 0;
262	/* We don't expect more than this many correctable bit errors per
263	 * page. */
264	#define MAXBITS 512
265	static unsigned bitstats[MAXBITS]; /* bit error histogram. */
266
267	memset(bitstats, 0, sizeof(bitstats));
268
269	pr_info("overwrite biterrors test\n");
270
271	for (i = 0; i < mtd->writesize; i++)
272		wbuffer[i] = hash(i+seed);
273
274	err = write_page(1);
275	if (err)
276		goto exit;
277
278	while (opno < max_overwrite) {
279
280		err = write_page(0);
281		if (err)
282			break;
283
284		err = read_page(0);
285		if (err >= 0) {
286			if (err >= MAXBITS) {
287				pr_info("Implausible number of bit errors corrected\n");
288				err = -EIO;
289				break;
290			}
291			bitstats[err]++;
292			if (err > max_corrected) {
293				max_corrected = err;
294				pr_info("Read reported %d corrected bit errors\n",
295					err);
296			}
297		} else { /* err < 0 */
298			pr_info("Read reported error %d\n", err);
299			err = 0;
300			break;
301		}
302
303		err = verify_page(0);
304		if (err) {
305			bitstats[max_corrected] = opno;
306			pr_info("ECC failure, read data is incorrect despite read success\n");
307			break;
308		}
309
310		err = mtdtest_relax();
311		if (err)
312			break;
313
314		opno++;
315	}
316
317	/* At this point bitstats[0] contains the number of ops with no bit
318	 * errors, bitstats[1] the number of ops with 1 bit error, etc. */
319	pr_info("Bit error histogram (%d operations total):\n", opno);
320	for (i = 0; i < max_corrected; i++)
321		pr_info("Page reads with %3d corrected bit errors: %d\n",
322			i, bitstats[i]);
323
324exit:
325	return err;
326}
327
328static int __init mtd_nandbiterrs_init(void)
329{
330	int err = 0;
331
332	printk("\n");
333	printk(KERN_INFO "==================================================\n");
334	pr_info("MTD device: %d\n", dev);
335
336	mtd = get_mtd_device(NULL, dev);
337	if (IS_ERR(mtd)) {
338		err = PTR_ERR(mtd);
339		pr_err("error: cannot get MTD device\n");
340		goto exit_mtddev;
341	}
342
343	if (!mtd_type_is_nand(mtd)) {
344		pr_info("this test requires NAND flash\n");
345		err = -ENODEV;
346		goto exit_nand;
347	}
348
349	pr_info("MTD device size %llu, eraseblock=%u, page=%u, oob=%u\n",
350		(unsigned long long)mtd->size, mtd->erasesize,
351		mtd->writesize, mtd->oobsize);
352
353	subsize  = mtd->writesize >> mtd->subpage_sft;
354	subcount = mtd->writesize / subsize;
355
356	pr_info("Device uses %d subpages of %d bytes\n", subcount, subsize);
357
358	offset     = (loff_t)page_offset * mtd->writesize;
359	eraseblock = mtd_div_by_eb(offset, mtd);
360
361	pr_info("Using page=%u, offset=%llu, eraseblock=%u\n",
362		page_offset, offset, eraseblock);
363
364	wbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
365	if (!wbuffer) {
366		err = -ENOMEM;
367		goto exit_wbuffer;
368	}
369
370	rbuffer = kmalloc(mtd->writesize, GFP_KERNEL);
371	if (!rbuffer) {
372		err = -ENOMEM;
373		goto exit_rbuffer;
374	}
375
376	err = mtdtest_erase_eraseblock(mtd, eraseblock);
377	if (err)
378		goto exit_error;
379
380	if (mode == 0)
381		err = incremental_errors_test();
382	else
383		err = overwrite_test();
384
385	if (err)
386		goto exit_error;
387
388	/* We leave the block un-erased in case of test failure. */
389	err = mtdtest_erase_eraseblock(mtd, eraseblock);
390	if (err)
391		goto exit_error;
392
393	err = -EIO;
394	pr_info("finished successfully.\n");
395	printk(KERN_INFO "==================================================\n");
396
397exit_error:
398	kfree(rbuffer);
399exit_rbuffer:
400	kfree(wbuffer);
401exit_wbuffer:
402	/* Nothing */
403exit_nand:
404	put_mtd_device(mtd);
405exit_mtddev:
406	return err;
407}
408
409static void __exit mtd_nandbiterrs_exit(void)
410{
411	return;
412}
413
414module_init(mtd_nandbiterrs_init);
415module_exit(mtd_nandbiterrs_exit);
416
417MODULE_DESCRIPTION("NAND bit error recovery test");
418MODULE_AUTHOR("Iwo Mergler");
419MODULE_LICENSE("GPL");
420