1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * This file contains an ECC algorithm that detects and corrects 1 bit
4 * errors in a 256 byte block of data.
5 *
6 * Copyright �� 2008 Koninklijke Philips Electronics NV.
7 *                  Author: Frans Meulenbroeks
8 *
9 * Completely replaces the previous ECC implementation which was written by:
10 *   Steven J. Hill (sjhill@realitydiluted.com)
11 *   Thomas Gleixner (tglx@linutronix.de)
12 *
13 * Information on how this algorithm works and how it was developed
14 * can be found in Documentation/driver-api/mtd/nand_ecc.rst
15 */
16
17#include <linux/types.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/mtd/nand.h>
21#include <linux/mtd/nand-ecc-sw-hamming.h>
22#include <linux/slab.h>
23#include <asm/byteorder.h>
24
25/*
26 * invparity is a 256 byte table that contains the odd parity
27 * for each byte. So if the number of bits in a byte is even,
28 * the array element is 1, and when the number of bits is odd
29 * the array eleemnt is 0.
30 */
31static const char invparity[256] = {
32	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
33	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
34	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
35	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
36	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
37	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
38	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
39	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
40	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
41	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
42	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
43	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
44	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
45	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
46	0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
47	1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1
48};
49
50/*
51 * bitsperbyte contains the number of bits per byte
52 * this is only used for testing and repairing parity
53 * (a precalculated value slightly improves performance)
54 */
55static const char bitsperbyte[256] = {
56	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
57	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
59	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
61	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
62	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
64	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
65	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
67	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
69	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
70	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
71	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
72};
73
74/*
75 * addressbits is a lookup table to filter out the bits from the xor-ed
76 * ECC data that identify the faulty location.
77 * this is only used for repairing parity
78 * see the comments in nand_ecc_sw_hamming_correct for more details
79 */
80static const char addressbits[256] = {
81	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
82	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
83	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
84	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
85	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
86	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
87	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
88	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
89	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
90	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
91	0x00, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x01,
92	0x02, 0x02, 0x03, 0x03, 0x02, 0x02, 0x03, 0x03,
93	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
94	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
95	0x04, 0x04, 0x05, 0x05, 0x04, 0x04, 0x05, 0x05,
96	0x06, 0x06, 0x07, 0x07, 0x06, 0x06, 0x07, 0x07,
97	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
98	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
99	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
100	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
101	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
102	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
103	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
104	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
105	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
106	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
107	0x08, 0x08, 0x09, 0x09, 0x08, 0x08, 0x09, 0x09,
108	0x0a, 0x0a, 0x0b, 0x0b, 0x0a, 0x0a, 0x0b, 0x0b,
109	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
110	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f,
111	0x0c, 0x0c, 0x0d, 0x0d, 0x0c, 0x0c, 0x0d, 0x0d,
112	0x0e, 0x0e, 0x0f, 0x0f, 0x0e, 0x0e, 0x0f, 0x0f
113};
114
115int ecc_sw_hamming_calculate(const unsigned char *buf, unsigned int step_size,
116			     unsigned char *code, bool sm_order)
117{
118	const u32 *bp = (uint32_t *)buf;
119	const u32 eccsize_mult = (step_size == 256) ? 1 : 2;
120	/* current value in buffer */
121	u32 cur;
122	/* rp0..rp17 are the various accumulated parities (per byte) */
123	u32 rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7, rp8, rp9, rp10, rp11, rp12,
124		rp13, rp14, rp15, rp16, rp17;
125	/* Cumulative parity for all data */
126	u32 par;
127	/* Cumulative parity at the end of the loop (rp12, rp14, rp16) */
128	u32 tmppar;
129	int i;
130
131	par = 0;
132	rp4 = 0;
133	rp6 = 0;
134	rp8 = 0;
135	rp10 = 0;
136	rp12 = 0;
137	rp14 = 0;
138	rp16 = 0;
139	rp17 = 0;
140
141	/*
142	 * The loop is unrolled a number of times;
143	 * This avoids if statements to decide on which rp value to update
144	 * Also we process the data by longwords.
145	 * Note: passing unaligned data might give a performance penalty.
146	 * It is assumed that the buffers are aligned.
147	 * tmppar is the cumulative sum of this iteration.
148	 * needed for calculating rp12, rp14, rp16 and par
149	 * also used as a performance improvement for rp6, rp8 and rp10
150	 */
151	for (i = 0; i < eccsize_mult << 2; i++) {
152		cur = *bp++;
153		tmppar = cur;
154		rp4 ^= cur;
155		cur = *bp++;
156		tmppar ^= cur;
157		rp6 ^= tmppar;
158		cur = *bp++;
159		tmppar ^= cur;
160		rp4 ^= cur;
161		cur = *bp++;
162		tmppar ^= cur;
163		rp8 ^= tmppar;
164
165		cur = *bp++;
166		tmppar ^= cur;
167		rp4 ^= cur;
168		rp6 ^= cur;
169		cur = *bp++;
170		tmppar ^= cur;
171		rp6 ^= cur;
172		cur = *bp++;
173		tmppar ^= cur;
174		rp4 ^= cur;
175		cur = *bp++;
176		tmppar ^= cur;
177		rp10 ^= tmppar;
178
179		cur = *bp++;
180		tmppar ^= cur;
181		rp4 ^= cur;
182		rp6 ^= cur;
183		rp8 ^= cur;
184		cur = *bp++;
185		tmppar ^= cur;
186		rp6 ^= cur;
187		rp8 ^= cur;
188		cur = *bp++;
189		tmppar ^= cur;
190		rp4 ^= cur;
191		rp8 ^= cur;
192		cur = *bp++;
193		tmppar ^= cur;
194		rp8 ^= cur;
195
196		cur = *bp++;
197		tmppar ^= cur;
198		rp4 ^= cur;
199		rp6 ^= cur;
200		cur = *bp++;
201		tmppar ^= cur;
202		rp6 ^= cur;
203		cur = *bp++;
204		tmppar ^= cur;
205		rp4 ^= cur;
206		cur = *bp++;
207		tmppar ^= cur;
208
209		par ^= tmppar;
210		if ((i & 0x1) == 0)
211			rp12 ^= tmppar;
212		if ((i & 0x2) == 0)
213			rp14 ^= tmppar;
214		if (eccsize_mult == 2 && (i & 0x4) == 0)
215			rp16 ^= tmppar;
216	}
217
218	/*
219	 * handle the fact that we use longword operations
220	 * we'll bring rp4..rp14..rp16 back to single byte entities by
221	 * shifting and xoring first fold the upper and lower 16 bits,
222	 * then the upper and lower 8 bits.
223	 */
224	rp4 ^= (rp4 >> 16);
225	rp4 ^= (rp4 >> 8);
226	rp4 &= 0xff;
227	rp6 ^= (rp6 >> 16);
228	rp6 ^= (rp6 >> 8);
229	rp6 &= 0xff;
230	rp8 ^= (rp8 >> 16);
231	rp8 ^= (rp8 >> 8);
232	rp8 &= 0xff;
233	rp10 ^= (rp10 >> 16);
234	rp10 ^= (rp10 >> 8);
235	rp10 &= 0xff;
236	rp12 ^= (rp12 >> 16);
237	rp12 ^= (rp12 >> 8);
238	rp12 &= 0xff;
239	rp14 ^= (rp14 >> 16);
240	rp14 ^= (rp14 >> 8);
241	rp14 &= 0xff;
242	if (eccsize_mult == 2) {
243		rp16 ^= (rp16 >> 16);
244		rp16 ^= (rp16 >> 8);
245		rp16 &= 0xff;
246	}
247
248	/*
249	 * we also need to calculate the row parity for rp0..rp3
250	 * This is present in par, because par is now
251	 * rp3 rp3 rp2 rp2 in little endian and
252	 * rp2 rp2 rp3 rp3 in big endian
253	 * as well as
254	 * rp1 rp0 rp1 rp0 in little endian and
255	 * rp0 rp1 rp0 rp1 in big endian
256	 * First calculate rp2 and rp3
257	 */
258#ifdef __BIG_ENDIAN
259	rp2 = (par >> 16);
260	rp2 ^= (rp2 >> 8);
261	rp2 &= 0xff;
262	rp3 = par & 0xffff;
263	rp3 ^= (rp3 >> 8);
264	rp3 &= 0xff;
265#else
266	rp3 = (par >> 16);
267	rp3 ^= (rp3 >> 8);
268	rp3 &= 0xff;
269	rp2 = par & 0xffff;
270	rp2 ^= (rp2 >> 8);
271	rp2 &= 0xff;
272#endif
273
274	/* reduce par to 16 bits then calculate rp1 and rp0 */
275	par ^= (par >> 16);
276#ifdef __BIG_ENDIAN
277	rp0 = (par >> 8) & 0xff;
278	rp1 = (par & 0xff);
279#else
280	rp1 = (par >> 8) & 0xff;
281	rp0 = (par & 0xff);
282#endif
283
284	/* finally reduce par to 8 bits */
285	par ^= (par >> 8);
286	par &= 0xff;
287
288	/*
289	 * and calculate rp5..rp15..rp17
290	 * note that par = rp4 ^ rp5 and due to the commutative property
291	 * of the ^ operator we can say:
292	 * rp5 = (par ^ rp4);
293	 * The & 0xff seems superfluous, but benchmarking learned that
294	 * leaving it out gives slightly worse results. No idea why, probably
295	 * it has to do with the way the pipeline in pentium is organized.
296	 */
297	rp5 = (par ^ rp4) & 0xff;
298	rp7 = (par ^ rp6) & 0xff;
299	rp9 = (par ^ rp8) & 0xff;
300	rp11 = (par ^ rp10) & 0xff;
301	rp13 = (par ^ rp12) & 0xff;
302	rp15 = (par ^ rp14) & 0xff;
303	if (eccsize_mult == 2)
304		rp17 = (par ^ rp16) & 0xff;
305
306	/*
307	 * Finally calculate the ECC bits.
308	 * Again here it might seem that there are performance optimisations
309	 * possible, but benchmarks showed that on the system this is developed
310	 * the code below is the fastest
311	 */
312	if (sm_order) {
313		code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
314			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
315			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
316			  (invparity[rp1] << 1) | (invparity[rp0]);
317		code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
318			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
319			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
320			  (invparity[rp9] << 1) | (invparity[rp8]);
321	} else {
322		code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
323			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
324			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
325			  (invparity[rp1] << 1) | (invparity[rp0]);
326		code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
327			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
328			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
329			  (invparity[rp9] << 1) | (invparity[rp8]);
330	}
331
332	if (eccsize_mult == 1)
333		code[2] =
334		    (invparity[par & 0xf0] << 7) |
335		    (invparity[par & 0x0f] << 6) |
336		    (invparity[par & 0xcc] << 5) |
337		    (invparity[par & 0x33] << 4) |
338		    (invparity[par & 0xaa] << 3) |
339		    (invparity[par & 0x55] << 2) |
340		    3;
341	else
342		code[2] =
343		    (invparity[par & 0xf0] << 7) |
344		    (invparity[par & 0x0f] << 6) |
345		    (invparity[par & 0xcc] << 5) |
346		    (invparity[par & 0x33] << 4) |
347		    (invparity[par & 0xaa] << 3) |
348		    (invparity[par & 0x55] << 2) |
349		    (invparity[rp17] << 1) |
350		    (invparity[rp16] << 0);
351
352	return 0;
353}
354EXPORT_SYMBOL(ecc_sw_hamming_calculate);
355
356/**
357 * nand_ecc_sw_hamming_calculate - Calculate 3-byte ECC for 256/512-byte block
358 * @nand: NAND device
359 * @buf: Input buffer with raw data
360 * @code: Output buffer with ECC
361 */
362int nand_ecc_sw_hamming_calculate(struct nand_device *nand,
363				  const unsigned char *buf, unsigned char *code)
364{
365	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
366	unsigned int step_size = nand->ecc.ctx.conf.step_size;
367	bool sm_order = engine_conf ? engine_conf->sm_order : false;
368
369	return ecc_sw_hamming_calculate(buf, step_size, code, sm_order);
370}
371EXPORT_SYMBOL(nand_ecc_sw_hamming_calculate);
372
373int ecc_sw_hamming_correct(unsigned char *buf, unsigned char *read_ecc,
374			   unsigned char *calc_ecc, unsigned int step_size,
375			   bool sm_order)
376{
377	const u32 eccsize_mult = step_size >> 8;
378	unsigned char b0, b1, b2, bit_addr;
379	unsigned int byte_addr;
380
381	/*
382	 * b0 to b2 indicate which bit is faulty (if any)
383	 * we might need the xor result  more than once,
384	 * so keep them in a local var
385	*/
386	if (sm_order) {
387		b0 = read_ecc[0] ^ calc_ecc[0];
388		b1 = read_ecc[1] ^ calc_ecc[1];
389	} else {
390		b0 = read_ecc[1] ^ calc_ecc[1];
391		b1 = read_ecc[0] ^ calc_ecc[0];
392	}
393
394	b2 = read_ecc[2] ^ calc_ecc[2];
395
396	/* check if there are any bitfaults */
397
398	/* repeated if statements are slightly more efficient than switch ... */
399	/* ordered in order of likelihood */
400
401	if ((b0 | b1 | b2) == 0)
402		return 0;	/* no error */
403
404	if ((((b0 ^ (b0 >> 1)) & 0x55) == 0x55) &&
405	    (((b1 ^ (b1 >> 1)) & 0x55) == 0x55) &&
406	    ((eccsize_mult == 1 && ((b2 ^ (b2 >> 1)) & 0x54) == 0x54) ||
407	     (eccsize_mult == 2 && ((b2 ^ (b2 >> 1)) & 0x55) == 0x55))) {
408	/* single bit error */
409		/*
410		 * rp17/rp15/13/11/9/7/5/3/1 indicate which byte is the faulty
411		 * byte, cp 5/3/1 indicate the faulty bit.
412		 * A lookup table (called addressbits) is used to filter
413		 * the bits from the byte they are in.
414		 * A marginal optimisation is possible by having three
415		 * different lookup tables.
416		 * One as we have now (for b0), one for b2
417		 * (that would avoid the >> 1), and one for b1 (with all values
418		 * << 4). However it was felt that introducing two more tables
419		 * hardly justify the gain.
420		 *
421		 * The b2 shift is there to get rid of the lowest two bits.
422		 * We could also do addressbits[b2] >> 1 but for the
423		 * performance it does not make any difference
424		 */
425		if (eccsize_mult == 1)
426			byte_addr = (addressbits[b1] << 4) + addressbits[b0];
427		else
428			byte_addr = (addressbits[b2 & 0x3] << 8) +
429				    (addressbits[b1] << 4) + addressbits[b0];
430		bit_addr = addressbits[b2 >> 2];
431		/* flip the bit */
432		buf[byte_addr] ^= (1 << bit_addr);
433		return 1;
434
435	}
436	/* count nr of bits; use table lookup, faster than calculating it */
437	if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
438		return 1;	/* error in ECC data; no action needed */
439
440	pr_err("%s: uncorrectable ECC error\n", __func__);
441	return -EBADMSG;
442}
443EXPORT_SYMBOL(ecc_sw_hamming_correct);
444
445/**
446 * nand_ecc_sw_hamming_correct - Detect and correct bit error(s)
447 * @nand: NAND device
448 * @buf: Raw data read from the chip
449 * @read_ecc: ECC bytes read from the chip
450 * @calc_ecc: ECC calculated from the raw data
451 *
452 * Detect and correct up to 1 bit error per 256/512-byte block.
453 */
454int nand_ecc_sw_hamming_correct(struct nand_device *nand, unsigned char *buf,
455				unsigned char *read_ecc,
456				unsigned char *calc_ecc)
457{
458	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
459	unsigned int step_size = nand->ecc.ctx.conf.step_size;
460	bool sm_order = engine_conf ? engine_conf->sm_order : false;
461
462	return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, step_size,
463				      sm_order);
464}
465EXPORT_SYMBOL(nand_ecc_sw_hamming_correct);
466
467int nand_ecc_sw_hamming_init_ctx(struct nand_device *nand)
468{
469	struct nand_ecc_props *conf = &nand->ecc.ctx.conf;
470	struct nand_ecc_sw_hamming_conf *engine_conf;
471	struct mtd_info *mtd = nanddev_to_mtd(nand);
472	int ret;
473
474	if (!mtd->ooblayout) {
475		switch (mtd->oobsize) {
476		case 8:
477		case 16:
478			mtd_set_ooblayout(mtd, nand_get_small_page_ooblayout());
479			break;
480		case 64:
481		case 128:
482			mtd_set_ooblayout(mtd,
483					  nand_get_large_page_hamming_ooblayout());
484			break;
485		default:
486			return -ENOTSUPP;
487		}
488	}
489
490	conf->engine_type = NAND_ECC_ENGINE_TYPE_SOFT;
491	conf->algo = NAND_ECC_ALGO_HAMMING;
492	conf->step_size = nand->ecc.user_conf.step_size;
493	conf->strength = 1;
494
495	/* Use the strongest configuration by default */
496	if (conf->step_size != 256 && conf->step_size != 512)
497		conf->step_size = 256;
498
499	engine_conf = kzalloc(sizeof(*engine_conf), GFP_KERNEL);
500	if (!engine_conf)
501		return -ENOMEM;
502
503	ret = nand_ecc_init_req_tweaking(&engine_conf->req_ctx, nand);
504	if (ret)
505		goto free_engine_conf;
506
507	engine_conf->code_size = 3;
508	engine_conf->calc_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
509	engine_conf->code_buf = kzalloc(mtd->oobsize, GFP_KERNEL);
510	if (!engine_conf->calc_buf || !engine_conf->code_buf) {
511		ret = -ENOMEM;
512		goto free_bufs;
513	}
514
515	nand->ecc.ctx.priv = engine_conf;
516	nand->ecc.ctx.nsteps = mtd->writesize / conf->step_size;
517	nand->ecc.ctx.total = nand->ecc.ctx.nsteps * engine_conf->code_size;
518
519	return 0;
520
521free_bufs:
522	nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
523	kfree(engine_conf->calc_buf);
524	kfree(engine_conf->code_buf);
525free_engine_conf:
526	kfree(engine_conf);
527
528	return ret;
529}
530EXPORT_SYMBOL(nand_ecc_sw_hamming_init_ctx);
531
532void nand_ecc_sw_hamming_cleanup_ctx(struct nand_device *nand)
533{
534	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
535
536	if (engine_conf) {
537		nand_ecc_cleanup_req_tweaking(&engine_conf->req_ctx);
538		kfree(engine_conf->calc_buf);
539		kfree(engine_conf->code_buf);
540		kfree(engine_conf);
541	}
542}
543EXPORT_SYMBOL(nand_ecc_sw_hamming_cleanup_ctx);
544
545static int nand_ecc_sw_hamming_prepare_io_req(struct nand_device *nand,
546					      struct nand_page_io_req *req)
547{
548	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
549	struct mtd_info *mtd = nanddev_to_mtd(nand);
550	int eccsize = nand->ecc.ctx.conf.step_size;
551	int eccbytes = engine_conf->code_size;
552	int eccsteps = nand->ecc.ctx.nsteps;
553	int total = nand->ecc.ctx.total;
554	u8 *ecccalc = engine_conf->calc_buf;
555	const u8 *data;
556	int i;
557
558	/* Nothing to do for a raw operation */
559	if (req->mode == MTD_OPS_RAW)
560		return 0;
561
562	/* This engine does not provide BBM/free OOB bytes protection */
563	if (!req->datalen)
564		return 0;
565
566	nand_ecc_tweak_req(&engine_conf->req_ctx, req);
567
568	/* No more preparation for page read */
569	if (req->type == NAND_PAGE_READ)
570		return 0;
571
572	/* Preparation for page write: derive the ECC bytes and place them */
573	for (i = 0, data = req->databuf.out;
574	     eccsteps;
575	     eccsteps--, i += eccbytes, data += eccsize)
576		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
577
578	return mtd_ooblayout_set_eccbytes(mtd, ecccalc, (void *)req->oobbuf.out,
579					  0, total);
580}
581
582static int nand_ecc_sw_hamming_finish_io_req(struct nand_device *nand,
583					     struct nand_page_io_req *req)
584{
585	struct nand_ecc_sw_hamming_conf *engine_conf = nand->ecc.ctx.priv;
586	struct mtd_info *mtd = nanddev_to_mtd(nand);
587	int eccsize = nand->ecc.ctx.conf.step_size;
588	int total = nand->ecc.ctx.total;
589	int eccbytes = engine_conf->code_size;
590	int eccsteps = nand->ecc.ctx.nsteps;
591	u8 *ecccalc = engine_conf->calc_buf;
592	u8 *ecccode = engine_conf->code_buf;
593	unsigned int max_bitflips = 0;
594	u8 *data = req->databuf.in;
595	int i, ret;
596
597	/* Nothing to do for a raw operation */
598	if (req->mode == MTD_OPS_RAW)
599		return 0;
600
601	/* This engine does not provide BBM/free OOB bytes protection */
602	if (!req->datalen)
603		return 0;
604
605	/* No more preparation for page write */
606	if (req->type == NAND_PAGE_WRITE) {
607		nand_ecc_restore_req(&engine_conf->req_ctx, req);
608		return 0;
609	}
610
611	/* Finish a page read: retrieve the (raw) ECC bytes*/
612	ret = mtd_ooblayout_get_eccbytes(mtd, ecccode, req->oobbuf.in, 0,
613					 total);
614	if (ret)
615		return ret;
616
617	/* Calculate the ECC bytes */
618	for (i = 0; eccsteps; eccsteps--, i += eccbytes, data += eccsize)
619		nand_ecc_sw_hamming_calculate(nand, data, &ecccalc[i]);
620
621	/* Finish a page read: compare and correct */
622	for (eccsteps = nand->ecc.ctx.nsteps, i = 0, data = req->databuf.in;
623	     eccsteps;
624	     eccsteps--, i += eccbytes, data += eccsize) {
625		int stat =  nand_ecc_sw_hamming_correct(nand, data,
626							&ecccode[i],
627							&ecccalc[i]);
628		if (stat < 0) {
629			mtd->ecc_stats.failed++;
630		} else {
631			mtd->ecc_stats.corrected += stat;
632			max_bitflips = max_t(unsigned int, max_bitflips, stat);
633		}
634	}
635
636	nand_ecc_restore_req(&engine_conf->req_ctx, req);
637
638	return max_bitflips;
639}
640
641static struct nand_ecc_engine_ops nand_ecc_sw_hamming_engine_ops = {
642	.init_ctx = nand_ecc_sw_hamming_init_ctx,
643	.cleanup_ctx = nand_ecc_sw_hamming_cleanup_ctx,
644	.prepare_io_req = nand_ecc_sw_hamming_prepare_io_req,
645	.finish_io_req = nand_ecc_sw_hamming_finish_io_req,
646};
647
648static struct nand_ecc_engine nand_ecc_sw_hamming_engine = {
649	.ops = &nand_ecc_sw_hamming_engine_ops,
650};
651
652struct nand_ecc_engine *nand_ecc_sw_hamming_get_engine(void)
653{
654	return &nand_ecc_sw_hamming_engine;
655}
656EXPORT_SYMBOL(nand_ecc_sw_hamming_get_engine);
657
658MODULE_LICENSE("GPL");
659MODULE_AUTHOR("Frans Meulenbroeks <fransmeulenbroeks@gmail.com>");
660MODULE_DESCRIPTION("NAND software Hamming ECC support");
661