1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Cryptographic API for the NX-842 hardware compression.
4 *
5 * Copyright (C) IBM Corporation, 2011-2015
6 *
7 * Designer of the Power data compression engine:
8 *   Bulent Abali <abali@us.ibm.com>
9 *
10 * Original Authors: Robert Jennings <rcj@linux.vnet.ibm.com>
11 *                   Seth Jennings <sjenning@linux.vnet.ibm.com>
12 *
13 * Rewrite: Dan Streetman <ddstreet@ieee.org>
14 *
15 * This is an interface to the NX-842 compression hardware in PowerPC
16 * processors.  Most of the complexity of this drvier is due to the fact that
17 * the NX-842 compression hardware requires the input and output data buffers
18 * to be specifically aligned, to be a specific multiple in length, and within
19 * specific minimum and maximum lengths.  Those restrictions, provided by the
20 * nx-842 driver via nx842_constraints, mean this driver must use bounce
21 * buffers and headers to correct misaligned in or out buffers, and to split
22 * input buffers that are too large.
23 *
24 * This driver will fall back to software decompression if the hardware
25 * decompression fails, so this driver's decompression should never fail as
26 * long as the provided compressed buffer is valid.  Any compressed buffer
27 * created by this driver will have a header (except ones where the input
28 * perfectly matches the constraints); so users of this driver cannot simply
29 * pass a compressed buffer created by this driver over to the 842 software
30 * decompression library.  Instead, users must use this driver to decompress;
31 * if the hardware fails or is unavailable, the compressed buffer will be
32 * parsed and the header removed, and the raw 842 buffer(s) passed to the 842
33 * software decompression library.
34 *
35 * This does not fall back to software compression, however, since the caller
36 * of this function is specifically requesting hardware compression; if the
37 * hardware compression fails, the caller can fall back to software
38 * compression, and the raw 842 compressed buffer that the software compressor
39 * creates can be passed to this driver for hardware decompression; any
40 * buffer without our specific header magic is assumed to be a raw 842 buffer
41 * and passed directly to the hardware.  Note that the software compression
42 * library will produce a compressed buffer that is incompatible with the
43 * hardware decompressor if the original input buffer length is not a multiple
44 * of 8; if such a compressed buffer is passed to this driver for
45 * decompression, the hardware will reject it and this driver will then pass
46 * it over to the software library for decompression.
47 */
48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
51#include <linux/vmalloc.h>
52#include <linux/sw842.h>
53#include <linux/spinlock.h>
54
55#include "nx-842.h"
56
57/* The first 5 bits of this magic are 0x1f, which is an invalid 842 5-bit
58 * template (see lib/842/842.h), so this magic number will never appear at
59 * the start of a raw 842 compressed buffer.  That is important, as any buffer
60 * passed to us without this magic is assumed to be a raw 842 compressed
61 * buffer, and passed directly to the hardware to decompress.
62 */
63#define NX842_CRYPTO_MAGIC	(0xf842)
64#define NX842_CRYPTO_HEADER_SIZE(g)				\
65	(sizeof(struct nx842_crypto_header) +			\
66	 sizeof(struct nx842_crypto_header_group) * (g))
67#define NX842_CRYPTO_HEADER_MAX_SIZE				\
68	NX842_CRYPTO_HEADER_SIZE(NX842_CRYPTO_GROUP_MAX)
69
70/* bounce buffer size */
71#define BOUNCE_BUFFER_ORDER	(2)
72#define BOUNCE_BUFFER_SIZE					\
73	((unsigned int)(PAGE_SIZE << BOUNCE_BUFFER_ORDER))
74
75/* try longer on comp because we can fallback to sw decomp if hw is busy */
76#define COMP_BUSY_TIMEOUT	(250) /* ms */
77#define DECOMP_BUSY_TIMEOUT	(50) /* ms */
78
79struct nx842_crypto_param {
80	u8 *in;
81	unsigned int iremain;
82	u8 *out;
83	unsigned int oremain;
84	unsigned int ototal;
85};
86
87static int update_param(struct nx842_crypto_param *p,
88			unsigned int slen, unsigned int dlen)
89{
90	if (p->iremain < slen)
91		return -EOVERFLOW;
92	if (p->oremain < dlen)
93		return -ENOSPC;
94
95	p->in += slen;
96	p->iremain -= slen;
97	p->out += dlen;
98	p->oremain -= dlen;
99	p->ototal += dlen;
100
101	return 0;
102}
103
104int nx842_crypto_init(struct crypto_tfm *tfm, struct nx842_driver *driver)
105{
106	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
107
108	spin_lock_init(&ctx->lock);
109	ctx->driver = driver;
110	ctx->wmem = kmalloc(driver->workmem_size, GFP_KERNEL);
111	ctx->sbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
112	ctx->dbounce = (u8 *)__get_free_pages(GFP_KERNEL, BOUNCE_BUFFER_ORDER);
113	if (!ctx->wmem || !ctx->sbounce || !ctx->dbounce) {
114		kfree(ctx->wmem);
115		free_page((unsigned long)ctx->sbounce);
116		free_page((unsigned long)ctx->dbounce);
117		return -ENOMEM;
118	}
119
120	return 0;
121}
122EXPORT_SYMBOL_GPL(nx842_crypto_init);
123
124void nx842_crypto_exit(struct crypto_tfm *tfm)
125{
126	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
127
128	kfree(ctx->wmem);
129	free_page((unsigned long)ctx->sbounce);
130	free_page((unsigned long)ctx->dbounce);
131}
132EXPORT_SYMBOL_GPL(nx842_crypto_exit);
133
134static void check_constraints(struct nx842_constraints *c)
135{
136	/* limit maximum, to always have enough bounce buffer to decompress */
137	if (c->maximum > BOUNCE_BUFFER_SIZE)
138		c->maximum = BOUNCE_BUFFER_SIZE;
139}
140
141static int nx842_crypto_add_header(struct nx842_crypto_header *hdr, u8 *buf)
142{
143	int s = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
144
145	/* compress should have added space for header */
146	if (s > be16_to_cpu(hdr->group[0].padding)) {
147		pr_err("Internal error: no space for header\n");
148		return -EINVAL;
149	}
150
151	memcpy(buf, hdr, s);
152
153	print_hex_dump_debug("header ", DUMP_PREFIX_OFFSET, 16, 1, buf, s, 0);
154
155	return 0;
156}
157
158static int compress(struct nx842_crypto_ctx *ctx,
159		    struct nx842_crypto_param *p,
160		    struct nx842_crypto_header_group *g,
161		    struct nx842_constraints *c,
162		    u16 *ignore,
163		    unsigned int hdrsize)
164{
165	unsigned int slen = p->iremain, dlen = p->oremain, tmplen;
166	unsigned int adj_slen = slen;
167	u8 *src = p->in, *dst = p->out;
168	int ret, dskip = 0;
169	ktime_t timeout;
170
171	if (p->iremain == 0)
172		return -EOVERFLOW;
173
174	if (p->oremain == 0 || hdrsize + c->minimum > dlen)
175		return -ENOSPC;
176
177	if (slen % c->multiple)
178		adj_slen = round_up(slen, c->multiple);
179	if (slen < c->minimum)
180		adj_slen = c->minimum;
181	if (slen > c->maximum)
182		adj_slen = slen = c->maximum;
183	if (adj_slen > slen || (u64)src % c->alignment) {
184		adj_slen = min(adj_slen, BOUNCE_BUFFER_SIZE);
185		slen = min(slen, BOUNCE_BUFFER_SIZE);
186		if (adj_slen > slen)
187			memset(ctx->sbounce + slen, 0, adj_slen - slen);
188		memcpy(ctx->sbounce, src, slen);
189		src = ctx->sbounce;
190		slen = adj_slen;
191		pr_debug("using comp sbounce buffer, len %x\n", slen);
192	}
193
194	dst += hdrsize;
195	dlen -= hdrsize;
196
197	if ((u64)dst % c->alignment) {
198		dskip = (int)(PTR_ALIGN(dst, c->alignment) - dst);
199		dst += dskip;
200		dlen -= dskip;
201	}
202	if (dlen % c->multiple)
203		dlen = round_down(dlen, c->multiple);
204	if (dlen < c->minimum) {
205nospc:
206		dst = ctx->dbounce;
207		dlen = min(p->oremain, BOUNCE_BUFFER_SIZE);
208		dlen = round_down(dlen, c->multiple);
209		dskip = 0;
210		pr_debug("using comp dbounce buffer, len %x\n", dlen);
211	}
212	if (dlen > c->maximum)
213		dlen = c->maximum;
214
215	tmplen = dlen;
216	timeout = ktime_add_ms(ktime_get(), COMP_BUSY_TIMEOUT);
217	do {
218		dlen = tmplen; /* reset dlen, if we're retrying */
219		ret = ctx->driver->compress(src, slen, dst, &dlen, ctx->wmem);
220		/* possibly we should reduce the slen here, instead of
221		 * retrying with the dbounce buffer?
222		 */
223		if (ret == -ENOSPC && dst != ctx->dbounce)
224			goto nospc;
225	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
226	if (ret)
227		return ret;
228
229	dskip += hdrsize;
230
231	if (dst == ctx->dbounce)
232		memcpy(p->out + dskip, dst, dlen);
233
234	g->padding = cpu_to_be16(dskip);
235	g->compressed_length = cpu_to_be32(dlen);
236	g->uncompressed_length = cpu_to_be32(slen);
237
238	if (p->iremain < slen) {
239		*ignore = slen - p->iremain;
240		slen = p->iremain;
241	}
242
243	pr_debug("compress slen %x ignore %x dlen %x padding %x\n",
244		 slen, *ignore, dlen, dskip);
245
246	return update_param(p, slen, dskip + dlen);
247}
248
249int nx842_crypto_compress(struct crypto_tfm *tfm,
250			  const u8 *src, unsigned int slen,
251			  u8 *dst, unsigned int *dlen)
252{
253	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
254	struct nx842_crypto_header *hdr = &ctx->header;
255	struct nx842_crypto_param p;
256	struct nx842_constraints c = *ctx->driver->constraints;
257	unsigned int groups, hdrsize, h;
258	int ret, n;
259	bool add_header;
260	u16 ignore = 0;
261
262	check_constraints(&c);
263
264	p.in = (u8 *)src;
265	p.iremain = slen;
266	p.out = dst;
267	p.oremain = *dlen;
268	p.ototal = 0;
269
270	*dlen = 0;
271
272	groups = min_t(unsigned int, NX842_CRYPTO_GROUP_MAX,
273		       DIV_ROUND_UP(p.iremain, c.maximum));
274	hdrsize = NX842_CRYPTO_HEADER_SIZE(groups);
275
276	spin_lock_bh(&ctx->lock);
277
278	/* skip adding header if the buffers meet all constraints */
279	add_header = (p.iremain % c.multiple	||
280		      p.iremain < c.minimum	||
281		      p.iremain > c.maximum	||
282		      (u64)p.in % c.alignment	||
283		      p.oremain % c.multiple	||
284		      p.oremain < c.minimum	||
285		      p.oremain > c.maximum	||
286		      (u64)p.out % c.alignment);
287
288	hdr->magic = cpu_to_be16(NX842_CRYPTO_MAGIC);
289	hdr->groups = 0;
290	hdr->ignore = 0;
291
292	while (p.iremain > 0) {
293		n = hdr->groups++;
294		ret = -ENOSPC;
295		if (hdr->groups > NX842_CRYPTO_GROUP_MAX)
296			goto unlock;
297
298		/* header goes before first group */
299		h = !n && add_header ? hdrsize : 0;
300
301		if (ignore)
302			pr_warn("internal error, ignore is set %x\n", ignore);
303
304		ret = compress(ctx, &p, &hdr->group[n], &c, &ignore, h);
305		if (ret)
306			goto unlock;
307	}
308
309	if (!add_header && hdr->groups > 1) {
310		pr_err("Internal error: No header but multiple groups\n");
311		ret = -EINVAL;
312		goto unlock;
313	}
314
315	/* ignore indicates the input stream needed to be padded */
316	hdr->ignore = cpu_to_be16(ignore);
317	if (ignore)
318		pr_debug("marked %d bytes as ignore\n", ignore);
319
320	if (add_header)
321		ret = nx842_crypto_add_header(hdr, dst);
322	if (ret)
323		goto unlock;
324
325	*dlen = p.ototal;
326
327	pr_debug("compress total slen %x dlen %x\n", slen, *dlen);
328
329unlock:
330	spin_unlock_bh(&ctx->lock);
331	return ret;
332}
333EXPORT_SYMBOL_GPL(nx842_crypto_compress);
334
335static int decompress(struct nx842_crypto_ctx *ctx,
336		      struct nx842_crypto_param *p,
337		      struct nx842_crypto_header_group *g,
338		      struct nx842_constraints *c,
339		      u16 ignore)
340{
341	unsigned int slen = be32_to_cpu(g->compressed_length);
342	unsigned int required_len = be32_to_cpu(g->uncompressed_length);
343	unsigned int dlen = p->oremain, tmplen;
344	unsigned int adj_slen = slen;
345	u8 *src = p->in, *dst = p->out;
346	u16 padding = be16_to_cpu(g->padding);
347	int ret, spadding = 0;
348	ktime_t timeout;
349
350	if (!slen || !required_len)
351		return -EINVAL;
352
353	if (p->iremain <= 0 || padding + slen > p->iremain)
354		return -EOVERFLOW;
355
356	if (p->oremain <= 0 || required_len - ignore > p->oremain)
357		return -ENOSPC;
358
359	src += padding;
360
361	if (slen % c->multiple)
362		adj_slen = round_up(slen, c->multiple);
363	if (slen < c->minimum)
364		adj_slen = c->minimum;
365	if (slen > c->maximum)
366		goto usesw;
367	if (slen < adj_slen || (u64)src % c->alignment) {
368		/* we can append padding bytes because the 842 format defines
369		 * an "end" template (see lib/842/842_decompress.c) and will
370		 * ignore any bytes following it.
371		 */
372		if (slen < adj_slen)
373			memset(ctx->sbounce + slen, 0, adj_slen - slen);
374		memcpy(ctx->sbounce, src, slen);
375		src = ctx->sbounce;
376		spadding = adj_slen - slen;
377		slen = adj_slen;
378		pr_debug("using decomp sbounce buffer, len %x\n", slen);
379	}
380
381	if (dlen % c->multiple)
382		dlen = round_down(dlen, c->multiple);
383	if (dlen < required_len || (u64)dst % c->alignment) {
384		dst = ctx->dbounce;
385		dlen = min(required_len, BOUNCE_BUFFER_SIZE);
386		pr_debug("using decomp dbounce buffer, len %x\n", dlen);
387	}
388	if (dlen < c->minimum)
389		goto usesw;
390	if (dlen > c->maximum)
391		dlen = c->maximum;
392
393	tmplen = dlen;
394	timeout = ktime_add_ms(ktime_get(), DECOMP_BUSY_TIMEOUT);
395	do {
396		dlen = tmplen; /* reset dlen, if we're retrying */
397		ret = ctx->driver->decompress(src, slen, dst, &dlen, ctx->wmem);
398	} while (ret == -EBUSY && ktime_before(ktime_get(), timeout));
399	if (ret) {
400usesw:
401		/* reset everything, sw doesn't have constraints */
402		src = p->in + padding;
403		slen = be32_to_cpu(g->compressed_length);
404		spadding = 0;
405		dst = p->out;
406		dlen = p->oremain;
407		if (dlen < required_len) { /* have ignore bytes */
408			dst = ctx->dbounce;
409			dlen = BOUNCE_BUFFER_SIZE;
410		}
411		pr_info_ratelimited("using software 842 decompression\n");
412		ret = sw842_decompress(src, slen, dst, &dlen);
413	}
414	if (ret)
415		return ret;
416
417	slen -= spadding;
418
419	dlen -= ignore;
420	if (ignore)
421		pr_debug("ignoring last %x bytes\n", ignore);
422
423	if (dst == ctx->dbounce)
424		memcpy(p->out, dst, dlen);
425
426	pr_debug("decompress slen %x padding %x dlen %x ignore %x\n",
427		 slen, padding, dlen, ignore);
428
429	return update_param(p, slen + padding, dlen);
430}
431
432int nx842_crypto_decompress(struct crypto_tfm *tfm,
433			    const u8 *src, unsigned int slen,
434			    u8 *dst, unsigned int *dlen)
435{
436	struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm);
437	struct nx842_crypto_header *hdr;
438	struct nx842_crypto_param p;
439	struct nx842_constraints c = *ctx->driver->constraints;
440	int n, ret, hdr_len;
441	u16 ignore = 0;
442
443	check_constraints(&c);
444
445	p.in = (u8 *)src;
446	p.iremain = slen;
447	p.out = dst;
448	p.oremain = *dlen;
449	p.ototal = 0;
450
451	*dlen = 0;
452
453	hdr = (struct nx842_crypto_header *)src;
454
455	spin_lock_bh(&ctx->lock);
456
457	/* If it doesn't start with our header magic number, assume it's a raw
458	 * 842 compressed buffer and pass it directly to the hardware driver
459	 */
460	if (be16_to_cpu(hdr->magic) != NX842_CRYPTO_MAGIC) {
461		struct nx842_crypto_header_group g = {
462			.padding =		0,
463			.compressed_length =	cpu_to_be32(p.iremain),
464			.uncompressed_length =	cpu_to_be32(p.oremain),
465		};
466
467		ret = decompress(ctx, &p, &g, &c, 0);
468		if (ret)
469			goto unlock;
470
471		goto success;
472	}
473
474	if (!hdr->groups) {
475		pr_err("header has no groups\n");
476		ret = -EINVAL;
477		goto unlock;
478	}
479	if (hdr->groups > NX842_CRYPTO_GROUP_MAX) {
480		pr_err("header has too many groups %x, max %x\n",
481		       hdr->groups, NX842_CRYPTO_GROUP_MAX);
482		ret = -EINVAL;
483		goto unlock;
484	}
485
486	hdr_len = NX842_CRYPTO_HEADER_SIZE(hdr->groups);
487	if (hdr_len > slen) {
488		ret = -EOVERFLOW;
489		goto unlock;
490	}
491
492	memcpy(&ctx->header, src, hdr_len);
493	hdr = &ctx->header;
494
495	for (n = 0; n < hdr->groups; n++) {
496		/* ignore applies to last group */
497		if (n + 1 == hdr->groups)
498			ignore = be16_to_cpu(hdr->ignore);
499
500		ret = decompress(ctx, &p, &hdr->group[n], &c, ignore);
501		if (ret)
502			goto unlock;
503	}
504
505success:
506	*dlen = p.ototal;
507
508	pr_debug("decompress total slen %x dlen %x\n", slen, *dlen);
509
510	ret = 0;
511
512unlock:
513	spin_unlock_bh(&ctx->lock);
514
515	return ret;
516}
517EXPORT_SYMBOL_GPL(nx842_crypto_decompress);
518
519MODULE_LICENSE("GPL");
520MODULE_DESCRIPTION("IBM PowerPC Nest (NX) 842 Hardware Compression Driver");
521MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
522