1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Hardware interface of the NX-GZIP compression accelerator
4 *
5 * Copyright (C) IBM Corporation, 2020
6 *
7 * Author: Bulent Abali <abali@us.ibm.com>
8 *
9 */
10
11#ifndef _NXU_H
12#define _NXU_H
13
14#include <stdint.h>
15#include <endian.h>
16#include "nx.h"
17
18/* deflate */
19#define LLSZ   286
20#define DSZ    30
21
22/* nx */
23#define DHTSZ  18
24#define DHT_MAXSZ 288
25#define MAX_DDE_COUNT 256
26
27/* util */
28#ifdef NXDBG
29#define NXPRT(X)	X
30#else
31#define NXPRT(X)
32#endif
33
34#ifdef NXTIMER
35#include <sys/platform/ppc.h>
36#define NX_CLK(X)	X
37#define nx_get_time()	__ppc_get_timebase()
38#define nx_get_freq()	__ppc_get_timebase_freq()
39#else
40#define NX_CLK(X)
41#define nx_get_time()  (-1)
42#define nx_get_freq()  (-1)
43#endif
44
45#define NX_MAX_FAULTS  500
46
47/*
48 * Definitions of acronyms used here. See
49 * P9 NX Gzip Accelerator User's Manual for details:
50 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
51 *
52 * adler/crc: 32 bit checksums appended to stream tail
53 * ce:       completion extension
54 * cpb:      coprocessor parameter block (metadata)
55 * crb:      coprocessor request block (command)
56 * csb:      coprocessor status block (status)
57 * dht:      dynamic huffman table
58 * dde:      data descriptor element (address, length)
59 * ddl:      list of ddes
60 * dh/fh:    dynamic and fixed huffman types
61 * fc:       coprocessor function code
62 * histlen:  history/dictionary length
63 * history:  sliding window of up to 32KB of data
64 * lzcount:  Deflate LZ symbol counts
65 * rembytecnt: remaining byte count
66 * sfbt:     source final block type; last block's type during decomp
67 * spbc:     source processed byte count
68 * subc:     source unprocessed bit count
69 * tebc:     target ending bit count; valid bits in the last byte
70 * tpbc:     target processed byte count
71 * vas:      virtual accelerator switch; the user mode interface
72 */
73
74union nx_qw_t {
75	uint32_t word[4];
76	uint64_t dword[2];
77} __aligned(16);
78
79/*
80 * Note: NX registers with fewer than 32 bits are declared by
81 * convention as uint32_t variables in unions. If *_offset and *_mask
82 * are defined for a variable, then use get_ put_ macros to
83 * conveniently access the register fields for endian conversions.
84 */
85
86struct nx_dde_t {
87	/* Data Descriptor Element, Section 6.4 */
88	union {
89		uint32_t dde_count;
90		/* When dde_count == 0 ddead is a pointer to a data buffer;
91		 * ddebc is the buffer length bytes.
92		 * When dde_count > 0 dde is an indirect dde; ddead is a
93		 * pointer to a contiguous list of direct ddes; ddebc is the
94		 * total length of all data pointed to by the list of direct
95		 * ddes. Note that only one level of indirection is permitted.
96		 * See Section 6.4 of the user manual for additional details.
97		 */
98	};
99	uint32_t ddebc; /* dde byte count */
100	uint64_t ddead; /* dde address */
101} __aligned(16);
102
103struct nx_csb_t {
104	/* Coprocessor Status Block, Section 6.6  */
105	union {
106		uint32_t csb_v;
107		/* Valid bit. v must be set to 0 by the program
108		 * before submitting the coprocessor command.
109		 * Software can poll for the v bit
110		 */
111
112		uint32_t csb_f;
113		/* 16B CSB size. Written to 0 by DMA when it writes the CPB */
114
115		uint32_t csb_cs;
116		/* cs completion sequence; unused */
117
118		uint32_t csb_cc;
119		/* cc completion code; cc != 0 exception occurred */
120
121		uint32_t csb_ce;
122		/* ce completion extension */
123
124	};
125	uint32_t tpbc;
126	/* target processed byte count TPBC */
127
128	uint64_t fsaddr;
129	/* Section 6.12.1 CSB NonZero error summary.  FSA Failing storage
130	 * address.  Address where error occurred. When available, written
131	 * to A field of CSB
132	 */
133} __aligned(16);
134
135struct nx_ccb_t {
136	/* Coprocessor Completion Block, Section 6.7 */
137
138	uint32_t reserved[3];
139	union {
140		/* When crb.c==0 (no ccb defined) it is reserved;
141		 * When crb.c==1 (ccb defined) it is cm
142		 */
143
144		uint32_t ccb_cm;
145		/* Signal interrupt of crb.c==1 and cm==1 */
146
147		uint32_t word;
148		/* generic access to the 32bit word */
149	};
150} __aligned(16);
151
152struct vas_stamped_crb_t {
153	/*
154	 * CRB operand of the paste coprocessor instruction is stamped
155	 * in quadword 4 with the information shown here as its written
156	 * in to the receive FIFO of the coprocessor
157	 */
158
159	union {
160		uint32_t vas_buf_num;
161		/* Verification only vas buffer number which correlates to
162		 * the low order bits of the atag in the paste command
163		 */
164
165		uint32_t send_wc_id;
166		/* Pointer to Send Window Context that provides for NX address
167		 * translation information, such as MSR and LPCR bits, job
168		 * completion interrupt RA, PSWID, and job utilization counter.
169		 */
170
171	};
172	union {
173		uint32_t recv_wc_id;
174		/* Pointer to Receive Window Context. NX uses this to return
175		 * credits to a Receive FIFO as entries are dequeued.
176		 */
177
178	};
179	uint32_t reserved2;
180	union {
181		uint32_t vas_invalid;
182		/* Invalid bit. If this bit is 1 the CRB is discarded by
183		 * NX upon fetching from the receive FIFO. If this bit is 0
184		 * the CRB is processed normally. The bit is stamped to 0
185		 * by VAS and may be written to 1 by hypervisor while
186		 * the CRB is in the receive FIFO (in memory).
187		 */
188
189	};
190};
191
192struct nx_stamped_fault_crb_t {
193	/*
194	 * A CRB that has a translation fault is stamped by NX in quadword 4
195	 * and pasted to the Fault Send Window in VAS.
196	 */
197	uint64_t fsa;
198	union {
199		uint32_t nxsf_t;
200		uint32_t nxsf_fs;
201	};
202	uint32_t pswid;
203};
204
205union stamped_crb_t {
206	struct vas_stamped_crb_t      vas;
207	struct nx_stamped_fault_crb_t nx;
208};
209
210struct nx_gzip_cpb_t {
211	/*
212	 * Coprocessor Parameter Block In/Out are used to pass metadata
213	 * to/from accelerator.  Tables 6.5 and 6.6 of the user manual.
214	 */
215
216	/* CPBInput */
217
218	struct {
219		union {
220		union nx_qw_t qw0;
221			struct {
222				uint32_t in_adler;            /* bits 0:31  */
223				uint32_t in_crc;              /* bits 32:63 */
224				union {
225					uint32_t in_histlen;  /* bits 64:75 */
226					uint32_t in_subc;     /* bits 93:95 */
227				};
228				union {
229					/* bits 108:111 */
230					uint32_t in_sfbt;
231					/* bits 112:127 */
232					uint32_t in_rembytecnt;
233					/* bits 116:127 */
234					uint32_t in_dhtlen;
235				};
236			};
237		};
238		union {
239			union nx_qw_t  in_dht[DHTSZ];	/* qw[1:18]     */
240			char in_dht_char[DHT_MAXSZ];	/* byte access  */
241		};
242		union nx_qw_t  reserved[5];		/* qw[19:23]    */
243	};
244
245	/* CPBOutput */
246
247	volatile struct {
248		union {
249			union nx_qw_t qw24;
250			struct {
251				uint32_t out_adler;    /* bits 0:31  qw[24] */
252				uint32_t out_crc;      /* bits 32:63 qw[24] */
253				union {
254					/* bits 77:79 qw[24] */
255					uint32_t out_tebc;
256					/* bits 80:95 qw[24] */
257					uint32_t out_subc;
258				};
259				union {
260					/* bits 108:111 qw[24] */
261					uint32_t out_sfbt;
262					/* bits 112:127 qw[24] */
263					uint32_t out_rembytecnt;
264					/* bits 116:127 qw[24] */
265					uint32_t out_dhtlen;
266				};
267			};
268		};
269		union {
270			union nx_qw_t  qw25[79];        /* qw[25:103] */
271			/* qw[25] compress no lzcounts or wrap */
272			uint32_t out_spbc_comp_wrap;
273			uint32_t out_spbc_wrap;         /* qw[25] wrap */
274			/* qw[25] compress no lzcounts */
275			uint32_t out_spbc_comp;
276			 /* 286 LL and 30 D symbol counts */
277			uint32_t out_lzcount[LLSZ+DSZ];
278			struct {
279				union nx_qw_t  out_dht[DHTSZ];  /* qw[25:42] */
280				/* qw[43] decompress */
281				uint32_t out_spbc_decomp;
282			};
283		};
284		/* qw[104] compress with lzcounts */
285		uint32_t out_spbc_comp_with_count;
286	};
287} __aligned(128);
288
289struct nx_gzip_crb_t {
290	union {                   /* byte[0:3]   */
291		uint32_t gzip_fc;     /* bits[24-31] */
292	};
293	uint32_t reserved1;       /* byte[4:7]   */
294	union {
295		uint64_t csb_address; /* byte[8:15]  */
296		struct {
297			uint32_t reserved2;
298			union {
299				uint32_t crb_c;
300				/* c==0 no ccb defined */
301
302				uint32_t crb_at;
303				/* at==0 address type is ignored;
304				 * all addrs effective assumed.
305				 */
306
307			};
308		};
309	};
310	struct nx_dde_t source_dde;           /* byte[16:31] */
311	struct nx_dde_t target_dde;           /* byte[32:47] */
312	volatile struct nx_ccb_t ccb;         /* byte[48:63] */
313	volatile union {
314		/* byte[64:239] shift csb by 128 bytes out of the crb; csb was
315		 * in crb earlier; JReilly says csb written with partial inject
316		 */
317		union nx_qw_t reserved64[11];
318		union stamped_crb_t stamp;       /* byte[64:79] */
319	};
320	volatile struct nx_csb_t csb;
321} __aligned(128);
322
323struct nx_gzip_crb_cpb_t {
324	struct nx_gzip_crb_t crb;
325	struct nx_gzip_cpb_t cpb;
326} __aligned(2048);
327
328
329/*
330 * NX hardware convention has the msb bit on the left numbered 0.
331 * The defines below has *_offset defined as the right most bit
332 * position of a field.  x of size_mask(x) is the field width in bits.
333 */
334
335#define size_mask(x)          ((1U<<(x))-1)
336
337/*
338 * Offsets and Widths within the containing 32 bits of the various NX
339 * gzip hardware registers.  Use the getnn/putnn macros to access
340 * these regs
341 */
342
343#define dde_count_mask        size_mask(8)
344#define dde_count_offset      23
345
346/* CSB */
347
348#define csb_v_mask            size_mask(1)
349#define csb_v_offset          0
350#define csb_f_mask            size_mask(1)
351#define csb_f_offset          6
352#define csb_cs_mask           size_mask(8)
353#define csb_cs_offset         15
354#define csb_cc_mask           size_mask(8)
355#define csb_cc_offset         23
356#define csb_ce_mask           size_mask(8)
357#define csb_ce_offset         31
358
359/* CCB */
360
361#define ccb_cm_mask           size_mask(3)
362#define ccb_cm_offset         31
363
364/* VAS stamped CRB fields */
365
366#define vas_buf_num_mask      size_mask(6)
367#define vas_buf_num_offset    5
368#define send_wc_id_mask       size_mask(16)
369#define send_wc_id_offset     31
370#define recv_wc_id_mask       size_mask(16)
371#define recv_wc_id_offset     31
372#define vas_invalid_mask      size_mask(1)
373#define vas_invalid_offset    31
374
375/* NX stamped fault CRB fields */
376
377#define nxsf_t_mask           size_mask(1)
378#define nxsf_t_offset         23
379#define nxsf_fs_mask          size_mask(8)
380#define nxsf_fs_offset        31
381
382/* CPB input */
383
384#define in_histlen_mask       size_mask(12)
385#define in_histlen_offset     11
386#define in_dhtlen_mask        size_mask(12)
387#define in_dhtlen_offset      31
388#define in_subc_mask          size_mask(3)
389#define in_subc_offset        31
390#define in_sfbt_mask          size_mask(4)
391#define in_sfbt_offset        15
392#define in_rembytecnt_mask    size_mask(16)
393#define in_rembytecnt_offset  31
394
395/* CPB output */
396
397#define out_tebc_mask         size_mask(3)
398#define out_tebc_offset       15
399#define out_subc_mask         size_mask(16)
400#define out_subc_offset       31
401#define out_sfbt_mask         size_mask(4)
402#define out_sfbt_offset       15
403#define out_rembytecnt_mask   size_mask(16)
404#define out_rembytecnt_offset 31
405#define out_dhtlen_mask       size_mask(12)
406#define out_dhtlen_offset     31
407
408/* CRB */
409
410#define gzip_fc_mask          size_mask(8)
411#define gzip_fc_offset        31
412#define crb_c_mask            size_mask(1)
413#define crb_c_offset          28
414#define crb_at_mask           size_mask(1)
415#define crb_at_offset         30
416#define csb_address_mask      ~(15UL) /* mask off bottom 4b */
417
418/*
419 * Access macros for the registers.  Do not access registers directly
420 * because of the endian conversion.  P9 processor may run either as
421 * Little or Big endian. However the NX coprocessor regs are always
422 * big endian.
423 * Use the 32 and 64b macros to access respective
424 * register sizes.
425 * Use nn forms for the register fields shorter than 32 bits.
426 */
427
428#define getnn(ST, REG)      ((be32toh(ST.REG) >> (31-REG##_offset)) \
429				 & REG##_mask)
430#define getpnn(ST, REG)     ((be32toh((ST)->REG) >> (31-REG##_offset)) \
431				 & REG##_mask)
432#define get32(ST, REG)      (be32toh(ST.REG))
433#define getp32(ST, REG)     (be32toh((ST)->REG))
434#define get64(ST, REG)      (be64toh(ST.REG))
435#define getp64(ST, REG)     (be64toh((ST)->REG))
436
437#define unget32(ST, REG)    (get32(ST, REG) & ~((REG##_mask) \
438				<< (31-REG##_offset)))
439/* get 32bits less the REG field */
440
441#define ungetp32(ST, REG)   (getp32(ST, REG) & ~((REG##_mask) \
442				<< (31-REG##_offset)))
443/* get 32bits less the REG field */
444
445#define clear_regs(ST)      memset((void *)(&(ST)), 0, sizeof(ST))
446#define clear_dde(ST)       do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
447				} while (0)
448#define clearp_dde(ST)      do { (ST)->dde_count = (ST)->ddebc = 0; \
449				 (ST)->ddead = 0; \
450				} while (0)
451#define clear_struct(ST)    memset((void *)(&(ST)), 0, sizeof(ST))
452#define putnn(ST, REG, X)   (ST.REG = htobe32(unget32(ST, REG) | (((X) \
453				 & REG##_mask) << (31-REG##_offset))))
454#define putpnn(ST, REG, X)  ((ST)->REG = htobe32(ungetp32(ST, REG) \
455				| (((X) & REG##_mask) << (31-REG##_offset))))
456
457#define put32(ST, REG, X)   (ST.REG = htobe32(X))
458#define putp32(ST, REG, X)  ((ST)->REG = htobe32(X))
459#define put64(ST, REG, X)   (ST.REG = htobe64(X))
460#define putp64(ST, REG, X)  ((ST)->REG = htobe64(X))
461
462/*
463 * Completion extension ce(0) ce(1) ce(2).  Bits ce(3-7)
464 * unused.  Section 6.6 Figure 6.7.
465 */
466
467#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
468#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
469#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
470
471#define CSB_CE_PARTIAL         0x4
472#define CSB_CE_TERMINATE       0x2
473#define CSB_CE_TPBC_VALID      0x1
474
475#define csb_ce_termination(X)         (!!((X) & CSB_CE_TERMINATE))
476/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
477
478#define csb_ce_check_completion(X)    (!csb_ce_termination(X))
479/* if not terminated then check full or partial completion */
480
481#define csb_ce_partial_completion(X)  (!!((X) & CSB_CE_PARTIAL))
482#define csb_ce_full_completion(X)     (!csb_ce_partial_completion(X))
483#define csb_ce_tpbc_valid(X)          (!!((X) & CSB_CE_TPBC_VALID))
484/* TPBC indicates successfully stored data count */
485
486#define csb_ce_default_err(X)         csb_ce_termination(X)
487/* most error CEs have CE(0)=0 and CE(1)=1 */
488
489#define csb_ce_cc3_partial(X)         csb_ce_partial_completion(X)
490/* some CC=3 are partially completed, Table 6-8 */
491
492#define csb_ce_cc64(X)                ((X)&(CSB_CE_PARTIAL \
493					| CSB_CE_TERMINATE) == 0)
494/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
495 * compress smaller than source.
496 */
497
498/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
499
500#define SFBT_BFINAL 0x1
501#define SFBT_LIT    0x4
502#define SFBT_FHT    0x5
503#define SFBT_DHT    0x6
504#define SFBT_HDR    0x7
505
506/*
507 * NX gzip function codes. Table 6.2.
508 * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
509 * select one of the two Byte Count Limits.
510 */
511
512#define GZIP_FC_LIMIT_MASK                               0x01
513#define GZIP_FC_COMPRESS_FHT                             0x00
514#define GZIP_FC_COMPRESS_DHT                             0x02
515#define GZIP_FC_COMPRESS_FHT_COUNT                       0x04
516#define GZIP_FC_COMPRESS_DHT_COUNT                       0x06
517#define GZIP_FC_COMPRESS_RESUME_FHT                      0x08
518#define GZIP_FC_COMPRESS_RESUME_DHT                      0x0a
519#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT                0x0c
520#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT                0x0e
521#define GZIP_FC_DECOMPRESS                               0x10
522#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND          0x12
523#define GZIP_FC_DECOMPRESS_RESUME                        0x14
524#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND   0x16
525#define GZIP_FC_WRAP                                     0x1e
526
527#define fc_is_compress(fc)  (((fc) & 0x10) == 0)
528#define fc_has_count(fc)    (fc_is_compress(fc) && (((fc) & 0x4) != 0))
529
530/* CSB.CC Error codes */
531
532#define ERR_NX_OK             0
533#define ERR_NX_ALIGNMENT      1
534#define ERR_NX_OPOVERLAP      2
535#define ERR_NX_DATA_LENGTH    3
536#define ERR_NX_TRANSLATION    5
537#define ERR_NX_PROTECTION     6
538#define ERR_NX_EXTERNAL_UE7   7
539#define ERR_NX_INVALID_OP     8
540#define ERR_NX_PRIVILEGE      9
541#define ERR_NX_INTERNAL_UE   10
542#define ERR_NX_EXTERN_UE_WR  12
543#define ERR_NX_TARGET_SPACE  13
544#define ERR_NX_EXCESSIVE_DDE 14
545#define ERR_NX_TRANSL_WR     15
546#define ERR_NX_PROTECT_WR    16
547#define ERR_NX_SUBFUNCTION   17
548#define ERR_NX_FUNC_ABORT    18
549#define ERR_NX_BYTE_MAX      19
550#define ERR_NX_CORRUPT_CRB   20
551#define ERR_NX_INVALID_CRB   21
552#define ERR_NX_INVALID_DDE   30
553#define ERR_NX_SEGMENTED_DDL 31
554#define ERR_NX_DDE_OVERFLOW  33
555#define ERR_NX_TPBC_GT_SPBC  64
556#define ERR_NX_MISSING_CODE  66
557#define ERR_NX_INVALID_DIST  67
558#define ERR_NX_INVALID_DHT   68
559#define ERR_NX_EXTERNAL_UE90 90
560#define ERR_NX_WDOG_TIMER   224
561#define ERR_NX_AT_FAULT     250
562#define ERR_NX_INTR_SERVER  252
563#define ERR_NX_UE253        253
564#define ERR_NX_NO_HW        254
565#define ERR_NX_HUNG_OP      255
566#define ERR_NX_END          256
567
568/* initial values for non-resume operations */
569#define INIT_CRC   0  /* crc32(0L, Z_NULL, 0) */
570#define INIT_ADLER 1  /* adler32(0L, Z_NULL, 0)  adler is initialized to 1 */
571
572/* prototypes */
573int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
574
575extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
576extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
577
578/* caller supplies a print buffer 4*sizeof(crb) */
579
580char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
581char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
582char *nx_prt_hex(void *cp, int sz, char *prbuf);
583char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
584char *nx_strerror(int e);
585
586#ifdef NX_SIM
587#include <stdio.h>
588int nx_sim_init(void *ctx);
589int nx_sim_end(void *ctx);
590int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
591#endif /* NX_SIM */
592
593/* Deflate stream manipulation */
594
595#define set_final_bit(x)	(x |= (unsigned char)1)
596#define clr_final_bit(x)	(x &= ~(unsigned char)1)
597
598#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
599					} while (0)
600/* append 10 bits 0000001b 00...... ;
601 * assumes appending starts on a byte boundary; b is the final bit.
602 */
603
604
605#ifdef NX_842
606
607/* 842 Engine */
608
609struct nx_eft_crb_t {
610	union {                   /* byte[0:3]   */
611		uint32_t eft_fc;      /* bits[29-31] */
612	};
613	uint32_t reserved1;       /* byte[4:7]   */
614	union {
615		uint64_t csb_address; /* byte[8:15]  */
616		struct {
617			uint32_t reserved2;
618			union {
619				uint32_t crb_c;
620				/* c==0 no ccb defined */
621
622				uint32_t crb_at;
623				/* at==0 address type is ignored;
624				 * all addrs effective assumed.
625				 */
626
627			};
628		};
629	};
630	struct nx_dde_t source_dde;           /* byte[16:31] */
631	struct nx_dde_t target_dde;           /* byte[32:47] */
632	struct nx_ccb_t ccb;                  /* byte[48:63] */
633	union {
634		union nx_qw_t reserved64[3];     /* byte[64:96] */
635	};
636	struct nx_csb_t csb;
637} __aligned(128);
638
639/* 842 CRB */
640
641#define EFT_FC_MASK                 size_mask(3)
642#define EFT_FC_OFFSET               31
643#define EFT_FC_COMPRESS             0x0
644#define EFT_FC_COMPRESS_WITH_CRC    0x1
645#define EFT_FC_DECOMPRESS           0x2
646#define EFT_FC_DECOMPRESS_WITH_CRC  0x3
647#define EFT_FC_BLK_DATA_MOVE        0x4
648#endif /* NX_842 */
649
650#endif /* _NXU_H */
651