1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * IBM Accelerator Family 'GenWQE'
4 *
5 * (C) Copyright IBM Corp. 2013
6 *
7 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
8 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
9 * Author: Michael Jung <mijung@gmx.net>
10 * Author: Michael Ruettger <michael@ibmra.de>
11 */
12
13/*
14 * Miscelanous functionality used in the other GenWQE driver parts.
15 */
16
17#include <linux/kernel.h>
18#include <linux/sched.h>
19#include <linux/vmalloc.h>
20#include <linux/page-flags.h>
21#include <linux/scatterlist.h>
22#include <linux/hugetlb.h>
23#include <linux/iommu.h>
24#include <linux/pci.h>
25#include <linux/dma-mapping.h>
26#include <linux/ctype.h>
27#include <linux/module.h>
28#include <linux/platform_device.h>
29#include <linux/delay.h>
30#include <linux/pgtable.h>
31
32#include "genwqe_driver.h"
33#include "card_base.h"
34#include "card_ddcb.h"
35
36/**
37 * __genwqe_writeq() - Write 64-bit register
38 * @cd:	        genwqe device descriptor
39 * @byte_offs:  byte offset within BAR
40 * @val:        64-bit value
41 *
42 * Return: 0 if success; < 0 if error
43 */
44int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
45{
46	struct pci_dev *pci_dev = cd->pci_dev;
47
48	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
49		return -EIO;
50
51	if (cd->mmio == NULL)
52		return -EIO;
53
54	if (pci_channel_offline(pci_dev))
55		return -EIO;
56
57	__raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
58	return 0;
59}
60
61/**
62 * __genwqe_readq() - Read 64-bit register
63 * @cd:         genwqe device descriptor
64 * @byte_offs:  offset within BAR
65 *
66 * Return: value from register
67 */
68u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
69{
70	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
71		return 0xffffffffffffffffull;
72
73	if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) &&
74	    (byte_offs == IO_SLC_CFGREG_GFIR))
75		return 0x000000000000ffffull;
76
77	if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) &&
78	    (byte_offs == IO_SLC_CFGREG_GFIR))
79		return 0x00000000ffff0000ull;
80
81	if (cd->mmio == NULL)
82		return 0xffffffffffffffffull;
83
84	return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs));
85}
86
87/**
88 * __genwqe_writel() - Write 32-bit register
89 * @cd:	        genwqe device descriptor
90 * @byte_offs:  byte offset within BAR
91 * @val:        32-bit value
92 *
93 * Return: 0 if success; < 0 if error
94 */
95int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
96{
97	struct pci_dev *pci_dev = cd->pci_dev;
98
99	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
100		return -EIO;
101
102	if (cd->mmio == NULL)
103		return -EIO;
104
105	if (pci_channel_offline(pci_dev))
106		return -EIO;
107
108	__raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
109	return 0;
110}
111
112/**
113 * __genwqe_readl() - Read 32-bit register
114 * @cd:         genwqe device descriptor
115 * @byte_offs:  offset within BAR
116 *
117 * Return: Value from register
118 */
119u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs)
120{
121	if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
122		return 0xffffffff;
123
124	if (cd->mmio == NULL)
125		return 0xffffffff;
126
127	return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs));
128}
129
130/**
131 * genwqe_read_app_id() - Extract app_id
132 * @cd:	        genwqe device descriptor
133 * @app_name:   carrier used to pass-back name
134 * @len:        length of data for name
135 *
136 * app_unitcfg need to be filled with valid data first
137 */
138int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len)
139{
140	int i, j;
141	u32 app_id = (u32)cd->app_unitcfg;
142
143	memset(app_name, 0, len);
144	for (i = 0, j = 0; j < min(len, 4); j++) {
145		char ch = (char)((app_id >> (24 - j*8)) & 0xff);
146
147		if (ch == ' ')
148			continue;
149		app_name[i++] = isprint(ch) ? ch : 'X';
150	}
151	return i;
152}
153
154#define CRC32_POLYNOMIAL	0x20044009
155static u32 crc32_tab[256];	/* crc32 lookup table */
156
157/**
158 * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations
159 *
160 * Existing kernel functions seem to use a different polynom,
161 * therefore we could not use them here.
162 *
163 * Genwqe's Polynomial = 0x20044009
164 */
165void genwqe_init_crc32(void)
166{
167	int i, j;
168	u32 crc;
169
170	for (i = 0;  i < 256;  i++) {
171		crc = i << 24;
172		for (j = 0;  j < 8;  j++) {
173			if (crc & 0x80000000)
174				crc = (crc << 1) ^ CRC32_POLYNOMIAL;
175			else
176				crc = (crc << 1);
177		}
178		crc32_tab[i] = crc;
179	}
180}
181
182/**
183 * genwqe_crc32() - Generate 32-bit crc as required for DDCBs
184 * @buff:       pointer to data buffer
185 * @len:        length of data for calculation
186 * @init:       initial crc (0xffffffff at start)
187 *
188 * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009)
189 *
190 * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should
191 * result in a crc32 of 0xf33cb7d3.
192 *
193 * The existing kernel crc functions did not cover this polynom yet.
194 *
195 * Return: crc32 checksum.
196 */
197u32 genwqe_crc32(u8 *buff, size_t len, u32 init)
198{
199	int i;
200	u32 crc;
201
202	crc = init;
203	while (len--) {
204		i = ((crc >> 24) ^ *buff++) & 0xFF;
205		crc = (crc << 8) ^ crc32_tab[i];
206	}
207	return crc;
208}
209
210void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size,
211			       dma_addr_t *dma_handle)
212{
213	if (get_order(size) > MAX_PAGE_ORDER)
214		return NULL;
215
216	return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle,
217				  GFP_KERNEL);
218}
219
220void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size,
221			     void *vaddr, dma_addr_t dma_handle)
222{
223	if (vaddr == NULL)
224		return;
225
226	dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle);
227}
228
229static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list,
230			      int num_pages)
231{
232	int i;
233	struct pci_dev *pci_dev = cd->pci_dev;
234
235	for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) {
236		dma_unmap_page(&pci_dev->dev, dma_list[i], PAGE_SIZE,
237			       DMA_BIDIRECTIONAL);
238		dma_list[i] = 0x0;
239	}
240}
241
242static int genwqe_map_pages(struct genwqe_dev *cd,
243			   struct page **page_list, int num_pages,
244			   dma_addr_t *dma_list)
245{
246	int i;
247	struct pci_dev *pci_dev = cd->pci_dev;
248
249	/* establish DMA mapping for requested pages */
250	for (i = 0; i < num_pages; i++) {
251		dma_addr_t daddr;
252
253		dma_list[i] = 0x0;
254		daddr = dma_map_page(&pci_dev->dev, page_list[i],
255				     0,	 /* map_offs */
256				     PAGE_SIZE,
257				     DMA_BIDIRECTIONAL);  /* FIXME rd/rw */
258
259		if (dma_mapping_error(&pci_dev->dev, daddr)) {
260			dev_err(&pci_dev->dev,
261				"[%s] err: no dma addr daddr=%016llx!\n",
262				__func__, (long long)daddr);
263			goto err;
264		}
265
266		dma_list[i] = daddr;
267	}
268	return 0;
269
270 err:
271	genwqe_unmap_pages(cd, dma_list, num_pages);
272	return -EIO;
273}
274
275static int genwqe_sgl_size(int num_pages)
276{
277	int len, num_tlb = num_pages / 7;
278
279	len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1);
280	return roundup(len, PAGE_SIZE);
281}
282
283/*
284 * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages
285 *
286 * Allocates memory for sgl and overlapping pages. Pages which might
287 * overlap other user-space memory blocks are being cached for DMAs,
288 * such that we do not run into syncronization issues. Data is copied
289 * from user-space into the cached pages.
290 */
291int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
292			  void __user *user_addr, size_t user_size, int write)
293{
294	int ret = -ENOMEM;
295	struct pci_dev *pci_dev = cd->pci_dev;
296
297	sgl->fpage_offs = offset_in_page((unsigned long)user_addr);
298	sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size);
299	sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE);
300	sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE;
301
302	dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n",
303		__func__, user_addr, user_size, sgl->nr_pages,
304		sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size);
305
306	sgl->user_addr = user_addr;
307	sgl->user_size = user_size;
308	sgl->write = write;
309	sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages);
310
311	if (get_order(sgl->sgl_size) > MAX_PAGE_ORDER) {
312		dev_err(&pci_dev->dev,
313			"[%s] err: too much memory requested!\n", __func__);
314		return ret;
315	}
316
317	sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size,
318					     &sgl->sgl_dma_addr);
319	if (sgl->sgl == NULL) {
320		dev_err(&pci_dev->dev,
321			"[%s] err: no memory available!\n", __func__);
322		return ret;
323	}
324
325	/* Only use buffering on incomplete pages */
326	if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) {
327		sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
328						       &sgl->fpage_dma_addr);
329		if (sgl->fpage == NULL)
330			goto err_out;
331
332		/* Sync with user memory */
333		if (copy_from_user(sgl->fpage + sgl->fpage_offs,
334				   user_addr, sgl->fpage_size)) {
335			ret = -EFAULT;
336			goto err_out;
337		}
338	}
339	if (sgl->lpage_size != 0) {
340		sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE,
341						       &sgl->lpage_dma_addr);
342		if (sgl->lpage == NULL)
343			goto err_out1;
344
345		/* Sync with user memory */
346		if (copy_from_user(sgl->lpage, user_addr + user_size -
347				   sgl->lpage_size, sgl->lpage_size)) {
348			ret = -EFAULT;
349			goto err_out2;
350		}
351	}
352	return 0;
353
354 err_out2:
355	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
356				 sgl->lpage_dma_addr);
357	sgl->lpage = NULL;
358	sgl->lpage_dma_addr = 0;
359 err_out1:
360	__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
361				 sgl->fpage_dma_addr);
362	sgl->fpage = NULL;
363	sgl->fpage_dma_addr = 0;
364 err_out:
365	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
366				 sgl->sgl_dma_addr);
367	sgl->sgl = NULL;
368	sgl->sgl_dma_addr = 0;
369	sgl->sgl_size = 0;
370
371	return ret;
372}
373
374int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl,
375		     dma_addr_t *dma_list)
376{
377	int i = 0, j = 0, p;
378	unsigned long dma_offs, map_offs;
379	dma_addr_t prev_daddr = 0;
380	struct sg_entry *s, *last_s = NULL;
381	size_t size = sgl->user_size;
382
383	dma_offs = 128;		/* next block if needed/dma_offset */
384	map_offs = sgl->fpage_offs; /* offset in first page */
385
386	s = &sgl->sgl[0];	/* first set of 8 entries */
387	p = 0;			/* page */
388	while (p < sgl->nr_pages) {
389		dma_addr_t daddr;
390		unsigned int size_to_map;
391
392		/* always write the chaining entry, cleanup is done later */
393		j = 0;
394		s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs);
395		s[j].len	 = cpu_to_be32(128);
396		s[j].flags	 = cpu_to_be32(SG_CHAINED);
397		j++;
398
399		while (j < 8) {
400			/* DMA mapping for requested page, offs, size */
401			size_to_map = min(size, PAGE_SIZE - map_offs);
402
403			if ((p == 0) && (sgl->fpage != NULL)) {
404				daddr = sgl->fpage_dma_addr + map_offs;
405
406			} else if ((p == sgl->nr_pages - 1) &&
407				   (sgl->lpage != NULL)) {
408				daddr = sgl->lpage_dma_addr;
409			} else {
410				daddr = dma_list[p] + map_offs;
411			}
412
413			size -= size_to_map;
414			map_offs = 0;
415
416			if (prev_daddr == daddr) {
417				u32 prev_len = be32_to_cpu(last_s->len);
418
419				/* pr_info("daddr combining: "
420					"%016llx/%08x -> %016llx\n",
421					prev_daddr, prev_len, daddr); */
422
423				last_s->len = cpu_to_be32(prev_len +
424							  size_to_map);
425
426				p++; /* process next page */
427				if (p == sgl->nr_pages)
428					goto fixup;  /* nothing to do */
429
430				prev_daddr = daddr + size_to_map;
431				continue;
432			}
433
434			/* start new entry */
435			s[j].target_addr = cpu_to_be64(daddr);
436			s[j].len	 = cpu_to_be32(size_to_map);
437			s[j].flags	 = cpu_to_be32(SG_DATA);
438			prev_daddr = daddr + size_to_map;
439			last_s = &s[j];
440			j++;
441
442			p++;	/* process next page */
443			if (p == sgl->nr_pages)
444				goto fixup;  /* nothing to do */
445		}
446		dma_offs += 128;
447		s += 8;		/* continue 8 elements further */
448	}
449 fixup:
450	if (j == 1) {		/* combining happened on last entry! */
451		s -= 8;		/* full shift needed on previous sgl block */
452		j =  7;		/* shift all elements */
453	}
454
455	for (i = 0; i < j; i++)	/* move elements 1 up */
456		s[i] = s[i + 1];
457
458	s[i].target_addr = cpu_to_be64(0);
459	s[i].len	 = cpu_to_be32(0);
460	s[i].flags	 = cpu_to_be32(SG_END_LIST);
461	return 0;
462}
463
464/**
465 * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages
466 * @cd:	        genwqe device descriptor
467 * @sgl:        scatter gather list describing user-space memory
468 *
469 * After the DMA transfer has been completed we free the memory for
470 * the sgl and the cached pages. Data is being transferred from cached
471 * pages into user-space buffers.
472 */
473int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl)
474{
475	int rc = 0;
476	size_t offset;
477	unsigned long res;
478	struct pci_dev *pci_dev = cd->pci_dev;
479
480	if (sgl->fpage) {
481		if (sgl->write) {
482			res = copy_to_user(sgl->user_addr,
483				sgl->fpage + sgl->fpage_offs, sgl->fpage_size);
484			if (res) {
485				dev_err(&pci_dev->dev,
486					"[%s] err: copying fpage! (res=%lu)\n",
487					__func__, res);
488				rc = -EFAULT;
489			}
490		}
491		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage,
492					 sgl->fpage_dma_addr);
493		sgl->fpage = NULL;
494		sgl->fpage_dma_addr = 0;
495	}
496	if (sgl->lpage) {
497		if (sgl->write) {
498			offset = sgl->user_size - sgl->lpage_size;
499			res = copy_to_user(sgl->user_addr + offset, sgl->lpage,
500					   sgl->lpage_size);
501			if (res) {
502				dev_err(&pci_dev->dev,
503					"[%s] err: copying lpage! (res=%lu)\n",
504					__func__, res);
505				rc = -EFAULT;
506			}
507		}
508		__genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage,
509					 sgl->lpage_dma_addr);
510		sgl->lpage = NULL;
511		sgl->lpage_dma_addr = 0;
512	}
513	__genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl,
514				 sgl->sgl_dma_addr);
515
516	sgl->sgl = NULL;
517	sgl->sgl_dma_addr = 0x0;
518	sgl->sgl_size = 0;
519	return rc;
520}
521
522/**
523 * genwqe_user_vmap() - Map user-space memory to virtual kernel memory
524 * @cd:         pointer to genwqe device
525 * @m:          mapping params
526 * @uaddr:      user virtual address
527 * @size:       size of memory to be mapped
528 *
529 * We need to think about how we could speed this up. Of course it is
530 * not a good idea to do this over and over again, like we are
531 * currently doing it. Nevertheless, I am curious where on the path
532 * the performance is spend. Most probably within the memory
533 * allocation functions, but maybe also in the DMA mapping code.
534 *
535 * Restrictions: The maximum size of the possible mapping currently depends
536 *               on the amount of memory we can get using kzalloc() for the
537 *               page_list and pci_alloc_consistent for the sg_list.
538 *               The sg_list is currently itself not scattered, which could
539 *               be fixed with some effort. The page_list must be split into
540 *               PAGE_SIZE chunks too. All that will make the complicated
541 *               code more complicated.
542 *
543 * Return: 0 if success
544 */
545int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr,
546		     unsigned long size)
547{
548	int rc = -EINVAL;
549	unsigned long data, offs;
550	struct pci_dev *pci_dev = cd->pci_dev;
551
552	if ((uaddr == NULL) || (size == 0)) {
553		m->size = 0;	/* mark unused and not added */
554		return -EINVAL;
555	}
556	m->u_vaddr = uaddr;
557	m->size    = size;
558
559	/* determine space needed for page_list. */
560	data = (unsigned long)uaddr;
561	offs = offset_in_page(data);
562	if (size > ULONG_MAX - PAGE_SIZE - offs) {
563		m->size = 0;	/* mark unused and not added */
564		return -EINVAL;
565	}
566	m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE);
567
568	m->page_list = kcalloc(m->nr_pages,
569			       sizeof(struct page *) + sizeof(dma_addr_t),
570			       GFP_KERNEL);
571	if (!m->page_list) {
572		dev_err(&pci_dev->dev, "err: alloc page_list failed\n");
573		m->nr_pages = 0;
574		m->u_vaddr = NULL;
575		m->size = 0;	/* mark unused and not added */
576		return -ENOMEM;
577	}
578	m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages);
579
580	/* pin user pages in memory */
581	rc = pin_user_pages_fast(data & PAGE_MASK, /* page aligned addr */
582				 m->nr_pages,
583				 m->write ? FOLL_WRITE : 0,	/* readable/writable */
584				 m->page_list);	/* ptrs to pages */
585	if (rc < 0)
586		goto fail_pin_user_pages;
587
588	/* assumption: pin_user_pages can be killed by signals. */
589	if (rc < m->nr_pages) {
590		unpin_user_pages_dirty_lock(m->page_list, rc, m->write);
591		rc = -EFAULT;
592		goto fail_pin_user_pages;
593	}
594
595	rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list);
596	if (rc != 0)
597		goto fail_free_user_pages;
598
599	return 0;
600
601 fail_free_user_pages:
602	unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, m->write);
603
604 fail_pin_user_pages:
605	kfree(m->page_list);
606	m->page_list = NULL;
607	m->dma_list = NULL;
608	m->nr_pages = 0;
609	m->u_vaddr = NULL;
610	m->size = 0;		/* mark unused and not added */
611	return rc;
612}
613
614/**
615 * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel
616 *                        memory
617 * @cd:         pointer to genwqe device
618 * @m:          mapping params
619 */
620int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m)
621{
622	struct pci_dev *pci_dev = cd->pci_dev;
623
624	if (!dma_mapping_used(m)) {
625		dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n",
626			__func__, m);
627		return -EINVAL;
628	}
629
630	if (m->dma_list)
631		genwqe_unmap_pages(cd, m->dma_list, m->nr_pages);
632
633	if (m->page_list) {
634		unpin_user_pages_dirty_lock(m->page_list, m->nr_pages,
635					    m->write);
636		kfree(m->page_list);
637		m->page_list = NULL;
638		m->dma_list = NULL;
639		m->nr_pages = 0;
640	}
641
642	m->u_vaddr = NULL;
643	m->size = 0;		/* mark as unused and not added */
644	return 0;
645}
646
647/**
648 * genwqe_card_type() - Get chip type SLU Configuration Register
649 * @cd:         pointer to the genwqe device descriptor
650 * Return: 0: Altera Stratix-IV 230
651 *         1: Altera Stratix-IV 530
652 *         2: Altera Stratix-V A4
653 *         3: Altera Stratix-V A7
654 */
655u8 genwqe_card_type(struct genwqe_dev *cd)
656{
657	u64 card_type = cd->slu_unitcfg;
658
659	return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20);
660}
661
662/**
663 * genwqe_card_reset() - Reset the card
664 * @cd:         pointer to the genwqe device descriptor
665 */
666int genwqe_card_reset(struct genwqe_dev *cd)
667{
668	u64 softrst;
669	struct pci_dev *pci_dev = cd->pci_dev;
670
671	if (!genwqe_is_privileged(cd))
672		return -ENODEV;
673
674	/* new SL */
675	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull);
676	msleep(1000);
677	__genwqe_readq(cd, IO_HSU_FIR_CLR);
678	__genwqe_readq(cd, IO_APP_FIR_CLR);
679	__genwqe_readq(cd, IO_SLU_FIR_CLR);
680
681	/*
682	 * Read-modify-write to preserve the stealth bits
683	 *
684	 * For SL >= 039, Stealth WE bit allows removing
685	 * the read-modify-wrote.
686	 * r-m-w may require a mask 0x3C to avoid hitting hard
687	 * reset again for error reset (should be 0, chicken).
688	 */
689	softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull;
690	__genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull);
691
692	/* give ERRORRESET some time to finish */
693	msleep(50);
694
695	if (genwqe_need_err_masking(cd)) {
696		dev_info(&pci_dev->dev,
697			 "[%s] masking errors for old bitstreams\n", __func__);
698		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
699	}
700	return 0;
701}
702
703int genwqe_read_softreset(struct genwqe_dev *cd)
704{
705	u64 bitstream;
706
707	if (!genwqe_is_privileged(cd))
708		return -ENODEV;
709
710	bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1;
711	cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull;
712	return 0;
713}
714
715/**
716 * genwqe_set_interrupt_capability() - Configure MSI capability structure
717 * @cd:         pointer to the device
718 * @count:      number of vectors to allocate
719 * Return: 0 if no error
720 */
721int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count)
722{
723	int rc;
724
725	rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI);
726	if (rc < 0)
727		return rc;
728	return 0;
729}
730
731/**
732 * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability()
733 * @cd:         pointer to the device
734 */
735void genwqe_reset_interrupt_capability(struct genwqe_dev *cd)
736{
737	pci_free_irq_vectors(cd->pci_dev);
738}
739
740/**
741 * set_reg_idx() - Fill array with data. Ignore illegal offsets.
742 * @cd:         card device
743 * @r:          debug register array
744 * @i:          index to desired entry
745 * @m:          maximum possible entries
746 * @addr:       addr which is read
747 * @idx:        index in debug array
748 * @val:        read value
749 */
750static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r,
751		       unsigned int *i, unsigned int m, u32 addr, u32 idx,
752		       u64 val)
753{
754	if (WARN_ON_ONCE(*i >= m))
755		return -EFAULT;
756
757	r[*i].addr = addr;
758	r[*i].idx = idx;
759	r[*i].val = val;
760	++*i;
761	return 0;
762}
763
764static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r,
765		   unsigned int *i, unsigned int m, u32 addr, u64 val)
766{
767	return set_reg_idx(cd, r, i, m, addr, 0, val);
768}
769
770int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs,
771			 unsigned int max_regs, int all)
772{
773	unsigned int i, j, idx = 0;
774	u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr;
775	u64 gfir, sluid, appid, ufir, ufec, sfir, sfec;
776
777	/* Global FIR */
778	gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
779	set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir);
780
781	/* UnitCfg for SLU */
782	sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */
783	set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid);
784
785	/* UnitCfg for APP */
786	appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */
787	set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid);
788
789	/* Check all chip Units */
790	for (i = 0; i < GENWQE_MAX_UNITS; i++) {
791
792		/* Unit FIR */
793		ufir_addr = (i << 24) | 0x008;
794		ufir = __genwqe_readq(cd, ufir_addr);
795		set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir);
796
797		/* Unit FEC */
798		ufec_addr = (i << 24) | 0x018;
799		ufec = __genwqe_readq(cd, ufec_addr);
800		set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec);
801
802		for (j = 0; j < 64; j++) {
803			/* wherever there is a primary 1, read the 2ndary */
804			if (!all && (!(ufir & (1ull << j))))
805				continue;
806
807			sfir_addr = (i << 24) | (0x100 + 8 * j);
808			sfir = __genwqe_readq(cd, sfir_addr);
809			set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir);
810
811			sfec_addr = (i << 24) | (0x300 + 8 * j);
812			sfec = __genwqe_readq(cd, sfec_addr);
813			set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec);
814		}
815	}
816
817	/* fill with invalid data until end */
818	for (i = idx; i < max_regs; i++) {
819		regs[i].addr = 0xffffffff;
820		regs[i].val = 0xffffffffffffffffull;
821	}
822	return idx;
823}
824
825/**
826 * genwqe_ffdc_buff_size() - Calculates the number of dump registers
827 * @cd:	        genwqe device descriptor
828 * @uid:	unit ID
829 */
830int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid)
831{
832	int entries = 0, ring, traps, traces, trace_entries;
833	u32 eevptr_addr, l_addr, d_len, d_type;
834	u64 eevptr, val, addr;
835
836	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
837	eevptr = __genwqe_readq(cd, eevptr_addr);
838
839	if ((eevptr != 0x0) && (eevptr != -1ull)) {
840		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
841
842		while (1) {
843			val = __genwqe_readq(cd, l_addr);
844
845			if ((val == 0x0) || (val == -1ull))
846				break;
847
848			/* 38:24 */
849			d_len  = (val & 0x0000007fff000000ull) >> 24;
850
851			/* 39 */
852			d_type = (val & 0x0000008000000000ull) >> 36;
853
854			if (d_type) {	/* repeat */
855				entries += d_len;
856			} else {	/* size in bytes! */
857				entries += d_len >> 3;
858			}
859
860			l_addr += 8;
861		}
862	}
863
864	for (ring = 0; ring < 8; ring++) {
865		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
866		val = __genwqe_readq(cd, addr);
867
868		if ((val == 0x0ull) || (val == -1ull))
869			continue;
870
871		traps = (val >> 24) & 0xff;
872		traces = (val >> 16) & 0xff;
873		trace_entries = val & 0xffff;
874
875		entries += traps + (traces * trace_entries);
876	}
877	return entries;
878}
879
880/**
881 * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure
882 * @cd:	        genwqe device descriptor
883 * @uid:	unit ID
884 * @regs:       register information
885 * @max_regs:   number of register entries
886 */
887int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid,
888			  struct genwqe_reg *regs, unsigned int max_regs)
889{
890	int i, traps, traces, trace, trace_entries, trace_entry, ring;
891	unsigned int idx = 0;
892	u32 eevptr_addr, l_addr, d_addr, d_len, d_type;
893	u64 eevptr, e, val, addr;
894
895	eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER;
896	eevptr = __genwqe_readq(cd, eevptr_addr);
897
898	if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) {
899		l_addr = GENWQE_UID_OFFS(uid) | eevptr;
900		while (1) {
901			e = __genwqe_readq(cd, l_addr);
902			if ((e == 0x0) || (e == 0xffffffffffffffffull))
903				break;
904
905			d_addr = (e & 0x0000000000ffffffull);	    /* 23:0 */
906			d_len  = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */
907			d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */
908			d_addr |= GENWQE_UID_OFFS(uid);
909
910			if (d_type) {
911				for (i = 0; i < (int)d_len; i++) {
912					val = __genwqe_readq(cd, d_addr);
913					set_reg_idx(cd, regs, &idx, max_regs,
914						    d_addr, i, val);
915				}
916			} else {
917				d_len >>= 3; /* Size in bytes! */
918				for (i = 0; i < (int)d_len; i++, d_addr += 8) {
919					val = __genwqe_readq(cd, d_addr);
920					set_reg_idx(cd, regs, &idx, max_regs,
921						    d_addr, 0, val);
922				}
923			}
924			l_addr += 8;
925		}
926	}
927
928	/*
929	 * To save time, there are only 6 traces poplulated on Uid=2,
930	 * Ring=1. each with iters=512.
931	 */
932	for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds,
933					      2...7 are ASI rings */
934		addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring);
935		val = __genwqe_readq(cd, addr);
936
937		if ((val == 0x0ull) || (val == -1ull))
938			continue;
939
940		traps = (val >> 24) & 0xff;	/* Number of Traps	*/
941		traces = (val >> 16) & 0xff;	/* Number of Traces	*/
942		trace_entries = val & 0xffff;	/* Entries per trace	*/
943
944		/* Note: This is a combined loop that dumps both the traps */
945		/* (for the trace == 0 case) as well as the traces 1 to    */
946		/* 'traces'.						   */
947		for (trace = 0; trace <= traces; trace++) {
948			u32 diag_sel =
949				GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace);
950
951			addr = (GENWQE_UID_OFFS(uid) |
952				IO_EXTENDED_DIAG_SELECTOR);
953			__genwqe_writeq(cd, addr, diag_sel);
954
955			for (trace_entry = 0;
956			     trace_entry < (trace ? trace_entries : traps);
957			     trace_entry++) {
958				addr = (GENWQE_UID_OFFS(uid) |
959					IO_EXTENDED_DIAG_READ_MBX);
960				val = __genwqe_readq(cd, addr);
961				set_reg_idx(cd, regs, &idx, max_regs, addr,
962					    (diag_sel<<16) | trace_entry, val);
963			}
964		}
965	}
966	return 0;
967}
968
969/**
970 * genwqe_write_vreg() - Write register in virtual window
971 * @cd:	        genwqe device descriptor
972 * @reg:	register (byte) offset within BAR
973 * @val:	value to write
974 * @func:	PCI virtual function
975 *
976 * Note, these registers are only accessible to the PF through the
977 * VF-window. It is not intended for the VF to access.
978 */
979int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func)
980{
981	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
982	__genwqe_writeq(cd, reg, val);
983	return 0;
984}
985
986/**
987 * genwqe_read_vreg() - Read register in virtual window
988 * @cd:	        genwqe device descriptor
989 * @reg:	register (byte) offset within BAR
990 * @func:	PCI virtual function
991 *
992 * Note, these registers are only accessible to the PF through the
993 * VF-window. It is not intended for the VF to access.
994 */
995u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func)
996{
997	__genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf);
998	return __genwqe_readq(cd, reg);
999}
1000
1001/**
1002 * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card
1003 * @cd:	        genwqe device descriptor
1004 *
1005 * Note: From a design perspective it turned out to be a bad idea to
1006 * use codes here to specifiy the frequency/speed values. An old
1007 * driver cannot understand new codes and is therefore always a
1008 * problem. Better is to measure out the value or put the
1009 * speed/frequency directly into a register which is always a valid
1010 * value for old as well as for new software.
1011 *
1012 * Return: Card clock in MHz
1013 */
1014int genwqe_base_clock_frequency(struct genwqe_dev *cd)
1015{
1016	u16 speed;		/*         MHz  MHz  MHz  MHz */
1017	static const int speed_grade[] = { 250, 200, 166, 175 };
1018
1019	speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full);
1020	if (speed >= ARRAY_SIZE(speed_grade))
1021		return 0;	/* illegal value */
1022
1023	return speed_grade[speed];
1024}
1025
1026/**
1027 * genwqe_stop_traps() - Stop traps
1028 * @cd:	        genwqe device descriptor
1029 *
1030 * Before reading out the analysis data, we need to stop the traps.
1031 */
1032void genwqe_stop_traps(struct genwqe_dev *cd)
1033{
1034	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull);
1035}
1036
1037/**
1038 * genwqe_start_traps() - Start traps
1039 * @cd:	        genwqe device descriptor
1040 *
1041 * After having read the data, we can/must enable the traps again.
1042 */
1043void genwqe_start_traps(struct genwqe_dev *cd)
1044{
1045	__genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull);
1046
1047	if (genwqe_need_err_masking(cd))
1048		__genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull);
1049}
1050