1// SPDX-License-Identifier: GPL-2.0
2/*
3 * channel program interfaces
4 *
5 * Copyright IBM Corp. 2017
6 *
7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8 *            Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
9 */
10
11#include <linux/ratelimit.h>
12#include <linux/mm.h>
13#include <linux/slab.h>
14#include <linux/highmem.h>
15#include <linux/iommu.h>
16#include <linux/vfio.h>
17#include <asm/idals.h>
18
19#include "vfio_ccw_cp.h"
20#include "vfio_ccw_private.h"
21
22struct page_array {
23	/* Array that stores pages need to pin. */
24	dma_addr_t		*pa_iova;
25	/* Array that receives the pinned pages. */
26	struct page		**pa_page;
27	/* Number of pages pinned from @pa_iova. */
28	int			pa_nr;
29};
30
31struct ccwchain {
32	struct list_head	next;
33	struct ccw1		*ch_ccw;
34	/* Guest physical address of the current chain. */
35	u64			ch_iova;
36	/* Count of the valid ccws in chain. */
37	int			ch_len;
38	/* Pinned PAGEs for the original data. */
39	struct page_array	*ch_pa;
40};
41
42/*
43 * page_array_alloc() - alloc memory for page array
44 * @pa: page_array on which to perform the operation
45 * @len: number of pages that should be pinned from @iova
46 *
47 * Attempt to allocate memory for page array.
48 *
49 * Usage of page_array:
50 * We expect (pa_nr == 0) and (pa_iova == NULL), any field in
51 * this structure will be filled in by this function.
52 *
53 * Returns:
54 *         0 if page array is allocated
55 *   -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova is not NULL
56 *   -ENOMEM if alloc failed
57 */
58static int page_array_alloc(struct page_array *pa, unsigned int len)
59{
60	if (pa->pa_nr || pa->pa_iova)
61		return -EINVAL;
62
63	if (len == 0)
64		return -EINVAL;
65
66	pa->pa_nr = len;
67
68	pa->pa_iova = kcalloc(len, sizeof(*pa->pa_iova), GFP_KERNEL);
69	if (!pa->pa_iova)
70		return -ENOMEM;
71
72	pa->pa_page = kcalloc(len, sizeof(*pa->pa_page), GFP_KERNEL);
73	if (!pa->pa_page) {
74		kfree(pa->pa_iova);
75		return -ENOMEM;
76	}
77
78	return 0;
79}
80
81/*
82 * page_array_unpin() - Unpin user pages in memory
83 * @pa: page_array on which to perform the operation
84 * @vdev: the vfio device to perform the operation
85 * @pa_nr: number of user pages to unpin
86 * @unaligned: were pages unaligned on the pin request
87 *
88 * Only unpin if any pages were pinned to begin with, i.e. pa_nr > 0,
89 * otherwise only clear pa->pa_nr
90 */
91static void page_array_unpin(struct page_array *pa,
92			     struct vfio_device *vdev, int pa_nr, bool unaligned)
93{
94	int unpinned = 0, npage = 1;
95
96	while (unpinned < pa_nr) {
97		dma_addr_t *first = &pa->pa_iova[unpinned];
98		dma_addr_t *last = &first[npage];
99
100		if (unpinned + npage < pa_nr &&
101		    *first + npage * PAGE_SIZE == *last &&
102		    !unaligned) {
103			npage++;
104			continue;
105		}
106
107		vfio_unpin_pages(vdev, *first, npage);
108		unpinned += npage;
109		npage = 1;
110	}
111
112	pa->pa_nr = 0;
113}
114
115/*
116 * page_array_pin() - Pin user pages in memory
117 * @pa: page_array on which to perform the operation
118 * @vdev: the vfio device to perform pin operations
119 * @unaligned: are pages aligned to 4K boundary?
120 *
121 * Returns number of pages pinned upon success.
122 * If the pin request partially succeeds, or fails completely,
123 * all pages are left unpinned and a negative error value is returned.
124 *
125 * Requests to pin "aligned" pages can be coalesced into a single
126 * vfio_pin_pages request for the sake of efficiency, based on the
127 * expectation of 4K page requests. Unaligned requests are probably
128 * dealing with 2K "pages", and cannot be coalesced without
129 * reworking this logic to incorporate that math.
130 */
131static int page_array_pin(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
132{
133	int pinned = 0, npage = 1;
134	int ret = 0;
135
136	while (pinned < pa->pa_nr) {
137		dma_addr_t *first = &pa->pa_iova[pinned];
138		dma_addr_t *last = &first[npage];
139
140		if (pinned + npage < pa->pa_nr &&
141		    *first + npage * PAGE_SIZE == *last &&
142		    !unaligned) {
143			npage++;
144			continue;
145		}
146
147		ret = vfio_pin_pages(vdev, *first, npage,
148				     IOMMU_READ | IOMMU_WRITE,
149				     &pa->pa_page[pinned]);
150		if (ret < 0) {
151			goto err_out;
152		} else if (ret > 0 && ret != npage) {
153			pinned += ret;
154			ret = -EINVAL;
155			goto err_out;
156		}
157		pinned += npage;
158		npage = 1;
159	}
160
161	return ret;
162
163err_out:
164	page_array_unpin(pa, vdev, pinned, unaligned);
165	return ret;
166}
167
168/* Unpin the pages before releasing the memory. */
169static void page_array_unpin_free(struct page_array *pa, struct vfio_device *vdev, bool unaligned)
170{
171	page_array_unpin(pa, vdev, pa->pa_nr, unaligned);
172	kfree(pa->pa_page);
173	kfree(pa->pa_iova);
174}
175
176static bool page_array_iova_pinned(struct page_array *pa, u64 iova, u64 length)
177{
178	u64 iova_pfn_start = iova >> PAGE_SHIFT;
179	u64 iova_pfn_end = (iova + length - 1) >> PAGE_SHIFT;
180	u64 pfn;
181	int i;
182
183	for (i = 0; i < pa->pa_nr; i++) {
184		pfn = pa->pa_iova[i] >> PAGE_SHIFT;
185		if (pfn >= iova_pfn_start && pfn <= iova_pfn_end)
186			return true;
187	}
188
189	return false;
190}
191/* Create the list of IDAL words for a page_array. */
192static inline void page_array_idal_create_words(struct page_array *pa,
193						dma64_t *idaws)
194{
195	int i;
196
197	/*
198	 * Idal words (execept the first one) rely on the memory being 4k
199	 * aligned. If a user virtual address is 4K aligned, then it's
200	 * corresponding kernel physical address will also be 4K aligned. Thus
201	 * there will be no problem here to simply use the phys to create an
202	 * idaw.
203	 */
204
205	for (i = 0; i < pa->pa_nr; i++) {
206		idaws[i] = virt_to_dma64(page_to_virt(pa->pa_page[i]));
207
208		/* Incorporate any offset from each starting address */
209		idaws[i] = dma64_add(idaws[i], pa->pa_iova[i] & ~PAGE_MASK);
210	}
211}
212
213static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
214{
215	struct ccw0 ccw0;
216	struct ccw1 *pccw1 = source;
217	int i;
218
219	for (i = 0; i < len; i++) {
220		ccw0 = *(struct ccw0 *)pccw1;
221		if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
222			pccw1->cmd_code = CCW_CMD_TIC;
223			pccw1->flags = 0;
224			pccw1->count = 0;
225		} else {
226			pccw1->cmd_code = ccw0.cmd_code;
227			pccw1->flags = ccw0.flags;
228			pccw1->count = ccw0.count;
229		}
230		pccw1->cda = u32_to_dma32(ccw0.cda);
231		pccw1++;
232	}
233}
234
235#define idal_is_2k(_cp) (!(_cp)->orb.cmd.c64 || (_cp)->orb.cmd.i2k)
236
237/*
238 * Helpers to operate ccwchain.
239 */
240#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
241#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
242#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
243
244#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
245
246#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
247
248#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
249#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
250
251#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
252
253/*
254 * ccw_does_data_transfer()
255 *
256 * Determine whether a CCW will move any data, such that the guest pages
257 * would need to be pinned before performing the I/O.
258 *
259 * Returns 1 if yes, 0 if no.
260 */
261static inline int ccw_does_data_transfer(struct ccw1 *ccw)
262{
263	/* If the count field is zero, then no data will be transferred */
264	if (ccw->count == 0)
265		return 0;
266
267	/* If the command is a NOP, then no data will be transferred */
268	if (ccw_is_noop(ccw))
269		return 0;
270
271	/* If the skip flag is off, then data will be transferred */
272	if (!ccw_is_skip(ccw))
273		return 1;
274
275	/*
276	 * If the skip flag is on, it is only meaningful if the command
277	 * code is a read, read backward, sense, or sense ID.  In those
278	 * cases, no data will be transferred.
279	 */
280	if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
281		return 0;
282
283	if (ccw_is_sense(ccw))
284		return 0;
285
286	/* The skip flag is on, but it is ignored for this command code. */
287	return 1;
288}
289
290/*
291 * is_cpa_within_range()
292 *
293 * @cpa: channel program address being questioned
294 * @head: address of the beginning of a CCW chain
295 * @len: number of CCWs within the chain
296 *
297 * Determine whether the address of a CCW (whether a new chain,
298 * or the target of a TIC) falls within a range (including the end points).
299 *
300 * Returns 1 if yes, 0 if no.
301 */
302static inline int is_cpa_within_range(dma32_t cpa, u32 head, int len)
303{
304	u32 tail = head + (len - 1) * sizeof(struct ccw1);
305	u32 gcpa = dma32_to_u32(cpa);
306
307	return head <= gcpa && gcpa <= tail;
308}
309
310static inline int is_tic_within_range(struct ccw1 *ccw, u32 head, int len)
311{
312	if (!ccw_is_tic(ccw))
313		return 0;
314
315	return is_cpa_within_range(ccw->cda, head, len);
316}
317
318static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
319{
320	struct ccwchain *chain;
321
322	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
323	if (!chain)
324		return NULL;
325
326	chain->ch_ccw = kcalloc(len, sizeof(*chain->ch_ccw), GFP_DMA | GFP_KERNEL);
327	if (!chain->ch_ccw)
328		goto out_err;
329
330	chain->ch_pa = kcalloc(len, sizeof(*chain->ch_pa), GFP_KERNEL);
331	if (!chain->ch_pa)
332		goto out_err;
333
334	list_add_tail(&chain->next, &cp->ccwchain_list);
335
336	return chain;
337
338out_err:
339	kfree(chain->ch_ccw);
340	kfree(chain);
341	return NULL;
342}
343
344static void ccwchain_free(struct ccwchain *chain)
345{
346	list_del(&chain->next);
347	kfree(chain->ch_pa);
348	kfree(chain->ch_ccw);
349	kfree(chain);
350}
351
352/* Free resource for a ccw that allocated memory for its cda. */
353static void ccwchain_cda_free(struct ccwchain *chain, int idx)
354{
355	struct ccw1 *ccw = &chain->ch_ccw[idx];
356
357	if (ccw_is_tic(ccw))
358		return;
359
360	kfree(dma32_to_virt(ccw->cda));
361}
362
363/**
364 * ccwchain_calc_length - calculate the length of the ccw chain.
365 * @iova: guest physical address of the target ccw chain
366 * @cp: channel_program on which to perform the operation
367 *
368 * This is the chain length not considering any TICs.
369 * You need to do a new round for each TIC target.
370 *
371 * The program is also validated for absence of not yet supported
372 * indirect data addressing scenarios.
373 *
374 * Returns: the length of the ccw chain or -errno.
375 */
376static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
377{
378	struct ccw1 *ccw = cp->guest_cp;
379	int cnt = 0;
380
381	do {
382		cnt++;
383
384		/*
385		 * We want to keep counting if the current CCW has the
386		 * command-chaining flag enabled, or if it is a TIC CCW
387		 * that loops back into the current chain.  The latter
388		 * is used for device orientation, where the CCW PRIOR to
389		 * the TIC can either jump to the TIC or a CCW immediately
390		 * after the TIC, depending on the results of its operation.
391		 */
392		if (!ccw_is_chain(ccw) && !is_tic_within_range(ccw, iova, cnt))
393			break;
394
395		ccw++;
396	} while (cnt < CCWCHAIN_LEN_MAX + 1);
397
398	if (cnt == CCWCHAIN_LEN_MAX + 1)
399		cnt = -EINVAL;
400
401	return cnt;
402}
403
404static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
405{
406	struct ccwchain *chain;
407	u32 ccw_head;
408
409	list_for_each_entry(chain, &cp->ccwchain_list, next) {
410		ccw_head = chain->ch_iova;
411		if (is_cpa_within_range(tic->cda, ccw_head, chain->ch_len))
412			return 1;
413	}
414
415	return 0;
416}
417
418static int ccwchain_loop_tic(struct ccwchain *chain,
419			     struct channel_program *cp);
420
421static int ccwchain_handle_ccw(dma32_t cda, struct channel_program *cp)
422{
423	struct vfio_device *vdev =
424		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
425	struct ccwchain *chain;
426	int len, ret;
427	u32 gcda;
428
429	gcda = dma32_to_u32(cda);
430	/* Copy 2K (the most we support today) of possible CCWs */
431	ret = vfio_dma_rw(vdev, gcda, cp->guest_cp, CCWCHAIN_LEN_MAX * sizeof(struct ccw1), false);
432	if (ret)
433		return ret;
434
435	/* Convert any Format-0 CCWs to Format-1 */
436	if (!cp->orb.cmd.fmt)
437		convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
438
439	/* Count the CCWs in the current chain */
440	len = ccwchain_calc_length(gcda, cp);
441	if (len < 0)
442		return len;
443
444	/* Need alloc a new chain for this one. */
445	chain = ccwchain_alloc(cp, len);
446	if (!chain)
447		return -ENOMEM;
448
449	chain->ch_len = len;
450	chain->ch_iova = gcda;
451
452	/* Copy the actual CCWs into the new chain */
453	memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
454
455	/* Loop for tics on this new chain. */
456	ret = ccwchain_loop_tic(chain, cp);
457
458	if (ret)
459		ccwchain_free(chain);
460
461	return ret;
462}
463
464/* Loop for TICs. */
465static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
466{
467	struct ccw1 *tic;
468	int i, ret;
469
470	for (i = 0; i < chain->ch_len; i++) {
471		tic = &chain->ch_ccw[i];
472
473		if (!ccw_is_tic(tic))
474			continue;
475
476		/* May transfer to an existing chain. */
477		if (tic_target_chain_exists(tic, cp))
478			continue;
479
480		/* Build a ccwchain for the next segment */
481		ret = ccwchain_handle_ccw(tic->cda, cp);
482		if (ret)
483			return ret;
484	}
485
486	return 0;
487}
488
489static int ccwchain_fetch_tic(struct ccw1 *ccw,
490			      struct channel_program *cp)
491{
492	struct ccwchain *iter;
493	u32 cda, ccw_head;
494
495	list_for_each_entry(iter, &cp->ccwchain_list, next) {
496		ccw_head = iter->ch_iova;
497		if (is_cpa_within_range(ccw->cda, ccw_head, iter->ch_len)) {
498			cda = (u64)iter->ch_ccw + dma32_to_u32(ccw->cda) - ccw_head;
499			ccw->cda = u32_to_dma32(cda);
500			return 0;
501		}
502	}
503
504	return -EFAULT;
505}
506
507static dma64_t *get_guest_idal(struct ccw1 *ccw, struct channel_program *cp, int idaw_nr)
508{
509	struct vfio_device *vdev =
510		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
511	dma64_t *idaws;
512	dma32_t *idaws_f1;
513	int idal_len = idaw_nr * sizeof(*idaws);
514	int idaw_size = idal_is_2k(cp) ? PAGE_SIZE / 2 : PAGE_SIZE;
515	int idaw_mask = ~(idaw_size - 1);
516	int i, ret;
517
518	idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
519	if (!idaws)
520		return ERR_PTR(-ENOMEM);
521
522	if (ccw_is_idal(ccw)) {
523		/* Copy IDAL from guest */
524		ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), idaws, idal_len, false);
525		if (ret) {
526			kfree(idaws);
527			return ERR_PTR(ret);
528		}
529	} else {
530		/* Fabricate an IDAL based off CCW data address */
531		if (cp->orb.cmd.c64) {
532			idaws[0] = u64_to_dma64(dma32_to_u32(ccw->cda));
533			for (i = 1; i < idaw_nr; i++) {
534				idaws[i] = dma64_add(idaws[i - 1], idaw_size);
535				idaws[i] = dma64_and(idaws[i], idaw_mask);
536			}
537		} else {
538			idaws_f1 = (dma32_t *)idaws;
539			idaws_f1[0] = ccw->cda;
540			for (i = 1; i < idaw_nr; i++) {
541				idaws_f1[i] = dma32_add(idaws_f1[i - 1], idaw_size);
542				idaws_f1[i] = dma32_and(idaws_f1[i], idaw_mask);
543			}
544		}
545	}
546
547	return idaws;
548}
549
550/*
551 * ccw_count_idaws() - Calculate the number of IDAWs needed to transfer
552 * a specified amount of data
553 *
554 * @ccw: The Channel Command Word being translated
555 * @cp: Channel Program being processed
556 *
557 * The ORB is examined, since it specifies what IDAWs could actually be
558 * used by any CCW in the channel program, regardless of whether or not
559 * the CCW actually does. An ORB that does not specify Format-2-IDAW
560 * Control could still contain a CCW with an IDAL, which would be
561 * Format-1 and thus only move 2K with each IDAW. Thus all CCWs within
562 * the channel program must follow the same size requirements.
563 */
564static int ccw_count_idaws(struct ccw1 *ccw,
565			   struct channel_program *cp)
566{
567	struct vfio_device *vdev =
568		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
569	u64 iova;
570	int size = cp->orb.cmd.c64 ? sizeof(u64) : sizeof(u32);
571	int ret;
572	int bytes = 1;
573
574	if (ccw->count)
575		bytes = ccw->count;
576
577	if (ccw_is_idal(ccw)) {
578		/* Read first IDAW to check its starting address. */
579		/* All subsequent IDAWs will be 2K- or 4K-aligned. */
580		ret = vfio_dma_rw(vdev, dma32_to_u32(ccw->cda), &iova, size, false);
581		if (ret)
582			return ret;
583
584		/*
585		 * Format-1 IDAWs only occupy the first 32 bits,
586		 * and bit 0 is always off.
587		 */
588		if (!cp->orb.cmd.c64)
589			iova = iova >> 32;
590	} else {
591		iova = dma32_to_u32(ccw->cda);
592	}
593
594	/* Format-1 IDAWs operate on 2K each */
595	if (!cp->orb.cmd.c64)
596		return idal_2k_nr_words((void *)iova, bytes);
597
598	/* Using the 2K variant of Format-2 IDAWs? */
599	if (cp->orb.cmd.i2k)
600		return idal_2k_nr_words((void *)iova, bytes);
601
602	/* The 'usual' case is 4K Format-2 IDAWs */
603	return idal_nr_words((void *)iova, bytes);
604}
605
606static int ccwchain_fetch_ccw(struct ccw1 *ccw,
607			      struct page_array *pa,
608			      struct channel_program *cp)
609{
610	struct vfio_device *vdev =
611		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
612	dma64_t *idaws;
613	dma32_t *idaws_f1;
614	int ret;
615	int idaw_nr;
616	int i;
617
618	/* Calculate size of IDAL */
619	idaw_nr = ccw_count_idaws(ccw, cp);
620	if (idaw_nr < 0)
621		return idaw_nr;
622
623	/* Allocate an IDAL from host storage */
624	idaws = get_guest_idal(ccw, cp, idaw_nr);
625	if (IS_ERR(idaws)) {
626		ret = PTR_ERR(idaws);
627		goto out_init;
628	}
629
630	/*
631	 * Allocate an array of pages to pin/translate.
632	 * The number of pages is actually the count of the idaws
633	 * required for the data transfer, since we only only support
634	 * 4K IDAWs today.
635	 */
636	ret = page_array_alloc(pa, idaw_nr);
637	if (ret < 0)
638		goto out_free_idaws;
639
640	/*
641	 * Copy guest IDAWs into page_array, in case the memory they
642	 * occupy is not contiguous.
643	 */
644	idaws_f1 = (dma32_t *)idaws;
645	for (i = 0; i < idaw_nr; i++) {
646		if (cp->orb.cmd.c64)
647			pa->pa_iova[i] = dma64_to_u64(idaws[i]);
648		else
649			pa->pa_iova[i] = dma32_to_u32(idaws_f1[i]);
650	}
651
652	if (ccw_does_data_transfer(ccw)) {
653		ret = page_array_pin(pa, vdev, idal_is_2k(cp));
654		if (ret < 0)
655			goto out_unpin;
656	} else {
657		pa->pa_nr = 0;
658	}
659
660	ccw->cda = virt_to_dma32(idaws);
661	ccw->flags |= CCW_FLAG_IDA;
662
663	/* Populate the IDAL with pinned/translated addresses from page */
664	page_array_idal_create_words(pa, idaws);
665
666	return 0;
667
668out_unpin:
669	page_array_unpin_free(pa, vdev, idal_is_2k(cp));
670out_free_idaws:
671	kfree(idaws);
672out_init:
673	ccw->cda = 0;
674	return ret;
675}
676
677/*
678 * Fetch one ccw.
679 * To reduce memory copy, we'll pin the cda page in memory,
680 * and to get rid of the cda 2G limitation of ccw1, we'll translate
681 * direct ccws to idal ccws.
682 */
683static int ccwchain_fetch_one(struct ccw1 *ccw,
684			      struct page_array *pa,
685			      struct channel_program *cp)
686
687{
688	if (ccw_is_tic(ccw))
689		return ccwchain_fetch_tic(ccw, cp);
690
691	return ccwchain_fetch_ccw(ccw, pa, cp);
692}
693
694/**
695 * cp_init() - allocate ccwchains for a channel program.
696 * @cp: channel_program on which to perform the operation
697 * @orb: control block for the channel program from the guest
698 *
699 * This creates one or more ccwchain(s), and copies the raw data of
700 * the target channel program from @orb->cmd.iova to the new ccwchain(s).
701 *
702 * Limitations:
703 * 1. Supports idal(c64) ccw chaining.
704 * 2. Supports 4k idaw.
705 *
706 * Returns:
707 *   %0 on success and a negative error value on failure.
708 */
709int cp_init(struct channel_program *cp, union orb *orb)
710{
711	struct vfio_device *vdev =
712		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
713	/* custom ratelimit used to avoid flood during guest IPL */
714	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1);
715	int ret;
716
717	/* this is an error in the caller */
718	if (cp->initialized)
719		return -EBUSY;
720
721	/*
722	 * We only support prefetching the channel program. We assume all channel
723	 * programs executed by supported guests likewise support prefetching.
724	 * Executing a channel program that does not specify prefetching will
725	 * typically not cause an error, but a warning is issued to help identify
726	 * the problem if something does break.
727	 */
728	if (!orb->cmd.pfch && __ratelimit(&ratelimit_state))
729		dev_warn(
730			vdev->dev,
731			"Prefetching channel program even though prefetch not specified in ORB");
732
733	INIT_LIST_HEAD(&cp->ccwchain_list);
734	memcpy(&cp->orb, orb, sizeof(*orb));
735
736	/* Build a ccwchain for the first CCW segment */
737	ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
738
739	if (!ret)
740		cp->initialized = true;
741
742	return ret;
743}
744
745
746/**
747 * cp_free() - free resources for channel program.
748 * @cp: channel_program on which to perform the operation
749 *
750 * This unpins the memory pages and frees the memory space occupied by
751 * @cp, which must have been returned by a previous call to cp_init().
752 * Otherwise, undefined behavior occurs.
753 */
754void cp_free(struct channel_program *cp)
755{
756	struct vfio_device *vdev =
757		&container_of(cp, struct vfio_ccw_private, cp)->vdev;
758	struct ccwchain *chain, *temp;
759	int i;
760
761	if (!cp->initialized)
762		return;
763
764	cp->initialized = false;
765	list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
766		for (i = 0; i < chain->ch_len; i++) {
767			page_array_unpin_free(&chain->ch_pa[i], vdev, idal_is_2k(cp));
768			ccwchain_cda_free(chain, i);
769		}
770		ccwchain_free(chain);
771	}
772}
773
774/**
775 * cp_prefetch() - translate a guest physical address channel program to
776 *                 a real-device runnable channel program.
777 * @cp: channel_program on which to perform the operation
778 *
779 * This function translates the guest-physical-address channel program
780 * and stores the result to ccwchain list. @cp must have been
781 * initialized by a previous call with cp_init(). Otherwise, undefined
782 * behavior occurs.
783 * For each chain composing the channel program:
784 * - On entry ch_len holds the count of CCWs to be translated.
785 * - On exit ch_len is adjusted to the count of successfully translated CCWs.
786 * This allows cp_free to find in ch_len the count of CCWs to free in a chain.
787 *
788 * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced
789 * as helpers to do ccw chain translation inside the kernel. Basically
790 * they accept a channel program issued by a virtual machine, and
791 * translate the channel program to a real-device runnable channel
792 * program.
793 *
794 * These APIs will copy the ccws into kernel-space buffers, and update
795 * the guest physical addresses with their corresponding host physical
796 * addresses.  Then channel I/O device drivers could issue the
797 * translated channel program to real devices to perform an I/O
798 * operation.
799 *
800 * These interfaces are designed to support translation only for
801 * channel programs, which are generated and formatted by a
802 * guest. Thus this will make it possible for things like VFIO to
803 * leverage the interfaces to passthrough a channel I/O mediated
804 * device in QEMU.
805 *
806 * We support direct ccw chaining by translating them to idal ccws.
807 *
808 * Returns:
809 *   %0 on success and a negative error value on failure.
810 */
811int cp_prefetch(struct channel_program *cp)
812{
813	struct ccwchain *chain;
814	struct ccw1 *ccw;
815	struct page_array *pa;
816	int len, idx, ret;
817
818	/* this is an error in the caller */
819	if (!cp->initialized)
820		return -EINVAL;
821
822	list_for_each_entry(chain, &cp->ccwchain_list, next) {
823		len = chain->ch_len;
824		for (idx = 0; idx < len; idx++) {
825			ccw = &chain->ch_ccw[idx];
826			pa = &chain->ch_pa[idx];
827
828			ret = ccwchain_fetch_one(ccw, pa, cp);
829			if (ret)
830				goto out_err;
831		}
832	}
833
834	return 0;
835out_err:
836	/* Only cleanup the chain elements that were actually translated. */
837	chain->ch_len = idx;
838	list_for_each_entry_continue(chain, &cp->ccwchain_list, next) {
839		chain->ch_len = 0;
840	}
841	return ret;
842}
843
844/**
845 * cp_get_orb() - get the orb of the channel program
846 * @cp: channel_program on which to perform the operation
847 * @sch: subchannel the operation will be performed against
848 *
849 * This function returns the address of the updated orb of the channel
850 * program. Channel I/O device drivers could use this orb to issue a
851 * ssch.
852 */
853union orb *cp_get_orb(struct channel_program *cp, struct subchannel *sch)
854{
855	union orb *orb;
856	struct ccwchain *chain;
857	struct ccw1 *cpa;
858
859	/* this is an error in the caller */
860	if (!cp->initialized)
861		return NULL;
862
863	orb = &cp->orb;
864
865	orb->cmd.intparm = (u32)virt_to_phys(sch);
866	orb->cmd.fmt = 1;
867
868	/*
869	 * Everything built by vfio-ccw is a Format-2 IDAL.
870	 * If the input was a Format-1 IDAL, indicate that
871	 * 2K Format-2 IDAWs were created here.
872	 */
873	if (!orb->cmd.c64)
874		orb->cmd.i2k = 1;
875	orb->cmd.c64 = 1;
876
877	if (orb->cmd.lpm == 0)
878		orb->cmd.lpm = sch->lpm;
879
880	chain = list_first_entry(&cp->ccwchain_list, struct ccwchain, next);
881	cpa = chain->ch_ccw;
882	orb->cmd.cpa = virt_to_dma32(cpa);
883
884	return orb;
885}
886
887/**
888 * cp_update_scsw() - update scsw for a channel program.
889 * @cp: channel_program on which to perform the operation
890 * @scsw: I/O results of the channel program and also the target to be
891 *        updated
892 *
893 * @scsw contains the I/O results of the channel program that pointed
894 * to by @cp. However what @scsw->cpa stores is a host physical
895 * address, which is meaningless for the guest, which is waiting for
896 * the I/O results.
897 *
898 * This function updates @scsw->cpa to its coressponding guest physical
899 * address.
900 */
901void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
902{
903	struct ccwchain *chain;
904	dma32_t cpa = scsw->cmd.cpa;
905	u32 ccw_head;
906
907	if (!cp->initialized)
908		return;
909
910	/*
911	 * LATER:
912	 * For now, only update the cmd.cpa part. We may need to deal with
913	 * other portions of the schib as well, even if we don't return them
914	 * in the ioctl directly. Path status changes etc.
915	 */
916	list_for_each_entry(chain, &cp->ccwchain_list, next) {
917		ccw_head = (u32)(u64)chain->ch_ccw;
918		/*
919		 * On successful execution, cpa points just beyond the end
920		 * of the chain.
921		 */
922		if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
923			/*
924			 * (cpa - ccw_head) is the offset value of the host
925			 * physical ccw to its chain head.
926			 * Adding this value to the guest physical ccw chain
927			 * head gets us the guest cpa:
928			 * cpa = chain->ch_iova + (cpa - ccw_head)
929			 */
930			cpa = dma32_add(cpa, chain->ch_iova - ccw_head);
931			break;
932		}
933	}
934
935	scsw->cmd.cpa = cpa;
936}
937
938/**
939 * cp_iova_pinned() - check if an iova is pinned for a ccw chain.
940 * @cp: channel_program on which to perform the operation
941 * @iova: the iova to check
942 * @length: the length to check from @iova
943 *
944 * If the @iova is currently pinned for the ccw chain, return true;
945 * else return false.
946 */
947bool cp_iova_pinned(struct channel_program *cp, u64 iova, u64 length)
948{
949	struct ccwchain *chain;
950	int i;
951
952	if (!cp->initialized)
953		return false;
954
955	list_for_each_entry(chain, &cp->ccwchain_list, next) {
956		for (i = 0; i < chain->ch_len; i++)
957			if (page_array_iova_pinned(&chain->ch_pa[i], iova, length))
958				return true;
959	}
960
961	return false;
962}
963