1// SPDX-License-Identifier: GPL-2.0
2/*
3 * s390 kvm PCI passthrough support
4 *
5 * Copyright IBM Corp. 2022
6 *
7 *    Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
8 */
9
10#include <linux/kvm_host.h>
11#include <linux/pci.h>
12#include <asm/pci.h>
13#include <asm/pci_insn.h>
14#include <asm/pci_io.h>
15#include <asm/sclp.h>
16#include "pci.h"
17#include "kvm-s390.h"
18
19struct zpci_aift *aift;
20
21static inline int __set_irq_noiib(u16 ctl, u8 isc)
22{
23	union zpci_sic_iib iib = {{0}};
24
25	return zpci_set_irq_ctrl(ctl, isc, &iib);
26}
27
28void kvm_s390_pci_aen_exit(void)
29{
30	unsigned long flags;
31	struct kvm_zdev **gait_kzdev;
32
33	lockdep_assert_held(&aift->aift_lock);
34
35	/*
36	 * Contents of the aipb remain registered for the life of the host
37	 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
38	 * in case we insert the KVM module again later.  Clear the AIFT
39	 * information and free anything not registered with underlying
40	 * firmware.
41	 */
42	spin_lock_irqsave(&aift->gait_lock, flags);
43	gait_kzdev = aift->kzdev;
44	aift->gait = NULL;
45	aift->sbv = NULL;
46	aift->kzdev = NULL;
47	spin_unlock_irqrestore(&aift->gait_lock, flags);
48
49	kfree(gait_kzdev);
50}
51
52static int zpci_setup_aipb(u8 nisc)
53{
54	struct page *page;
55	int size, rc;
56
57	zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
58	if (!zpci_aipb)
59		return -ENOMEM;
60
61	aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, NULL);
62	if (!aift->sbv) {
63		rc = -ENOMEM;
64		goto free_aipb;
65	}
66	zpci_aif_sbv = aift->sbv;
67	size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
68						sizeof(struct zpci_gaite)));
69	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
70	if (!page) {
71		rc = -ENOMEM;
72		goto free_sbv;
73	}
74	aift->gait = (struct zpci_gaite *)page_to_virt(page);
75
76	zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
77	zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
78	zpci_aipb->aipb.afi = nisc;
79	zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
80
81	/* Setup Adapter Event Notification Interpretation */
82	if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
83		rc = -EIO;
84		goto free_gait;
85	}
86
87	return 0;
88
89free_gait:
90	free_pages((unsigned long)aift->gait, size);
91free_sbv:
92	airq_iv_release(aift->sbv);
93	zpci_aif_sbv = NULL;
94free_aipb:
95	kfree(zpci_aipb);
96	zpci_aipb = NULL;
97
98	return rc;
99}
100
101static int zpci_reset_aipb(u8 nisc)
102{
103	/*
104	 * AEN registration can only happen once per system boot.  If
105	 * an aipb already exists then AEN was already registered and
106	 * we can re-use the aipb contents.  This can only happen if
107	 * the KVM module was removed and re-inserted.  However, we must
108	 * ensure that the same forwarding ISC is used as this is assigned
109	 * during KVM module load.
110	 */
111	if (zpci_aipb->aipb.afi != nisc)
112		return -EINVAL;
113
114	aift->sbv = zpci_aif_sbv;
115	aift->gait = phys_to_virt(zpci_aipb->aipb.gait);
116
117	return 0;
118}
119
120int kvm_s390_pci_aen_init(u8 nisc)
121{
122	int rc = 0;
123
124	/* If already enabled for AEN, bail out now */
125	if (aift->gait || aift->sbv)
126		return -EPERM;
127
128	mutex_lock(&aift->aift_lock);
129	aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
130			      GFP_KERNEL);
131	if (!aift->kzdev) {
132		rc = -ENOMEM;
133		goto unlock;
134	}
135
136	if (!zpci_aipb)
137		rc = zpci_setup_aipb(nisc);
138	else
139		rc = zpci_reset_aipb(nisc);
140	if (rc)
141		goto free_zdev;
142
143	/* Enable floating IRQs */
144	if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
145		rc = -EIO;
146		kvm_s390_pci_aen_exit();
147	}
148
149	goto unlock;
150
151free_zdev:
152	kfree(aift->kzdev);
153unlock:
154	mutex_unlock(&aift->aift_lock);
155	return rc;
156}
157
158/* Modify PCI: Register floating adapter interruption forwarding */
159static int kvm_zpci_set_airq(struct zpci_dev *zdev)
160{
161	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
162	struct zpci_fib fib = {};
163	u8 status;
164
165	fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
166	fib.fmt0.sum = 1;       /* enable summary notifications */
167	fib.fmt0.noi = airq_iv_end(zdev->aibv);
168	fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
169	fib.fmt0.aibvo = 0;
170	fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
171	fib.fmt0.aisbo = zdev->aisb & 63;
172	fib.gd = zdev->gisa;
173
174	return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
175}
176
177/* Modify PCI: Unregister floating adapter interruption forwarding */
178static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
179{
180	u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
181	struct zpci_fib fib = {};
182	u8 cc, status;
183
184	fib.gd = zdev->gisa;
185
186	cc = zpci_mod_fc(req, &fib, &status);
187	if (cc == 3 || (cc == 1 && status == 24))
188		/* Function already gone or IRQs already deregistered. */
189		cc = 0;
190
191	return cc ? -EIO : 0;
192}
193
194static inline void unaccount_mem(unsigned long nr_pages)
195{
196	struct user_struct *user = get_uid(current_user());
197
198	if (user)
199		atomic_long_sub(nr_pages, &user->locked_vm);
200	if (current->mm)
201		atomic64_sub(nr_pages, &current->mm->pinned_vm);
202}
203
204static inline int account_mem(unsigned long nr_pages)
205{
206	struct user_struct *user = get_uid(current_user());
207	unsigned long page_limit, cur_pages, new_pages;
208
209	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
210
211	do {
212		cur_pages = atomic_long_read(&user->locked_vm);
213		new_pages = cur_pages + nr_pages;
214		if (new_pages > page_limit)
215			return -ENOMEM;
216	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
217					new_pages) != cur_pages);
218
219	atomic64_add(nr_pages, &current->mm->pinned_vm);
220
221	return 0;
222}
223
224static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
225				   bool assist)
226{
227	struct page *pages[1], *aibv_page, *aisb_page = NULL;
228	unsigned int msi_vecs, idx;
229	struct zpci_gaite *gaite;
230	unsigned long hva, bit;
231	struct kvm *kvm;
232	phys_addr_t gaddr;
233	int rc = 0, gisc, npages, pcount = 0;
234
235	/*
236	 * Interrupt forwarding is only applicable if the device is already
237	 * enabled for interpretation
238	 */
239	if (zdev->gisa == 0)
240		return -EINVAL;
241
242	kvm = zdev->kzdev->kvm;
243	msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
244
245	/* Get the associated forwarding ISC - if invalid, return the error */
246	gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
247	if (gisc < 0)
248		return gisc;
249
250	/* Replace AIBV address */
251	idx = srcu_read_lock(&kvm->srcu);
252	hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
253	npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
254	srcu_read_unlock(&kvm->srcu, idx);
255	if (npages < 1) {
256		rc = -EIO;
257		goto out;
258	}
259	aibv_page = pages[0];
260	pcount++;
261	gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
262	fib->fmt0.aibv = gaddr;
263
264	/* Pin the guest AISB if one was specified */
265	if (fib->fmt0.sum == 1) {
266		idx = srcu_read_lock(&kvm->srcu);
267		hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
268		npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
269					     pages);
270		srcu_read_unlock(&kvm->srcu, idx);
271		if (npages < 1) {
272			rc = -EIO;
273			goto unpin1;
274		}
275		aisb_page = pages[0];
276		pcount++;
277	}
278
279	/* Account for pinned pages, roll back on failure */
280	if (account_mem(pcount))
281		goto unpin2;
282
283	/* AISB must be allocated before we can fill in GAITE */
284	mutex_lock(&aift->aift_lock);
285	bit = airq_iv_alloc_bit(aift->sbv);
286	if (bit == -1UL)
287		goto unlock;
288	zdev->aisb = bit; /* store the summary bit number */
289	zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
290				    AIRQ_IV_BITLOCK |
291				    AIRQ_IV_GUESTVEC,
292				    phys_to_virt(fib->fmt0.aibv));
293
294	spin_lock_irq(&aift->gait_lock);
295	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
296						   sizeof(struct zpci_gaite));
297
298	/* If assist not requested, host will get all alerts */
299	if (assist)
300		gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
301	else
302		gaite->gisa = 0;
303
304	gaite->gisc = fib->fmt0.isc;
305	gaite->count++;
306	gaite->aisbo = fib->fmt0.aisbo;
307	gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
308							      ~PAGE_MASK));
309	aift->kzdev[zdev->aisb] = zdev->kzdev;
310	spin_unlock_irq(&aift->gait_lock);
311
312	/* Update guest FIB for re-issue */
313	fib->fmt0.aisbo = zdev->aisb & 63;
314	fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
315	fib->fmt0.isc = gisc;
316
317	/* Save some guest fib values in the host for later use */
318	zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
319	zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
320	mutex_unlock(&aift->aift_lock);
321
322	/* Issue the clp to setup the irq now */
323	rc = kvm_zpci_set_airq(zdev);
324	return rc;
325
326unlock:
327	mutex_unlock(&aift->aift_lock);
328unpin2:
329	if (fib->fmt0.sum == 1)
330		unpin_user_page(aisb_page);
331unpin1:
332	unpin_user_page(aibv_page);
333out:
334	return rc;
335}
336
337static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
338{
339	struct kvm_zdev *kzdev = zdev->kzdev;
340	struct zpci_gaite *gaite;
341	struct page *vpage = NULL, *spage = NULL;
342	int rc, pcount = 0;
343	u8 isc;
344
345	if (zdev->gisa == 0)
346		return -EINVAL;
347
348	mutex_lock(&aift->aift_lock);
349
350	/*
351	 * If the clear fails due to an error, leave now unless we know this
352	 * device is about to go away (force) -- In that case clear the GAITE
353	 * regardless.
354	 */
355	rc = kvm_zpci_clear_airq(zdev);
356	if (rc && !force)
357		goto out;
358
359	if (zdev->kzdev->fib.fmt0.aibv == 0)
360		goto out;
361	spin_lock_irq(&aift->gait_lock);
362	gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
363						   sizeof(struct zpci_gaite));
364	isc = gaite->gisc;
365	gaite->count--;
366	if (gaite->count == 0) {
367		/* Release guest AIBV and AISB */
368		vpage = phys_to_page(kzdev->fib.fmt0.aibv);
369		if (gaite->aisb != 0)
370			spage = phys_to_page(gaite->aisb);
371		/* Clear the GAIT entry */
372		gaite->aisb = 0;
373		gaite->gisc = 0;
374		gaite->aisbo = 0;
375		gaite->gisa = 0;
376		aift->kzdev[zdev->aisb] = NULL;
377		/* Clear zdev info */
378		airq_iv_free_bit(aift->sbv, zdev->aisb);
379		airq_iv_release(zdev->aibv);
380		zdev->aisb = 0;
381		zdev->aibv = NULL;
382	}
383	spin_unlock_irq(&aift->gait_lock);
384	kvm_s390_gisc_unregister(kzdev->kvm, isc);
385	kzdev->fib.fmt0.isc = 0;
386	kzdev->fib.fmt0.aibv = 0;
387
388	if (vpage) {
389		unpin_user_page(vpage);
390		pcount++;
391	}
392	if (spage) {
393		unpin_user_page(spage);
394		pcount++;
395	}
396	if (pcount > 0)
397		unaccount_mem(pcount);
398out:
399	mutex_unlock(&aift->aift_lock);
400
401	return rc;
402}
403
404static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
405{
406	struct kvm_zdev *kzdev;
407
408	kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
409	if (!kzdev)
410		return -ENOMEM;
411
412	kzdev->zdev = zdev;
413	zdev->kzdev = kzdev;
414
415	return 0;
416}
417
418static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
419{
420	struct kvm_zdev *kzdev;
421
422	kzdev = zdev->kzdev;
423	WARN_ON(kzdev->zdev != zdev);
424	zdev->kzdev = NULL;
425	kfree(kzdev);
426}
427
428
429/*
430 * Register device with the specified KVM. If interpretation facilities are
431 * available, enable them and let userspace indicate whether or not they will
432 * be used (specify SHM bit to disable).
433 */
434static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
435{
436	struct zpci_dev *zdev = opaque;
437	u8 status;
438	int rc;
439
440	if (!zdev)
441		return -EINVAL;
442
443	mutex_lock(&zdev->kzdev_lock);
444
445	if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
446		mutex_unlock(&zdev->kzdev_lock);
447		return -EINVAL;
448	}
449
450	kvm_get_kvm(kvm);
451
452	mutex_lock(&kvm->lock);
453
454	rc = kvm_s390_pci_dev_open(zdev);
455	if (rc)
456		goto err;
457
458	/*
459	 * If interpretation facilities aren't available, add the device to
460	 * the kzdev list but don't enable for interpretation.
461	 */
462	if (!kvm_s390_pci_interp_allowed())
463		goto out;
464
465	/*
466	 * If this is the first request to use an interpreted device, make the
467	 * necessary vcpu changes
468	 */
469	if (!kvm->arch.use_zpci_interp)
470		kvm_s390_vcpu_pci_enable_interp(kvm);
471
472	if (zdev_enabled(zdev)) {
473		rc = zpci_disable_device(zdev);
474		if (rc)
475			goto err;
476	}
477
478	/*
479	 * Store information about the identity of the kvm guest allowed to
480	 * access this device via interpretation to be used by host CLP
481	 */
482	zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
483
484	rc = zpci_enable_device(zdev);
485	if (rc)
486		goto clear_gisa;
487
488	/* Re-register the IOMMU that was already created */
489	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
490				virt_to_phys(zdev->dma_table), &status);
491	if (rc)
492		goto clear_gisa;
493
494out:
495	zdev->kzdev->kvm = kvm;
496
497	spin_lock(&kvm->arch.kzdev_list_lock);
498	list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
499	spin_unlock(&kvm->arch.kzdev_list_lock);
500
501	mutex_unlock(&kvm->lock);
502	mutex_unlock(&zdev->kzdev_lock);
503	return 0;
504
505clear_gisa:
506	zdev->gisa = 0;
507err:
508	if (zdev->kzdev)
509		kvm_s390_pci_dev_release(zdev);
510	mutex_unlock(&kvm->lock);
511	mutex_unlock(&zdev->kzdev_lock);
512	kvm_put_kvm(kvm);
513	return rc;
514}
515
516static void kvm_s390_pci_unregister_kvm(void *opaque)
517{
518	struct zpci_dev *zdev = opaque;
519	struct kvm *kvm;
520	u8 status;
521
522	if (!zdev)
523		return;
524
525	mutex_lock(&zdev->kzdev_lock);
526
527	if (WARN_ON(!zdev->kzdev)) {
528		mutex_unlock(&zdev->kzdev_lock);
529		return;
530	}
531
532	kvm = zdev->kzdev->kvm;
533	mutex_lock(&kvm->lock);
534
535	/*
536	 * A 0 gisa means interpretation was never enabled, just remove the
537	 * device from the list.
538	 */
539	if (zdev->gisa == 0)
540		goto out;
541
542	/* Forwarding must be turned off before interpretation */
543	if (zdev->kzdev->fib.fmt0.aibv != 0)
544		kvm_s390_pci_aif_disable(zdev, true);
545
546	/* Remove the host CLP guest designation */
547	zdev->gisa = 0;
548
549	if (zdev_enabled(zdev)) {
550		if (zpci_disable_device(zdev))
551			goto out;
552	}
553
554	if (zpci_enable_device(zdev))
555		goto out;
556
557	/* Re-register the IOMMU that was already created */
558	zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
559			   virt_to_phys(zdev->dma_table), &status);
560
561out:
562	spin_lock(&kvm->arch.kzdev_list_lock);
563	list_del(&zdev->kzdev->entry);
564	spin_unlock(&kvm->arch.kzdev_list_lock);
565	kvm_s390_pci_dev_release(zdev);
566
567	mutex_unlock(&kvm->lock);
568	mutex_unlock(&zdev->kzdev_lock);
569
570	kvm_put_kvm(kvm);
571}
572
573void kvm_s390_pci_init_list(struct kvm *kvm)
574{
575	spin_lock_init(&kvm->arch.kzdev_list_lock);
576	INIT_LIST_HEAD(&kvm->arch.kzdev_list);
577}
578
579void kvm_s390_pci_clear_list(struct kvm *kvm)
580{
581	/*
582	 * This list should already be empty, either via vfio device closures
583	 * or kvm fd cleanup.
584	 */
585	spin_lock(&kvm->arch.kzdev_list_lock);
586	WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
587	spin_unlock(&kvm->arch.kzdev_list_lock);
588}
589
590static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
591{
592	struct zpci_dev *zdev = NULL;
593	struct kvm_zdev *kzdev;
594
595	spin_lock(&kvm->arch.kzdev_list_lock);
596	list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
597		if (kzdev->zdev->fh == fh) {
598			zdev = kzdev->zdev;
599			break;
600		}
601	}
602	spin_unlock(&kvm->arch.kzdev_list_lock);
603
604	return zdev;
605}
606
607static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
608				     struct kvm_s390_zpci_op *args)
609{
610	struct zpci_fib fib = {};
611	bool hostflag;
612
613	fib.fmt0.aibv = args->u.reg_aen.ibv;
614	fib.fmt0.isc = args->u.reg_aen.isc;
615	fib.fmt0.noi = args->u.reg_aen.noi;
616	if (args->u.reg_aen.sb != 0) {
617		fib.fmt0.aisb = args->u.reg_aen.sb;
618		fib.fmt0.aisbo = args->u.reg_aen.sbo;
619		fib.fmt0.sum = 1;
620	} else {
621		fib.fmt0.aisb = 0;
622		fib.fmt0.aisbo = 0;
623		fib.fmt0.sum = 0;
624	}
625
626	hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
627	return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
628}
629
630int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
631{
632	struct kvm_zdev *kzdev;
633	struct zpci_dev *zdev;
634	int r;
635
636	zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
637	if (!zdev)
638		return -ENODEV;
639
640	mutex_lock(&zdev->kzdev_lock);
641	mutex_lock(&kvm->lock);
642
643	kzdev = zdev->kzdev;
644	if (!kzdev) {
645		r = -ENODEV;
646		goto out;
647	}
648	if (kzdev->kvm != kvm) {
649		r = -EPERM;
650		goto out;
651	}
652
653	switch (args->op) {
654	case KVM_S390_ZPCIOP_REG_AEN:
655		/* Fail on unknown flags */
656		if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
657			r = -EINVAL;
658			break;
659		}
660		r = kvm_s390_pci_zpci_reg_aen(zdev, args);
661		break;
662	case KVM_S390_ZPCIOP_DEREG_AEN:
663		r = kvm_s390_pci_aif_disable(zdev, false);
664		break;
665	default:
666		r = -EINVAL;
667	}
668
669out:
670	mutex_unlock(&kvm->lock);
671	mutex_unlock(&zdev->kzdev_lock);
672	return r;
673}
674
675int __init kvm_s390_pci_init(void)
676{
677	zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm;
678	zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm;
679
680	if (!kvm_s390_pci_interp_allowed())
681		return 0;
682
683	aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
684	if (!aift)
685		return -ENOMEM;
686
687	spin_lock_init(&aift->gait_lock);
688	mutex_init(&aift->aift_lock);
689
690	return 0;
691}
692
693void kvm_s390_pci_exit(void)
694{
695	zpci_kvm_hook.kvm_register = NULL;
696	zpci_kvm_hook.kvm_unregister = NULL;
697
698	if (!kvm_s390_pci_interp_allowed())
699		return;
700
701	mutex_destroy(&aift->aift_lock);
702
703	kfree(aift);
704}
705