1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Hosting Protected Virtual Machines
4 *
5 * Copyright IBM Corp. 2019, 2020
6 *    Author(s): Janosch Frank <frankja@linux.ibm.com>
7 */
8#include <linux/kvm.h>
9#include <linux/kvm_host.h>
10#include <linux/minmax.h>
11#include <linux/pagemap.h>
12#include <linux/sched/signal.h>
13#include <asm/gmap.h>
14#include <asm/uv.h>
15#include <asm/mman.h>
16#include <linux/pagewalk.h>
17#include <linux/sched/mm.h>
18#include <linux/mmu_notifier.h>
19#include "kvm-s390.h"
20
21bool kvm_s390_pv_is_protected(struct kvm *kvm)
22{
23	lockdep_assert_held(&kvm->lock);
24	return !!kvm_s390_pv_get_handle(kvm);
25}
26EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
27
28bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
29{
30	lockdep_assert_held(&vcpu->mutex);
31	return !!kvm_s390_pv_cpu_get_handle(vcpu);
32}
33EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
34
35/**
36 * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
37 * be destroyed
38 *
39 * @list: list head for the list of leftover VMs
40 * @old_gmap_table: the gmap table of the leftover protected VM
41 * @handle: the handle of the leftover protected VM
42 * @stor_var: pointer to the variable storage of the leftover protected VM
43 * @stor_base: address of the base storage of the leftover protected VM
44 *
45 * Represents a protected VM that is still registered with the Ultravisor,
46 * but which does not correspond any longer to an active KVM VM. It should
47 * be destroyed at some point later, either asynchronously or when the
48 * process terminates.
49 */
50struct pv_vm_to_be_destroyed {
51	struct list_head list;
52	unsigned long old_gmap_table;
53	u64 handle;
54	void *stor_var;
55	unsigned long stor_base;
56};
57
58static void kvm_s390_clear_pv_state(struct kvm *kvm)
59{
60	kvm->arch.pv.handle = 0;
61	kvm->arch.pv.guest_len = 0;
62	kvm->arch.pv.stor_base = 0;
63	kvm->arch.pv.stor_var = NULL;
64}
65
66int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
67{
68	int cc;
69
70	if (!kvm_s390_pv_cpu_get_handle(vcpu))
71		return 0;
72
73	cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc);
74
75	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x",
76		     vcpu->vcpu_id, *rc, *rrc);
77	WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc);
78
79	/* Intended memory leak for something that should never happen. */
80	if (!cc)
81		free_pages(vcpu->arch.pv.stor_base,
82			   get_order(uv_info.guest_cpu_stor_len));
83
84	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
85	vcpu->arch.sie_block->pv_handle_cpu = 0;
86	vcpu->arch.sie_block->pv_handle_config = 0;
87	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
88	vcpu->arch.sie_block->sdf = 0;
89	/*
90	 * The sidad field (for sdf == 2) is now the gbea field (for sdf == 0).
91	 * Use the reset value of gbea to avoid leaking the kernel pointer of
92	 * the just freed sida.
93	 */
94	vcpu->arch.sie_block->gbea = 1;
95	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
96
97	return cc ? EIO : 0;
98}
99
100int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
101{
102	struct uv_cb_csc uvcb = {
103		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
104		.header.len = sizeof(uvcb),
105	};
106	void *sida_addr;
107	int cc;
108
109	if (kvm_s390_pv_cpu_get_handle(vcpu))
110		return -EINVAL;
111
112	vcpu->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT,
113						   get_order(uv_info.guest_cpu_stor_len));
114	if (!vcpu->arch.pv.stor_base)
115		return -ENOMEM;
116
117	/* Input */
118	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
119	uvcb.num = vcpu->arch.sie_block->icpua;
120	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
121	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
122
123	/* Alloc Secure Instruction Data Area Designation */
124	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
125	if (!sida_addr) {
126		free_pages(vcpu->arch.pv.stor_base,
127			   get_order(uv_info.guest_cpu_stor_len));
128		return -ENOMEM;
129	}
130	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
131
132	cc = uv_call(0, (u64)&uvcb);
133	*rc = uvcb.header.rc;
134	*rrc = uvcb.header.rrc;
135	KVM_UV_EVENT(vcpu->kvm, 3,
136		     "PROTVIRT CREATE VCPU: cpu %d handle %llx rc %x rrc %x",
137		     vcpu->vcpu_id, uvcb.cpu_handle, uvcb.header.rc,
138		     uvcb.header.rrc);
139
140	if (cc) {
141		u16 dummy;
142
143		kvm_s390_pv_destroy_cpu(vcpu, &dummy, &dummy);
144		return -EIO;
145	}
146
147	/* Output */
148	vcpu->arch.pv.handle = uvcb.cpu_handle;
149	vcpu->arch.sie_block->pv_handle_cpu = uvcb.cpu_handle;
150	vcpu->arch.sie_block->pv_handle_config = kvm_s390_pv_get_handle(vcpu->kvm);
151	vcpu->arch.sie_block->sdf = 2;
152	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
153	return 0;
154}
155
156/* only free resources when the destroy was successful */
157static void kvm_s390_pv_dealloc_vm(struct kvm *kvm)
158{
159	vfree(kvm->arch.pv.stor_var);
160	free_pages(kvm->arch.pv.stor_base,
161		   get_order(uv_info.guest_base_stor_len));
162	kvm_s390_clear_pv_state(kvm);
163}
164
165static int kvm_s390_pv_alloc_vm(struct kvm *kvm)
166{
167	unsigned long base = uv_info.guest_base_stor_len;
168	unsigned long virt = uv_info.guest_virt_var_stor_len;
169	unsigned long npages = 0, vlen = 0;
170
171	kvm->arch.pv.stor_var = NULL;
172	kvm->arch.pv.stor_base = __get_free_pages(GFP_KERNEL_ACCOUNT, get_order(base));
173	if (!kvm->arch.pv.stor_base)
174		return -ENOMEM;
175
176	/*
177	 * Calculate current guest storage for allocation of the
178	 * variable storage, which is based on the length in MB.
179	 *
180	 * Slots are sorted by GFN
181	 */
182	mutex_lock(&kvm->slots_lock);
183	npages = kvm_s390_get_gfn_end(kvm_memslots(kvm));
184	mutex_unlock(&kvm->slots_lock);
185
186	kvm->arch.pv.guest_len = npages * PAGE_SIZE;
187
188	/* Allocate variable storage */
189	vlen = ALIGN(virt * ((npages * PAGE_SIZE) / HPAGE_SIZE), PAGE_SIZE);
190	vlen += uv_info.guest_virt_base_stor_len;
191	kvm->arch.pv.stor_var = vzalloc(vlen);
192	if (!kvm->arch.pv.stor_var)
193		goto out_err;
194	return 0;
195
196out_err:
197	kvm_s390_pv_dealloc_vm(kvm);
198	return -ENOMEM;
199}
200
201/**
202 * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
203 * @kvm: the KVM that was associated with this leftover protected VM
204 * @leftover: details about the leftover protected VM that needs a clean up
205 * @rc: the RC code of the Destroy Secure Configuration UVC
206 * @rrc: the RRC code of the Destroy Secure Configuration UVC
207 *
208 * Destroy one leftover protected VM.
209 * On success, kvm->mm->context.protected_count will be decremented atomically
210 * and all other resources used by the VM will be freed.
211 *
212 * Return: 0 in case of success, otherwise 1
213 */
214static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
215					    struct pv_vm_to_be_destroyed *leftover,
216					    u16 *rc, u16 *rrc)
217{
218	int cc;
219
220	/* It used the destroy-fast UVC, nothing left to do here */
221	if (!leftover->handle)
222		goto done_fast;
223	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
224	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
225	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
226	if (cc)
227		return cc;
228	/*
229	 * Intentionally leak unusable memory. If the UVC fails, the memory
230	 * used for the VM and its metadata is permanently unusable.
231	 * This can only happen in case of a serious KVM or hardware bug; it
232	 * is not expected to happen in normal operation.
233	 */
234	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
235	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
236	vfree(leftover->stor_var);
237done_fast:
238	atomic_dec(&kvm->mm->context.protected_count);
239	return 0;
240}
241
242/**
243 * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
244 * @kvm: the VM whose memory is to be cleared.
245 *
246 * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
247 * The CPUs of the protected VM need to be destroyed beforehand.
248 */
249static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
250{
251	const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
252	struct kvm_memory_slot *slot;
253	unsigned long len;
254	int srcu_idx;
255
256	srcu_idx = srcu_read_lock(&kvm->srcu);
257
258	/* Take the memslot containing guest absolute address 0 */
259	slot = gfn_to_memslot(kvm, 0);
260	/* Clear all slots or parts thereof that are below 2GB */
261	while (slot && slot->base_gfn < pages_2g) {
262		len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
263		s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
264		/* Take the next memslot */
265		slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
266	}
267
268	srcu_read_unlock(&kvm->srcu, srcu_idx);
269}
270
271static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
272{
273	struct uv_cb_destroy_fast uvcb = {
274		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
275		.header.len = sizeof(uvcb),
276		.handle = kvm_s390_pv_get_handle(kvm),
277	};
278	int cc;
279
280	cc = uv_call_sched(0, (u64)&uvcb);
281	if (rc)
282		*rc = uvcb.header.rc;
283	if (rrc)
284		*rrc = uvcb.header.rrc;
285	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
286	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
287		     uvcb.header.rc, uvcb.header.rrc);
288	WARN_ONCE(cc && uvcb.header.rc != 0x104,
289		  "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
290		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
291	/* Intended memory leak on "impossible" error */
292	if (!cc)
293		kvm_s390_pv_dealloc_vm(kvm);
294	return cc ? -EIO : 0;
295}
296
297static inline bool is_destroy_fast_available(void)
298{
299	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
300}
301
302/**
303 * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
304 * @kvm: the VM
305 * @rc: return value for the RC field of the UVCB
306 * @rrc: return value for the RRC field of the UVCB
307 *
308 * Set aside the protected VM for a subsequent teardown. The VM will be able
309 * to continue immediately as a non-secure VM, and the information needed to
310 * properly tear down the protected VM is set aside. If another protected VM
311 * was already set aside without starting its teardown, this function will
312 * fail.
313 * The CPUs of the protected VM need to be destroyed beforehand.
314 *
315 * Context: kvm->lock needs to be held
316 *
317 * Return: 0 in case of success, -EINVAL if another protected VM was already set
318 * aside, -ENOMEM if the system ran out of memory.
319 */
320int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
321{
322	struct pv_vm_to_be_destroyed *priv;
323	int res = 0;
324
325	lockdep_assert_held(&kvm->lock);
326	/*
327	 * If another protected VM was already prepared for teardown, refuse.
328	 * A normal deinitialization has to be performed instead.
329	 */
330	if (kvm->arch.pv.set_aside)
331		return -EINVAL;
332
333	/* Guest with segment type ASCE, refuse to destroy asynchronously */
334	if ((kvm->arch.gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
335		return -EINVAL;
336
337	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
338	if (!priv)
339		return -ENOMEM;
340
341	if (is_destroy_fast_available()) {
342		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
343	} else {
344		priv->stor_var = kvm->arch.pv.stor_var;
345		priv->stor_base = kvm->arch.pv.stor_base;
346		priv->handle = kvm_s390_pv_get_handle(kvm);
347		priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
348		WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
349		if (s390_replace_asce(kvm->arch.gmap))
350			res = -ENOMEM;
351	}
352
353	if (res) {
354		kfree(priv);
355		return res;
356	}
357
358	kvm_s390_destroy_lower_2g(kvm);
359	kvm_s390_clear_pv_state(kvm);
360	kvm->arch.pv.set_aside = priv;
361
362	*rc = UVC_RC_EXECUTED;
363	*rrc = 42;
364	return 0;
365}
366
367/**
368 * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
369 * @kvm: the KVM whose protected VM needs to be deinitialized
370 * @rc: the RC code of the UVC
371 * @rrc: the RRC code of the UVC
372 *
373 * Deinitialize the current protected VM. This function will destroy and
374 * cleanup the current protected VM, but it will not cleanup the guest
375 * memory. This function should only be called when the protected VM has
376 * just been created and therefore does not have any guest memory, or when
377 * the caller cleans up the guest memory separately.
378 *
379 * This function should not fail, but if it does, the donated memory must
380 * not be freed.
381 *
382 * Context: kvm->lock needs to be held
383 *
384 * Return: 0 in case of success, otherwise -EIO
385 */
386int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
387{
388	int cc;
389
390	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
391			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
392	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
393	if (!cc) {
394		atomic_dec(&kvm->mm->context.protected_count);
395		kvm_s390_pv_dealloc_vm(kvm);
396	} else {
397		/* Intended memory leak on "impossible" error */
398		s390_replace_asce(kvm->arch.gmap);
399	}
400	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc);
401	WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc);
402
403	return cc ? -EIO : 0;
404}
405
406/**
407 * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
408 * with a specific KVM.
409 * @kvm: the KVM to be cleaned up
410 * @rc: the RC code of the first failing UVC
411 * @rrc: the RRC code of the first failing UVC
412 *
413 * This function will clean up all protected VMs associated with a KVM.
414 * This includes the active one, the one prepared for deinitialization with
415 * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
416 *
417 * Context: kvm->lock needs to be held unless being called from
418 * kvm_arch_destroy_vm.
419 *
420 * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
421 */
422int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
423{
424	struct pv_vm_to_be_destroyed *cur;
425	bool need_zap = false;
426	u16 _rc, _rrc;
427	int cc = 0;
428
429	/*
430	 * Nothing to do if the counter was already 0. Otherwise make sure
431	 * the counter does not reach 0 before calling s390_uv_destroy_range.
432	 */
433	if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
434		return 0;
435
436	*rc = 1;
437	/* If the current VM is protected, destroy it */
438	if (kvm_s390_pv_get_handle(kvm)) {
439		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
440		need_zap = true;
441	}
442
443	/* If a previous protected VM was set aside, put it in the need_cleanup list */
444	if (kvm->arch.pv.set_aside) {
445		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
446		kvm->arch.pv.set_aside = NULL;
447	}
448
449	/* Cleanup all protected VMs in the need_cleanup list */
450	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
451		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
452		need_zap = true;
453		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
454			cc = 1;
455			/*
456			 * Only return the first error rc and rrc, so make
457			 * sure it is not overwritten. All destroys will
458			 * additionally be reported via KVM_UV_EVENT().
459			 */
460			if (*rc == UVC_RC_EXECUTED) {
461				*rc = _rc;
462				*rrc = _rrc;
463			}
464		}
465		list_del(&cur->list);
466		kfree(cur);
467	}
468
469	/*
470	 * If the mm still has a mapping, try to mark all its pages as
471	 * accessible. The counter should not reach zero before this
472	 * cleanup has been performed.
473	 */
474	if (need_zap && mmget_not_zero(kvm->mm)) {
475		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
476		mmput(kvm->mm);
477	}
478
479	/* Now the counter can safely reach 0 */
480	atomic_dec(&kvm->mm->context.protected_count);
481	return cc ? -EIO : 0;
482}
483
484/**
485 * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
486 * @kvm: the VM previously associated with the protected VM
487 * @rc: return value for the RC field of the UVCB
488 * @rrc: return value for the RRC field of the UVCB
489 *
490 * Tear down the protected VM that had been previously prepared for teardown
491 * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
492 * userspace asynchronously from a separate thread.
493 *
494 * Context: kvm->lock must not be held.
495 *
496 * Return: 0 in case of success, -EINVAL if no protected VM had been
497 * prepared for asynchronous teardowm, -EIO in case of other errors.
498 */
499int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
500{
501	struct pv_vm_to_be_destroyed *p;
502	int ret = 0;
503
504	lockdep_assert_not_held(&kvm->lock);
505	mutex_lock(&kvm->lock);
506	p = kvm->arch.pv.set_aside;
507	kvm->arch.pv.set_aside = NULL;
508	mutex_unlock(&kvm->lock);
509	if (!p)
510		return -EINVAL;
511
512	/* When a fatal signal is received, stop immediately */
513	if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
514		goto done;
515	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
516		ret = -EIO;
517	kfree(p);
518	p = NULL;
519done:
520	/*
521	 * p is not NULL if we aborted because of a fatal signal, in which
522	 * case queue the leftover for later cleanup.
523	 */
524	if (p) {
525		mutex_lock(&kvm->lock);
526		list_add(&p->list, &kvm->arch.pv.need_cleanup);
527		mutex_unlock(&kvm->lock);
528		/* Did not finish, but pretend things went well */
529		*rc = UVC_RC_EXECUTED;
530		*rrc = 42;
531	}
532	return ret;
533}
534
535static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
536					     struct mm_struct *mm)
537{
538	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
539	u16 dummy;
540	int r;
541
542	/*
543	 * No locking is needed since this is the last thread of the last user of this
544	 * struct mm.
545	 * When the struct kvm gets deinitialized, this notifier is also
546	 * unregistered. This means that if this notifier runs, then the
547	 * struct kvm is still valid.
548	 */
549	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
550	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
551		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
552}
553
554static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
555	.release = kvm_s390_pv_mmu_notifier_release,
556};
557
558int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
559{
560	struct uv_cb_cgc uvcb = {
561		.header.cmd = UVC_CMD_CREATE_SEC_CONF,
562		.header.len = sizeof(uvcb)
563	};
564	int cc, ret;
565	u16 dummy;
566
567	ret = kvm_s390_pv_alloc_vm(kvm);
568	if (ret)
569		return ret;
570
571	/* Inputs */
572	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
573	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
574	uvcb.guest_asce = kvm->arch.gmap->asce;
575	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
576	uvcb.conf_base_stor_origin =
577		virt_to_phys((void *)kvm->arch.pv.stor_base);
578	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
579	uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
580	uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
581
582	cc = uv_call_sched(0, (u64)&uvcb);
583	*rc = uvcb.header.rc;
584	*rrc = uvcb.header.rrc;
585	KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
586		     uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
587
588	/* Outputs */
589	kvm->arch.pv.handle = uvcb.guest_handle;
590
591	atomic_inc(&kvm->mm->context.protected_count);
592	if (cc) {
593		if (uvcb.header.rc & UVC_RC_NEED_DESTROY) {
594			kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
595		} else {
596			atomic_dec(&kvm->mm->context.protected_count);
597			kvm_s390_pv_dealloc_vm(kvm);
598		}
599		return -EIO;
600	}
601	kvm->arch.gmap->guest_handle = uvcb.guest_handle;
602	/* Add the notifier only once. No races because we hold kvm->lock */
603	if (kvm->arch.pv.mmu_notifier.ops != &kvm_s390_pv_mmu_notifier_ops) {
604		kvm->arch.pv.mmu_notifier.ops = &kvm_s390_pv_mmu_notifier_ops;
605		mmu_notifier_register(&kvm->arch.pv.mmu_notifier, kvm->mm);
606	}
607	return 0;
608}
609
610int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
611			      u16 *rrc)
612{
613	struct uv_cb_ssc uvcb = {
614		.header.cmd = UVC_CMD_SET_SEC_CONF_PARAMS,
615		.header.len = sizeof(uvcb),
616		.sec_header_origin = (u64)hdr,
617		.sec_header_len = length,
618		.guest_handle = kvm_s390_pv_get_handle(kvm),
619	};
620	int cc = uv_call(0, (u64)&uvcb);
621
622	*rc = uvcb.header.rc;
623	*rrc = uvcb.header.rrc;
624	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM SET PARMS: rc %x rrc %x",
625		     *rc, *rrc);
626	return cc ? -EINVAL : 0;
627}
628
629static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
630		      u64 offset, u16 *rc, u16 *rrc)
631{
632	struct uv_cb_unp uvcb = {
633		.header.cmd = UVC_CMD_UNPACK_IMG,
634		.header.len = sizeof(uvcb),
635		.guest_handle = kvm_s390_pv_get_handle(kvm),
636		.gaddr = addr,
637		.tweak[0] = tweak,
638		.tweak[1] = offset,
639	};
640	int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
641
642	*rc = uvcb.header.rc;
643	*rrc = uvcb.header.rrc;
644
645	if (ret && ret != -EAGAIN)
646		KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: failed addr %llx with rc %x rrc %x",
647			     uvcb.gaddr, *rc, *rrc);
648	return ret;
649}
650
651int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
652		       unsigned long tweak, u16 *rc, u16 *rrc)
653{
654	u64 offset = 0;
655	int ret = 0;
656
657	if (addr & ~PAGE_MASK || !size || size & ~PAGE_MASK)
658		return -EINVAL;
659
660	KVM_UV_EVENT(kvm, 3, "PROTVIRT VM UNPACK: start addr %lx size %lx",
661		     addr, size);
662
663	while (offset < size) {
664		ret = unpack_one(kvm, addr, tweak, offset, rc, rrc);
665		if (ret == -EAGAIN) {
666			cond_resched();
667			if (fatal_signal_pending(current))
668				break;
669			continue;
670		}
671		if (ret)
672			break;
673		addr += PAGE_SIZE;
674		offset += PAGE_SIZE;
675	}
676	if (!ret)
677		KVM_UV_EVENT(kvm, 3, "%s", "PROTVIRT VM UNPACK: successful");
678	return ret;
679}
680
681int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
682{
683	struct uv_cb_cpu_set_state uvcb = {
684		.header.cmd	= UVC_CMD_CPU_SET_STATE,
685		.header.len	= sizeof(uvcb),
686		.cpu_handle	= kvm_s390_pv_cpu_get_handle(vcpu),
687		.state		= state,
688	};
689	int cc;
690
691	cc = uv_call(0, (u64)&uvcb);
692	KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT SET CPU %d STATE %d rc %x rrc %x",
693		     vcpu->vcpu_id, state, uvcb.header.rc, uvcb.header.rrc);
694	if (cc)
695		return -EINVAL;
696	return 0;
697}
698
699int kvm_s390_pv_dump_cpu(struct kvm_vcpu *vcpu, void *buff, u16 *rc, u16 *rrc)
700{
701	struct uv_cb_dump_cpu uvcb = {
702		.header.cmd = UVC_CMD_DUMP_CPU,
703		.header.len = sizeof(uvcb),
704		.cpu_handle = vcpu->arch.pv.handle,
705		.dump_area_origin = (u64)buff,
706	};
707	int cc;
708
709	cc = uv_call_sched(0, (u64)&uvcb);
710	*rc = uvcb.header.rc;
711	*rrc = uvcb.header.rrc;
712	return cc;
713}
714
715/* Size of the cache for the storage state dump data. 1MB for now */
716#define DUMP_BUFF_LEN HPAGE_SIZE
717
718/**
719 * kvm_s390_pv_dump_stor_state
720 *
721 * @kvm: pointer to the guest's KVM struct
722 * @buff_user: Userspace pointer where we will write the results to
723 * @gaddr: Starting absolute guest address for which the storage state
724 *	   is requested.
725 * @buff_user_len: Length of the buff_user buffer
726 * @rc: Pointer to where the uvcb return code is stored
727 * @rrc: Pointer to where the uvcb return reason code is stored
728 *
729 * Stores buff_len bytes of tweak component values to buff_user
730 * starting with the 1MB block specified by the absolute guest address
731 * (gaddr). The gaddr pointer will be updated with the last address
732 * for which data was written when returning to userspace. buff_user
733 * might be written to even if an error rc is returned. For instance
734 * if we encounter a fault after writing the first page of data.
735 *
736 * Context: kvm->lock needs to be held
737 *
738 * Return:
739 *  0 on success
740 *  -ENOMEM if allocating the cache fails
741 *  -EINVAL if gaddr is not aligned to 1MB
742 *  -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
743 *  -EINVAL if the UV call fails, rc and rrc will be set in this case
744 *  -EFAULT if copying the result to buff_user failed
745 */
746int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
747				u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
748{
749	struct uv_cb_dump_stor_state uvcb = {
750		.header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
751		.header.len = sizeof(uvcb),
752		.config_handle = kvm->arch.pv.handle,
753		.gaddr = *gaddr,
754		.dump_area_origin = 0,
755	};
756	const u64 increment_len = uv_info.conf_dump_storage_state_len;
757	size_t buff_kvm_size;
758	size_t size_done = 0;
759	u8 *buff_kvm = NULL;
760	int cc, ret;
761
762	ret = -EINVAL;
763	/* UV call processes 1MB guest storage chunks at a time */
764	if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
765		goto out;
766
767	/*
768	 * We provide the storage state for 1MB chunks of guest
769	 * storage. The buffer will need to be aligned to
770	 * conf_dump_storage_state_len so we don't end on a partial
771	 * chunk.
772	 */
773	if (!buff_user_len ||
774	    !IS_ALIGNED(buff_user_len, increment_len))
775		goto out;
776
777	/*
778	 * Allocate a buffer from which we will later copy to the user
779	 * process. We don't want userspace to dictate our buffer size
780	 * so we limit it to DUMP_BUFF_LEN.
781	 */
782	ret = -ENOMEM;
783	buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
784	buff_kvm = vzalloc(buff_kvm_size);
785	if (!buff_kvm)
786		goto out;
787
788	ret = 0;
789	uvcb.dump_area_origin = (u64)buff_kvm;
790	/* We will loop until the user buffer is filled or an error occurs */
791	do {
792		/* Get 1MB worth of guest storage state data */
793		cc = uv_call_sched(0, (u64)&uvcb);
794
795		/* All or nothing */
796		if (cc) {
797			ret = -EINVAL;
798			break;
799		}
800
801		size_done += increment_len;
802		uvcb.dump_area_origin += increment_len;
803		buff_user_len -= increment_len;
804		uvcb.gaddr += HPAGE_SIZE;
805
806		/* KVM Buffer full, time to copy to the process */
807		if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
808			if (copy_to_user(buff_user, buff_kvm, size_done)) {
809				ret = -EFAULT;
810				break;
811			}
812
813			buff_user += size_done;
814			size_done = 0;
815			uvcb.dump_area_origin = (u64)buff_kvm;
816		}
817	} while (buff_user_len);
818
819	/* Report back where we ended dumping */
820	*gaddr = uvcb.gaddr;
821
822	/* Lets only log errors, we don't want to spam */
823out:
824	if (ret)
825		KVM_UV_EVENT(kvm, 3,
826			     "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
827			     uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
828	*rc = uvcb.header.rc;
829	*rrc = uvcb.header.rrc;
830	vfree(buff_kvm);
831
832	return ret;
833}
834
835/**
836 * kvm_s390_pv_dump_complete
837 *
838 * @kvm: pointer to the guest's KVM struct
839 * @buff_user: Userspace pointer where we will write the results to
840 * @rc: Pointer to where the uvcb return code is stored
841 * @rrc: Pointer to where the uvcb return reason code is stored
842 *
843 * Completes the dumping operation and writes the completion data to
844 * user space.
845 *
846 * Context: kvm->lock needs to be held
847 *
848 * Return:
849 *  0 on success
850 *  -ENOMEM if allocating the completion buffer fails
851 *  -EINVAL if the UV call fails, rc and rrc will be set in this case
852 *  -EFAULT if copying the result to buff_user failed
853 */
854int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
855			      u16 *rc, u16 *rrc)
856{
857	struct uv_cb_dump_complete complete = {
858		.header.len = sizeof(complete),
859		.header.cmd = UVC_CMD_DUMP_COMPLETE,
860		.config_handle = kvm_s390_pv_get_handle(kvm),
861	};
862	u64 *compl_data;
863	int ret;
864
865	/* Allocate dump area */
866	compl_data = vzalloc(uv_info.conf_dump_finalize_len);
867	if (!compl_data)
868		return -ENOMEM;
869	complete.dump_area_origin = (u64)compl_data;
870
871	ret = uv_call_sched(0, (u64)&complete);
872	*rc = complete.header.rc;
873	*rrc = complete.header.rrc;
874	KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
875		     complete.header.rc, complete.header.rrc);
876
877	if (!ret) {
878		/*
879		 * kvm_s390_pv_dealloc_vm() will also (mem)set
880		 * this to false on a reboot or other destroy
881		 * operation for this vm.
882		 */
883		kvm->arch.pv.dumping = false;
884		kvm_s390_vcpu_unblock_all(kvm);
885		ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
886		if (ret)
887			ret = -EFAULT;
888	}
889	vfree(compl_data);
890	/* If the UVC returned an error, translate it to -EINVAL */
891	if (ret > 0)
892		ret = -EINVAL;
893	return ret;
894}
895