1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright 2020-21 IBM Corp.
4 */
5
6#define pr_fmt(fmt) "vas: " fmt
7
8#include <linux/module.h>
9#include <linux/kernel.h>
10#include <linux/export.h>
11#include <linux/types.h>
12#include <linux/delay.h>
13#include <linux/slab.h>
14#include <linux/interrupt.h>
15#include <linux/irqdomain.h>
16#include <asm/machdep.h>
17#include <asm/hvcall.h>
18#include <asm/plpar_wrappers.h>
19#include <asm/firmware.h>
20#include <asm/vphn.h>
21#include <asm/vas.h>
22#include "vas.h"
23
24#define VAS_INVALID_WIN_ADDRESS	0xFFFFFFFFFFFFFFFFul
25#define VAS_DEFAULT_DOMAIN_ID	0xFFFFFFFFFFFFFFFFul
26/* The hypervisor allows one credit per window right now */
27#define DEF_WIN_CREDS		1
28
29static struct vas_all_caps caps_all;
30static bool copypaste_feat;
31static struct hv_vas_cop_feat_caps hv_cop_caps;
32
33static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
34static DEFINE_MUTEX(vas_pseries_mutex);
35static bool migration_in_progress;
36
37static long hcall_return_busy_check(long rc)
38{
39	/* Check if we are stalled for some time */
40	if (H_IS_LONG_BUSY(rc)) {
41		msleep(get_longbusy_msecs(rc));
42		rc = H_BUSY;
43	} else if (rc == H_BUSY) {
44		cond_resched();
45	}
46
47	return rc;
48}
49
50/*
51 * Allocate VAS window hcall
52 */
53static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
54				     u8 wintype, u16 credits)
55{
56	long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
57	long rc;
58
59	do {
60		rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
61				  credits, domain[0], domain[1], domain[2],
62				  domain[3], domain[4], domain[5]);
63
64		rc = hcall_return_busy_check(rc);
65	} while (rc == H_BUSY);
66
67	if (rc == H_SUCCESS) {
68		if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
69			pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
70			return -ENOTSUPP;
71		}
72		win->vas_win.winid = retbuf[0];
73		win->win_addr = retbuf[1];
74		win->complete_irq = retbuf[2];
75		win->fault_irq = retbuf[3];
76		return 0;
77	}
78
79	pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
80		rc, wintype, credits);
81
82	return -EIO;
83}
84
85/*
86 * Deallocate VAS window hcall.
87 */
88static int h_deallocate_vas_window(u64 winid)
89{
90	long rc;
91
92	do {
93		rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
94
95		rc = hcall_return_busy_check(rc);
96	} while (rc == H_BUSY);
97
98	if (rc == H_SUCCESS)
99		return 0;
100
101	pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
102		rc, winid);
103	return -EIO;
104}
105
106/*
107 * Modify VAS window.
108 * After the window is opened with allocate window hcall, configure it
109 * with flags and LPAR PID before using.
110 */
111static int h_modify_vas_window(struct pseries_vas_window *win)
112{
113	long rc;
114
115	/*
116	 * AMR value is not supported in Linux VAS implementation.
117	 * The hypervisor ignores it if 0 is passed.
118	 */
119	do {
120		rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
121					win->vas_win.winid, win->pid, 0,
122					VAS_MOD_WIN_FLAGS, 0);
123
124		rc = hcall_return_busy_check(rc);
125	} while (rc == H_BUSY);
126
127	if (rc == H_SUCCESS)
128		return 0;
129
130	pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
131			rc, win->vas_win.winid, win->pid);
132	return -EIO;
133}
134
135/*
136 * This hcall is used to determine the capabilities from the hypervisor.
137 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
138 * @query_type: If 0 is passed, the hypervisor returns the overall
139 *		capabilities which provides all feature(s) that are
140 *		available. Then query the hypervisor to get the
141 *		corresponding capabilities for the specific feature.
142 *		Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
143 *			and VAS GZIP Default capabilities.
144 *			H_QUERY_NX_CAPABILITIES provides NX GZIP
145 *			capabilities.
146 * @result: Return buffer to save capabilities.
147 */
148int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
149{
150	long rc;
151
152	rc = plpar_hcall_norets(hcall, query_type, result);
153
154	if (rc == H_SUCCESS)
155		return 0;
156
157	/* H_FUNCTION means HV does not support VAS so don't print an error */
158	if (rc != H_FUNCTION) {
159		pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
160			(hcall == H_QUERY_VAS_CAPABILITIES) ?
161				"H_QUERY_VAS_CAPABILITIES" :
162				"H_QUERY_NX_CAPABILITIES",
163			rc, query_type, result);
164	}
165
166	return -EIO;
167}
168EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
169
170/*
171 * hcall to get fault CRB from the hypervisor.
172 */
173static int h_get_nx_fault(u32 winid, u64 buffer)
174{
175	long rc;
176
177	rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
178
179	if (rc == H_SUCCESS)
180		return 0;
181
182	pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
183		rc, winid, buffer);
184	return -EIO;
185
186}
187
188/*
189 * Handle the fault interrupt.
190 * When the fault interrupt is received for each window, query the
191 * hypervisor to get the fault CRB on the specific fault. Then
192 * process the CRB by updating CSB or send signal if the user space
193 * CSB is invalid.
194 * Note: The hypervisor forwards an interrupt for each fault request.
195 *	So one fault CRB to process for each H_GET_NX_FAULT hcall.
196 */
197static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
198{
199	struct pseries_vas_window *txwin = data;
200	struct coprocessor_request_block crb;
201	struct vas_user_win_ref *tsk_ref;
202	int rc;
203
204	while (atomic_read(&txwin->pending_faults)) {
205		rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
206		if (!rc) {
207			tsk_ref = &txwin->vas_win.task_ref;
208			vas_dump_crb(&crb);
209			vas_update_csb(&crb, tsk_ref);
210		}
211		atomic_dec(&txwin->pending_faults);
212	}
213
214	return IRQ_HANDLED;
215}
216
217/*
218 * irq_default_primary_handler() can be used only with IRQF_ONESHOT
219 * which disables IRQ before executing the thread handler and enables
220 * it after. But this disabling interrupt sets the VAS IRQ OFF
221 * state in the hypervisor. If the NX generates fault interrupt
222 * during this window, the hypervisor will not deliver this
223 * interrupt to the LPAR. So use VAS specific IRQ handler instead
224 * of calling the default primary handler.
225 */
226static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
227{
228	struct pseries_vas_window *txwin = data;
229
230	/*
231	 * The thread hanlder will process this interrupt if it is
232	 * already running.
233	 */
234	atomic_inc(&txwin->pending_faults);
235
236	return IRQ_WAKE_THREAD;
237}
238
239/*
240 * Allocate window and setup IRQ mapping.
241 */
242static int allocate_setup_window(struct pseries_vas_window *txwin,
243				 u64 *domain, u8 wintype)
244{
245	int rc;
246
247	rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
248	if (rc)
249		return rc;
250	/*
251	 * On PowerVM, the hypervisor setup and forwards the fault
252	 * interrupt per window. So the IRQ setup and fault handling
253	 * will be done for each open window separately.
254	 */
255	txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
256	if (!txwin->fault_virq) {
257		pr_err("Failed irq mapping %d\n", txwin->fault_irq);
258		rc = -EINVAL;
259		goto out_win;
260	}
261
262	txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
263				txwin->vas_win.winid);
264	if (!txwin->name) {
265		rc = -ENOMEM;
266		goto out_irq;
267	}
268
269	rc = request_threaded_irq(txwin->fault_virq,
270				  pseries_vas_irq_handler,
271				  pseries_vas_fault_thread_fn, 0,
272				  txwin->name, txwin);
273	if (rc) {
274		pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
275		       txwin->vas_win.winid, txwin->fault_virq, rc);
276		goto out_free;
277	}
278
279	txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
280
281	return 0;
282out_free:
283	kfree(txwin->name);
284out_irq:
285	irq_dispose_mapping(txwin->fault_virq);
286out_win:
287	h_deallocate_vas_window(txwin->vas_win.winid);
288	return rc;
289}
290
291static inline void free_irq_setup(struct pseries_vas_window *txwin)
292{
293	free_irq(txwin->fault_virq, txwin);
294	kfree(txwin->name);
295	irq_dispose_mapping(txwin->fault_virq);
296}
297
298static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
299					      enum vas_cop_type cop_type)
300{
301	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
302	struct vas_cop_feat_caps *cop_feat_caps;
303	struct vas_caps *caps;
304	struct pseries_vas_window *txwin;
305	int rc;
306
307	txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
308	if (!txwin)
309		return ERR_PTR(-ENOMEM);
310
311	/*
312	 * A VAS window can have many credits which means that many
313	 * requests can be issued simultaneously. But the hypervisor
314	 * restricts one credit per window.
315	 * The hypervisor introduces 2 different types of credits:
316	 * Default credit type (Uses normal priority FIFO):
317	 *	A limited number of credits are assigned to partitions
318	 *	based on processor entitlement. But these credits may be
319	 *	over-committed on a system depends on whether the CPUs
320	 *	are in shared or dedicated modes - that is, more requests
321	 *	may be issued across the system than NX can service at
322	 *	once which can result in paste command failure (RMA_busy).
323	 *	Then the process has to resend requests or fall-back to
324	 *	SW compression.
325	 * Quality of Service (QoS) credit type (Uses high priority FIFO):
326	 *	To avoid NX HW contention, the system admins can assign
327	 *	QoS credits for each LPAR so that this partition is
328	 *	guaranteed access to NX resources. These credits are
329	 *	assigned to partitions via the HMC.
330	 *	Refer PAPR for more information.
331	 *
332	 * Allocate window with QoS credits if user requested. Otherwise
333	 * default credits are used.
334	 */
335	if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
336		caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
337	else
338		caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
339
340	cop_feat_caps = &caps->caps;
341
342	if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
343			atomic_read(&cop_feat_caps->nr_total_credits)) {
344		pr_err_ratelimited("Credits are not available to allocate window\n");
345		rc = -EINVAL;
346		goto out;
347	}
348
349	if (vas_id == -1) {
350		/*
351		 * The user space is requesting to allocate a window on
352		 * a VAS instance where the process is executing.
353		 * On PowerVM, domain values are passed to the hypervisor
354		 * to select VAS instance. Useful if the process is
355		 * affinity to NUMA node.
356		 * The hypervisor selects VAS instance if
357		 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
358		 * The h_allocate_vas_window hcall is defined to take a
359		 * domain values as specified by h_home_node_associativity,
360		 * So no unpacking needs to be done.
361		 */
362		rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
363				  VPHN_FLAG_VCPU, hard_smp_processor_id());
364		if (rc != H_SUCCESS) {
365			pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
366			goto out;
367		}
368	}
369
370	txwin->pid = mfspr(SPRN_PID);
371
372	/*
373	 * Allocate / Deallocate window hcalls and setup / free IRQs
374	 * have to be protected with mutex.
375	 * Open VAS window: Allocate window hcall and setup IRQ
376	 * Close VAS window: Deallocate window hcall and free IRQ
377	 *	The hypervisor waits until all NX requests are
378	 *	completed before closing the window. So expects OS
379	 *	to handle NX faults, means IRQ can be freed only
380	 *	after the deallocate window hcall is returned.
381	 * So once the window is closed with deallocate hcall before
382	 * the IRQ is freed, it can be assigned to new allocate
383	 * hcall with the same fault IRQ by the hypervisor. It can
384	 * result in setup IRQ fail for the new window since the
385	 * same fault IRQ is not freed by the OS before.
386	 */
387	mutex_lock(&vas_pseries_mutex);
388	if (migration_in_progress) {
389		rc = -EBUSY;
390	} else {
391		rc = allocate_setup_window(txwin, (u64 *)&domain[0],
392				   cop_feat_caps->win_type);
393		if (!rc)
394			caps->nr_open_wins_progress++;
395	}
396
397	mutex_unlock(&vas_pseries_mutex);
398	if (rc)
399		goto out;
400
401	/*
402	 * Modify window and it is ready to use.
403	 */
404	rc = h_modify_vas_window(txwin);
405	if (!rc)
406		rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
407	if (rc)
408		goto out_free;
409
410	txwin->win_type = cop_feat_caps->win_type;
411
412	/*
413	 * The migration SUSPEND thread sets migration_in_progress and
414	 * closes all open windows from the list. But the window is
415	 * added to the list after open and modify HCALLs. So possible
416	 * that migration_in_progress is set before modify HCALL which
417	 * may cause some windows are still open when the hypervisor
418	 * initiates the migration.
419	 * So checks the migration_in_progress flag again and close all
420	 * open windows.
421	 *
422	 * Possible to lose the acquired credit with DLPAR core
423	 * removal after the window is opened. So if there are any
424	 * closed windows (means with lost credits), do not give new
425	 * window to user space. New windows will be opened only
426	 * after the existing windows are reopened when credits are
427	 * available.
428	 */
429	mutex_lock(&vas_pseries_mutex);
430	if (!caps->nr_close_wins && !migration_in_progress) {
431		list_add(&txwin->win_list, &caps->list);
432		caps->nr_open_windows++;
433		caps->nr_open_wins_progress--;
434		mutex_unlock(&vas_pseries_mutex);
435		vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
436		return &txwin->vas_win;
437	}
438	mutex_unlock(&vas_pseries_mutex);
439
440	put_vas_user_win_ref(&txwin->vas_win.task_ref);
441	rc = -EBUSY;
442	pr_err_ratelimited("No credit is available to allocate window\n");
443
444out_free:
445	/*
446	 * Window is not operational. Free IRQ before closing
447	 * window so that do not have to hold mutex.
448	 */
449	free_irq_setup(txwin);
450	h_deallocate_vas_window(txwin->vas_win.winid);
451	/*
452	 * Hold mutex and reduce nr_open_wins_progress counter.
453	 */
454	mutex_lock(&vas_pseries_mutex);
455	caps->nr_open_wins_progress--;
456	mutex_unlock(&vas_pseries_mutex);
457out:
458	atomic_dec(&cop_feat_caps->nr_used_credits);
459	kfree(txwin);
460	return ERR_PTR(rc);
461}
462
463static u64 vas_paste_address(struct vas_window *vwin)
464{
465	struct pseries_vas_window *win;
466
467	win = container_of(vwin, struct pseries_vas_window, vas_win);
468	return win->win_addr;
469}
470
471static int deallocate_free_window(struct pseries_vas_window *win)
472{
473	int rc = 0;
474
475	/*
476	 * The hypervisor waits for all requests including faults
477	 * are processed before closing the window - Means all
478	 * credits have to be returned. In the case of fault
479	 * request, a credit is returned after OS issues
480	 * H_GET_NX_FAULT hcall.
481	 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
482	 * hcall.
483	 */
484	rc = h_deallocate_vas_window(win->vas_win.winid);
485	if (!rc)
486		free_irq_setup(win);
487
488	return rc;
489}
490
491static int vas_deallocate_window(struct vas_window *vwin)
492{
493	struct pseries_vas_window *win;
494	struct vas_cop_feat_caps *caps;
495	int rc = 0;
496
497	if (!vwin)
498		return -EINVAL;
499
500	win = container_of(vwin, struct pseries_vas_window, vas_win);
501
502	/* Should not happen */
503	if (win->win_type >= VAS_MAX_FEAT_TYPE) {
504		pr_err("Window (%u): Invalid window type %u\n",
505				vwin->winid, win->win_type);
506		return -EINVAL;
507	}
508
509	caps = &vascaps[win->win_type].caps;
510	mutex_lock(&vas_pseries_mutex);
511	/*
512	 * VAS window is already closed in the hypervisor when
513	 * lost the credit or with migration. So just remove the entry
514	 * from the list, remove task references and free vas_window
515	 * struct.
516	 */
517	if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
518		!(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
519		rc = deallocate_free_window(win);
520		if (rc) {
521			mutex_unlock(&vas_pseries_mutex);
522			return rc;
523		}
524	} else
525		vascaps[win->win_type].nr_close_wins--;
526
527	list_del(&win->win_list);
528	atomic_dec(&caps->nr_used_credits);
529	vascaps[win->win_type].nr_open_windows--;
530	mutex_unlock(&vas_pseries_mutex);
531
532	mm_context_remove_vas_window(vwin->task_ref.mm);
533	put_vas_user_win_ref(&vwin->task_ref);
534
535	kfree(win);
536	return 0;
537}
538
539static const struct vas_user_win_ops vops_pseries = {
540	.open_win	= vas_allocate_window,	/* Open and configure window */
541	.paste_addr	= vas_paste_address,	/* To do copy/paste */
542	.close_win	= vas_deallocate_window, /* Close window */
543};
544
545/*
546 * Supporting only nx-gzip coprocessor type now, but this API code
547 * extended to other coprocessor types later.
548 */
549int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
550			     const char *name)
551{
552	if (!copypaste_feat)
553		return -ENOTSUPP;
554
555	return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
556}
557EXPORT_SYMBOL_GPL(vas_register_api_pseries);
558
559void vas_unregister_api_pseries(void)
560{
561	vas_unregister_coproc_api();
562}
563EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
564
565/*
566 * Get the specific capabilities based on the feature type.
567 * Right now supports GZIP default and GZIP QoS capabilities.
568 */
569static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
570				struct hv_vas_cop_feat_caps *hv_caps)
571{
572	struct vas_cop_feat_caps *caps;
573	struct vas_caps *vcaps;
574	int rc = 0;
575
576	vcaps = &vascaps[type];
577	memset(vcaps, 0, sizeof(*vcaps));
578	INIT_LIST_HEAD(&vcaps->list);
579
580	vcaps->feat = feat;
581	caps = &vcaps->caps;
582
583	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
584					  (u64)virt_to_phys(hv_caps));
585	if (rc)
586		return rc;
587
588	caps->user_mode = hv_caps->user_mode;
589	if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
590		pr_err("User space COPY/PASTE is not supported\n");
591		return -ENOTSUPP;
592	}
593
594	caps->descriptor = be64_to_cpu(hv_caps->descriptor);
595	caps->win_type = hv_caps->win_type;
596	if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
597		pr_err("Unsupported window type %u\n", caps->win_type);
598		return -EINVAL;
599	}
600	caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
601	caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
602	atomic_set(&caps->nr_total_credits,
603		   be16_to_cpu(hv_caps->target_lpar_creds));
604	if (feat == VAS_GZIP_DEF_FEAT) {
605		caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
606
607		if (caps->max_win_creds < DEF_WIN_CREDS) {
608			pr_err("Window creds(%u) > max allowed window creds(%u)\n",
609			       DEF_WIN_CREDS, caps->max_win_creds);
610			return -EINVAL;
611		}
612	}
613
614	rc = sysfs_add_vas_caps(caps);
615	if (rc)
616		return rc;
617
618	copypaste_feat = true;
619
620	return 0;
621}
622
623/*
624 * VAS windows can be closed due to lost credits when the core is
625 * removed. So reopen them if credits are available due to DLPAR
626 * core add and set the window active status. When NX sees the page
627 * fault on the unmapped paste address, the kernel handles the fault
628 * by setting the remapping to new paste address if the window is
629 * active.
630 */
631static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
632				 bool migrate)
633{
634	long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
635	struct vas_cop_feat_caps *caps = &vcaps->caps;
636	struct pseries_vas_window *win = NULL, *tmp;
637	int rc, mv_ents = 0;
638	int flag;
639
640	/*
641	 * Nothing to do if there are no closed windows.
642	 */
643	if (!vcaps->nr_close_wins)
644		return 0;
645
646	/*
647	 * For the core removal, the hypervisor reduces the credits
648	 * assigned to the LPAR and the kernel closes VAS windows
649	 * in the hypervisor depends on reduced credits. The kernel
650	 * uses LIFO (the last windows that are opened will be closed
651	 * first) and expects to open in the same order when credits
652	 * are available.
653	 * For example, 40 windows are closed when the LPAR lost 2 cores
654	 * (dedicated). If 1 core is added, this LPAR can have 20 more
655	 * credits. It means the kernel can reopen 20 windows. So move
656	 * 20 entries in the VAS windows lost and reopen next 20 windows.
657	 * For partition migration, reopen all windows that are closed
658	 * during resume.
659	 */
660	if ((vcaps->nr_close_wins > creds) && !migrate)
661		mv_ents = vcaps->nr_close_wins - creds;
662
663	list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
664		if (!mv_ents)
665			break;
666
667		mv_ents--;
668	}
669
670	/*
671	 * Open windows if they are closed only with migration or
672	 * DLPAR (lost credit) before.
673	 */
674	if (migrate)
675		flag = VAS_WIN_MIGRATE_CLOSE;
676	else
677		flag = VAS_WIN_NO_CRED_CLOSE;
678
679	list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
680		/*
681		 * This window is closed with DLPAR and migration events.
682		 * So reopen the window with the last event.
683		 * The user space is not suspended with the current
684		 * migration notifier. So the user space can issue DLPAR
685		 * CPU hotplug while migration in progress. In this case
686		 * this window will be opened with the last event.
687		 */
688		if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
689			(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
690			win->vas_win.status &= ~flag;
691			continue;
692		}
693
694		/*
695		 * Nothing to do on this window if it is not closed
696		 * with this flag
697		 */
698		if (!(win->vas_win.status & flag))
699			continue;
700
701		rc = allocate_setup_window(win, (u64 *)&domain[0],
702					   caps->win_type);
703		if (rc)
704			return rc;
705
706		rc = h_modify_vas_window(win);
707		if (rc)
708			goto out;
709
710		mutex_lock(&win->vas_win.task_ref.mmap_mutex);
711		/*
712		 * Set window status to active
713		 */
714		win->vas_win.status &= ~flag;
715		mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
716		win->win_type = caps->win_type;
717		if (!--vcaps->nr_close_wins)
718			break;
719	}
720
721	return 0;
722out:
723	/*
724	 * Window modify HCALL failed. So close the window to the
725	 * hypervisor and return.
726	 */
727	free_irq_setup(win);
728	h_deallocate_vas_window(win->vas_win.winid);
729	return rc;
730}
731
732/*
733 * The hypervisor reduces the available credits if the LPAR lost core. It
734 * means the excessive windows should not be active and the user space
735 * should not be using these windows to send compression requests to NX.
736 * So the kernel closes the excessive windows and unmap the paste address
737 * such that the user space receives paste instruction failure. Then up to
738 * the user space to fall back to SW compression and manage with the
739 * existing windows.
740 */
741static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
742									bool migrate)
743{
744	struct pseries_vas_window *win, *tmp;
745	struct vas_user_win_ref *task_ref;
746	struct vm_area_struct *vma;
747	int rc = 0, flag;
748
749	if (migrate)
750		flag = VAS_WIN_MIGRATE_CLOSE;
751	else
752		flag = VAS_WIN_NO_CRED_CLOSE;
753
754	list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
755		/*
756		 * This window is already closed due to lost credit
757		 * or for migration before. Go for next window.
758		 * For migration, nothing to do since this window
759		 * closed for DLPAR and will be reopened even on
760		 * the destination system with other DLPAR operation.
761		 */
762		if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
763			(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
764			win->vas_win.status |= flag;
765			continue;
766		}
767
768		task_ref = &win->vas_win.task_ref;
769		/*
770		 * VAS mmap (coproc_mmap()) and its fault handler
771		 * (vas_mmap_fault()) are called after holding mmap lock.
772		 * So hold mmap mutex after mmap_lock to avoid deadlock.
773		 */
774		mmap_write_lock(task_ref->mm);
775		mutex_lock(&task_ref->mmap_mutex);
776		vma = task_ref->vma;
777		/*
778		 * Number of available credits are reduced, So select
779		 * and close windows.
780		 */
781		win->vas_win.status |= flag;
782
783		/*
784		 * vma is set in the original mapping. But this mapping
785		 * is done with mmap() after the window is opened with ioctl.
786		 * so we may not see the original mapping if the core remove
787		 * is done before the original mmap() and after the ioctl.
788		 */
789		if (vma)
790			zap_vma_pages(vma);
791
792		mutex_unlock(&task_ref->mmap_mutex);
793		mmap_write_unlock(task_ref->mm);
794		/*
795		 * Close VAS window in the hypervisor, but do not
796		 * free vas_window struct since it may be reused
797		 * when the credit is available later (DLPAR with
798		 * adding cores). This struct will be used
799		 * later when the process issued with close(FD).
800		 */
801		rc = deallocate_free_window(win);
802		/*
803		 * This failure is from the hypervisor.
804		 * No way to stop migration for these failures.
805		 * So ignore error and continue closing other windows.
806		 */
807		if (rc && !migrate)
808			return rc;
809
810		vcap->nr_close_wins++;
811
812		/*
813		 * For migration, do not depend on lpar_creds in case if
814		 * mismatch with the hypervisor value (should not happen).
815		 * So close all active windows in the list and will be
816		 * reopened windows based on the new lpar_creds on the
817		 * destination system during resume.
818		 */
819		if (!migrate && !--excess_creds)
820			break;
821	}
822
823	return 0;
824}
825
826/*
827 * Get new VAS capabilities when the core add/removal configuration
828 * changes. Reconfig window configurations based on the credits
829 * availability from this new capabilities.
830 */
831int vas_reconfig_capabilties(u8 type, int new_nr_creds)
832{
833	struct vas_cop_feat_caps *caps;
834	int old_nr_creds;
835	struct vas_caps *vcaps;
836	int rc = 0, nr_active_wins;
837
838	if (type >= VAS_MAX_FEAT_TYPE) {
839		pr_err("Invalid credit type %d\n", type);
840		return -EINVAL;
841	}
842
843	vcaps = &vascaps[type];
844	caps = &vcaps->caps;
845
846	mutex_lock(&vas_pseries_mutex);
847
848	old_nr_creds = atomic_read(&caps->nr_total_credits);
849
850	atomic_set(&caps->nr_total_credits, new_nr_creds);
851	/*
852	 * The total number of available credits may be decreased or
853	 * increased with DLPAR operation. Means some windows have to be
854	 * closed / reopened. Hold the vas_pseries_mutex so that the
855	 * user space can not open new windows.
856	 */
857	if (old_nr_creds <  new_nr_creds) {
858		/*
859		 * If the existing target credits is less than the new
860		 * target, reopen windows if they are closed due to
861		 * the previous DLPAR (core removal).
862		 */
863		rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
864					   false);
865	} else {
866		/*
867		 * # active windows is more than new LPAR available
868		 * credits. So close the excessive windows.
869		 * On pseries, each window will have 1 credit.
870		 */
871		nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
872		if (nr_active_wins > new_nr_creds)
873			rc = reconfig_close_windows(vcaps,
874					nr_active_wins - new_nr_creds,
875					false);
876	}
877
878	mutex_unlock(&vas_pseries_mutex);
879	return rc;
880}
881
882int pseries_vas_dlpar_cpu(void)
883{
884	int new_nr_creds, rc;
885
886	/*
887	 * NX-GZIP is not enabled. Nothing to do for DLPAR event
888	 */
889	if (!copypaste_feat)
890		return 0;
891
892
893	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
894				      vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
895				      (u64)virt_to_phys(&hv_cop_caps));
896	if (!rc) {
897		new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
898		rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
899	}
900
901	if (rc)
902		pr_err("Failed reconfig VAS capabilities with DLPAR\n");
903
904	return rc;
905}
906
907/*
908 * Total number of default credits available (target_credits)
909 * in LPAR depends on number of cores configured. It varies based on
910 * whether processors are in shared mode or dedicated mode.
911 * Get the notifier when CPU configuration is changed with DLPAR
912 * operation so that get the new target_credits (vas default capabilities)
913 * and then update the existing windows usage if needed.
914 */
915static int pseries_vas_notifier(struct notifier_block *nb,
916				unsigned long action, void *data)
917{
918	struct of_reconfig_data *rd = data;
919	struct device_node *dn = rd->dn;
920	const __be32 *intserv = NULL;
921	int len;
922
923	/*
924	 * For shared CPU partition, the hypervisor assigns total credits
925	 * based on entitled core capacity. So updating VAS windows will
926	 * be called from lparcfg_write().
927	 */
928	if (is_shared_processor())
929		return NOTIFY_OK;
930
931	if ((action == OF_RECONFIG_ATTACH_NODE) ||
932		(action == OF_RECONFIG_DETACH_NODE))
933		intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
934					  &len);
935	/*
936	 * Processor config is not changed
937	 */
938	if (!intserv)
939		return NOTIFY_OK;
940
941	return pseries_vas_dlpar_cpu();
942}
943
944static struct notifier_block pseries_vas_nb = {
945	.notifier_call = pseries_vas_notifier,
946};
947
948/*
949 * For LPM, all windows have to be closed on the source partition
950 * before migration and reopen them on the destination partition
951 * after migration. So closing windows during suspend and
952 * reopen them during resume.
953 */
954int vas_migration_handler(int action)
955{
956	struct vas_cop_feat_caps *caps;
957	int old_nr_creds, new_nr_creds = 0;
958	struct vas_caps *vcaps;
959	int i, rc = 0;
960
961	pr_info("VAS migration event %d\n", action);
962
963	/*
964	 * NX-GZIP is not enabled. Nothing to do for migration.
965	 */
966	if (!copypaste_feat)
967		return rc;
968
969	if (action == VAS_SUSPEND)
970		migration_in_progress = true;
971	else
972		migration_in_progress = false;
973
974	for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
975		vcaps = &vascaps[i];
976		caps = &vcaps->caps;
977		old_nr_creds = atomic_read(&caps->nr_total_credits);
978
979		rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
980					      vcaps->feat,
981					      (u64)virt_to_phys(&hv_cop_caps));
982		if (!rc) {
983			new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
984			/*
985			 * Should not happen. But incase print messages, close
986			 * all windows in the list during suspend and reopen
987			 * windows based on new lpar_creds on the destination
988			 * system.
989			 */
990			if (old_nr_creds != new_nr_creds) {
991				pr_err("Target credits mismatch with the hypervisor\n");
992				pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
993					action, old_nr_creds, new_nr_creds);
994				pr_err("Used creds: %d, Active creds: %d\n",
995					atomic_read(&caps->nr_used_credits),
996					vcaps->nr_open_windows - vcaps->nr_close_wins);
997			}
998		} else {
999			pr_err("state(%d): Get VAS capabilities failed with %d\n",
1000				action, rc);
1001			/*
1002			 * We can not stop migration with the current lpm
1003			 * implementation. So continue closing all windows in
1004			 * the list (during suspend) and return without
1005			 * opening windows (during resume) if VAS capabilities
1006			 * HCALL failed.
1007			 */
1008			if (action == VAS_RESUME)
1009				goto out;
1010		}
1011
1012		switch (action) {
1013		case VAS_SUSPEND:
1014			mutex_lock(&vas_pseries_mutex);
1015			rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
1016							true);
1017			/*
1018			 * Windows are included in the list after successful
1019			 * open. So wait for closing these in-progress open
1020			 * windows in vas_allocate_window() which will be
1021			 * done if the migration_in_progress is set.
1022			 */
1023			while (vcaps->nr_open_wins_progress) {
1024				mutex_unlock(&vas_pseries_mutex);
1025				msleep(10);
1026				mutex_lock(&vas_pseries_mutex);
1027			}
1028			mutex_unlock(&vas_pseries_mutex);
1029			break;
1030		case VAS_RESUME:
1031			mutex_lock(&vas_pseries_mutex);
1032			atomic_set(&caps->nr_total_credits, new_nr_creds);
1033			rc = reconfig_open_windows(vcaps, new_nr_creds, true);
1034			mutex_unlock(&vas_pseries_mutex);
1035			break;
1036		default:
1037			/* should not happen */
1038			pr_err("Invalid migration action %d\n", action);
1039			rc = -EINVAL;
1040			goto out;
1041		}
1042
1043		/*
1044		 * Ignore errors during suspend and return for resume.
1045		 */
1046		if (rc && (action == VAS_RESUME))
1047			goto out;
1048	}
1049
1050	pr_info("VAS migration event (%d) successful\n", action);
1051
1052out:
1053	return rc;
1054}
1055
1056static int __init pseries_vas_init(void)
1057{
1058	struct hv_vas_all_caps *hv_caps;
1059	int rc = 0;
1060
1061	/*
1062	 * Linux supports user space COPY/PASTE only with Radix
1063	 */
1064	if (!radix_enabled()) {
1065		copypaste_feat = false;
1066		pr_err("API is supported only with radix page tables\n");
1067		return -ENOTSUPP;
1068	}
1069
1070	hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
1071	if (!hv_caps)
1072		return -ENOMEM;
1073	/*
1074	 * Get VAS overall capabilities by passing 0 to feature type.
1075	 */
1076	rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
1077					  (u64)virt_to_phys(hv_caps));
1078	if (rc)
1079		goto out;
1080
1081	caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
1082	caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
1083
1084	sysfs_pseries_vas_init(&caps_all);
1085
1086	/*
1087	 * QOS capabilities available
1088	 */
1089	if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1090		rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1091					  VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1092
1093		if (rc)
1094			goto out;
1095	}
1096	/*
1097	 * Default capabilities available
1098	 */
1099	if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1100		rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1101					  VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1102
1103	if (!rc && copypaste_feat) {
1104		if (firmware_has_feature(FW_FEATURE_LPAR))
1105			of_reconfig_notifier_register(&pseries_vas_nb);
1106
1107		pr_info("GZIP feature is available\n");
1108	} else {
1109		/*
1110		 * Should not happen, but only when get default
1111		 * capabilities HCALL failed. So disable copy paste
1112		 * feature.
1113		 */
1114		copypaste_feat = false;
1115	}
1116
1117out:
1118	kfree(hv_caps);
1119	return rc;
1120}
1121machine_device_initcall(pseries, pseries_vas_init);
1122