1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright 2017 IBM Corp.
4 */
5
6#include <linux/hugetlb.h>
7#include <linux/sched/mm.h>
8#include <asm/opal-api.h>
9#include <asm/pnv-pci.h>
10#include <misc/cxllib.h>
11
12#include "cxl.h"
13
14#define CXL_INVALID_DRA                 ~0ull
15#define CXL_DUMMY_READ_SIZE             128
16#define CXL_DUMMY_READ_ALIGN            8
17#define CXL_CAPI_WINDOW_START           0x2000000000000ull
18#define CXL_CAPI_WINDOW_LOG_SIZE        48
19#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
20
21
22bool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
23{
24	int rc;
25	u32 phb_index;
26	u64 chip_id, capp_unit_id;
27
28	/* No flags currently supported */
29	if (flags)
30		return false;
31
32	if (!cpu_has_feature(CPU_FTR_HVMODE))
33		return false;
34
35	if (!cxl_is_power9())
36		return false;
37
38	if (cxl_slot_is_switched(dev))
39		return false;
40
41	/* on p9, some pci slots are not connected to a CAPP unit */
42	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
43	if (rc)
44		return false;
45
46	return true;
47}
48EXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
49
50static DEFINE_MUTEX(dra_mutex);
51static u64 dummy_read_addr = CXL_INVALID_DRA;
52
53static int allocate_dummy_read_buf(void)
54{
55	u64 buf, vaddr;
56	size_t buf_size;
57
58	/*
59	 * Dummy read buffer is 128-byte long, aligned on a
60	 * 256-byte boundary and we need the physical address.
61	 */
62	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
63	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
64	if (!buf)
65		return -ENOMEM;
66
67	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
68					(~0ull << CXL_DUMMY_READ_ALIGN);
69
70	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
71		"Dummy read buffer alignment issue");
72	dummy_read_addr = virt_to_phys((void *) vaddr);
73	return 0;
74}
75
76int cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
77{
78	int rc;
79	u32 phb_index;
80	u64 chip_id, capp_unit_id;
81
82	if (!cpu_has_feature(CPU_FTR_HVMODE))
83		return -EINVAL;
84
85	mutex_lock(&dra_mutex);
86	if (dummy_read_addr == CXL_INVALID_DRA) {
87		rc = allocate_dummy_read_buf();
88		if (rc) {
89			mutex_unlock(&dra_mutex);
90			return rc;
91		}
92	}
93	mutex_unlock(&dra_mutex);
94
95	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
96	if (rc)
97		return rc;
98
99	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
100	if (rc)
101		return rc;
102
103	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
104	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
105	cfg->bar_addr = CXL_CAPI_WINDOW_START;
106	cfg->dra = dummy_read_addr;
107	return 0;
108}
109EXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
110
111int cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
112			unsigned long flags)
113{
114	int rc = 0;
115
116	if (!cpu_has_feature(CPU_FTR_HVMODE))
117		return -EINVAL;
118
119	switch (mode) {
120	case CXL_MODE_PCI:
121		/*
122		 * We currently don't support going back to PCI mode
123		 * However, we'll turn the invalidations off, so that
124		 * the firmware doesn't have to ack them and can do
125		 * things like reset, etc.. with no worries.
126		 * So always return EPERM (can't go back to PCI) or
127		 * EBUSY if we couldn't even turn off snooping
128		 */
129		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
130		if (rc)
131			rc = -EBUSY;
132		else
133			rc = -EPERM;
134		break;
135	case CXL_MODE_CXL:
136		/* DMA only supported on TVT1 for the time being */
137		if (flags != CXL_MODE_DMA_TVT1)
138			return -EINVAL;
139		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
140		if (rc)
141			return rc;
142		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
143		break;
144	default:
145		rc = -EINVAL;
146	}
147	return rc;
148}
149EXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
150
151/*
152 * When switching the PHB to capi mode, the TVT#1 entry for
153 * the Partitionable Endpoint is set in bypass mode, like
154 * in PCI mode.
155 * Configure the device dma to use TVT#1, which is done
156 * by calling dma_set_mask() with a mask large enough.
157 */
158int cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
159{
160	int rc;
161
162	if (flags)
163		return -EINVAL;
164
165	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
166	return rc;
167}
168EXPORT_SYMBOL_GPL(cxllib_set_device_dma);
169
170int cxllib_get_PE_attributes(struct task_struct *task,
171			     unsigned long translation_mode,
172			     struct cxllib_pe_attributes *attr)
173{
174	if (translation_mode != CXL_TRANSLATED_MODE &&
175		translation_mode != CXL_REAL_MODE)
176		return -EINVAL;
177
178	attr->sr = cxl_calculate_sr(false,
179				task == NULL,
180				translation_mode == CXL_REAL_MODE,
181				true);
182	attr->lpid = mfspr(SPRN_LPID);
183	if (task) {
184		struct mm_struct *mm = get_task_mm(task);
185		if (mm == NULL)
186			return -EINVAL;
187		/*
188		 * Caller is keeping a reference on mm_users for as long
189		 * as XSL uses the memory context
190		 */
191		attr->pid = mm->context.id;
192		mmput(mm);
193		attr->tid = task->thread.tidr;
194	} else {
195		attr->pid = 0;
196		attr->tid = 0;
197	}
198	return 0;
199}
200EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
201
202static int get_vma_info(struct mm_struct *mm, u64 addr,
203			u64 *vma_start, u64 *vma_end,
204			unsigned long *page_size)
205{
206	struct vm_area_struct *vma = NULL;
207	int rc = 0;
208
209	mmap_read_lock(mm);
210
211	vma = find_vma(mm, addr);
212	if (!vma) {
213		rc = -EFAULT;
214		goto out;
215	}
216	*page_size = vma_kernel_pagesize(vma);
217	*vma_start = vma->vm_start;
218	*vma_end = vma->vm_end;
219out:
220	mmap_read_unlock(mm);
221	return rc;
222}
223
224int cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
225{
226	int rc;
227	u64 dar, vma_start, vma_end;
228	unsigned long page_size;
229
230	if (mm == NULL)
231		return -EFAULT;
232
233	/*
234	 * The buffer we have to process can extend over several pages
235	 * and may also cover several VMAs.
236	 * We iterate over all the pages. The page size could vary
237	 * between VMAs.
238	 */
239	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
240	if (rc)
241		return rc;
242
243	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
244	     dar += page_size) {
245		if (dar < vma_start || dar >= vma_end) {
246			/*
247			 * We don't hold mm->mmap_lock while iterating, since
248			 * the lock is required by one of the lower-level page
249			 * fault processing functions and it could
250			 * create a deadlock.
251			 *
252			 * It means the VMAs can be altered between 2
253			 * loop iterations and we could theoretically
254			 * miss a page (however unlikely). But that's
255			 * not really a problem, as the driver will
256			 * retry access, get another page fault on the
257			 * missing page and call us again.
258			 */
259			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
260					&page_size);
261			if (rc)
262				return rc;
263		}
264
265		rc = cxl_handle_mm_fault(mm, flags, dar);
266		if (rc)
267			return -EFAULT;
268	}
269	return 0;
270}
271EXPORT_SYMBOL_GPL(cxllib_handle_fault);
272