1// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2
3/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4/* Copyright (c) 2008-2019, IBM Corporation */
5
6#include <linux/gfp.h>
7#include <rdma/ib_verbs.h>
8#include <rdma/ib_umem.h>
9#include <linux/dma-mapping.h>
10#include <linux/slab.h>
11#include <linux/sched/mm.h>
12#include <linux/resource.h>
13
14#include "siw.h"
15#include "siw_mem.h"
16
17/* Stag lookup is based on its index part only (24 bits). */
18#define SIW_STAG_MAX_INDEX	0x00ffffff
19
20/*
21 * The code avoids special Stag of zero and tries to randomize
22 * STag values between 1 and SIW_STAG_MAX_INDEX.
23 */
24int siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
25{
26	struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
27	u32 id, next;
28
29	get_random_bytes(&next, 4);
30	next &= SIW_STAG_MAX_INDEX;
31
32	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
33	    GFP_KERNEL) < 0)
34		return -ENOMEM;
35
36	/* Set the STag index part */
37	m->stag = id << 8;
38
39	siw_dbg_mem(m, "new MEM object\n");
40
41	return 0;
42}
43
44/*
45 * siw_mem_id2obj()
46 *
47 * resolves memory from stag given by id. might be called from:
48 * o process context before sending out of sgl, or
49 * o in softirq when resolving target memory
50 */
51struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
52{
53	struct siw_mem *mem;
54
55	rcu_read_lock();
56	mem = xa_load(&sdev->mem_xa, stag_index);
57	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
58		rcu_read_unlock();
59		return mem;
60	}
61	rcu_read_unlock();
62
63	return NULL;
64}
65
66void siw_umem_release(struct siw_umem *umem)
67{
68	int i, num_pages = umem->num_pages;
69
70	if (umem->base_mem)
71		ib_umem_release(umem->base_mem);
72
73	for (i = 0; num_pages > 0; i++) {
74		kfree(umem->page_chunk[i].plist);
75		num_pages -= PAGES_PER_CHUNK;
76	}
77	kfree(umem->page_chunk);
78	kfree(umem);
79}
80
81int siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
82		   u64 start, u64 len, int rights)
83{
84	struct siw_device *sdev = to_siw_dev(pd->device);
85	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
86	struct xa_limit limit = XA_LIMIT(1, SIW_STAG_MAX_INDEX);
87	u32 id, next;
88
89	if (!mem)
90		return -ENOMEM;
91
92	mem->mem_obj = mem_obj;
93	mem->stag_valid = 0;
94	mem->sdev = sdev;
95	mem->va = start;
96	mem->len = len;
97	mem->pd = pd;
98	mem->perms = rights & IWARP_ACCESS_MASK;
99	kref_init(&mem->ref);
100
101	get_random_bytes(&next, 4);
102	next &= SIW_STAG_MAX_INDEX;
103
104	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
105	    GFP_KERNEL) < 0) {
106		kfree(mem);
107		return -ENOMEM;
108	}
109
110	mr->mem = mem;
111	/* Set the STag index part */
112	mem->stag = id << 8;
113	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
114
115	return 0;
116}
117
118void siw_mr_drop_mem(struct siw_mr *mr)
119{
120	struct siw_mem *mem = mr->mem, *found;
121
122	mem->stag_valid = 0;
123
124	/* make STag invalid visible asap */
125	smp_mb();
126
127	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
128	WARN_ON(found != mem);
129	siw_mem_put(mem);
130}
131
132void siw_free_mem(struct kref *ref)
133{
134	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
135
136	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
137
138	if (!mem->is_mw && mem->mem_obj) {
139		if (mem->is_pbl == 0)
140			siw_umem_release(mem->umem);
141		else
142			kfree(mem->pbl);
143	}
144	kfree(mem);
145}
146
147/*
148 * siw_check_mem()
149 *
150 * Check protection domain, STAG state, access permissions and
151 * address range for memory object.
152 *
153 * @pd:		Protection Domain memory should belong to
154 * @mem:	memory to be checked
155 * @addr:	starting addr of mem
156 * @perms:	requested access permissions
157 * @len:	len of memory interval to be checked
158 *
159 */
160int siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
161		  enum ib_access_flags perms, int len)
162{
163	if (!mem->stag_valid) {
164		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
165		return -E_STAG_INVALID;
166	}
167	if (mem->pd != pd) {
168		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
169		return -E_PD_MISMATCH;
170	}
171	/*
172	 * check access permissions
173	 */
174	if ((mem->perms & perms) < perms) {
175		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
176			   mem->perms, perms);
177		return -E_ACCESS_PERM;
178	}
179	/*
180	 * Check if access falls into valid memory interval.
181	 */
182	if (addr < mem->va || addr + len > mem->va + mem->len) {
183		siw_dbg_pd(pd, "MEM interval len %d\n", len);
184		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
185			   (void *)(uintptr_t)addr,
186			   (void *)(uintptr_t)(addr + len));
187		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
188			   (void *)(uintptr_t)mem->va,
189			   (void *)(uintptr_t)(mem->va + mem->len),
190			   mem->stag);
191
192		return -E_BASE_BOUNDS;
193	}
194	return E_ACCESS_OK;
195}
196
197/*
198 * siw_check_sge()
199 *
200 * Check SGE for access rights in given interval
201 *
202 * @pd:		Protection Domain memory should belong to
203 * @sge:	SGE to be checked
204 * @mem:	location of memory reference within array
205 * @perms:	requested access permissions
206 * @off:	starting offset in SGE
207 * @len:	len of memory interval to be checked
208 *
209 * NOTE: Function references SGE's memory object (mem->obj)
210 * if not yet done. New reference is kept if check went ok and
211 * released if check failed. If mem->obj is already valid, no new
212 * lookup is being done and mem is not released it check fails.
213 */
214int siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
215		  enum ib_access_flags perms, u32 off, int len)
216{
217	struct siw_device *sdev = to_siw_dev(pd->device);
218	struct siw_mem *new = NULL;
219	int rv = E_ACCESS_OK;
220
221	if (len + off > sge->length) {
222		rv = -E_BASE_BOUNDS;
223		goto fail;
224	}
225	if (*mem == NULL) {
226		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
227		if (unlikely(!new)) {
228			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
229			rv = -E_STAG_INVALID;
230			goto fail;
231		}
232		*mem = new;
233	}
234	/* Check if user re-registered with different STag key */
235	if (unlikely((*mem)->stag != sge->lkey)) {
236		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
237		rv = -E_STAG_INVALID;
238		goto fail;
239	}
240	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
241	if (unlikely(rv))
242		goto fail;
243
244	return 0;
245
246fail:
247	if (new) {
248		*mem = NULL;
249		siw_mem_put(new);
250	}
251	return rv;
252}
253
254void siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
255{
256	switch (op) {
257	case SIW_OP_SEND:
258	case SIW_OP_WRITE:
259	case SIW_OP_SEND_WITH_IMM:
260	case SIW_OP_SEND_REMOTE_INV:
261	case SIW_OP_READ:
262	case SIW_OP_READ_LOCAL_INV:
263		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
264			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
265		break;
266
267	case SIW_OP_RECEIVE:
268		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
269		break;
270
271	case SIW_OP_READ_RESPONSE:
272		siw_unref_mem_sgl(wqe->mem, 1);
273		break;
274
275	default:
276		/*
277		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
278		 * do not hold memory references
279		 */
280		break;
281	}
282}
283
284int siw_invalidate_stag(struct ib_pd *pd, u32 stag)
285{
286	struct siw_device *sdev = to_siw_dev(pd->device);
287	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
288	int rv = 0;
289
290	if (unlikely(!mem)) {
291		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
292		return -EINVAL;
293	}
294	if (unlikely(mem->pd != pd)) {
295		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
296		rv = -EACCES;
297		goto out;
298	}
299	/*
300	 * Per RDMA verbs definition, an STag may already be in invalid
301	 * state if invalidation is requested. So no state check here.
302	 */
303	mem->stag_valid = 0;
304
305	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
306out:
307	siw_mem_put(mem);
308	return rv;
309}
310
311/*
312 * Gets physical address backed by PBL element. Address is referenced
313 * by linear byte offset into list of variably sized PB elements.
314 * Optionally, provides remaining len within current element, and
315 * current PBL index for later resume at same element.
316 */
317dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
318{
319	int i = idx ? *idx : 0;
320
321	while (i < pbl->num_buf) {
322		struct siw_pble *pble = &pbl->pbe[i];
323
324		if (pble->pbl_off + pble->size > off) {
325			u64 pble_off = off - pble->pbl_off;
326
327			if (len)
328				*len = pble->size - pble_off;
329			if (idx)
330				*idx = i;
331
332			return pble->addr + pble_off;
333		}
334		i++;
335	}
336	if (len)
337		*len = 0;
338	return 0;
339}
340
341struct siw_pbl *siw_pbl_alloc(u32 num_buf)
342{
343	struct siw_pbl *pbl;
344
345	if (num_buf == 0)
346		return ERR_PTR(-EINVAL);
347
348	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
349	if (!pbl)
350		return ERR_PTR(-ENOMEM);
351
352	pbl->max_buf = num_buf;
353
354	return pbl;
355}
356
357struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
358			      u64 len, int rights)
359{
360	struct siw_umem *umem;
361	struct ib_umem *base_mem;
362	struct sg_page_iter sg_iter;
363	struct sg_table *sgt;
364	u64 first_page_va;
365	int num_pages, num_chunks, i, rv = 0;
366
367	if (!len)
368		return ERR_PTR(-EINVAL);
369
370	first_page_va = start & PAGE_MASK;
371	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
372	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
373
374	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
375	if (!umem)
376		return ERR_PTR(-ENOMEM);
377
378	umem->page_chunk =
379		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
380	if (!umem->page_chunk) {
381		rv = -ENOMEM;
382		goto err_out;
383	}
384	base_mem = ib_umem_get(base_dev, start, len, rights);
385	if (IS_ERR(base_mem)) {
386		rv = PTR_ERR(base_mem);
387		siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
388		goto err_out;
389	}
390	umem->fp_addr = first_page_va;
391	umem->base_mem = base_mem;
392
393	sgt = &base_mem->sgt_append.sgt;
394	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
395
396	if (!__sg_page_iter_next(&sg_iter)) {
397		rv = -EINVAL;
398		goto err_out;
399	}
400	for (i = 0; num_pages > 0; i++) {
401		int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
402		struct page **plist =
403			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
404
405		if (!plist) {
406			rv = -ENOMEM;
407			goto err_out;
408		}
409		umem->page_chunk[i].plist = plist;
410		while (nents--) {
411			*plist = sg_page_iter_page(&sg_iter);
412			umem->num_pages++;
413			num_pages--;
414			plist++;
415			if (!__sg_page_iter_next(&sg_iter))
416				break;
417		}
418	}
419	return umem;
420err_out:
421	siw_umem_release(umem);
422
423	return ERR_PTR(rv);
424}
425