1/*-
2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c 322151 2017-08-07 12:49:30Z hselasky $
26 */
27
28#include <linux/kernel.h>
29#include <linux/module.h>
30#include <linux/delay.h>
31#include <dev/mlx5/driver.h>
32#include "mlx5_core.h"
33
34CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE);
35
36struct mlx5_pages_req {
37	struct mlx5_core_dev *dev;
38	u16	func_id;
39	s32	npages;
40	struct work_struct work;
41};
42
43
44struct mlx5_manage_pages_inbox {
45	struct mlx5_inbox_hdr	hdr;
46	__be16			rsvd;
47	__be16			func_id;
48	__be32			num_entries;
49	__be64			pas[0];
50};
51
52struct mlx5_manage_pages_outbox {
53	struct mlx5_outbox_hdr	hdr;
54	__be32			num_entries;
55	u8			rsvd[4];
56	__be64			pas[0];
57};
58
59enum {
60	MAX_RECLAIM_TIME_MSECS	= 5000,
61};
62
63static void
64mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
65{
66	struct mlx5_fw_page *fwp;
67	uint8_t owned;
68
69	fwp = (struct mlx5_fw_page *)arg;
70	owned = MLX5_DMA_OWNED(fwp->dev);
71
72	if (!owned)
73		MLX5_DMA_LOCK(fwp->dev);
74
75	if (error == 0) {
76		KASSERT(nseg == 1, ("Number of segments is different from 1"));
77		fwp->dma_addr = segs->ds_addr;
78		fwp->load_done = MLX5_LOAD_ST_SUCCESS;
79	} else {
80		fwp->load_done = MLX5_LOAD_ST_FAILURE;
81	}
82	MLX5_DMA_DONE(fwp->dev);
83
84	if (!owned)
85		MLX5_DMA_UNLOCK(fwp->dev);
86}
87
88void
89mlx5_fwp_flush(struct mlx5_fw_page *fwp)
90{
91	unsigned num = fwp->numpages;
92
93	while (num--)
94		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE);
95}
96
97void
98mlx5_fwp_invalidate(struct mlx5_fw_page *fwp)
99{
100	unsigned num = fwp->numpages;
101
102	while (num--) {
103		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD);
104		bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD);
105	}
106}
107
108struct mlx5_fw_page *
109mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num)
110{
111	struct mlx5_fw_page *fwp;
112	unsigned x;
113	int err;
114
115	/* check for special case */
116	if (num == 0) {
117		fwp = kzalloc(sizeof(*fwp), flags);
118		if (fwp != NULL)
119			fwp->dev = dev;
120		return (fwp);
121	}
122
123	/* we need sleeping context for this function */
124	if (flags & M_NOWAIT)
125		return (NULL);
126
127	fwp = kzalloc(sizeof(*fwp) * num, flags);
128
129	/* serialize loading the DMA map(s) */
130	sx_xlock(&dev->cmd.dma_sx);
131
132	for (x = 0; x != num; x++) {
133		/* store pointer to MLX5 core device */
134		fwp[x].dev = dev;
135		/* store number of pages left from the array */
136		fwp[x].numpages = num - x;
137
138		/* allocate memory */
139		err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr,
140		    BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map);
141		if (err != 0)
142			goto failure;
143
144		/* load memory into DMA */
145		MLX5_DMA_LOCK(dev);
146		err = bus_dmamap_load(
147		    dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr,
148		    MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb,
149		    fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT);
150
151		while (fwp[x].load_done == MLX5_LOAD_ST_NONE)
152			MLX5_DMA_WAIT(dev);
153		MLX5_DMA_UNLOCK(dev);
154
155		/* check for error */
156		if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) {
157			bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr,
158			    fwp[x].dma_map);
159			goto failure;
160		}
161	}
162	sx_xunlock(&dev->cmd.dma_sx);
163	return (fwp);
164
165failure:
166	while (x--) {
167		bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map);
168		bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map);
169	}
170	sx_xunlock(&dev->cmd.dma_sx);
171	return (NULL);
172}
173
174void
175mlx5_fwp_free(struct mlx5_fw_page *fwp)
176{
177	struct mlx5_core_dev *dev;
178	unsigned num;
179
180	/* be NULL safe */
181	if (fwp == NULL)
182		return;
183
184	/* check for special case */
185	if (fwp->numpages == 0) {
186		kfree(fwp);
187		return;
188	}
189
190	num = fwp->numpages;
191	dev = fwp->dev;
192
193	while (num--) {
194		bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map);
195		bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map);
196	}
197
198	kfree(fwp);
199}
200
201u64
202mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset)
203{
204	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
205	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
206
207	return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
208}
209
210void *
211mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset)
212{
213	size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE);
214	KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset));
215
216	return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE));
217}
218
219static int
220mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp)
221{
222	struct rb_root *root = &dev->priv.page_root;
223	struct rb_node **new = &root->rb_node;
224	struct rb_node *parent = NULL;
225	struct mlx5_fw_page *tfp;
226
227	while (*new) {
228		parent = *new;
229		tfp = rb_entry(parent, struct mlx5_fw_page, rb_node);
230		if (tfp->dma_addr < nfp->dma_addr)
231			new = &parent->rb_left;
232		else if (tfp->dma_addr > nfp->dma_addr)
233			new = &parent->rb_right;
234		else
235			return (-EEXIST);
236	}
237
238	rb_link_node(&nfp->rb_node, parent, new);
239	rb_insert_color(&nfp->rb_node, root);
240	return (0);
241}
242
243static struct mlx5_fw_page *
244mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr)
245{
246	struct rb_root *root = &dev->priv.page_root;
247	struct rb_node *tmp = root->rb_node;
248	struct mlx5_fw_page *result = NULL;
249	struct mlx5_fw_page *tfp;
250
251	while (tmp) {
252		tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node);
253		if (tfp->dma_addr < addr) {
254			tmp = tmp->rb_left;
255		} else if (tfp->dma_addr > addr) {
256			tmp = tmp->rb_right;
257		} else {
258			rb_erase(&tfp->rb_node, &dev->priv.page_root);
259			result = tfp;
260			break;
261		}
262	}
263	return (result);
264}
265
266static int
267alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
268{
269	struct mlx5_fw_page *fwp;
270	int err;
271
272	fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1);
273	if (fwp == NULL)
274		return (-ENOMEM);
275
276	fwp->func_id = func_id;
277
278	MLX5_DMA_LOCK(dev);
279	err = mlx5_insert_fw_page_locked(dev, fwp);
280	MLX5_DMA_UNLOCK(dev);
281
282	if (err != 0) {
283		mlx5_fwp_free(fwp);
284	} else {
285		/* make sure cached data is cleaned */
286		mlx5_fwp_invalidate(fwp);
287
288		/* store DMA address */
289		*addr = fwp->dma_addr;
290	}
291	return (err);
292}
293
294static void
295free_4k(struct mlx5_core_dev *dev, u64 addr)
296{
297	struct mlx5_fw_page *fwp;
298
299	MLX5_DMA_LOCK(dev);
300	fwp = mlx5_remove_fw_page_locked(dev, addr);
301	MLX5_DMA_UNLOCK(dev);
302
303	if (fwp == NULL) {
304		mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr);
305		return;
306	}
307	mlx5_fwp_free(fwp);
308}
309
310static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
311				s32 *npages, int boot)
312{
313	u32 in[MLX5_ST_SZ_DW(query_pages_in)];
314	u32 out[MLX5_ST_SZ_DW(query_pages_out)];
315	int err;
316
317	memset(in, 0, sizeof(in));
318
319	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
320	MLX5_SET(query_pages_in, in, op_mod,
321		 boot ? MLX5_BOOT_PAGES : MLX5_INIT_PAGES);
322
323	memset(out, 0, sizeof(out));
324	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
325	if (err)
326		return err;
327
328	*npages = MLX5_GET(query_pages_out, out, num_pages);
329	*func_id = MLX5_GET(query_pages_out, out, function_id);
330
331	return 0;
332}
333
334static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
335		      int notify_fail)
336{
337	struct mlx5_manage_pages_inbox *in;
338	struct mlx5_manage_pages_outbox out;
339	struct mlx5_manage_pages_inbox *nin;
340	int inlen;
341	u64 addr;
342	int err;
343	int i = 0;
344
345	inlen = sizeof(*in) + npages * sizeof(in->pas[0]);
346	in = mlx5_vzalloc(inlen);
347	if (!in) {
348		mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
349		err = -ENOMEM;
350		goto out_alloc;
351	}
352	memset(&out, 0, sizeof(out));
353
354	for (i = 0; i < npages; i++) {
355		err = alloc_4k(dev, &addr, func_id);
356		if (err)
357			goto out_alloc;
358		in->pas[i] = cpu_to_be64(addr);
359	}
360
361	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
362	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
363	in->func_id = cpu_to_be16(func_id);
364	in->num_entries = cpu_to_be32(npages);
365	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
366	if (err) {
367		mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
368			       func_id, npages, err);
369		goto out_alloc;
370	}
371	dev->priv.fw_pages += npages;
372	dev->priv.pages_per_func[func_id] += npages;
373
374	if (out.hdr.status) {
375		err = mlx5_cmd_status_to_err(&out.hdr);
376		if (err) {
377			mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n",
378				       func_id, npages, out.hdr.status);
379			goto out_alloc;
380		}
381	}
382
383	mlx5_core_dbg(dev, "err %d\n", err);
384
385	goto out_free;
386
387out_alloc:
388	if (notify_fail) {
389		nin = kzalloc(sizeof(*nin), GFP_KERNEL);
390		if (!nin)
391			goto out_4k;
392
393		memset(&out, 0, sizeof(out));
394		nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
395		nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE);
396		nin->func_id = cpu_to_be16(func_id);
397		if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out)))
398			mlx5_core_warn(dev, "page notify failed\n");
399		kfree(nin);
400	}
401
402out_4k:
403	for (i--; i >= 0; i--)
404		free_4k(dev, be64_to_cpu(in->pas[i]));
405out_free:
406	kvfree(in);
407	return err;
408}
409
410static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
411			 int *nclaimed)
412{
413	struct mlx5_manage_pages_inbox   in;
414	struct mlx5_manage_pages_outbox *out;
415	int num_claimed;
416	int outlen;
417	u64 addr;
418	int err;
419	int i;
420
421	if (nclaimed)
422		*nclaimed = 0;
423
424	memset(&in, 0, sizeof(in));
425	outlen = sizeof(*out) + npages * sizeof(out->pas[0]);
426	out = mlx5_vzalloc(outlen);
427	if (!out)
428		return -ENOMEM;
429
430	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
431	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
432	in.func_id = cpu_to_be16(func_id);
433	in.num_entries = cpu_to_be32(npages);
434	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
435	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
436	if (err) {
437		mlx5_core_err(dev, "failed reclaiming pages\n");
438		goto out_free;
439	}
440
441	if (out->hdr.status) {
442		err = mlx5_cmd_status_to_err(&out->hdr);
443		goto out_free;
444	}
445
446	num_claimed = be32_to_cpu(out->num_entries);
447	if (nclaimed)
448		*nclaimed = num_claimed;
449
450	dev->priv.fw_pages -= num_claimed;
451	dev->priv.pages_per_func[func_id] -= num_claimed;
452	for (i = 0; i < num_claimed; i++) {
453		addr = be64_to_cpu(out->pas[i]);
454		free_4k(dev, addr);
455	}
456
457out_free:
458	kvfree(out);
459	return err;
460}
461
462static void pages_work_handler(struct work_struct *work)
463{
464	struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
465	struct mlx5_core_dev *dev = req->dev;
466	int err = 0;
467
468	if (req->npages < 0)
469		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
470	else if (req->npages > 0)
471		err = give_pages(dev, req->func_id, req->npages, 1);
472
473	if (err)
474		mlx5_core_warn(dev, "%s fail %d\n",
475			       req->npages < 0 ? "reclaim" : "give", err);
476
477	kfree(req);
478}
479
480void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
481				 s32 npages)
482{
483	struct mlx5_pages_req *req;
484
485	req = kzalloc(sizeof(*req), GFP_ATOMIC);
486	if (!req) {
487		mlx5_core_warn(dev, "failed to allocate pages request\n");
488		return;
489	}
490
491	req->dev = dev;
492	req->func_id = func_id;
493	req->npages = npages;
494	INIT_WORK(&req->work, pages_work_handler);
495	if (!queue_work(dev->priv.pg_wq, &req->work))
496		mlx5_core_warn(dev, "failed to queue pages handler work\n");
497}
498
499int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
500{
501	u16 uninitialized_var(func_id);
502	s32 uninitialized_var(npages);
503	int err;
504
505	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
506	if (err)
507		return err;
508
509	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
510		      npages, boot ? "boot" : "init", func_id);
511
512	return give_pages(dev, func_id, npages, 0);
513}
514
515enum {
516	MLX5_BLKS_FOR_RECLAIM_PAGES = 12
517};
518
519s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev)
520{
521	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
522	s64 prevpages = 0;
523	s64 npages = 0;
524
525	while (!time_after(jiffies, end)) {
526		/* exclude own function, VFs only */
527		npages = dev->priv.fw_pages - dev->priv.pages_per_func[0];
528		if (!npages)
529			break;
530
531		if (npages != prevpages)
532			end = end + msecs_to_jiffies(100);
533
534		prevpages = npages;
535		msleep(1);
536	}
537
538	if (npages)
539		mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n");
540
541	return -npages;
542}
543
544static int optimal_reclaimed_pages(void)
545{
546	struct mlx5_cmd_prot_block *block;
547	struct mlx5_cmd_layout *lay;
548	int ret;
549
550	ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
551	       sizeof(struct mlx5_manage_pages_outbox)) /
552	       FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]);
553
554	return ret;
555}
556
557int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
558{
559	int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
560	struct mlx5_fw_page *fwp;
561	struct rb_node *p;
562	int nclaimed = 0;
563	int err;
564
565	do {
566		p = rb_first(&dev->priv.page_root);
567		if (p) {
568			fwp = rb_entry(p, struct mlx5_fw_page, rb_node);
569			if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
570				--dev->priv.fw_pages;
571				free_4k(dev, fwp->dma_addr);
572				nclaimed = 1;
573			} else {
574				err = reclaim_pages(dev, fwp->func_id,
575						    optimal_reclaimed_pages(),
576						    &nclaimed);
577				if (err) {
578					mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
579						       err);
580					return err;
581				}
582			}
583
584			if (nclaimed)
585				end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
586		}
587		if (time_after(jiffies, end)) {
588			mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
589			break;
590		}
591	} while (p);
592
593	return 0;
594}
595
596void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
597{
598
599	dev->priv.page_root = RB_ROOT;
600}
601
602void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
603{
604	/* nothing */
605}
606
607int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
608{
609	dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
610	if (!dev->priv.pg_wq)
611		return -ENOMEM;
612
613	return 0;
614}
615
616void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
617{
618	destroy_workqueue(dev->priv.pg_wq);
619}
620