1322810Shselasky/*-
2322810Shselasky * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3322810Shselasky *
4322810Shselasky * Redistribution and use in source and binary forms, with or without
5322810Shselasky * modification, are permitted provided that the following conditions
6322810Shselasky * are met:
7322810Shselasky * 1. Redistributions of source code must retain the above copyright
8322810Shselasky *    notice, this list of conditions and the following disclaimer.
9322810Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10322810Shselasky *    notice, this list of conditions and the following disclaimer in the
11322810Shselasky *    documentation and/or other materials provided with the distribution.
12322810Shselasky *
13322810Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14322810Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15322810Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16322810Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17322810Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18322810Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19322810Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20322810Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21322810Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22322810Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23322810Shselasky * SUCH DAMAGE.
24322810Shselasky *
25322810Shselasky * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c 368226 2020-12-01 13:10:25Z hselasky $
26322810Shselasky */
27322810Shselasky
28322810Shselasky#include <linux/kref.h>
29322810Shselasky#include <linux/random.h>
30322810Shselasky#include <linux/delay.h>
31331769Shselasky#include <linux/sched.h>
32322810Shselasky#include <rdma/ib_umem.h>
33331769Shselasky#include <rdma/ib_umem_odp.h>
34331769Shselasky#include <rdma/ib_verbs.h>
35322810Shselasky#include "mlx5_ib.h"
36322810Shselasky
37322810Shselaskyenum {
38322810Shselasky	MAX_PENDING_REG_MR = 8,
39322810Shselasky};
40322810Shselasky
41322810Shselasky#define MLX5_UMR_ALIGN 2048
42331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
43331769Shselaskystatic __be64 mlx5_ib_update_mtt_emergency_buffer[
44331769Shselasky		MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
45331769Shselasky	__aligned(MLX5_UMR_ALIGN);
46331769Shselaskystatic DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
47331769Shselasky#endif
48322810Shselasky
49331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr);
50322810Shselasky
51322810Shselaskystatic int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
52322810Shselasky{
53331769Shselasky	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
54322810Shselasky
55331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
56331769Shselasky	/* Wait until all page fault handlers using the mr complete. */
57331769Shselasky	synchronize_srcu(&dev->mr_srcu);
58331769Shselasky#endif
59331769Shselasky
60322810Shselasky	return err;
61322810Shselasky}
62322810Shselasky
63322810Shselaskystatic int order2idx(struct mlx5_ib_dev *dev, int order)
64322810Shselasky{
65322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
66322810Shselasky
67322810Shselasky	if (order < cache->ent[0].order)
68322810Shselasky		return 0;
69322810Shselasky	else
70322810Shselasky		return order - cache->ent[0].order;
71322810Shselasky}
72322810Shselasky
73331769Shselaskystatic bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
74331769Shselasky{
75331769Shselasky	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
76331769Shselasky		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
77331769Shselasky}
78331769Shselasky
79331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
80331769Shselaskystatic void update_odp_mr(struct mlx5_ib_mr *mr)
81331769Shselasky{
82331769Shselasky	if (mr->umem->odp_data) {
83331769Shselasky		/*
84331769Shselasky		 * This barrier prevents the compiler from moving the
85331769Shselasky		 * setting of umem->odp_data->private to point to our
86331769Shselasky		 * MR, before reg_umr finished, to ensure that the MR
87331769Shselasky		 * initialization have finished before starting to
88331769Shselasky		 * handle invalidations.
89331769Shselasky		 */
90331769Shselasky		smp_wmb();
91331769Shselasky		mr->umem->odp_data->private = mr;
92331769Shselasky		/*
93331769Shselasky		 * Make sure we will see the new
94331769Shselasky		 * umem->odp_data->private value in the invalidation
95331769Shselasky		 * routines, before we can get page faults on the
96331769Shselasky		 * MR. Page faults can happen once we put the MR in
97331769Shselasky		 * the tree, below this line. Without the barrier,
98331769Shselasky		 * there can be a fault handling and an invalidation
99331769Shselasky		 * before umem->odp_data->private == mr is visible to
100331769Shselasky		 * the invalidation handler.
101331769Shselasky		 */
102331769Shselasky		smp_wmb();
103331769Shselasky	}
104331769Shselasky}
105331769Shselasky#endif
106331769Shselasky
107368226Shselaskystatic void reg_mr_callback(int status, struct mlx5_async_work *context)
108322810Shselasky{
109368226Shselasky	struct mlx5_ib_mr *mr =
110368226Shselasky		container_of(context, struct mlx5_ib_mr, cb_work);
111322810Shselasky	struct mlx5_ib_dev *dev = mr->dev;
112322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
113322810Shselasky	int c = order2idx(dev, mr->order);
114322810Shselasky	struct mlx5_cache_ent *ent = &cache->ent[c];
115331769Shselasky	u8 key;
116331769Shselasky	unsigned long flags;
117322810Shselasky	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
118322810Shselasky	int err;
119322810Shselasky
120322810Shselasky	spin_lock_irqsave(&ent->lock, flags);
121322810Shselasky	ent->pending--;
122322810Shselasky	spin_unlock_irqrestore(&ent->lock, flags);
123322810Shselasky	if (status) {
124331769Shselasky		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
125322810Shselasky		kfree(mr);
126322810Shselasky		dev->fill_delay = 1;
127322810Shselasky		mod_timer(&dev->delay_timer, jiffies + HZ);
128322810Shselasky		return;
129322810Shselasky	}
130322810Shselasky
131322810Shselasky	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
132322810Shselasky	key = dev->mdev->priv.mkey_key++;
133322810Shselasky	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
134331769Shselasky	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
135322810Shselasky
136322810Shselasky	cache->last_add = jiffies;
137322810Shselasky
138322810Shselasky	spin_lock_irqsave(&ent->lock, flags);
139322810Shselasky	list_add_tail(&mr->list, &ent->head);
140322810Shselasky	ent->cur++;
141322810Shselasky	ent->size++;
142322810Shselasky	spin_unlock_irqrestore(&ent->lock, flags);
143322810Shselasky
144322810Shselasky	spin_lock_irqsave(&table->lock, flags);
145331769Shselasky	err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key),
146331769Shselasky				&mr->mmkey);
147331769Shselasky	if (err)
148331769Shselasky		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
149322810Shselasky	spin_unlock_irqrestore(&table->lock, flags);
150322810Shselasky}
151322810Shselasky
152322810Shselaskystatic int add_keys(struct mlx5_ib_dev *dev, int c, int num)
153322810Shselasky{
154322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
155322810Shselasky	struct mlx5_cache_ent *ent = &cache->ent[c];
156331769Shselasky	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
157322810Shselasky	struct mlx5_ib_mr *mr;
158322810Shselasky	int npages = 1 << ent->order;
159331769Shselasky	void *mkc;
160331769Shselasky	u32 *in;
161322810Shselasky	int err = 0;
162322810Shselasky	int i;
163322810Shselasky
164331769Shselasky	in = kzalloc(inlen, GFP_KERNEL);
165322810Shselasky	if (!in)
166322810Shselasky		return -ENOMEM;
167322810Shselasky
168331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
169322810Shselasky	for (i = 0; i < num; i++) {
170322810Shselasky		if (ent->pending >= MAX_PENDING_REG_MR) {
171322810Shselasky			err = -EAGAIN;
172322810Shselasky			break;
173322810Shselasky		}
174322810Shselasky
175322810Shselasky		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
176322810Shselasky		if (!mr) {
177322810Shselasky			err = -ENOMEM;
178322810Shselasky			break;
179322810Shselasky		}
180322810Shselasky		mr->order = ent->order;
181322810Shselasky		mr->umred = 1;
182322810Shselasky		mr->dev = dev;
183322810Shselasky
184331769Shselasky		MLX5_SET(mkc, mkc, free, 1);
185331769Shselasky		MLX5_SET(mkc, mkc, umr_en, 1);
186331769Shselasky		MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
187331769Shselasky
188331769Shselasky		MLX5_SET(mkc, mkc, qpn, 0xffffff);
189331769Shselasky		MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
190331769Shselasky		MLX5_SET(mkc, mkc, log_page_size, 12);
191331769Shselasky
192322810Shselasky		spin_lock_irq(&ent->lock);
193322810Shselasky		ent->pending++;
194322810Shselasky		spin_unlock_irq(&ent->lock);
195331807Shselasky		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
196368226Shselasky					       &dev->async_ctx, in, inlen,
197331807Shselasky					       mr->out, sizeof(mr->out),
198368226Shselasky					       reg_mr_callback, &mr->cb_work);
199322810Shselasky		if (err) {
200322810Shselasky			spin_lock_irq(&ent->lock);
201322810Shselasky			ent->pending--;
202322810Shselasky			spin_unlock_irq(&ent->lock);
203322810Shselasky			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
204322810Shselasky			kfree(mr);
205322810Shselasky			break;
206322810Shselasky		}
207322810Shselasky	}
208322810Shselasky
209322810Shselasky	kfree(in);
210322810Shselasky	return err;
211322810Shselasky}
212322810Shselasky
213322810Shselaskystatic void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
214322810Shselasky{
215322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
216322810Shselasky	struct mlx5_cache_ent *ent = &cache->ent[c];
217322810Shselasky	struct mlx5_ib_mr *mr;
218322810Shselasky	int err;
219322810Shselasky	int i;
220322810Shselasky
221322810Shselasky	for (i = 0; i < num; i++) {
222322810Shselasky		spin_lock_irq(&ent->lock);
223322810Shselasky		if (list_empty(&ent->head)) {
224322810Shselasky			spin_unlock_irq(&ent->lock);
225322810Shselasky			return;
226322810Shselasky		}
227322810Shselasky		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
228322810Shselasky		list_del(&mr->list);
229322810Shselasky		ent->cur--;
230322810Shselasky		ent->size--;
231322810Shselasky		spin_unlock_irq(&ent->lock);
232322810Shselasky		err = destroy_mkey(dev, mr);
233322810Shselasky		if (err)
234322810Shselasky			mlx5_ib_warn(dev, "failed destroy mkey\n");
235322810Shselasky		else
236322810Shselasky			kfree(mr);
237322810Shselasky	}
238322810Shselasky}
239322810Shselasky
240322810Shselaskystatic int someone_adding(struct mlx5_mr_cache *cache)
241322810Shselasky{
242322810Shselasky	int i;
243322810Shselasky
244322810Shselasky	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
245322810Shselasky		if (cache->ent[i].cur < cache->ent[i].limit)
246322810Shselasky			return 1;
247322810Shselasky	}
248322810Shselasky
249322810Shselasky	return 0;
250322810Shselasky}
251322810Shselasky
252322810Shselaskystatic void __cache_work_func(struct mlx5_cache_ent *ent)
253322810Shselasky{
254322810Shselasky	struct mlx5_ib_dev *dev = ent->dev;
255322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
256322810Shselasky	int i = order2idx(dev, ent->order);
257322810Shselasky	int err;
258322810Shselasky
259322810Shselasky	if (cache->stopped)
260322810Shselasky		return;
261322810Shselasky
262322810Shselasky	ent = &dev->cache.ent[i];
263322810Shselasky	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
264322810Shselasky		err = add_keys(dev, i, 1);
265322810Shselasky		if (ent->cur < 2 * ent->limit) {
266322810Shselasky			if (err == -EAGAIN) {
267322810Shselasky				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
268322810Shselasky					    i + 2);
269331769Shselasky				queue_delayed_work(cache->wq, &ent->dwork,
270331769Shselasky						   msecs_to_jiffies(3));
271322810Shselasky			} else if (err) {
272322810Shselasky				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
273322810Shselasky					     i + 2, err);
274331769Shselasky				queue_delayed_work(cache->wq, &ent->dwork,
275331769Shselasky						   msecs_to_jiffies(1000));
276322810Shselasky			} else {
277331769Shselasky				queue_work(cache->wq, &ent->work);
278322810Shselasky			}
279322810Shselasky		}
280322810Shselasky	} else if (ent->cur > 2 * ent->limit) {
281331769Shselasky		/*
282331769Shselasky		 * The remove_keys() logic is performed as garbage collection
283331769Shselasky		 * task. Such task is intended to be run when no other active
284331769Shselasky		 * processes are running.
285331769Shselasky		 *
286331769Shselasky		 * The need_resched() will return TRUE if there are user tasks
287331769Shselasky		 * to be activated in near future.
288331769Shselasky		 *
289331769Shselasky		 * In such case, we don't execute remove_keys() and postpone
290331769Shselasky		 * the garbage collection work to try to run in next cycle,
291331769Shselasky		 * in order to free CPU resources to other tasks.
292331769Shselasky		 */
293331769Shselasky		if (!need_resched() && !someone_adding(cache) &&
294331769Shselasky		    time_after(jiffies, cache->last_add + 300 * HZ)) {
295322810Shselasky			remove_keys(dev, i, 1);
296322810Shselasky			if (ent->cur > ent->limit)
297331769Shselasky				queue_work(cache->wq, &ent->work);
298331769Shselasky		} else {
299331769Shselasky			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
300322810Shselasky		}
301322810Shselasky	}
302322810Shselasky}
303322810Shselasky
304322810Shselaskystatic void delayed_cache_work_func(struct work_struct *work)
305322810Shselasky{
306322810Shselasky	struct mlx5_cache_ent *ent;
307322810Shselasky
308322810Shselasky	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
309322810Shselasky	__cache_work_func(ent);
310322810Shselasky}
311322810Shselasky
312322810Shselaskystatic void cache_work_func(struct work_struct *work)
313322810Shselasky{
314322810Shselasky	struct mlx5_cache_ent *ent;
315322810Shselasky
316322810Shselasky	ent = container_of(work, struct mlx5_cache_ent, work);
317322810Shselasky	__cache_work_func(ent);
318322810Shselasky}
319322810Shselasky
320331769Shselaskystatic struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
321331769Shselasky{
322331769Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
323331769Shselasky	struct mlx5_ib_mr *mr = NULL;
324331769Shselasky	struct mlx5_cache_ent *ent;
325331769Shselasky	int c;
326331769Shselasky	int i;
327331769Shselasky
328331769Shselasky	c = order2idx(dev, order);
329331769Shselasky	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
330331769Shselasky		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
331331769Shselasky		return NULL;
332331769Shselasky	}
333331769Shselasky
334331769Shselasky	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
335331769Shselasky		ent = &cache->ent[i];
336331769Shselasky
337331769Shselasky		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
338331769Shselasky
339331769Shselasky		spin_lock_irq(&ent->lock);
340331769Shselasky		if (!list_empty(&ent->head)) {
341331769Shselasky			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
342331769Shselasky					      list);
343331769Shselasky			list_del(&mr->list);
344331769Shselasky			ent->cur--;
345331769Shselasky			spin_unlock_irq(&ent->lock);
346331769Shselasky			if (ent->cur < ent->limit)
347331769Shselasky				queue_work(cache->wq, &ent->work);
348331769Shselasky			break;
349331769Shselasky		}
350331769Shselasky		spin_unlock_irq(&ent->lock);
351331769Shselasky
352331769Shselasky		queue_work(cache->wq, &ent->work);
353331769Shselasky	}
354331769Shselasky
355331769Shselasky	if (!mr)
356331769Shselasky		cache->ent[c].miss++;
357331769Shselasky
358331769Shselasky	return mr;
359331769Shselasky}
360331769Shselasky
361322810Shselaskystatic void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
362322810Shselasky{
363322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
364322810Shselasky	struct mlx5_cache_ent *ent;
365322810Shselasky	int shrink = 0;
366322810Shselasky	int c;
367322810Shselasky
368322810Shselasky	c = order2idx(dev, mr->order);
369322810Shselasky	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
370322810Shselasky		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
371322810Shselasky		return;
372322810Shselasky	}
373322810Shselasky	ent = &cache->ent[c];
374322810Shselasky	spin_lock_irq(&ent->lock);
375322810Shselasky	list_add_tail(&mr->list, &ent->head);
376322810Shselasky	ent->cur++;
377322810Shselasky	if (ent->cur > 2 * ent->limit)
378322810Shselasky		shrink = 1;
379322810Shselasky	spin_unlock_irq(&ent->lock);
380322810Shselasky
381322810Shselasky	if (shrink)
382331769Shselasky		queue_work(cache->wq, &ent->work);
383322810Shselasky}
384322810Shselasky
385322810Shselaskystatic void clean_keys(struct mlx5_ib_dev *dev, int c)
386322810Shselasky{
387322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
388322810Shselasky	struct mlx5_cache_ent *ent = &cache->ent[c];
389322810Shselasky	struct mlx5_ib_mr *mr;
390322810Shselasky	int err;
391322810Shselasky
392322810Shselasky	cancel_delayed_work(&ent->dwork);
393322810Shselasky	while (1) {
394322810Shselasky		spin_lock_irq(&ent->lock);
395322810Shselasky		if (list_empty(&ent->head)) {
396322810Shselasky			spin_unlock_irq(&ent->lock);
397322810Shselasky			return;
398322810Shselasky		}
399322810Shselasky		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
400322810Shselasky		list_del(&mr->list);
401322810Shselasky		ent->cur--;
402322810Shselasky		ent->size--;
403322810Shselasky		spin_unlock_irq(&ent->lock);
404322810Shselasky		err = destroy_mkey(dev, mr);
405322810Shselasky		if (err)
406331769Shselasky			mlx5_ib_warn(dev, "failed destroy mkey\n");
407322810Shselasky		else
408322810Shselasky			kfree(mr);
409322810Shselasky	}
410322810Shselasky}
411322810Shselasky
412322810Shselaskystatic void delay_time_func(unsigned long ctx)
413322810Shselasky{
414322810Shselasky	struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx;
415322810Shselasky
416322810Shselasky	dev->fill_delay = 0;
417322810Shselasky}
418322810Shselasky
419322810Shselaskyint mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
420322810Shselasky{
421322810Shselasky	struct mlx5_mr_cache *cache = &dev->cache;
422322810Shselasky	struct mlx5_cache_ent *ent;
423322810Shselasky	int limit;
424322810Shselasky	int i;
425322810Shselasky
426322810Shselasky	mutex_init(&dev->slow_path_mutex);
427331769Shselasky	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
428322810Shselasky	if (!cache->wq) {
429322810Shselasky		mlx5_ib_warn(dev, "failed to create work queue\n");
430322810Shselasky		return -ENOMEM;
431322810Shselasky	}
432322810Shselasky
433368226Shselasky	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
434331769Shselasky	setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
435322810Shselasky	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
436322810Shselasky		INIT_LIST_HEAD(&cache->ent[i].head);
437322810Shselasky		spin_lock_init(&cache->ent[i].lock);
438322810Shselasky
439322810Shselasky		ent = &cache->ent[i];
440322810Shselasky		INIT_LIST_HEAD(&ent->head);
441322810Shselasky		spin_lock_init(&ent->lock);
442322810Shselasky		ent->order = i + 2;
443322810Shselasky		ent->dev = dev;
444322810Shselasky
445331769Shselasky		if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
446331769Shselasky			limit = dev->mdev->profile->mr_cache[i].limit;
447331769Shselasky		else
448322810Shselasky			limit = 0;
449322810Shselasky
450322810Shselasky		INIT_WORK(&ent->work, cache_work_func);
451322810Shselasky		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
452322810Shselasky		ent->limit = limit;
453331769Shselasky		queue_work(cache->wq, &ent->work);
454322810Shselasky	}
455322810Shselasky
456322810Shselasky	return 0;
457322810Shselasky}
458322810Shselasky
459322810Shselaskyint mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
460322810Shselasky{
461322810Shselasky	int i;
462322810Shselasky
463322810Shselasky	dev->cache.stopped = 1;
464322810Shselasky	flush_workqueue(dev->cache.wq);
465368226Shselasky	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
466322810Shselasky
467322810Shselasky	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
468322810Shselasky		clean_keys(dev, i);
469322810Shselasky
470322810Shselasky	destroy_workqueue(dev->cache.wq);
471322810Shselasky	del_timer_sync(&dev->delay_timer);
472331769Shselasky
473322810Shselasky	return 0;
474322810Shselasky}
475322810Shselasky
476322810Shselaskystruct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
477322810Shselasky{
478322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
479331769Shselasky	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
480322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
481322810Shselasky	struct mlx5_ib_mr *mr;
482331769Shselasky	void *mkc;
483331769Shselasky	u32 *in;
484322810Shselasky	int err;
485322810Shselasky
486322810Shselasky	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
487322810Shselasky	if (!mr)
488322810Shselasky		return ERR_PTR(-ENOMEM);
489322810Shselasky
490331769Shselasky	in = kzalloc(inlen, GFP_KERNEL);
491322810Shselasky	if (!in) {
492322810Shselasky		err = -ENOMEM;
493322810Shselasky		goto err_free;
494322810Shselasky	}
495322810Shselasky
496331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
497322810Shselasky
498331769Shselasky	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
499331769Shselasky	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
500331769Shselasky	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
501331769Shselasky	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
502331769Shselasky	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
503331769Shselasky	MLX5_SET(mkc, mkc, lr, 1);
504331769Shselasky
505331769Shselasky	MLX5_SET(mkc, mkc, length64, 1);
506331769Shselasky	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
507331769Shselasky	MLX5_SET(mkc, mkc, qpn, 0xffffff);
508331769Shselasky	MLX5_SET64(mkc, mkc, start_addr, 0);
509331769Shselasky
510331807Shselasky	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
511322810Shselasky	if (err)
512322810Shselasky		goto err_in;
513322810Shselasky
514322810Shselasky	kfree(in);
515331769Shselasky	mr->ibmr.lkey = mr->mmkey.key;
516331769Shselasky	mr->ibmr.rkey = mr->mmkey.key;
517322810Shselasky	mr->umem = NULL;
518322810Shselasky
519322810Shselasky	return &mr->ibmr;
520322810Shselasky
521322810Shselaskyerr_in:
522322810Shselasky	kfree(in);
523322810Shselasky
524322810Shselaskyerr_free:
525322810Shselasky	kfree(mr);
526322810Shselasky
527322810Shselasky	return ERR_PTR(err);
528322810Shselasky}
529322810Shselasky
530331769Shselaskystatic int get_octo_len(u64 addr, u64 len, int page_size)
531322810Shselasky{
532322810Shselasky	u64 offset;
533322810Shselasky	int npages;
534322810Shselasky
535331769Shselasky	offset = addr & (page_size - 1);
536322810Shselasky	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
537322810Shselasky	return (npages + 1) / 2;
538322810Shselasky}
539322810Shselasky
540331769Shselaskystatic int use_umr(int order)
541322810Shselasky{
542331769Shselasky	return order <= MLX5_MAX_UMR_SHIFT;
543331769Shselasky}
544322810Shselasky
545331769Shselaskystatic int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
546331769Shselasky			  int npages, int page_shift, int *size,
547331769Shselasky			  __be64 **mr_pas, dma_addr_t *dma)
548331769Shselasky{
549331769Shselasky	__be64 *pas;
550331769Shselasky	struct device *ddev = dev->ib_dev.dma_device;
551331769Shselasky
552331769Shselasky	/*
553331769Shselasky	 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
554331769Shselasky	 * To avoid copying garbage after the pas array, we allocate
555331769Shselasky	 * a little more.
556331769Shselasky	 */
557331769Shselasky	*size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
558331769Shselasky	*mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
559331769Shselasky	if (!(*mr_pas))
560331769Shselasky		return -ENOMEM;
561331769Shselasky
562331769Shselasky	pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
563331769Shselasky	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
564331769Shselasky	/* Clear padding after the actual pages. */
565331769Shselasky	memset(pas + npages, 0, *size - npages * sizeof(u64));
566331769Shselasky
567331769Shselasky	*dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
568331769Shselasky	if (dma_mapping_error(ddev, *dma)) {
569331769Shselasky		kfree(*mr_pas);
570331769Shselasky		return -ENOMEM;
571331769Shselasky	}
572331769Shselasky
573331769Shselasky	return 0;
574331769Shselasky}
575331769Shselasky
576331769Shselaskystatic void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
577331769Shselasky				struct ib_sge *sg, u64 dma, int n, u32 key,
578331769Shselasky				int page_shift)
579331769Shselasky{
580331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
581331769Shselasky	struct mlx5_umr_wr *umrwr = umr_wr(wr);
582331769Shselasky
583331769Shselasky	sg->addr = dma;
584331769Shselasky	sg->length = ALIGN(sizeof(u64) * n, 64);
585331769Shselasky	sg->lkey = dev->umrc.pd->local_dma_lkey;
586331769Shselasky
587331769Shselasky	wr->next = NULL;
588331769Shselasky	wr->sg_list = sg;
589331769Shselasky	if (n)
590331769Shselasky		wr->num_sge = 1;
591331769Shselasky	else
592331769Shselasky		wr->num_sge = 0;
593331769Shselasky
594331769Shselasky	wr->opcode = MLX5_IB_WR_UMR;
595331769Shselasky
596331769Shselasky	umrwr->npages = n;
597331769Shselasky	umrwr->page_shift = page_shift;
598331769Shselasky	umrwr->mkey = key;
599331769Shselasky}
600331769Shselasky
601331769Shselaskystatic void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
602331769Shselasky			     struct ib_sge *sg, u64 dma, int n, u32 key,
603331769Shselasky			     int page_shift, u64 virt_addr, u64 len,
604331769Shselasky			     int access_flags)
605331769Shselasky{
606331769Shselasky	struct mlx5_umr_wr *umrwr = umr_wr(wr);
607331769Shselasky
608331769Shselasky	prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
609331769Shselasky
610331769Shselasky	wr->send_flags = 0;
611331769Shselasky
612331769Shselasky	umrwr->target.virt_addr = virt_addr;
613331769Shselasky	umrwr->length = len;
614331769Shselasky	umrwr->access_flags = access_flags;
615331769Shselasky	umrwr->pd = pd;
616331769Shselasky}
617331769Shselasky
618331769Shselaskystatic void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
619331769Shselasky			       struct ib_send_wr *wr, u32 key)
620331769Shselasky{
621331769Shselasky	struct mlx5_umr_wr *umrwr = umr_wr(wr);
622331769Shselasky
623331769Shselasky	wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
624331769Shselasky	wr->opcode = MLX5_IB_WR_UMR;
625331769Shselasky	umrwr->mkey = key;
626331769Shselasky}
627331769Shselasky
628331769Shselaskystatic struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
629331769Shselasky				   int access_flags, int *npages,
630331769Shselasky				   int *page_shift, int *ncont, int *order)
631331769Shselasky{
632331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
633331769Shselasky	struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
634331769Shselasky					   access_flags, 0);
635331769Shselasky	if (IS_ERR(umem)) {
636331769Shselasky		mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
637331769Shselasky		return (void *)umem;
638331769Shselasky	}
639331769Shselasky
640331769Shselasky	mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
641331769Shselasky	if (!*npages) {
642331769Shselasky		mlx5_ib_warn(dev, "avoid zero region\n");
643331769Shselasky		ib_umem_release(umem);
644331769Shselasky		return ERR_PTR(-EINVAL);
645331769Shselasky	}
646331769Shselasky
647331769Shselasky	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
648331769Shselasky		    *npages, *ncont, *order, *page_shift);
649331769Shselasky
650331769Shselasky	return umem;
651331769Shselasky}
652331769Shselasky
653331769Shselaskystatic void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
654331769Shselasky{
655331769Shselasky	struct mlx5_ib_umr_context *context =
656331769Shselasky		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
657331769Shselasky
658331769Shselasky	context->status = wc->status;
659331769Shselasky	complete(&context->done);
660331769Shselasky}
661331769Shselasky
662331769Shselaskystatic inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
663331769Shselasky{
664331769Shselasky	context->cqe.done = mlx5_ib_umr_done;
665331769Shselasky	context->status = -1;
666331769Shselasky	init_completion(&context->done);
667331769Shselasky}
668331769Shselasky
669331769Shselaskystatic struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
670331769Shselasky				  u64 virt_addr, u64 len, int npages,
671331769Shselasky				  int page_shift, int order, int access_flags)
672331769Shselasky{
673331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
674331769Shselasky	struct device *ddev = dev->ib_dev.dma_device;
675331769Shselasky	struct umr_common *umrc = &dev->umrc;
676331769Shselasky	struct mlx5_ib_umr_context umr_context;
677331769Shselasky	struct mlx5_umr_wr umrwr = {};
678331769Shselasky	struct ib_send_wr *bad;
679331769Shselasky	struct mlx5_ib_mr *mr;
680331769Shselasky	struct ib_sge sg;
681331769Shselasky	int size;
682331769Shselasky	__be64 *mr_pas;
683331769Shselasky	dma_addr_t dma;
684331769Shselasky	int err = 0;
685331769Shselasky	int i;
686331769Shselasky
687331769Shselasky	for (i = 0; i < 1; i++) {
688331769Shselasky		mr = alloc_cached_mr(dev, order);
689331769Shselasky		if (mr)
690322810Shselasky			break;
691322810Shselasky
692331769Shselasky		err = add_keys(dev, order2idx(dev, order), 1);
693331769Shselasky		if (err && err != -EAGAIN) {
694331769Shselasky			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
695331769Shselasky			break;
696331769Shselasky		}
697322810Shselasky	}
698331769Shselasky
699331769Shselasky	if (!mr)
700331769Shselasky		return ERR_PTR(-EAGAIN);
701331769Shselasky
702331769Shselasky	err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
703331769Shselasky			     &dma);
704331769Shselasky	if (err)
705331769Shselasky		goto free_mr;
706331769Shselasky
707331769Shselasky	mlx5_ib_init_umr_context(&umr_context);
708331769Shselasky
709331769Shselasky	umrwr.wr.wr_cqe = &umr_context.cqe;
710331769Shselasky	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
711331769Shselasky			 page_shift, virt_addr, len, access_flags);
712331769Shselasky
713331769Shselasky	down(&umrc->sem);
714331769Shselasky	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
715331769Shselasky	if (err) {
716331769Shselasky		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
717331769Shselasky		goto unmap_dma;
718331769Shselasky	} else {
719331769Shselasky		wait_for_completion(&umr_context.done);
720331769Shselasky		if (umr_context.status != IB_WC_SUCCESS) {
721331769Shselasky			mlx5_ib_warn(dev, "reg umr failed\n");
722331769Shselasky			err = -EFAULT;
723331769Shselasky		}
724331769Shselasky	}
725331769Shselasky
726331769Shselasky	mr->mmkey.iova = virt_addr;
727331769Shselasky	mr->mmkey.size = len;
728331769Shselasky	mr->mmkey.pd = to_mpd(pd)->pdn;
729331769Shselasky
730331769Shselasky	mr->live = 1;
731331769Shselasky
732331769Shselaskyunmap_dma:
733331769Shselasky	up(&umrc->sem);
734331769Shselasky	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
735331769Shselasky
736331769Shselasky	kfree(mr_pas);
737331769Shselasky
738331769Shselaskyfree_mr:
739331769Shselasky	if (err) {
740331769Shselasky		free_cached_mr(dev, mr);
741331769Shselasky		return ERR_PTR(err);
742331769Shselasky	}
743331769Shselasky
744331769Shselasky	return mr;
745322810Shselasky}
746322810Shselasky
747331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
748331769Shselaskyint mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
749331769Shselasky		       int zap)
750322810Shselasky{
751331769Shselasky	struct mlx5_ib_dev *dev = mr->dev;
752331769Shselasky	struct device *ddev = dev->ib_dev.dma_device;
753331769Shselasky	struct umr_common *umrc = &dev->umrc;
754331769Shselasky	struct mlx5_ib_umr_context umr_context;
755331769Shselasky	struct ib_umem *umem = mr->umem;
756331769Shselasky	int size;
757331769Shselasky	__be64 *pas;
758331769Shselasky	dma_addr_t dma;
759331769Shselasky	struct ib_send_wr *bad;
760331769Shselasky	struct mlx5_umr_wr wr;
761331769Shselasky	struct ib_sge sg;
762331769Shselasky	int err = 0;
763331769Shselasky	const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
764331769Shselasky	const int page_index_mask = page_index_alignment - 1;
765331769Shselasky	size_t pages_mapped = 0;
766331769Shselasky	size_t pages_to_map = 0;
767331769Shselasky	size_t pages_iter = 0;
768331769Shselasky	int use_emergency_buf = 0;
769331769Shselasky
770331769Shselasky	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
771331769Shselasky	 * so we need to align the offset and length accordingly */
772331769Shselasky	if (start_page_index & page_index_mask) {
773331769Shselasky		npages += start_page_index & page_index_mask;
774331769Shselasky		start_page_index &= ~page_index_mask;
775331769Shselasky	}
776331769Shselasky
777331769Shselasky	pages_to_map = ALIGN(npages, page_index_alignment);
778331769Shselasky
779331769Shselasky	if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
780331769Shselasky		return -EINVAL;
781331769Shselasky
782331769Shselasky	size = sizeof(u64) * pages_to_map;
783331769Shselasky	size = min_t(int, PAGE_SIZE, size);
784331769Shselasky	/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
785331769Shselasky	 * code, when we are called from an invalidation. The pas buffer must
786331769Shselasky	 * be 2k-aligned for Connect-IB. */
787331769Shselasky	pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
788331769Shselasky	if (!pas) {
789331769Shselasky		mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
790331769Shselasky		pas = mlx5_ib_update_mtt_emergency_buffer;
791331769Shselasky		size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
792331769Shselasky		use_emergency_buf = 1;
793331769Shselasky		mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
794331769Shselasky		memset(pas, 0, size);
795331769Shselasky	}
796331769Shselasky	pages_iter = size / sizeof(u64);
797331769Shselasky	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
798331769Shselasky	if (dma_mapping_error(ddev, dma)) {
799331769Shselasky		mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
800331769Shselasky		err = -ENOMEM;
801331769Shselasky		goto free_pas;
802331769Shselasky	}
803331769Shselasky
804331769Shselasky	for (pages_mapped = 0;
805331769Shselasky	     pages_mapped < pages_to_map && !err;
806331769Shselasky	     pages_mapped += pages_iter, start_page_index += pages_iter) {
807331769Shselasky		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
808331769Shselasky
809331769Shselasky		npages = min_t(size_t,
810331769Shselasky			       pages_iter,
811331769Shselasky			       ib_umem_num_pages(umem) - start_page_index);
812331769Shselasky
813331769Shselasky		if (!zap) {
814331769Shselasky			__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
815331769Shselasky					       start_page_index, npages, pas,
816331769Shselasky					       MLX5_IB_MTT_PRESENT);
817331769Shselasky			/* Clear padding after the pages brought from the
818331769Shselasky			 * umem. */
819331769Shselasky			memset(pas + npages, 0, size - npages * sizeof(u64));
820331769Shselasky		}
821331769Shselasky
822331769Shselasky		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
823331769Shselasky
824331769Shselasky		mlx5_ib_init_umr_context(&umr_context);
825331769Shselasky
826331769Shselasky		memset(&wr, 0, sizeof(wr));
827331769Shselasky		wr.wr.wr_cqe = &umr_context.cqe;
828331769Shselasky
829331769Shselasky		sg.addr = dma;
830331769Shselasky		sg.length = ALIGN(npages * sizeof(u64),
831331769Shselasky				MLX5_UMR_MTT_ALIGNMENT);
832331769Shselasky		sg.lkey = dev->umrc.pd->local_dma_lkey;
833331769Shselasky
834331769Shselasky		wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
835331769Shselasky				MLX5_IB_SEND_UMR_UPDATE_MTT;
836331769Shselasky		wr.wr.sg_list = &sg;
837331769Shselasky		wr.wr.num_sge = 1;
838331769Shselasky		wr.wr.opcode = MLX5_IB_WR_UMR;
839331769Shselasky		wr.npages = sg.length / sizeof(u64);
840331769Shselasky		wr.page_shift = PAGE_SHIFT;
841331769Shselasky		wr.mkey = mr->mmkey.key;
842331769Shselasky		wr.target.offset = start_page_index;
843331769Shselasky
844331769Shselasky		down(&umrc->sem);
845331769Shselasky		err = ib_post_send(umrc->qp, &wr.wr, &bad);
846331769Shselasky		if (err) {
847331769Shselasky			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
848331769Shselasky		} else {
849331769Shselasky			wait_for_completion(&umr_context.done);
850331769Shselasky			if (umr_context.status != IB_WC_SUCCESS) {
851331769Shselasky				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
852331769Shselasky					    umr_context.status);
853331769Shselasky				err = -EFAULT;
854331769Shselasky			}
855331769Shselasky		}
856331769Shselasky		up(&umrc->sem);
857331769Shselasky	}
858331769Shselasky	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
859331769Shselasky
860331769Shselaskyfree_pas:
861331769Shselasky	if (!use_emergency_buf)
862331769Shselasky		free_page((unsigned long)pas);
863331769Shselasky	else
864331769Shselasky		mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
865331769Shselasky
866331769Shselasky	return err;
867331769Shselasky}
868331769Shselasky#endif
869331769Shselasky
870331769Shselasky/*
871331769Shselasky * If ibmr is NULL it will be allocated by reg_create.
872331769Shselasky * Else, the given ibmr will be used.
873331769Shselasky */
874331769Shselaskystatic struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
875331769Shselasky				     u64 virt_addr, u64 length,
876331769Shselasky				     struct ib_umem *umem, int npages,
877331769Shselasky				     int page_shift, int access_flags)
878331769Shselasky{
879322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
880322810Shselasky	struct mlx5_ib_mr *mr;
881331769Shselasky	__be64 *pas;
882331769Shselasky	void *mkc;
883322810Shselasky	int inlen;
884331769Shselasky	u32 *in;
885322810Shselasky	int err;
886322810Shselasky	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
887322810Shselasky
888331769Shselasky	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
889322810Shselasky	if (!mr)
890322810Shselasky		return ERR_PTR(-ENOMEM);
891322810Shselasky
892331769Shselasky	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
893331769Shselasky		sizeof(*pas) * ((npages + 1) / 2) * 2;
894322810Shselasky	in = mlx5_vzalloc(inlen);
895322810Shselasky	if (!in) {
896322810Shselasky		err = -ENOMEM;
897322810Shselasky		goto err_1;
898322810Shselasky	}
899331769Shselasky	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
900331769Shselasky	mlx5_ib_populate_pas(dev, umem, page_shift, pas,
901322810Shselasky			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
902322810Shselasky
903331769Shselasky	/* The pg_access bit allows setting the access flags
904322810Shselasky	 * in the page list submitted with the command. */
905331769Shselasky	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
906331769Shselasky
907331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
908331769Shselasky	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
909331769Shselasky	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
910331769Shselasky	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
911331769Shselasky	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
912331769Shselasky	MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
913331769Shselasky	MLX5_SET(mkc, mkc, lr, 1);
914331769Shselasky
915331769Shselasky	MLX5_SET64(mkc, mkc, start_addr, virt_addr);
916331769Shselasky	MLX5_SET64(mkc, mkc, len, length);
917331769Shselasky	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
918331769Shselasky	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
919331769Shselasky	MLX5_SET(mkc, mkc, translations_octword_size,
920331769Shselasky		 get_octo_len(virt_addr, length, 1 << page_shift));
921331769Shselasky	MLX5_SET(mkc, mkc, log_page_size, page_shift);
922331769Shselasky	MLX5_SET(mkc, mkc, qpn, 0xffffff);
923331769Shselasky	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
924331769Shselasky		 get_octo_len(virt_addr, length, 1 << page_shift));
925331769Shselasky
926331807Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
927322810Shselasky	if (err) {
928322810Shselasky		mlx5_ib_warn(dev, "create mkey failed\n");
929322810Shselasky		goto err_2;
930322810Shselasky	}
931322810Shselasky	mr->umem = umem;
932322810Shselasky	mr->dev = dev;
933331769Shselasky	mr->live = 1;
934322810Shselasky	kvfree(in);
935322810Shselasky
936331769Shselasky	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
937322810Shselasky
938322810Shselasky	return mr;
939322810Shselasky
940322810Shselaskyerr_2:
941322810Shselasky	kvfree(in);
942322810Shselasky
943322810Shselaskyerr_1:
944331769Shselasky	if (!ibmr)
945331769Shselasky		kfree(mr);
946322810Shselasky
947322810Shselasky	return ERR_PTR(err);
948322810Shselasky}
949322810Shselasky
950331769Shselaskystatic void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
951331769Shselasky			  int npages, u64 length, int access_flags)
952322810Shselasky{
953331769Shselasky	mr->npages = npages;
954331769Shselasky	atomic_add(npages, &dev->mdev->priv.reg_pages);
955331769Shselasky	mr->ibmr.lkey = mr->mmkey.key;
956331769Shselasky	mr->ibmr.rkey = mr->mmkey.key;
957331769Shselasky	mr->ibmr.length = length;
958331769Shselasky	mr->access_flags = access_flags;
959322810Shselasky}
960322810Shselasky
961322810Shselaskystruct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
962322810Shselasky				  u64 virt_addr, int access_flags,
963331769Shselasky				  struct ib_udata *udata)
964322810Shselasky{
965322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
966322810Shselasky	struct mlx5_ib_mr *mr = NULL;
967322810Shselasky	struct ib_umem *umem;
968322810Shselasky	int page_shift;
969322810Shselasky	int npages;
970322810Shselasky	int ncont;
971322810Shselasky	int order;
972322810Shselasky	int err;
973322810Shselasky
974322810Shselasky	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
975331769Shselasky		    (long long)start, (long long)virt_addr, (long long)length, access_flags);
976331769Shselasky	umem = mr_umem_get(pd, start, length, access_flags, &npages,
977331769Shselasky			   &page_shift, &ncont, &order);
978331769Shselasky
979331769Shselasky	if (IS_ERR(umem))
980322810Shselasky		return (void *)umem;
981322810Shselasky
982331769Shselasky	if (use_umr(order)) {
983331769Shselasky		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
984331769Shselasky			     order, access_flags);
985331769Shselasky		if (PTR_ERR(mr) == -EAGAIN) {
986331769Shselasky			mlx5_ib_dbg(dev, "cache empty for order %d", order);
987331769Shselasky			mr = NULL;
988331769Shselasky		}
989331769Shselasky	} else if (access_flags & IB_ACCESS_ON_DEMAND) {
990322810Shselasky		err = -EINVAL;
991331769Shselasky		pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
992322810Shselasky		goto error;
993322810Shselasky	}
994322810Shselasky
995331769Shselasky	if (!mr) {
996331769Shselasky		mutex_lock(&dev->slow_path_mutex);
997331769Shselasky		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
998331769Shselasky				page_shift, access_flags);
999331769Shselasky		mutex_unlock(&dev->slow_path_mutex);
1000331769Shselasky	}
1001322810Shselasky
1002322810Shselasky	if (IS_ERR(mr)) {
1003322810Shselasky		err = PTR_ERR(mr);
1004322810Shselasky		goto error;
1005322810Shselasky	}
1006322810Shselasky
1007331769Shselasky	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1008322810Shselasky
1009322810Shselasky	mr->umem = umem;
1010331769Shselasky	set_mr_fileds(dev, mr, npages, length, access_flags);
1011322810Shselasky
1012331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1013331769Shselasky	update_odp_mr(mr);
1014331769Shselasky#endif
1015331769Shselasky
1016322810Shselasky	return &mr->ibmr;
1017322810Shselasky
1018322810Shselaskyerror:
1019322810Shselasky	ib_umem_release(umem);
1020322810Shselasky	return ERR_PTR(err);
1021322810Shselasky}
1022322810Shselasky
1023331769Shselaskystatic int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1024331769Shselasky{
1025331769Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
1026331769Shselasky	struct umr_common *umrc = &dev->umrc;
1027331769Shselasky	struct mlx5_ib_umr_context umr_context;
1028331769Shselasky	struct mlx5_umr_wr umrwr = {};
1029331769Shselasky	struct ib_send_wr *bad;
1030331769Shselasky	int err;
1031331769Shselasky
1032331769Shselasky	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1033331769Shselasky		return 0;
1034331769Shselasky
1035331769Shselasky	mlx5_ib_init_umr_context(&umr_context);
1036331769Shselasky
1037331769Shselasky	umrwr.wr.wr_cqe = &umr_context.cqe;
1038331769Shselasky	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
1039331769Shselasky
1040331769Shselasky	down(&umrc->sem);
1041331769Shselasky	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1042331769Shselasky	if (err) {
1043331769Shselasky		up(&umrc->sem);
1044331769Shselasky		mlx5_ib_dbg(dev, "err %d\n", err);
1045331769Shselasky		goto error;
1046331769Shselasky	} else {
1047331769Shselasky		wait_for_completion(&umr_context.done);
1048331769Shselasky		up(&umrc->sem);
1049331769Shselasky	}
1050331769Shselasky	if (umr_context.status != IB_WC_SUCCESS) {
1051331769Shselasky		mlx5_ib_warn(dev, "unreg umr failed\n");
1052331769Shselasky		err = -EFAULT;
1053331769Shselasky		goto error;
1054331769Shselasky	}
1055331769Shselasky	return 0;
1056331769Shselasky
1057331769Shselaskyerror:
1058331769Shselasky	return err;
1059331769Shselasky}
1060331769Shselasky
1061331769Shselaskystatic int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
1062331769Shselasky		     u64 length, int npages, int page_shift, int order,
1063331769Shselasky		     int access_flags, int flags)
1064331769Shselasky{
1065331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1066331769Shselasky	struct device *ddev = dev->ib_dev.dma_device;
1067331769Shselasky	struct mlx5_ib_umr_context umr_context;
1068331769Shselasky	struct ib_send_wr *bad;
1069331769Shselasky	struct mlx5_umr_wr umrwr = {};
1070331769Shselasky	struct ib_sge sg;
1071331769Shselasky	struct umr_common *umrc = &dev->umrc;
1072331769Shselasky	dma_addr_t dma = 0;
1073331769Shselasky	__be64 *mr_pas = NULL;
1074331769Shselasky	int size;
1075331769Shselasky	int err;
1076331769Shselasky
1077331769Shselasky	mlx5_ib_init_umr_context(&umr_context);
1078331769Shselasky
1079331769Shselasky	umrwr.wr.wr_cqe = &umr_context.cqe;
1080331769Shselasky	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1081331769Shselasky
1082331769Shselasky	if (flags & IB_MR_REREG_TRANS) {
1083331769Shselasky		err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
1084331769Shselasky				     &mr_pas, &dma);
1085331769Shselasky		if (err)
1086331769Shselasky			return err;
1087331769Shselasky
1088331769Shselasky		umrwr.target.virt_addr = virt_addr;
1089331769Shselasky		umrwr.length = length;
1090331769Shselasky		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1091331769Shselasky	}
1092331769Shselasky
1093331769Shselasky	prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
1094331769Shselasky			    page_shift);
1095331769Shselasky
1096331769Shselasky	if (flags & IB_MR_REREG_PD) {
1097331769Shselasky		umrwr.pd = pd;
1098331769Shselasky		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
1099331769Shselasky	}
1100331769Shselasky
1101331769Shselasky	if (flags & IB_MR_REREG_ACCESS) {
1102331769Shselasky		umrwr.access_flags = access_flags;
1103331769Shselasky		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
1104331769Shselasky	}
1105331769Shselasky
1106331769Shselasky	/* post send request to UMR QP */
1107331769Shselasky	down(&umrc->sem);
1108331769Shselasky	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
1109331769Shselasky
1110331769Shselasky	if (err) {
1111331769Shselasky		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
1112331769Shselasky	} else {
1113331769Shselasky		wait_for_completion(&umr_context.done);
1114331769Shselasky		if (umr_context.status != IB_WC_SUCCESS) {
1115331769Shselasky			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
1116331769Shselasky				     umr_context.status);
1117331769Shselasky			err = -EFAULT;
1118331769Shselasky		}
1119331769Shselasky	}
1120331769Shselasky
1121331769Shselasky	up(&umrc->sem);
1122331769Shselasky	if (flags & IB_MR_REREG_TRANS) {
1123331769Shselasky		dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1124331769Shselasky		kfree(mr_pas);
1125331769Shselasky	}
1126331769Shselasky	return err;
1127331769Shselasky}
1128331769Shselasky
1129331769Shselaskyint mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1130331769Shselasky			  u64 length, u64 virt_addr, int new_access_flags,
1131331769Shselasky			  struct ib_pd *new_pd, struct ib_udata *udata)
1132331769Shselasky{
1133331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1134331769Shselasky	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1135331769Shselasky	struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1136331769Shselasky	int access_flags = flags & IB_MR_REREG_ACCESS ?
1137331769Shselasky			    new_access_flags :
1138331769Shselasky			    mr->access_flags;
1139331769Shselasky	u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
1140331769Shselasky	u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
1141331769Shselasky	int page_shift = 0;
1142331769Shselasky	int npages = 0;
1143331769Shselasky	int ncont = 0;
1144331769Shselasky	int order = 0;
1145331769Shselasky	int err;
1146331769Shselasky
1147331769Shselasky	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1148331769Shselasky		    (long long)start, (long long)virt_addr, (long long)length, access_flags);
1149331769Shselasky
1150331769Shselasky	if (flags != IB_MR_REREG_PD) {
1151331769Shselasky		/*
1152331769Shselasky		 * Replace umem. This needs to be done whether or not UMR is
1153331769Shselasky		 * used.
1154331769Shselasky		 */
1155331769Shselasky		flags |= IB_MR_REREG_TRANS;
1156331769Shselasky		ib_umem_release(mr->umem);
1157331769Shselasky		mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
1158331769Shselasky				       &page_shift, &ncont, &order);
1159331769Shselasky		if (IS_ERR(mr->umem)) {
1160331769Shselasky			err = PTR_ERR(mr->umem);
1161331769Shselasky			mr->umem = NULL;
1162331769Shselasky			return err;
1163331769Shselasky		}
1164331769Shselasky	}
1165331769Shselasky
1166331769Shselasky	if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
1167331769Shselasky		/*
1168331769Shselasky		 * UMR can't be used - MKey needs to be replaced.
1169331769Shselasky		 */
1170331769Shselasky		if (mr->umred) {
1171331769Shselasky			err = unreg_umr(dev, mr);
1172331769Shselasky			if (err)
1173331769Shselasky				mlx5_ib_warn(dev, "Failed to unregister MR\n");
1174331769Shselasky		} else {
1175331769Shselasky			err = destroy_mkey(dev, mr);
1176331769Shselasky			if (err)
1177331769Shselasky				mlx5_ib_warn(dev, "Failed to destroy MKey\n");
1178331769Shselasky		}
1179331769Shselasky		if (err)
1180331769Shselasky			return err;
1181331769Shselasky
1182331769Shselasky		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1183331769Shselasky				page_shift, access_flags);
1184331769Shselasky
1185331769Shselasky		if (IS_ERR(mr))
1186331769Shselasky			return PTR_ERR(mr);
1187331769Shselasky
1188331769Shselasky		mr->umred = 0;
1189331769Shselasky	} else {
1190331769Shselasky		/*
1191331769Shselasky		 * Send a UMR WQE
1192331769Shselasky		 */
1193331769Shselasky		err = rereg_umr(pd, mr, addr, len, npages, page_shift,
1194331769Shselasky				order, access_flags, flags);
1195331769Shselasky		if (err) {
1196331769Shselasky			mlx5_ib_warn(dev, "Failed to rereg UMR\n");
1197331769Shselasky			return err;
1198331769Shselasky		}
1199331769Shselasky	}
1200331769Shselasky
1201331769Shselasky	if (flags & IB_MR_REREG_PD) {
1202331769Shselasky		ib_mr->pd = pd;
1203331769Shselasky		mr->mmkey.pd = to_mpd(pd)->pdn;
1204331769Shselasky	}
1205331769Shselasky
1206331769Shselasky	if (flags & IB_MR_REREG_ACCESS)
1207331769Shselasky		mr->access_flags = access_flags;
1208331769Shselasky
1209331769Shselasky	if (flags & IB_MR_REREG_TRANS) {
1210331769Shselasky		atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1211331769Shselasky		set_mr_fileds(dev, mr, npages, len, access_flags);
1212331769Shselasky		mr->mmkey.iova = addr;
1213331769Shselasky		mr->mmkey.size = len;
1214331769Shselasky	}
1215331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1216331769Shselasky	update_odp_mr(mr);
1217331769Shselasky#endif
1218331769Shselasky
1219331769Shselasky	return 0;
1220331769Shselasky}
1221331769Shselasky
1222331769Shselaskystatic int
1223331769Shselaskymlx5_alloc_priv_descs(struct ib_device *device,
1224331769Shselasky		      struct mlx5_ib_mr *mr,
1225331769Shselasky		      int ndescs,
1226331769Shselasky		      int desc_size)
1227331769Shselasky{
1228331769Shselasky	int size = ndescs * desc_size;
1229331769Shselasky	int add_size;
1230331769Shselasky	int ret;
1231331769Shselasky
1232331769Shselasky	add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0);
1233331769Shselasky
1234331769Shselasky	mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1235331769Shselasky	if (!mr->descs_alloc)
1236331769Shselasky		return -ENOMEM;
1237331769Shselasky
1238331769Shselasky	mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1239331769Shselasky
1240331769Shselasky	mr->desc_map = dma_map_single(device->dma_device, mr->descs,
1241331769Shselasky				      size, DMA_TO_DEVICE);
1242331769Shselasky	if (dma_mapping_error(device->dma_device, mr->desc_map)) {
1243331769Shselasky		ret = -ENOMEM;
1244331769Shselasky		goto err;
1245331769Shselasky	}
1246331769Shselasky
1247331769Shselasky	return 0;
1248331769Shselaskyerr:
1249331769Shselasky	kfree(mr->descs_alloc);
1250331769Shselasky
1251331769Shselasky	return ret;
1252331769Shselasky}
1253331769Shselasky
1254331769Shselaskystatic void
1255331769Shselaskymlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1256331769Shselasky{
1257331769Shselasky	if (mr->descs) {
1258331769Shselasky		struct ib_device *device = mr->ibmr.device;
1259331769Shselasky		int size = mr->max_descs * mr->desc_size;
1260331769Shselasky
1261331769Shselasky		dma_unmap_single(device->dma_device, mr->desc_map,
1262331769Shselasky				 size, DMA_TO_DEVICE);
1263331769Shselasky		kfree(mr->descs_alloc);
1264331769Shselasky		mr->descs = NULL;
1265331769Shselasky	}
1266331769Shselasky}
1267331769Shselasky
1268331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr)
1269331769Shselasky{
1270331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
1271331769Shselasky	int umred = mr->umred;
1272331769Shselasky	int err;
1273331769Shselasky
1274331769Shselasky	if (mr->sig) {
1275331769Shselasky		if (mlx5_core_destroy_psv(dev->mdev,
1276331769Shselasky					  mr->sig->psv_memory.psv_idx))
1277331769Shselasky			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1278331769Shselasky				     mr->sig->psv_memory.psv_idx);
1279331769Shselasky		if (mlx5_core_destroy_psv(dev->mdev,
1280331769Shselasky					  mr->sig->psv_wire.psv_idx))
1281331769Shselasky			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1282331769Shselasky				     mr->sig->psv_wire.psv_idx);
1283331769Shselasky		kfree(mr->sig);
1284331769Shselasky		mr->sig = NULL;
1285331769Shselasky	}
1286331769Shselasky
1287331769Shselasky	mlx5_free_priv_descs(mr);
1288331769Shselasky
1289331769Shselasky	if (!umred) {
1290331769Shselasky		err = destroy_mkey(dev, mr);
1291331769Shselasky		if (err) {
1292331769Shselasky			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
1293331769Shselasky				     mr->mmkey.key, err);
1294331769Shselasky			return err;
1295331769Shselasky		}
1296331769Shselasky	} else {
1297331769Shselasky		err = unreg_umr(dev, mr);
1298331769Shselasky		if (err) {
1299331769Shselasky			mlx5_ib_warn(dev, "failed unregister\n");
1300331769Shselasky			return err;
1301331769Shselasky		}
1302331769Shselasky		free_cached_mr(dev, mr);
1303331769Shselasky	}
1304331769Shselasky
1305331769Shselasky	if (!umred)
1306331769Shselasky		kfree(mr);
1307331769Shselasky
1308331769Shselasky	return 0;
1309331769Shselasky}
1310331769Shselasky
1311322810ShselaskyCTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8);
1312322810Shselasky
1313322810Shselaskystruct ib_mr *
1314322810Shselaskymlx5_ib_reg_phys_mr(struct ib_pd *pd,
1315322810Shselasky		    struct ib_phys_buf *buffer_list,
1316322810Shselasky		    int num_phys_buf,
1317322810Shselasky		    int access_flags,
1318322810Shselasky		    u64 *virt_addr)
1319322810Shselasky{
1320322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1321322810Shselasky	struct mlx5_ib_mr *mr;
1322331769Shselasky	__be64 *pas;
1323331769Shselasky	void *mkc;
1324331769Shselasky	u32 *in;
1325322810Shselasky	u64 total_size;
1326322810Shselasky	u32 octo_len;
1327322810Shselasky	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1328322810Shselasky	unsigned long mask;
1329322810Shselasky	int shift;
1330322810Shselasky	int npages;
1331322810Shselasky	int inlen;
1332322810Shselasky	int err;
1333322810Shselasky	int i, j, n;
1334322810Shselasky
1335322810Shselasky	mask = buffer_list[0].addr ^ *virt_addr;
1336322810Shselasky	total_size = 0;
1337322810Shselasky	for (i = 0; i < num_phys_buf; ++i) {
1338322810Shselasky		if (i != 0)
1339322810Shselasky			mask |= buffer_list[i].addr;
1340322810Shselasky		if (i != num_phys_buf - 1)
1341322810Shselasky			mask |= buffer_list[i].addr + buffer_list[i].size;
1342322810Shselasky
1343322810Shselasky		total_size += buffer_list[i].size;
1344322810Shselasky	}
1345322810Shselasky
1346322810Shselasky	if (mask & ~PAGE_MASK)
1347322810Shselasky		return ERR_PTR(-EINVAL);
1348322810Shselasky
1349322810Shselasky	shift = __ffs(mask | 1 << 31);
1350322810Shselasky
1351322810Shselasky	buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
1352322810Shselasky	buffer_list[0].addr &= ~0ULL << shift;
1353322810Shselasky
1354322810Shselasky	npages = 0;
1355322810Shselasky	for (i = 0; i < num_phys_buf; ++i)
1356322810Shselasky		npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
1357322810Shselasky
1358322810Shselasky	if (!npages) {
1359322810Shselasky		mlx5_ib_warn(dev, "avoid zero region\n");
1360322810Shselasky		return ERR_PTR(-EINVAL);
1361322810Shselasky	}
1362322810Shselasky
1363322810Shselasky	mr = kzalloc(sizeof *mr, GFP_KERNEL);
1364322810Shselasky	if (!mr)
1365322810Shselasky		return ERR_PTR(-ENOMEM);
1366322810Shselasky
1367322810Shselasky	octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift);
1368322810Shselasky	octo_len = ALIGN(octo_len, 4);
1369322810Shselasky
1370331769Shselasky	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + (octo_len * 16);
1371322810Shselasky	in = mlx5_vzalloc(inlen);
1372322810Shselasky	if (!in) {
1373322810Shselasky		kfree(mr);
1374322810Shselasky		return ERR_PTR(-ENOMEM);
1375322810Shselasky	}
1376331769Shselasky	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1377322810Shselasky
1378322810Shselasky	n = 0;
1379322810Shselasky	for (i = 0; i < num_phys_buf; ++i) {
1380322810Shselasky		for (j = 0;
1381322810Shselasky		     j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
1382322810Shselasky		     ++j) {
1383322810Shselasky			u64 temp = buffer_list[i].addr + ((u64) j << shift);
1384322810Shselasky			if (pg_cap)
1385322810Shselasky				temp |= MLX5_IB_MTT_PRESENT;
1386331769Shselasky			pas[n++] = cpu_to_be64(temp);
1387322810Shselasky		}
1388322810Shselasky	}
1389322810Shselasky
1390331769Shselasky	/*
1391331769Shselasky	 * The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access
1392331769Shselasky	 * flags in the page list submitted with the command:
1393331769Shselasky	 */
1394331769Shselasky	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1395331769Shselasky
1396331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1397331769Shselasky	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT);
1398331769Shselasky	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
1399331769Shselasky	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
1400331769Shselasky	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
1401331769Shselasky	MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
1402331769Shselasky	MLX5_SET(mkc, mkc, lr, 1);
1403331769Shselasky
1404331769Shselasky	MLX5_SET64(mkc, mkc, start_addr, *virt_addr);
1405331769Shselasky	MLX5_SET64(mkc, mkc, len, total_size);
1406331769Shselasky	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1407331769Shselasky	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1408331769Shselasky	MLX5_SET(mkc, mkc, translations_octword_size, octo_len);
1409331769Shselasky	MLX5_SET(mkc, mkc, log_page_size, shift);
1410331769Shselasky	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1411331769Shselasky	MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octo_len);
1412331769Shselasky
1413331807Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1414331807Shselasky
1415322810Shselasky	mr->umem = NULL;
1416322810Shselasky	mr->dev = dev;
1417331769Shselasky	mr->live = 1;
1418322810Shselasky	mr->npages = npages;
1419331769Shselasky	mr->ibmr.lkey = mr->mmkey.key;
1420331769Shselasky	mr->ibmr.rkey = mr->mmkey.key;
1421331769Shselasky	mr->ibmr.length = total_size;
1422331769Shselasky	mr->access_flags = access_flags;
1423322810Shselasky
1424322810Shselasky	kvfree(in);
1425322810Shselasky
1426322810Shselasky	if (err) {
1427322810Shselasky		kfree(mr);
1428322810Shselasky		return ERR_PTR(err);
1429322810Shselasky	}
1430322810Shselasky	return &mr->ibmr;
1431322810Shselasky}
1432322810Shselasky
1433322810Shselaskyint mlx5_ib_dereg_mr(struct ib_mr *ibmr)
1434322810Shselasky{
1435322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
1436322810Shselasky	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1437331769Shselasky	int npages = mr->npages;
1438322810Shselasky	struct ib_umem *umem = mr->umem;
1439322810Shselasky
1440331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
1441331769Shselasky	if (umem && umem->odp_data) {
1442331769Shselasky		/* Prevent new page faults from succeeding */
1443331769Shselasky		mr->live = 0;
1444331769Shselasky		/* Wait for all running page-fault handlers to finish. */
1445331769Shselasky		synchronize_srcu(&dev->mr_srcu);
1446331769Shselasky		/* Destroy all page mappings */
1447331769Shselasky		mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
1448331769Shselasky					 ib_umem_end(umem));
1449331769Shselasky		/*
1450331769Shselasky		 * We kill the umem before the MR for ODP,
1451331769Shselasky		 * so that there will not be any invalidations in
1452331769Shselasky		 * flight, looking at the *mr struct.
1453331769Shselasky		 */
1454331769Shselasky		ib_umem_release(umem);
1455331769Shselasky		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1456322810Shselasky
1457331769Shselasky		/* Avoid double-freeing the umem. */
1458331769Shselasky		umem = NULL;
1459331769Shselasky	}
1460331769Shselasky#endif
1461331769Shselasky
1462331769Shselasky	clean_mr(mr);
1463331769Shselasky
1464322810Shselasky	if (umem) {
1465322810Shselasky		ib_umem_release(umem);
1466322810Shselasky		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1467322810Shselasky	}
1468322810Shselasky
1469322810Shselasky	return 0;
1470322810Shselasky}
1471322810Shselasky
1472331769Shselaskystruct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
1473331769Shselasky			       enum ib_mr_type mr_type,
1474331769Shselasky			       u32 max_num_sg)
1475322810Shselasky{
1476322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1477331769Shselasky	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1478331769Shselasky	int ndescs = ALIGN(max_num_sg, 4);
1479322810Shselasky	struct mlx5_ib_mr *mr;
1480331769Shselasky	void *mkc;
1481331769Shselasky	u32 *in;
1482322810Shselasky	int err;
1483322810Shselasky
1484322810Shselasky	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1485322810Shselasky	if (!mr)
1486322810Shselasky		return ERR_PTR(-ENOMEM);
1487322810Shselasky
1488331769Shselasky	in = kzalloc(inlen, GFP_KERNEL);
1489322810Shselasky	if (!in) {
1490322810Shselasky		err = -ENOMEM;
1491322810Shselasky		goto err_free;
1492322810Shselasky	}
1493322810Shselasky
1494331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1495331769Shselasky	MLX5_SET(mkc, mkc, free, 1);
1496331769Shselasky	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1497331769Shselasky	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1498331769Shselasky	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1499322810Shselasky
1500331769Shselasky	if (mr_type == IB_MR_TYPE_MEM_REG) {
1501331769Shselasky		mr->access_mode = MLX5_ACCESS_MODE_MTT;
1502331769Shselasky		MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
1503331769Shselasky		err = mlx5_alloc_priv_descs(pd->device, mr,
1504331769Shselasky					    ndescs, sizeof(u64));
1505331769Shselasky		if (err)
1506331769Shselasky			goto err_free_in;
1507322810Shselasky
1508331769Shselasky		mr->desc_size = sizeof(u64);
1509331769Shselasky		mr->max_descs = ndescs;
1510331769Shselasky	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
1511331769Shselasky		mr->access_mode = MLX5_ACCESS_MODE_KLM;
1512322810Shselasky
1513331769Shselasky		err = mlx5_alloc_priv_descs(pd->device, mr,
1514331769Shselasky					    ndescs, sizeof(struct mlx5_klm));
1515331769Shselasky		if (err)
1516331769Shselasky			goto err_free_in;
1517331769Shselasky		mr->desc_size = sizeof(struct mlx5_klm);
1518331769Shselasky		mr->max_descs = ndescs;
1519331769Shselasky	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
1520331769Shselasky		u32 psv_index[2];
1521322810Shselasky
1522331769Shselasky		MLX5_SET(mkc, mkc, bsf_en, 1);
1523331769Shselasky		MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1524331769Shselasky		mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1525331769Shselasky		if (!mr->sig) {
1526331769Shselasky			err = -ENOMEM;
1527331769Shselasky			goto err_free_in;
1528331769Shselasky		}
1529322810Shselasky
1530331769Shselasky		/* create mem & wire PSVs */
1531331769Shselasky		err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
1532331769Shselasky					   2, psv_index);
1533331769Shselasky		if (err)
1534331769Shselasky			goto err_free_sig;
1535322810Shselasky
1536331769Shselasky		mr->access_mode = MLX5_ACCESS_MODE_KLM;
1537331769Shselasky		mr->sig->psv_memory.psv_idx = psv_index[0];
1538331769Shselasky		mr->sig->psv_wire.psv_idx = psv_index[1];
1539322810Shselasky
1540331769Shselasky		mr->sig->sig_status_checked = true;
1541331769Shselasky		mr->sig->sig_err_exists = false;
1542331769Shselasky		/* Next UMR, Arm SIGERR */
1543331769Shselasky		++mr->sig->sigerr_count;
1544331769Shselasky	} else {
1545331769Shselasky		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1546331769Shselasky		err = -EINVAL;
1547331769Shselasky		goto err_free_in;
1548331769Shselasky	}
1549322810Shselasky
1550331769Shselasky	MLX5_SET(mkc, mkc, access_mode, mr->access_mode);
1551331769Shselasky	MLX5_SET(mkc, mkc, umr_en, 1);
1552322810Shselasky
1553331807Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1554331769Shselasky	if (err)
1555331769Shselasky		goto err_destroy_psv;
1556322810Shselasky
1557331769Shselasky	mr->ibmr.lkey = mr->mmkey.key;
1558331769Shselasky	mr->ibmr.rkey = mr->mmkey.key;
1559331769Shselasky	mr->umem = NULL;
1560331769Shselasky	kfree(in);
1561322810Shselasky
1562331769Shselasky	return &mr->ibmr;
1563331769Shselasky
1564331769Shselaskyerr_destroy_psv:
1565331769Shselasky	if (mr->sig) {
1566331769Shselasky		if (mlx5_core_destroy_psv(dev->mdev,
1567331769Shselasky					  mr->sig->psv_memory.psv_idx))
1568331769Shselasky			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1569331769Shselasky				     mr->sig->psv_memory.psv_idx);
1570331769Shselasky		if (mlx5_core_destroy_psv(dev->mdev,
1571331769Shselasky					  mr->sig->psv_wire.psv_idx))
1572331769Shselasky			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1573331769Shselasky				     mr->sig->psv_wire.psv_idx);
1574331769Shselasky	}
1575331769Shselasky	mlx5_free_priv_descs(mr);
1576331769Shselaskyerr_free_sig:
1577331769Shselasky	kfree(mr->sig);
1578331769Shselaskyerr_free_in:
1579331769Shselasky	kfree(in);
1580322810Shselaskyerr_free:
1581331769Shselasky	kfree(mr);
1582331769Shselasky	return ERR_PTR(err);
1583322810Shselasky}
1584322810Shselasky
1585331769Shselaskystruct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1586331769Shselasky			       struct ib_udata *udata)
1587322810Shselasky{
1588331769Shselasky	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1589331769Shselasky	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1590331769Shselasky	struct mlx5_ib_mw *mw = NULL;
1591331769Shselasky	u32 *in = NULL;
1592331769Shselasky	void *mkc;
1593331769Shselasky	int ndescs;
1594322810Shselasky	int err;
1595331769Shselasky	struct mlx5_ib_alloc_mw req = {};
1596331769Shselasky	struct {
1597331769Shselasky		__u32	comp_mask;
1598331769Shselasky		__u32	response_length;
1599331769Shselasky	} resp = {};
1600322810Shselasky
1601331769Shselasky	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1602331769Shselasky	if (err)
1603331769Shselasky		return ERR_PTR(err);
1604322810Shselasky
1605331769Shselasky	if (req.comp_mask || req.reserved1 || req.reserved2)
1606331769Shselasky		return ERR_PTR(-EOPNOTSUPP);
1607322810Shselasky
1608331769Shselasky	if (udata->inlen > sizeof(req) &&
1609331769Shselasky	    !ib_is_udata_cleared(udata, sizeof(req),
1610331769Shselasky				 udata->inlen - sizeof(req)))
1611331769Shselasky		return ERR_PTR(-EOPNOTSUPP);
1612322810Shselasky
1613331769Shselasky	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1614322810Shselasky
1615331769Shselasky	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1616331769Shselasky	in = kzalloc(inlen, GFP_KERNEL);
1617331769Shselasky	if (!mw || !in) {
1618331769Shselasky		err = -ENOMEM;
1619331769Shselasky		goto free;
1620322810Shselasky	}
1621322810Shselasky
1622331769Shselasky	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1623322810Shselasky
1624331769Shselasky	MLX5_SET(mkc, mkc, free, 1);
1625331769Shselasky	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1626331769Shselasky	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1627331769Shselasky	MLX5_SET(mkc, mkc, umr_en, 1);
1628331769Shselasky	MLX5_SET(mkc, mkc, lr, 1);
1629331769Shselasky	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM);
1630331769Shselasky	MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1631331769Shselasky	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1632322810Shselasky
1633331807Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1634331769Shselasky	if (err)
1635331769Shselasky		goto free;
1636322810Shselasky
1637331769Shselasky	mw->ibmw.rkey = mw->mmkey.key;
1638322810Shselasky
1639331769Shselasky	resp.response_length = min(offsetof(typeof(resp), response_length) +
1640331769Shselasky				   sizeof(resp.response_length), udata->outlen);
1641331769Shselasky	if (resp.response_length) {
1642331769Shselasky		err = ib_copy_to_udata(udata, &resp, resp.response_length);
1643331769Shselasky		if (err) {
1644331769Shselasky			mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1645331769Shselasky			goto free;
1646331769Shselasky		}
1647331769Shselasky	}
1648322810Shselasky
1649331769Shselasky	kfree(in);
1650331769Shselasky	return &mw->ibmw;
1651322810Shselasky
1652331769Shselaskyfree:
1653331769Shselasky	kfree(mw);
1654331769Shselasky	kfree(in);
1655331769Shselasky	return ERR_PTR(err);
1656322810Shselasky}
1657322810Shselasky
1658331769Shselaskyint mlx5_ib_dealloc_mw(struct ib_mw *mw)
1659322810Shselasky{
1660331769Shselasky	struct mlx5_ib_mw *mmw = to_mmw(mw);
1661322810Shselasky	int err;
1662322810Shselasky
1663331769Shselasky	err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
1664331769Shselasky				      &mmw->mmkey);
1665331769Shselasky	if (!err)
1666331769Shselasky		kfree(mmw);
1667322810Shselasky	return err;
1668322810Shselasky}
1669322810Shselasky
1670331769Shselaskyint mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1671331769Shselasky			    struct ib_mr_status *mr_status)
1672322810Shselasky{
1673331769Shselasky	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1674331769Shselasky	int ret = 0;
1675322810Shselasky
1676331769Shselasky	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1677331769Shselasky		pr_err("Invalid status check mask\n");
1678331769Shselasky		ret = -EINVAL;
1679331769Shselasky		goto done;
1680322810Shselasky	}
1681322810Shselasky
1682331769Shselasky	mr_status->fail_status = 0;
1683331769Shselasky	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
1684331769Shselasky		if (!mmr->sig) {
1685331769Shselasky			ret = -EINVAL;
1686331769Shselasky			pr_err("signature status check requested on a non-signature enabled MR\n");
1687331769Shselasky			goto done;
1688331769Shselasky		}
1689322810Shselasky
1690331769Shselasky		mmr->sig->sig_status_checked = true;
1691331769Shselasky		if (!mmr->sig->sig_err_exists)
1692331769Shselasky			goto done;
1693322810Shselasky
1694331769Shselasky		if (ibmr->lkey == mmr->sig->err_item.key)
1695331769Shselasky			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
1696331769Shselasky			       sizeof(mr_status->sig_err));
1697331769Shselasky		else {
1698331769Shselasky			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
1699331769Shselasky			mr_status->sig_err.sig_err_offset = 0;
1700331769Shselasky			mr_status->sig_err.key = mmr->sig->err_item.key;
1701331769Shselasky		}
1702322810Shselasky
1703331769Shselasky		mmr->sig->sig_err_exists = false;
1704331769Shselasky		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
1705331769Shselasky	}
1706322810Shselasky
1707331769Shselaskydone:
1708331769Shselasky	return ret;
1709322810Shselasky}
1710322810Shselasky
1711331769Shselaskystatic int
1712331769Shselaskymlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
1713331769Shselasky		   struct scatterlist *sgl,
1714331769Shselasky		   unsigned short sg_nents,
1715331769Shselasky		   unsigned int *sg_offset_p)
1716322810Shselasky{
1717331769Shselasky	struct scatterlist *sg = sgl;
1718331769Shselasky	struct mlx5_klm *klms = mr->descs;
1719331769Shselasky	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1720331769Shselasky	u32 lkey = mr->ibmr.pd->local_dma_lkey;
1721322810Shselasky	int i;
1722322810Shselasky
1723331769Shselasky	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
1724331769Shselasky	mr->ibmr.length = 0;
1725331769Shselasky	mr->ndescs = sg_nents;
1726322810Shselasky
1727331769Shselasky	for_each_sg(sgl, sg, sg_nents, i) {
1728331769Shselasky		if (unlikely(i > mr->max_descs))
1729331769Shselasky			break;
1730331769Shselasky		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
1731331769Shselasky		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
1732331769Shselasky		klms[i].key = cpu_to_be32(lkey);
1733331769Shselasky		mr->ibmr.length += sg_dma_len(sg);
1734322810Shselasky
1735331769Shselasky		sg_offset = 0;
1736322810Shselasky	}
1737322810Shselasky
1738331769Shselasky	if (sg_offset_p)
1739331769Shselasky		*sg_offset_p = sg_offset;
1740322810Shselasky
1741331769Shselasky	return i;
1742322810Shselasky}
1743322810Shselasky
1744331769Shselaskystatic int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
1745322810Shselasky{
1746331769Shselasky	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1747331769Shselasky	__be64 *descs;
1748322810Shselasky
1749331769Shselasky	if (unlikely(mr->ndescs == mr->max_descs))
1750331769Shselasky		return -ENOMEM;
1751322810Shselasky
1752331769Shselasky	descs = mr->descs;
1753331769Shselasky	descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
1754322810Shselasky
1755331769Shselasky	return 0;
1756322810Shselasky}
1757322810Shselasky
1758331769Shselaskyint mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
1759331769Shselasky		      unsigned int *sg_offset)
1760322810Shselasky{
1761331769Shselasky	struct mlx5_ib_mr *mr = to_mmr(ibmr);
1762331769Shselasky	int n;
1763322810Shselasky
1764331769Shselasky	mr->ndescs = 0;
1765322810Shselasky
1766331769Shselasky	ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
1767331769Shselasky				   mr->desc_size * mr->max_descs,
1768331769Shselasky				   DMA_TO_DEVICE);
1769322810Shselasky
1770331769Shselasky	if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
1771331769Shselasky		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
1772331769Shselasky	else
1773331769Shselasky		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
1774331769Shselasky				mlx5_set_page);
1775322810Shselasky
1776331769Shselasky	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
1777331769Shselasky				      mr->desc_size * mr->max_descs,
1778331769Shselasky				      DMA_TO_DEVICE);
1779322810Shselasky
1780331769Shselasky	return n;
1781322810Shselasky}
1782