1/*
2 * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005, 2006, 2007, 2008, 2014 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2006, 2007 Cisco Systems, Inc.  All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include <linux/err.h>
36#include <linux/errno.h>
37#include <linux/module.h>
38#include <linux/slab.h>
39#include <linux/kernel.h>
40#include <linux/vmalloc.h>
41
42#include <dev/mlx4/cmd.h>
43
44#include <linux/math64.h>
45
46#include "mlx4.h"
47#include "icm.h"
48
49static u32 mlx4_buddy_alloc(struct mlx4_buddy *buddy, int order)
50{
51	int o;
52	int m;
53	u32 seg;
54
55	spin_lock(&buddy->lock);
56
57	for (o = order; o <= buddy->max_order; ++o)
58		if (buddy->num_free[o]) {
59			m = 1 << (buddy->max_order - o);
60			seg = find_first_bit(buddy->bits[o], m);
61			if (seg < m)
62				goto found;
63		}
64
65	spin_unlock(&buddy->lock);
66	return -1;
67
68 found:
69	clear_bit(seg, buddy->bits[o]);
70	--buddy->num_free[o];
71
72	while (o > order) {
73		--o;
74		seg <<= 1;
75		set_bit(seg ^ 1, buddy->bits[o]);
76		++buddy->num_free[o];
77	}
78
79	spin_unlock(&buddy->lock);
80
81	seg <<= order;
82
83	return seg;
84}
85
86static void mlx4_buddy_free(struct mlx4_buddy *buddy, u32 seg, int order)
87{
88	seg >>= order;
89
90	spin_lock(&buddy->lock);
91
92	while (test_bit(seg ^ 1, buddy->bits[order])) {
93		clear_bit(seg ^ 1, buddy->bits[order]);
94		--buddy->num_free[order];
95		seg >>= 1;
96		++order;
97	}
98
99	set_bit(seg, buddy->bits[order]);
100	++buddy->num_free[order];
101
102	spin_unlock(&buddy->lock);
103}
104
105static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order)
106{
107	int i, s;
108
109	buddy->max_order = max_order;
110	spin_lock_init(&buddy->lock);
111
112	buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *),
113			      GFP_KERNEL);
114	buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free,
115				  GFP_KERNEL);
116	if (!buddy->bits || !buddy->num_free)
117		goto err_out;
118
119	for (i = 0; i <= buddy->max_order; ++i) {
120		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
121		buddy->bits[i] = kcalloc(s, sizeof (long), GFP_KERNEL | __GFP_NOWARN);
122		if (!buddy->bits[i]) {
123			buddy->bits[i] = vzalloc(s * sizeof(long));
124			if (!buddy->bits[i])
125				goto err_out_free;
126		}
127	}
128
129	set_bit(0, buddy->bits[buddy->max_order]);
130	buddy->num_free[buddy->max_order] = 1;
131
132	return 0;
133
134err_out_free:
135	for (i = 0; i <= buddy->max_order; ++i)
136		kvfree(buddy->bits[i]);
137
138err_out:
139	kfree(buddy->bits);
140	kfree(buddy->num_free);
141
142	return -ENOMEM;
143}
144
145static void mlx4_buddy_cleanup(struct mlx4_buddy *buddy)
146{
147	int i;
148
149	for (i = 0; i <= buddy->max_order; ++i)
150		kvfree(buddy->bits[i]);
151
152	kfree(buddy->bits);
153	kfree(buddy->num_free);
154}
155
156u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
157{
158	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
159	u32 seg;
160	int seg_order;
161	u32 offset;
162
163	seg_order = max_t(int, order - log_mtts_per_seg, 0);
164
165	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
166	if (seg == -1)
167		return -1;
168
169	offset = seg * (1 << log_mtts_per_seg);
170
171	if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
172				 offset + (1 << order) - 1)) {
173		mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
174		return -1;
175	}
176
177	return offset;
178}
179
180static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
181{
182	u64 in_param = 0;
183	u64 out_param;
184	int err;
185
186	if (mlx4_is_mfunc(dev)) {
187		set_param_l(&in_param, order);
188		err = mlx4_cmd_imm(dev, in_param, &out_param, RES_MTT,
189						       RES_OP_RESERVE_AND_MAP,
190						       MLX4_CMD_ALLOC_RES,
191						       MLX4_CMD_TIME_CLASS_A,
192						       MLX4_CMD_WRAPPED);
193		if (err)
194			return -1;
195		return get_param_l(&out_param);
196	}
197	return __mlx4_alloc_mtt_range(dev, order);
198}
199
200int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
201		  struct mlx4_mtt *mtt)
202{
203	int i;
204
205	if (!npages) {
206		mtt->order      = -1;
207		mtt->page_shift = MLX4_ICM_PAGE_SHIFT;
208		return 0;
209	} else
210		mtt->page_shift = page_shift;
211
212	for (mtt->order = 0, i = 1; i < npages; i <<= 1)
213		++mtt->order;
214
215	mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
216	if (mtt->offset == -1)
217		return -ENOMEM;
218
219	return 0;
220}
221EXPORT_SYMBOL_GPL(mlx4_mtt_init);
222
223void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
224{
225	u32 first_seg;
226	int seg_order;
227	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
228
229	seg_order = max_t(int, order - log_mtts_per_seg, 0);
230	first_seg = offset / (1 << log_mtts_per_seg);
231
232	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
233	mlx4_table_put_range(dev, &mr_table->mtt_table, offset,
234			     offset + (1 << order) - 1);
235}
236
237static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
238{
239	u64 in_param = 0;
240	int err;
241
242	if (mlx4_is_mfunc(dev)) {
243		set_param_l(&in_param, offset);
244		set_param_h(&in_param, order);
245		err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
246						       MLX4_CMD_FREE_RES,
247						       MLX4_CMD_TIME_CLASS_A,
248						       MLX4_CMD_WRAPPED);
249		if (err)
250			mlx4_warn(dev, "Failed to free mtt range at:%d order:%d\n",
251				  offset, order);
252		return;
253	}
254	__mlx4_free_mtt_range(dev, offset, order);
255}
256
257void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
258{
259	if (mtt->order < 0)
260		return;
261
262	mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
263}
264EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
265
266u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
267{
268	return (u64) mtt->offset * dev->caps.mtt_entry_sz;
269}
270EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
271
272static u32 hw_index_to_key(u32 ind)
273{
274	return (ind >> 24) | (ind << 8);
275}
276
277static u32 key_to_hw_index(u32 key)
278{
279	return (key << 24) | (key >> 8);
280}
281
282static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
283			  int mpt_index)
284{
285	return mlx4_cmd(dev, mailbox->dma, mpt_index,
286			0, MLX4_CMD_SW2HW_MPT, MLX4_CMD_TIME_CLASS_B,
287			MLX4_CMD_WRAPPED);
288}
289
290static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
291			  int mpt_index)
292{
293	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
294			    !mailbox, MLX4_CMD_HW2SW_MPT,
295			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
296}
297
298/* Must protect against concurrent access */
299int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
300		       struct mlx4_mpt_entry ***mpt_entry)
301{
302	int err;
303	int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
304	struct mlx4_cmd_mailbox *mailbox = NULL;
305
306	if (mmr->enabled != MLX4_MPT_EN_HW)
307		return -EINVAL;
308
309	err = mlx4_HW2SW_MPT(dev, NULL, key);
310	if (err) {
311		mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
312		mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
313		return err;
314	}
315
316	mmr->enabled = MLX4_MPT_EN_SW;
317
318	if (!mlx4_is_mfunc(dev)) {
319		**mpt_entry = mlx4_table_find(
320				&mlx4_priv(dev)->mr_table.dmpt_table,
321				key, NULL);
322	} else {
323		mailbox = mlx4_alloc_cmd_mailbox(dev);
324		if (IS_ERR(mailbox))
325			return PTR_ERR(mailbox);
326
327		err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
328				   0, MLX4_CMD_QUERY_MPT,
329				   MLX4_CMD_TIME_CLASS_B,
330				   MLX4_CMD_WRAPPED);
331		if (err)
332			goto free_mailbox;
333
334		*mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
335	}
336
337	if (!(*mpt_entry) || !(**mpt_entry)) {
338		err = -ENOMEM;
339		goto free_mailbox;
340	}
341
342	return 0;
343
344free_mailbox:
345	mlx4_free_cmd_mailbox(dev, mailbox);
346	return err;
347}
348EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
349
350int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
351			 struct mlx4_mpt_entry **mpt_entry)
352{
353	int err;
354
355	if (!mlx4_is_mfunc(dev)) {
356		/* Make sure any changes to this entry are flushed */
357		wmb();
358
359		*(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
360
361		/* Make sure the new status is written */
362		wmb();
363
364		err = mlx4_SYNC_TPT(dev);
365	} else {
366		int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
367
368		struct mlx4_cmd_mailbox *mailbox =
369			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
370				     buf);
371
372		err = mlx4_SW2HW_MPT(dev, mailbox, key);
373	}
374
375	if (!err) {
376		mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
377		mmr->enabled = MLX4_MPT_EN_HW;
378	}
379	return err;
380}
381EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
382
383void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
384			struct mlx4_mpt_entry **mpt_entry)
385{
386	if (mlx4_is_mfunc(dev)) {
387		struct mlx4_cmd_mailbox *mailbox =
388			container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
389				     buf);
390		mlx4_free_cmd_mailbox(dev, mailbox);
391	}
392}
393EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
394
395int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
396			 u32 pdn)
397{
398	u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags) & ~MLX4_MPT_PD_MASK;
399	/* The wrapper function will put the slave's id here */
400	if (mlx4_is_mfunc(dev))
401		pd_flags &= ~MLX4_MPT_PD_VF_MASK;
402
403	mpt_entry->pd_flags = cpu_to_be32(pd_flags |
404					  (pdn & MLX4_MPT_PD_MASK)
405					  | MLX4_MPT_PD_FLAG_EN_INV);
406	return 0;
407}
408EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
409
410int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
411			     struct mlx4_mpt_entry *mpt_entry,
412			     u32 access)
413{
414	u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
415		    (access & MLX4_PERM_MASK);
416
417	mpt_entry->flags = cpu_to_be32(flags);
418	return 0;
419}
420EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
421
422static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
423			   u64 iova, u64 size, u32 access, int npages,
424			   int page_shift, struct mlx4_mr *mr)
425{
426	mr->iova       = iova;
427	mr->size       = size;
428	mr->pd	       = pd;
429	mr->access     = access;
430	mr->enabled    = MLX4_MPT_DISABLED;
431	mr->key	       = hw_index_to_key(mridx);
432
433	return mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
434}
435
436static int mlx4_WRITE_MTT(struct mlx4_dev *dev,
437			  struct mlx4_cmd_mailbox *mailbox,
438			  int num_entries)
439{
440	return mlx4_cmd(dev, mailbox->dma, num_entries, 0, MLX4_CMD_WRITE_MTT,
441			MLX4_CMD_TIME_CLASS_A,  MLX4_CMD_WRAPPED);
442}
443
444int __mlx4_mpt_reserve(struct mlx4_dev *dev)
445{
446	struct mlx4_priv *priv = mlx4_priv(dev);
447
448	return mlx4_bitmap_alloc(&priv->mr_table.mpt_bitmap);
449}
450
451static int mlx4_mpt_reserve(struct mlx4_dev *dev)
452{
453	u64 out_param;
454
455	if (mlx4_is_mfunc(dev)) {
456		if (mlx4_cmd_imm(dev, 0, &out_param, RES_MPT, RES_OP_RESERVE,
457				   MLX4_CMD_ALLOC_RES,
458				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
459			return -1;
460		return get_param_l(&out_param);
461	}
462	return  __mlx4_mpt_reserve(dev);
463}
464
465void __mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
466{
467	struct mlx4_priv *priv = mlx4_priv(dev);
468
469	mlx4_bitmap_free(&priv->mr_table.mpt_bitmap, index, MLX4_NO_RR);
470}
471
472static void mlx4_mpt_release(struct mlx4_dev *dev, u32 index)
473{
474	u64 in_param = 0;
475
476	if (mlx4_is_mfunc(dev)) {
477		set_param_l(&in_param, index);
478		if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_RESERVE,
479			       MLX4_CMD_FREE_RES,
480			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED))
481			mlx4_warn(dev, "Failed to release mr index:%d\n",
482				  index);
483		return;
484	}
485	__mlx4_mpt_release(dev, index);
486}
487
488int __mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
489{
490	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
491
492	return mlx4_table_get(dev, &mr_table->dmpt_table, index, gfp);
493}
494
495static int mlx4_mpt_alloc_icm(struct mlx4_dev *dev, u32 index, gfp_t gfp)
496{
497	u64 param = 0;
498
499	if (mlx4_is_mfunc(dev)) {
500		set_param_l(&param, index);
501		return mlx4_cmd_imm(dev, param, &param, RES_MPT, RES_OP_MAP_ICM,
502							MLX4_CMD_ALLOC_RES,
503							MLX4_CMD_TIME_CLASS_A,
504							MLX4_CMD_WRAPPED);
505	}
506	return __mlx4_mpt_alloc_icm(dev, index, gfp);
507}
508
509void __mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
510{
511	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
512
513	mlx4_table_put(dev, &mr_table->dmpt_table, index);
514}
515
516static void mlx4_mpt_free_icm(struct mlx4_dev *dev, u32 index)
517{
518	u64 in_param = 0;
519
520	if (mlx4_is_mfunc(dev)) {
521		set_param_l(&in_param, index);
522		if (mlx4_cmd(dev, in_param, RES_MPT, RES_OP_MAP_ICM,
523			     MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
524			     MLX4_CMD_WRAPPED))
525			mlx4_warn(dev, "Failed to free icm of mr index:%d\n",
526				  index);
527		return;
528	}
529	return __mlx4_mpt_free_icm(dev, index);
530}
531
532int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
533		  int npages, int page_shift, struct mlx4_mr *mr)
534{
535	u32 index;
536	int err;
537
538	index = mlx4_mpt_reserve(dev);
539	if (index == -1)
540		return -ENOMEM;
541
542	err = mlx4_mr_alloc_reserved(dev, index, pd, iova, size,
543				     access, npages, page_shift, mr);
544	if (err)
545		mlx4_mpt_release(dev, index);
546
547	return err;
548}
549EXPORT_SYMBOL_GPL(mlx4_mr_alloc);
550
551static int mlx4_mr_free_reserved(struct mlx4_dev *dev, struct mlx4_mr *mr)
552{
553	int err;
554
555	if (mr->enabled == MLX4_MPT_EN_HW) {
556		err = mlx4_HW2SW_MPT(dev, NULL,
557				     key_to_hw_index(mr->key) &
558				     (dev->caps.num_mpts - 1));
559		if (err) {
560			mlx4_warn(dev, "HW2SW_MPT failed (%d), MR has MWs bound to it\n",
561				  err);
562			return err;
563		}
564
565		mr->enabled = MLX4_MPT_EN_SW;
566	}
567	mlx4_mtt_cleanup(dev, &mr->mtt);
568
569	return 0;
570}
571
572int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
573{
574	int ret;
575
576	ret = mlx4_mr_free_reserved(dev, mr);
577	if (ret)
578		return ret;
579	if (mr->enabled)
580		mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
581	mlx4_mpt_release(dev, key_to_hw_index(mr->key));
582
583	return 0;
584}
585EXPORT_SYMBOL_GPL(mlx4_mr_free);
586
587void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
588{
589	mlx4_mtt_cleanup(dev, &mr->mtt);
590	mr->mtt.order = -1;
591}
592EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
593
594int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
595			    u64 iova, u64 size, int npages,
596			    int page_shift, struct mlx4_mpt_entry *mpt_entry)
597{
598	int err;
599
600	err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
601	if (err)
602		return err;
603
604	mpt_entry->start       = cpu_to_be64(iova);
605	mpt_entry->length      = cpu_to_be64(size);
606	mpt_entry->entity_size = cpu_to_be32(page_shift);
607	mpt_entry->flags    &= ~(cpu_to_be32(MLX4_MPT_FLAG_FREE |
608					   MLX4_MPT_FLAG_SW_OWNS));
609	if (mr->mtt.order < 0) {
610		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
611		mpt_entry->mtt_addr = 0;
612	} else {
613		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
614						  &mr->mtt));
615		if (mr->mtt.page_shift == 0)
616			mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
617	}
618	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
619		/* fast register MR in free state */
620		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
621		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
622						   MLX4_MPT_PD_FLAG_RAE);
623	} else {
624		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
625	}
626	mr->enabled = MLX4_MPT_EN_SW;
627
628	return 0;
629}
630EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
631
632int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
633{
634	struct mlx4_cmd_mailbox *mailbox;
635	struct mlx4_mpt_entry *mpt_entry;
636	int err;
637
638	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mr->key), GFP_KERNEL);
639	if (err)
640		return err;
641
642	mailbox = mlx4_alloc_cmd_mailbox(dev);
643	if (IS_ERR(mailbox)) {
644		err = PTR_ERR(mailbox);
645		goto err_table;
646	}
647	mpt_entry = mailbox->buf;
648	mpt_entry->flags = cpu_to_be32(MLX4_MPT_FLAG_MIO	 |
649				       MLX4_MPT_FLAG_REGION	 |
650				       mr->access);
651
652	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mr->key));
653	mpt_entry->pd_flags    = cpu_to_be32(mr->pd | MLX4_MPT_PD_FLAG_EN_INV);
654	mpt_entry->start       = cpu_to_be64(mr->iova);
655	mpt_entry->length      = cpu_to_be64(mr->size);
656	mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
657
658	if (mr->mtt.order < 0) {
659		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
660		mpt_entry->mtt_addr = 0;
661	} else {
662		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
663						  &mr->mtt));
664	}
665
666	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
667		/* fast register MR in free state */
668		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
669		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
670						   MLX4_MPT_PD_FLAG_RAE);
671		mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
672	} else {
673		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
674	}
675
676	err = mlx4_SW2HW_MPT(dev, mailbox,
677			     key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1));
678	if (err) {
679		mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
680		goto err_cmd;
681	}
682	mr->enabled = MLX4_MPT_EN_HW;
683
684	mlx4_free_cmd_mailbox(dev, mailbox);
685
686	return 0;
687
688err_cmd:
689	mlx4_free_cmd_mailbox(dev, mailbox);
690
691err_table:
692	mlx4_mpt_free_icm(dev, key_to_hw_index(mr->key));
693	return err;
694}
695EXPORT_SYMBOL_GPL(mlx4_mr_enable);
696
697static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
698				int start_index, int npages, u64 *page_list)
699{
700	struct mlx4_priv *priv = mlx4_priv(dev);
701	__be64 *mtts;
702	dma_addr_t dma_handle;
703	int i;
704
705	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
706			       start_index, &dma_handle);
707
708	if (!mtts)
709		return -ENOMEM;
710
711	dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
712				npages * sizeof (u64), DMA_TO_DEVICE);
713
714	for (i = 0; i < npages; ++i)
715		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
716
717	dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
718				   npages * sizeof (u64), DMA_TO_DEVICE);
719
720	return 0;
721}
722
723int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
724		     int start_index, int npages, u64 *page_list)
725{
726	int err = 0;
727	int chunk;
728	int mtts_per_page;
729	int max_mtts_first_page;
730
731	/* compute how may mtts fit in the first page */
732	mtts_per_page = PAGE_SIZE / sizeof(u64);
733	max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
734			      % mtts_per_page;
735
736	chunk = min_t(int, max_mtts_first_page, npages);
737
738	while (npages > 0) {
739		err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
740		if (err)
741			return err;
742		npages      -= chunk;
743		start_index += chunk;
744		page_list   += chunk;
745
746		chunk = min_t(int, mtts_per_page, npages);
747	}
748	return err;
749}
750
751int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
752		   int start_index, int npages, u64 *page_list)
753{
754	struct mlx4_cmd_mailbox *mailbox = NULL;
755	__be64 *inbox = NULL;
756	int chunk;
757	int err = 0;
758	int i;
759
760	if (mtt->order < 0)
761		return -EINVAL;
762
763	if (mlx4_is_mfunc(dev)) {
764		mailbox = mlx4_alloc_cmd_mailbox(dev);
765		if (IS_ERR(mailbox))
766			return PTR_ERR(mailbox);
767		inbox = mailbox->buf;
768
769		while (npages > 0) {
770			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
771				      npages);
772			inbox[0] = cpu_to_be64(mtt->offset + start_index);
773			inbox[1] = 0;
774			for (i = 0; i < chunk; ++i)
775				inbox[i + 2] = cpu_to_be64(page_list[i] |
776					       MLX4_MTT_FLAG_PRESENT);
777			err = mlx4_WRITE_MTT(dev, mailbox, chunk);
778			if (err) {
779				mlx4_free_cmd_mailbox(dev, mailbox);
780				return err;
781			}
782
783			npages      -= chunk;
784			start_index += chunk;
785			page_list   += chunk;
786		}
787		mlx4_free_cmd_mailbox(dev, mailbox);
788		return err;
789	}
790
791	return __mlx4_write_mtt(dev, mtt, start_index, npages, page_list);
792}
793EXPORT_SYMBOL_GPL(mlx4_write_mtt);
794
795int mlx4_buf_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
796		       struct mlx4_buf *buf, gfp_t gfp)
797{
798	u64 *page_list;
799	int err;
800	int i;
801
802	page_list = kmalloc(buf->npages * sizeof *page_list,
803			    gfp);
804	if (!page_list)
805		return -ENOMEM;
806
807	for (i = 0; i < buf->npages; ++i)
808		if (buf->nbufs == 1)
809			page_list[i] = buf->direct.map + (i << buf->page_shift);
810		else
811			page_list[i] = buf->page_list[i].map;
812
813	err = mlx4_write_mtt(dev, mtt, 0, buf->npages, page_list);
814
815	kfree(page_list);
816	return err;
817}
818EXPORT_SYMBOL_GPL(mlx4_buf_write_mtt);
819
820int mlx4_mw_alloc(struct mlx4_dev *dev, u32 pd, enum mlx4_mw_type type,
821		  struct mlx4_mw *mw)
822{
823	u32 index;
824
825	if ((type == MLX4_MW_TYPE_1 &&
826	     !(dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)) ||
827	     (type == MLX4_MW_TYPE_2 &&
828	     !(dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)))
829		return -ENOTSUPP;
830
831	index = mlx4_mpt_reserve(dev);
832	if (index == -1)
833		return -ENOMEM;
834
835	mw->key	    = hw_index_to_key(index);
836	mw->pd      = pd;
837	mw->type    = type;
838	mw->enabled = MLX4_MPT_DISABLED;
839
840	return 0;
841}
842EXPORT_SYMBOL_GPL(mlx4_mw_alloc);
843
844int mlx4_mw_enable(struct mlx4_dev *dev, struct mlx4_mw *mw)
845{
846	struct mlx4_cmd_mailbox *mailbox;
847	struct mlx4_mpt_entry *mpt_entry;
848	int err;
849
850	err = mlx4_mpt_alloc_icm(dev, key_to_hw_index(mw->key), GFP_KERNEL);
851	if (err)
852		return err;
853
854	mailbox = mlx4_alloc_cmd_mailbox(dev);
855	if (IS_ERR(mailbox)) {
856		err = PTR_ERR(mailbox);
857		goto err_table;
858	}
859	mpt_entry = mailbox->buf;
860
861	/* Note that the MLX4_MPT_FLAG_REGION bit in mpt_entry->flags is turned
862	 * off, thus creating a memory window and not a memory region.
863	 */
864	mpt_entry->key	       = cpu_to_be32(key_to_hw_index(mw->key));
865	mpt_entry->pd_flags    = cpu_to_be32(mw->pd);
866	if (mw->type == MLX4_MW_TYPE_2) {
867		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
868		mpt_entry->qpn       = cpu_to_be32(MLX4_MPT_QP_FLAG_BOUND_QP);
869		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_EN_INV);
870	}
871
872	err = mlx4_SW2HW_MPT(dev, mailbox,
873			     key_to_hw_index(mw->key) &
874			     (dev->caps.num_mpts - 1));
875	if (err) {
876		mlx4_warn(dev, "SW2HW_MPT failed (%d)\n", err);
877		goto err_cmd;
878	}
879	mw->enabled = MLX4_MPT_EN_HW;
880
881	mlx4_free_cmd_mailbox(dev, mailbox);
882
883	return 0;
884
885err_cmd:
886	mlx4_free_cmd_mailbox(dev, mailbox);
887
888err_table:
889	mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
890	return err;
891}
892EXPORT_SYMBOL_GPL(mlx4_mw_enable);
893
894void mlx4_mw_free(struct mlx4_dev *dev, struct mlx4_mw *mw)
895{
896	int err;
897
898	if (mw->enabled == MLX4_MPT_EN_HW) {
899		err = mlx4_HW2SW_MPT(dev, NULL,
900				     key_to_hw_index(mw->key) &
901				     (dev->caps.num_mpts - 1));
902		if (err)
903			mlx4_warn(dev, "xxx HW2SW_MPT failed (%d)\n", err);
904
905		mw->enabled = MLX4_MPT_EN_SW;
906	}
907	if (mw->enabled)
908		mlx4_mpt_free_icm(dev, key_to_hw_index(mw->key));
909	mlx4_mpt_release(dev, key_to_hw_index(mw->key));
910}
911EXPORT_SYMBOL_GPL(mlx4_mw_free);
912
913int mlx4_init_mr_table(struct mlx4_dev *dev)
914{
915	struct mlx4_priv *priv = mlx4_priv(dev);
916	struct mlx4_mr_table *mr_table = &priv->mr_table;
917	int err;
918
919	/* Nothing to do for slaves - all MR handling is forwarded
920	* to the master */
921	if (mlx4_is_slave(dev))
922		return 0;
923
924	if (!is_power_of_2(dev->caps.num_mpts))
925		return -EINVAL;
926
927	err = mlx4_bitmap_init(&mr_table->mpt_bitmap, dev->caps.num_mpts,
928			       ~0, dev->caps.reserved_mrws, 0);
929	if (err)
930		return err;
931
932	err = mlx4_buddy_init(&mr_table->mtt_buddy,
933			      ilog2((u32)dev->caps.num_mtts /
934			      (1 << log_mtts_per_seg)));
935	if (err)
936		goto err_buddy;
937
938	if (dev->caps.reserved_mtts) {
939		priv->reserved_mtts =
940			mlx4_alloc_mtt_range(dev,
941					     fls(dev->caps.reserved_mtts - 1));
942		if (priv->reserved_mtts < 0) {
943			mlx4_warn(dev, "MTT table of order %u is too small\n",
944				  mr_table->mtt_buddy.max_order);
945			err = -ENOMEM;
946			goto err_reserve_mtts;
947		}
948	}
949
950	return 0;
951
952err_reserve_mtts:
953	mlx4_buddy_cleanup(&mr_table->mtt_buddy);
954
955err_buddy:
956	mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
957
958	return err;
959}
960
961void mlx4_cleanup_mr_table(struct mlx4_dev *dev)
962{
963	struct mlx4_priv *priv = mlx4_priv(dev);
964	struct mlx4_mr_table *mr_table = &priv->mr_table;
965
966	if (mlx4_is_slave(dev))
967		return;
968	if (priv->reserved_mtts >= 0)
969		mlx4_free_mtt_range(dev, priv->reserved_mtts,
970				    fls(dev->caps.reserved_mtts - 1));
971	mlx4_buddy_cleanup(&mr_table->mtt_buddy);
972	mlx4_bitmap_cleanup(&mr_table->mpt_bitmap);
973}
974
975static inline int mlx4_check_fmr(struct mlx4_fmr *fmr, u64 *page_list,
976				  int npages, u64 iova)
977{
978	int i, page_mask;
979
980	if (npages > fmr->max_pages)
981		return -EINVAL;
982
983	page_mask = (1 << fmr->page_shift) - 1;
984
985	/* We are getting page lists, so va must be page aligned. */
986	if (iova & page_mask)
987		return -EINVAL;
988
989	/* Trust the user not to pass misaligned data in page_list */
990	if (0)
991		for (i = 0; i < npages; ++i) {
992			if (page_list[i] & ~page_mask)
993				return -EINVAL;
994		}
995
996	if (fmr->maps >= fmr->max_maps)
997		return -EINVAL;
998
999	return 0;
1000}
1001
1002int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list,
1003		      int npages, u64 iova, u32 *lkey, u32 *rkey)
1004{
1005	u32 key;
1006	int i, err;
1007
1008	err = mlx4_check_fmr(fmr, page_list, npages, iova);
1009	if (err)
1010		return err;
1011
1012	++fmr->maps;
1013
1014	key = key_to_hw_index(fmr->mr.key);
1015	key += dev->caps.num_mpts;
1016	*lkey = *rkey = fmr->mr.key = hw_index_to_key(key);
1017
1018	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_SW;
1019
1020	/* Make sure MPT status is visible before writing MTT entries */
1021	wmb();
1022
1023	dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle,
1024				npages * sizeof(u64), DMA_TO_DEVICE);
1025
1026	for (i = 0; i < npages; ++i)
1027		fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
1028
1029	dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle,
1030				   npages * sizeof(u64), DMA_TO_DEVICE);
1031
1032	fmr->mpt->key    = cpu_to_be32(key);
1033	fmr->mpt->lkey   = cpu_to_be32(key);
1034	fmr->mpt->length = cpu_to_be64(npages * (1ull << fmr->page_shift));
1035	fmr->mpt->start  = cpu_to_be64(iova);
1036
1037	/* Make MTT entries are visible before setting MPT status */
1038	wmb();
1039
1040	*(u8 *) fmr->mpt = MLX4_MPT_STATUS_HW;
1041
1042	/* Make sure MPT status is visible before consumer can use FMR */
1043	wmb();
1044
1045	return 0;
1046}
1047EXPORT_SYMBOL_GPL(mlx4_map_phys_fmr);
1048
1049int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
1050		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
1051{
1052	struct mlx4_priv *priv = mlx4_priv(dev);
1053	int err = -ENOMEM;
1054
1055	if (max_maps > dev->caps.max_fmr_maps)
1056		return -EINVAL;
1057
1058	if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
1059		return -EINVAL;
1060
1061	/* All MTTs must fit in the same page */
1062	if (max_pages * sizeof *fmr->mtts > PAGE_SIZE)
1063		return -EINVAL;
1064
1065	fmr->page_shift = page_shift;
1066	fmr->max_pages  = max_pages;
1067	fmr->max_maps   = max_maps;
1068	fmr->maps = 0;
1069
1070	err = mlx4_mr_alloc(dev, pd, 0, 0, access, max_pages,
1071			    page_shift, &fmr->mr);
1072	if (err)
1073		return err;
1074
1075	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
1076				    fmr->mr.mtt.offset,
1077				    &fmr->dma_handle);
1078
1079	if (!fmr->mtts) {
1080		err = -ENOMEM;
1081		goto err_free;
1082	}
1083
1084	return 0;
1085
1086err_free:
1087	(void) mlx4_mr_free(dev, &fmr->mr);
1088	return err;
1089}
1090EXPORT_SYMBOL_GPL(mlx4_fmr_alloc);
1091
1092int mlx4_fmr_enable(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
1093{
1094	struct mlx4_priv *priv = mlx4_priv(dev);
1095	int err;
1096
1097	err = mlx4_mr_enable(dev, &fmr->mr);
1098	if (err)
1099		return err;
1100
1101	fmr->mpt = mlx4_table_find(&priv->mr_table.dmpt_table,
1102				    key_to_hw_index(fmr->mr.key), NULL);
1103	if (!fmr->mpt)
1104		return -ENOMEM;
1105
1106	return 0;
1107}
1108EXPORT_SYMBOL_GPL(mlx4_fmr_enable);
1109
1110void mlx4_fmr_unmap(struct mlx4_dev *dev, struct mlx4_fmr *fmr,
1111		    u32 *lkey, u32 *rkey)
1112{
1113	struct mlx4_cmd_mailbox *mailbox;
1114	int err;
1115
1116	if (!fmr->maps)
1117		return;
1118
1119	fmr->maps = 0;
1120
1121	mailbox = mlx4_alloc_cmd_mailbox(dev);
1122	if (IS_ERR(mailbox)) {
1123		err = PTR_ERR(mailbox);
1124		pr_warn("mlx4_ib: mlx4_alloc_cmd_mailbox failed (%d)\n", err);
1125		return;
1126	}
1127
1128	err = mlx4_HW2SW_MPT(dev, NULL,
1129			     key_to_hw_index(fmr->mr.key) &
1130			     (dev->caps.num_mpts - 1));
1131	mlx4_free_cmd_mailbox(dev, mailbox);
1132	if (err) {
1133		pr_warn("mlx4_ib: mlx4_HW2SW_MPT failed (%d)\n", err);
1134		return;
1135	}
1136	fmr->mr.enabled = MLX4_MPT_EN_SW;
1137}
1138EXPORT_SYMBOL_GPL(mlx4_fmr_unmap);
1139
1140int mlx4_fmr_free(struct mlx4_dev *dev, struct mlx4_fmr *fmr)
1141{
1142	int ret;
1143
1144	if (fmr->maps)
1145		return -EBUSY;
1146
1147	ret = mlx4_mr_free(dev, &fmr->mr);
1148	if (ret)
1149		return ret;
1150	fmr->mr.enabled = MLX4_MPT_DISABLED;
1151
1152	return 0;
1153}
1154EXPORT_SYMBOL_GPL(mlx4_fmr_free);
1155
1156int mlx4_SYNC_TPT(struct mlx4_dev *dev)
1157{
1158	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT,
1159			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
1160}
1161EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
1162