mlx4_ib_mr.c revision 296382
1219820Sjeff/*
2219820Sjeff * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4219820Sjeff *
5219820Sjeff * This software is available to you under a choice of one of two
6219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
7219820Sjeff * General Public License (GPL) Version 2, available from the file
8219820Sjeff * COPYING in the main directory of this source tree, or the
9219820Sjeff * OpenIB.org BSD license below:
10219820Sjeff *
11219820Sjeff *     Redistribution and use in source and binary forms, with or
12219820Sjeff *     without modification, are permitted provided that the following
13219820Sjeff *     conditions are met:
14219820Sjeff *
15219820Sjeff *      - Redistributions of source code must retain the above
16219820Sjeff *        copyright notice, this list of conditions and the following
17219820Sjeff *        disclaimer.
18219820Sjeff *
19219820Sjeff *      - Redistributions in binary form must reproduce the above
20219820Sjeff *        copyright notice, this list of conditions and the following
21219820Sjeff *        disclaimer in the documentation and/or other materials
22219820Sjeff *        provided with the distribution.
23219820Sjeff *
24219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31219820Sjeff * SOFTWARE.
32219820Sjeff */
33219820Sjeff
34255932Salfred#include <linux/slab.h>
35255932Salfred#include <linux/module.h>
36255932Salfred#include <linux/sched.h>
37255932Salfred
38219820Sjeff#include "mlx4_ib.h"
39219820Sjeff
40219820Sjeffstatic u32 convert_access(int acc)
41219820Sjeff{
42219820Sjeff	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC       : 0) |
43219820Sjeff	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
44219820Sjeff	       (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
45219820Sjeff	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
46278886Shselasky	       (acc & IB_ACCESS_MW_BIND       ? MLX4_PERM_BIND_MW      : 0) |
47219820Sjeff	       MLX4_PERM_LOCAL_READ;
48219820Sjeff}
49278886Shselasky/* No suuport for Shared MR feature */
50278886Shselasky#if 0
51255932Salfredstatic ssize_t shared_mr_proc_read(struct file *file,
52255932Salfred			  char __user *buffer,
53255932Salfred			  size_t len,
54255932Salfred			  loff_t *offset)
55255932Salfred{
56219820Sjeff
57255932Salfred	return -ENOSYS;
58255932Salfred
59255932Salfred}
60255932Salfred
61255932Salfredstatic ssize_t shared_mr_proc_write(struct file *file,
62255932Salfred			   const char __user *buffer,
63255932Salfred			   size_t len,
64255932Salfred			   loff_t *offset)
65255932Salfred{
66255932Salfred
67255932Salfred	return -ENOSYS;
68255932Salfred}
69255932Salfred
70255932Salfredstatic int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
71255932Salfred{
72255932Salfred
73255932Salfred	struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
74255932Salfred	struct mlx4_shared_mr_info *smr_info =
75255932Salfred		(struct mlx4_shared_mr_info *)pde->data;
76255932Salfred
77255932Salfred	/* Prevent any mapping not on start of area */
78255932Salfred	if (vma->vm_pgoff != 0)
79255932Salfred		return -EINVAL;
80255932Salfred
81255932Salfred	return ib_umem_map_to_vma(smr_info->umem,
82255932Salfred					vma);
83255932Salfred
84255932Salfred}
85255932Salfred
86255932Salfredstatic const struct file_operations shared_mr_proc_ops = {
87255932Salfred	.owner	= THIS_MODULE,
88255932Salfred	.read	= shared_mr_proc_read,
89255932Salfred	.write	= shared_mr_proc_write,
90255932Salfred	.mmap	= shared_mr_mmap
91255932Salfred};
92255932Salfred
93255932Salfredstatic mode_t convert_shared_access(int acc)
94255932Salfred{
95255932Salfred
96255932Salfred	return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR       : 0) |
97255932Salfred	       (acc & IB_ACCESS_SHARED_MR_USER_WRITE  ? S_IWUSR : 0) |
98255932Salfred	       (acc & IB_ACCESS_SHARED_MR_GROUP_READ   ? S_IRGRP  : 0) |
99255932Salfred	       (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE   ? S_IWGRP  : 0) |
100255932Salfred	       (acc & IB_ACCESS_SHARED_MR_OTHER_READ   ? S_IROTH  : 0) |
101255932Salfred	       (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE   ? S_IWOTH  : 0);
102255932Salfred
103255932Salfred}
104255932Salfred#endif
105219820Sjeffstruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
106219820Sjeff{
107219820Sjeff	struct mlx4_ib_mr *mr;
108219820Sjeff	int err;
109219820Sjeff
110255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
111219820Sjeff	if (!mr)
112219820Sjeff		return ERR_PTR(-ENOMEM);
113219820Sjeff
114219820Sjeff	err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
115219820Sjeff			    ~0ull, convert_access(acc), 0, 0, &mr->mmr);
116219820Sjeff	if (err)
117219820Sjeff		goto err_free;
118219820Sjeff
119219820Sjeff	err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
120219820Sjeff	if (err)
121219820Sjeff		goto err_mr;
122219820Sjeff
123219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
124219820Sjeff	mr->umem = NULL;
125219820Sjeff
126219820Sjeff	return &mr->ibmr;
127219820Sjeff
128219820Sjefferr_mr:
129278886Shselasky	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
130219820Sjeff
131219820Sjefferr_free:
132219820Sjeff	kfree(mr);
133219820Sjeff
134219820Sjeff	return ERR_PTR(err);
135219820Sjeff}
136219820Sjeff
137255932Salfredstatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
138255932Salfred						struct mlx4_mtt *mtt,
139255932Salfred						u64 mtt_size,
140255932Salfred						u64 mtt_shift,
141255932Salfred						u64 len,
142255932Salfred						u64 cur_start_addr,
143255932Salfred						u64 *pages,
144255932Salfred						int *start_index,
145255932Salfred						int *npages)
146255932Salfred{
147255932Salfred	int k;
148255932Salfred	int err = 0;
149255932Salfred	u64 mtt_entries;
150255932Salfred	u64 cur_end_addr = cur_start_addr + len;
151255932Salfred	u64 cur_end_addr_aligned = 0;
152255932Salfred
153255932Salfred	len += (cur_start_addr & (mtt_size-1ULL));
154255932Salfred	cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
155255932Salfred	len += (cur_end_addr_aligned - cur_end_addr);
156255932Salfred	if (len & (mtt_size-1ULL)) {
157255932Salfred		WARN(1 ,
158255932Salfred		"write_block: len %llx is not aligned to mtt_size %llx\n",
159278886Shselasky			(unsigned long long)len, (unsigned long long)mtt_size);
160255932Salfred		return -EINVAL;
161255932Salfred	}
162255932Salfred
163255932Salfred
164255932Salfred	mtt_entries = (len >> mtt_shift);
165255932Salfred
166255932Salfred	/* Align the MTT start address to
167255932Salfred		the mtt_size.
168255932Salfred		Required to handle cases when the MR
169255932Salfred		starts in the middle of an MTT record.
170255932Salfred		Was not required in old code since
171255932Salfred		the physical addresses provided by
172255932Salfred		the dma subsystem were page aligned,
173255932Salfred		which was also the MTT size.
174255932Salfred	*/
175255932Salfred	cur_start_addr = round_down(cur_start_addr, mtt_size);
176255932Salfred	/* A new block is started ...*/
177255932Salfred	for (k = 0; k < mtt_entries; ++k) {
178255932Salfred		pages[*npages] = cur_start_addr + (mtt_size * k);
179255932Salfred		(*npages)++;
180255932Salfred		/*
181255932Salfred		 * Be friendly to mlx4_write_mtt() and
182255932Salfred		 * pass it chunks of appropriate size.
183255932Salfred		 */
184255932Salfred		if (*npages == PAGE_SIZE / sizeof(u64)) {
185255932Salfred			err = mlx4_write_mtt(dev->dev,
186255932Salfred					mtt, *start_index,
187255932Salfred					*npages, pages);
188255932Salfred			if (err)
189255932Salfred				return err;
190255932Salfred
191255932Salfred			(*start_index) += *npages;
192255932Salfred			*npages = 0;
193255932Salfred		}
194255932Salfred	}
195255932Salfred
196255932Salfred	return 0;
197255932Salfred}
198255932Salfred
199219820Sjeffint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
200219820Sjeff			   struct ib_umem *umem)
201219820Sjeff{
202219820Sjeff	u64 *pages;
203255932Salfred	u64 len = 0;
204219820Sjeff	int err = 0;
205255932Salfred	u64 mtt_size;
206255932Salfred	u64 cur_start_addr = 0;
207255932Salfred	u64 mtt_shift;
208255932Salfred	int start_index = 0;
209255932Salfred	int npages = 0;
210278886Shselasky	struct scatterlist *sg;
211278886Shselasky	int i;
212219820Sjeff
213219820Sjeff	pages = (u64 *) __get_free_page(GFP_KERNEL);
214219820Sjeff	if (!pages)
215219820Sjeff		return -ENOMEM;
216219820Sjeff
217255932Salfred	mtt_shift = mtt->page_shift;
218255932Salfred	mtt_size = 1ULL << mtt_shift;
219219820Sjeff
220278886Shselasky	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
221255932Salfred			if (cur_start_addr + len ==
222278886Shselasky			    sg_dma_address(sg)) {
223255932Salfred				/* still the same block */
224278886Shselasky				len += sg_dma_len(sg);
225255932Salfred				continue;
226219820Sjeff			}
227255932Salfred			/* A new block is started ...*/
228255932Salfred			/* If len is malaligned, write an extra mtt entry to
229255932Salfred			    cover the misaligned area (round up the division)
230255932Salfred			*/
231255932Salfred			err = mlx4_ib_umem_write_mtt_block(dev,
232255932Salfred						mtt, mtt_size, mtt_shift,
233255932Salfred						len, cur_start_addr,
234255932Salfred						pages,
235255932Salfred						&start_index,
236255932Salfred						&npages);
237255932Salfred			if (err)
238255932Salfred				goto out;
239255932Salfred
240255932Salfred			cur_start_addr =
241278886Shselasky				sg_dma_address(sg);
242278886Shselasky			len = sg_dma_len(sg);
243296382Shselasky	}
244219820Sjeff
245255932Salfred	/* Handle the last block */
246255932Salfred	if (len > 0) {
247255932Salfred		/*  If len is malaligned, write an extra mtt entry to cover
248255932Salfred		     the misaligned area (round up the division)
249255932Salfred		*/
250255932Salfred		err = mlx4_ib_umem_write_mtt_block(dev,
251255932Salfred						mtt, mtt_size, mtt_shift,
252255932Salfred						len, cur_start_addr,
253255932Salfred						pages,
254255932Salfred						&start_index,
255255932Salfred						&npages);
256255932Salfred			if (err)
257255932Salfred				goto out;
258255932Salfred	}
259219820Sjeff
260255932Salfred
261255932Salfred	if (npages)
262255932Salfred		err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
263255932Salfred
264219820Sjeffout:
265219820Sjeff	free_page((unsigned long) pages);
266219820Sjeff	return err;
267219820Sjeff}
268219820Sjeff
269255932Salfredstatic inline u64 alignment_of(u64 ptr)
270219820Sjeff{
271255932Salfred	return ilog2(ptr & (~(ptr-1)));
272255932Salfred}
273255932Salfred
274255932Salfredstatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
275255932Salfred						u64 current_block_end,
276255932Salfred						u64 block_shift)
277255932Salfred{
278255932Salfred	/* Check whether the alignment of the new block
279255932Salfred	     is aligned as well as the previous block.
280255932Salfred	     Block address must start with zeros till size of entity_size.
281255932Salfred	*/
282255932Salfred	if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
283255932Salfred		/* It is not as well aligned as the
284255932Salfred		previous block-reduce the mtt size
285255932Salfred		accordingly.
286255932Salfred		Here we take the last right bit
287255932Salfred		which is 1.
288255932Salfred		*/
289255932Salfred		block_shift = alignment_of(next_block_start);
290255932Salfred
291255932Salfred	/*  Check whether the alignment of the
292255932Salfred	     end of previous block - is it aligned
293255932Salfred	     as well as the start of the block
294255932Salfred	*/
295255932Salfred	if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
296255932Salfred		/* It is not as well aligned as
297255932Salfred		the start of the block - reduce the
298255932Salfred		mtt size accordingly.
299255932Salfred		*/
300255932Salfred		block_shift = alignment_of(current_block_end);
301255932Salfred
302255932Salfred	return block_shift;
303255932Salfred}
304255932Salfred
305255932Salfred/* Calculate optimal mtt size based on contiguous pages.
306255932Salfred* Function will return also the number of pages that are not aligned to the
307255932Salfred   calculated mtt_size to be added to total number
308255932Salfred    of pages. For that we should check the first chunk length & last chunk
309255932Salfred    length and if not aligned to mtt_size we should increment
310255932Salfred    the non_aligned_pages number.
311255932Salfred    All chunks in the middle already handled as part of mtt shift calculation
312255932Salfred    for both their start & end addresses.
313255932Salfred*/
314255932Salfredint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
315255932Salfred						u64 start_va,
316255932Salfred						int *num_of_mtts)
317255932Salfred{
318255932Salfred	u64 block_shift = MLX4_MAX_MTT_SHIFT;
319255932Salfred	u64 current_block_len = 0;
320255932Salfred	u64 current_block_start = 0;
321255932Salfred	u64 misalignment_bits;
322255932Salfred	u64 first_block_start = 0;
323255932Salfred	u64 last_block_end = 0;
324255932Salfred	u64 total_len = 0;
325255932Salfred	u64 last_block_aligned_end = 0;
326255932Salfred	u64 min_shift = ilog2(umem->page_size);
327278886Shselasky	struct scatterlist *sg;
328278886Shselasky	int i;
329278886Shselasky	u64 next_block_start;
330278886Shselasky	u64 current_block_end;
331219820Sjeff
332278886Shselasky	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
333255932Salfred		/* Initialization - save the first chunk start as
334255932Salfred		    the current_block_start - block means contiguous pages.
335255932Salfred		*/
336255932Salfred		if (current_block_len == 0 && current_block_start == 0) {
337255932Salfred			first_block_start = current_block_start =
338278886Shselasky				sg_dma_address(sg);
339255932Salfred			/* Find the bits that are different between
340255932Salfred			    the physical address and the virtual
341255932Salfred			    address for the start of the MR.
342255932Salfred			*/
343255932Salfred			/* umem_get aligned the start_va to a page
344255932Salfred			   boundry. Therefore, we need to align the
345255932Salfred			   start va to the same boundry */
346255932Salfred			/* misalignment_bits is needed to handle the
347255932Salfred			   case of a single memory region. In this
348255932Salfred			   case, the rest of the logic will not reduce
349255932Salfred			   the block size.  If we use a block size
350255932Salfred			   which is bigger than the alignment of the
351255932Salfred			   misalignment bits, we might use the virtual
352255932Salfred			   page number instead of the physical page
353255932Salfred			   number, resulting in access to the wrong
354255932Salfred			   data. */
355255932Salfred			misalignment_bits =
356255932Salfred			(start_va & (~(((u64)(umem->page_size))-1ULL)))
357255932Salfred						^ current_block_start;
358255932Salfred			block_shift = min(alignment_of(misalignment_bits)
359255932Salfred				, block_shift);
360255932Salfred		}
361219820Sjeff
362278886Shselasky		/* Go over the scatter entries and check
363255932Salfred		     if they continue the previous scatter entry.
364255932Salfred		*/
365278886Shselasky		next_block_start =
366278886Shselasky			sg_dma_address(sg);
367278886Shselasky		current_block_end = current_block_start
368296382Shselasky			+ current_block_len;
369296382Shselasky		/* If we have a split (non-contig.) between two block*/
370296382Shselasky		if (current_block_end != next_block_start) {
371296382Shselasky			block_shift = mlx4_ib_umem_calc_block_mtt(
372296382Shselasky					next_block_start,
373296382Shselasky					current_block_end,
374296382Shselasky					block_shift);
375219820Sjeff
376296382Shselasky			/* If we reached the minimum shift for 4k
377296382Shselasky			     page we stop the loop.
378296382Shselasky			*/
379296382Shselasky			if (block_shift <= min_shift)
380296382Shselasky				goto end;
381255932Salfred
382296382Shselasky			/* If not saved yet we are in first block -
383296382Shselasky			     we save the length of first block to
384296382Shselasky			     calculate the non_aligned_pages number at
385296382Shselasky			*    the end.
386296382Shselasky			*/
387296382Shselasky			total_len += current_block_len;
388255932Salfred
389296382Shselasky			/* Start a new block */
390296382Shselasky			current_block_start = next_block_start;
391296382Shselasky			current_block_len =
392278886Shselasky				sg_dma_len(sg);
393296382Shselasky			continue;
394296382Shselasky		}
395296382Shselasky		/* The scatter entry is another part of
396296382Shselasky		     the current block, increase the block size
397296382Shselasky		* An entry in the scatter can be larger than
398296382Shselasky		4k (page) as of dma mapping
399296382Shselasky		which merge some blocks together.
400296382Shselasky		*/
401296382Shselasky		current_block_len +=
402278886Shselasky			sg_dma_len(sg);
403255932Salfred	}
404219820Sjeff
405255932Salfred	/* Account for the last block in the total len */
406255932Salfred	total_len += current_block_len;
407255932Salfred	/* Add to the first block the misalignment that it suffers from.*/
408255932Salfred	total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
409255932Salfred	last_block_end = current_block_start+current_block_len;
410255932Salfred	last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
411255932Salfred	total_len += (last_block_aligned_end - last_block_end);
412255932Salfred
413255932Salfred	WARN((total_len & ((1ULL<<block_shift)-1ULL)),
414255932Salfred		" misaligned total length detected (%llu, %llu)!",
415278886Shselasky		(unsigned long long)total_len, (unsigned long long)block_shift);
416255932Salfred
417255932Salfred	*num_of_mtts = total_len >> block_shift;
418255932Salfredend:
419255932Salfred	if (block_shift < min_shift) {
420255932Salfred		/* If shift is less than the min we set a WARN and
421255932Salfred		     return the min shift.
422255932Salfred		*/
423255932Salfred		WARN(1,
424255932Salfred		"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
425278886Shselasky		(unsigned long long)block_shift);
426255932Salfred
427255932Salfred		block_shift = min_shift;
428219820Sjeff	}
429255932Salfred	return block_shift;
430278886Shselasky
431255932Salfred}
432219820Sjeff
433278886Shselasky/* No suuport for Shared MR */
434278886Shselasky#if 0
435255932Salfredstatic int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
436255932Salfred{
437278886Shselasky
438255932Salfred	struct proc_dir_entry *mr_proc_entry;
439255932Salfred	mode_t mode = S_IFREG;
440255932Salfred	char name_buff[16];
441219820Sjeff
442255932Salfred	mode |= convert_shared_access(access_flags);
443255932Salfred	sprintf(name_buff, "%X", mr_id);
444255932Salfred	mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
445255932Salfred	mr->smr_info->mr_id = mr_id;
446255932Salfred	mr->smr_info->umem = mr->umem;
447219820Sjeff
448255932Salfred	mr_proc_entry = proc_create_data(name_buff, mode,
449255932Salfred				mlx4_mrs_dir_entry,
450255932Salfred				&shared_mr_proc_ops,
451255932Salfred				mr->smr_info);
452255932Salfred
453255932Salfred	if (!mr_proc_entry) {
454255932Salfred		pr_err("prepare_shared_mr failed via proc\n");
455255932Salfred		kfree(mr->smr_info);
456255932Salfred		return -ENODEV;
457255932Salfred	}
458255932Salfred
459255932Salfred	current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
460255932Salfred	mr_proc_entry->size = mr->umem->length;
461255932Salfred	return 0;
462255932Salfred
463219820Sjeff}
464255932Salfredstatic int is_shared_mr(int access_flags)
465255932Salfred{
466255932Salfred	/* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
467255932Salfred	other shared bits were turned on.
468255932Salfred	*/
469255932Salfred	return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
470255932Salfred				IB_ACCESS_SHARED_MR_USER_WRITE |
471255932Salfred				IB_ACCESS_SHARED_MR_GROUP_READ |
472255932Salfred				IB_ACCESS_SHARED_MR_GROUP_WRITE |
473255932Salfred				IB_ACCESS_SHARED_MR_OTHER_READ |
474255932Salfred				IB_ACCESS_SHARED_MR_OTHER_WRITE));
475219820Sjeff
476255932Salfred}
477278886Shselasky
478278886Shselaskystatic void free_smr_info(struct mlx4_ib_mr *mr)
479278886Shselasky{
480278886Shselasky	/* When master/parent shared mr is dereged there is
481278886Shselasky	no ability to share this mr any more - its mr_id will be
482278886Shselasky	returned to the kernel as part of ib_uverbs_dereg_mr
483278886Shselasky	and may be allocated again as part of other reg_mr.
484278886Shselasky	*/
485278886Shselasky	char name_buff[16];
486278886Shselasky
487278886Shselasky	sprintf(name_buff, "%X", mr->smr_info->mr_id);
488278886Shselasky	/* Remove proc entry is checking internally that no operation
489278886Shselasky	was strated on that proc fs file and if in the middle
490278886Shselasky	current process will wait till end of operation.
491278886Shselasky	That's why no sync mechanism is needed when we release
492278886Shselasky	below the shared umem.
493278886Shselasky	*/
494278886Shselasky	remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
495278886Shselasky	kfree(mr->smr_info);
496278886Shselasky	mr->smr_info = NULL;
497278886Shselasky}
498255932Salfred#endif
499255932Salfred
500278886Shselaskystatic void mlx4_invalidate_umem(void *invalidation_cookie,
501278886Shselasky				struct ib_umem *umem,
502278886Shselasky				unsigned long addr, size_t size)
503278886Shselasky{
504278886Shselasky	struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie;
505278886Shselasky
506278886Shselasky	/* This function is called under client peer lock so its resources are race protected */
507278886Shselasky	if (atomic_inc_return(&mr->invalidated) > 1) {
508278886Shselasky		umem->invalidation_ctx->inflight_invalidation = 1;
509278886Shselasky		goto end;
510278886Shselasky	}
511278886Shselasky
512278886Shselasky	umem->invalidation_ctx->peer_callback = 1;
513278886Shselasky	mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr);
514278886Shselasky	ib_umem_release(umem);
515278886Shselasky	complete(&mr->invalidation_comp);
516278886Shselasky
517278886Shselaskyend:
518278886Shselasky	return;
519278886Shselasky
520278886Shselasky}
521278886Shselasky
522219820Sjeffstruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
523219820Sjeff				  u64 virt_addr, int access_flags,
524255932Salfred				  struct ib_udata *udata,
525255932Salfred				  int mr_id)
526219820Sjeff{
527219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
528219820Sjeff	struct mlx4_ib_mr *mr;
529219820Sjeff	int shift;
530219820Sjeff	int err;
531219820Sjeff	int n;
532278886Shselasky	struct ib_peer_memory_client *ib_peer_mem;
533219820Sjeff
534255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
535219820Sjeff	if (!mr)
536219820Sjeff		return ERR_PTR(-ENOMEM);
537219820Sjeff
538278886Shselasky	mr->umem = ib_umem_get_ex(pd->uobject->context, start, length,
539278886Shselasky			access_flags, 0, 1);
540219820Sjeff	if (IS_ERR(mr->umem)) {
541219820Sjeff		err = PTR_ERR(mr->umem);
542219820Sjeff		goto err_free;
543219820Sjeff	}
544219820Sjeff
545278886Shselasky	ib_peer_mem = mr->umem->ib_peer_mem;
546255932Salfred	n = ib_umem_page_count(mr->umem);
547255932Salfred	shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
548255932Salfred		&n);
549255932Salfred	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
550255932Salfred			 convert_access(access_flags), n, shift, &mr->mmr);
551255932Salfred	if (err)
552255932Salfred		goto err_umem;
553219820Sjeff
554255932Salfred	err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
555255932Salfred	if (err)
556255932Salfred		goto err_mr;
557219820Sjeff
558219820Sjeff	err = mlx4_mr_enable(dev->dev, &mr->mmr);
559219820Sjeff	if (err)
560219820Sjeff		goto err_mr;
561219820Sjeff
562219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
563278886Shselasky/* No suuport for Shared MR */
564278886Shselasky#if 0
565255932Salfred	/* Check whether MR should be shared */
566255932Salfred	if (is_shared_mr(access_flags)) {
567255932Salfred	/* start address and length must be aligned to page size in order
568255932Salfred	    to map a full page and preventing leakage of data */
569255932Salfred		if (mr->umem->offset || (length & ~PAGE_MASK)) {
570255932Salfred		        err = -EINVAL;
571255932Salfred		        goto err_mr;
572255932Salfred		}
573219820Sjeff
574255932Salfred		err = prepare_shared_mr(mr, access_flags, mr_id);
575255932Salfred		if (err)
576255932Salfred			goto err_mr;
577255932Salfred	}
578255932Salfred#endif
579278886Shselasky	if (ib_peer_mem) {
580278886Shselasky		if (access_flags & IB_ACCESS_MW_BIND) {
581278886Shselasky			/* Prevent binding MW on peer clients.
582278886Shselasky			* mlx4_invalidate_umem must be void,
583278886Shselasky			* therefore, mlx4_mr_free should not fail
584278886Shselasky			* when using peer clients. */
585278886Shselasky			err = -ENOSYS;
586278886Shselasky			pr_err("MW is not supported with peer memory client");
587278886Shselasky			goto err_smr;
588278886Shselasky		}
589278886Shselasky		init_completion(&mr->invalidation_comp);
590278886Shselasky		ib_umem_activate_invalidation_notifier(mr->umem,
591278886Shselasky					mlx4_invalidate_umem, mr);
592278886Shselasky	}
593278886Shselasky
594278886Shselasky	atomic_set(&mr->invalidated, 0);
595219820Sjeff	return &mr->ibmr;
596219820Sjeff
597278886Shselaskyerr_smr:
598278886Shselasky/* No suuport for Shared MR */
599278886Shselasky#if 0
600278886Shselasky	if (mr->smr_info)
601278886Shselasky		free_smr_info(mr);
602278886Shselasky#endif
603219820Sjefferr_mr:
604278886Shselasky	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
605219820Sjeff
606219820Sjefferr_umem:
607219820Sjeff	ib_umem_release(mr->umem);
608219820Sjeff
609219820Sjefferr_free:
610219820Sjeff	kfree(mr);
611219820Sjeff
612219820Sjeff	return ERR_PTR(err);
613219820Sjeff}
614219820Sjeff
615219820Sjeffint mlx4_ib_dereg_mr(struct ib_mr *ibmr)
616219820Sjeff{
617219820Sjeff	struct mlx4_ib_mr *mr = to_mmr(ibmr);
618278886Shselasky	struct ib_umem *umem = mr->umem;
619278886Shselasky	int ret;
620219820Sjeff
621278886Shselasky/* No suuport for Shared MR */
622278886Shselasky#if 0
623278886Shselasky	if (mr->smr_info)
624278886Shselasky		free_smr_info(mr);
625278886Shselasky#endif
626255932Salfred
627278886Shselasky	if (atomic_inc_return(&mr->invalidated) > 1) {
628278886Shselasky		wait_for_completion(&mr->invalidation_comp);
629278886Shselasky		goto end;
630255932Salfred	}
631255932Salfred
632278886Shselasky	ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
633278886Shselasky	if (ret) {
634278886Shselasky		/* Error is not expected here, except when memory windows
635278886Shselasky		* are bound to MR which is not supported with
636278886Shselasky		* peer memory clients */
637278886Shselasky		atomic_set(&mr->invalidated, 0);
638278886Shselasky		return ret;
639278886Shselasky	}
640278886Shselasky
641278886Shselasky	if (!umem)
642278886Shselasky		goto end;
643278886Shselasky
644296382Shselasky	ib_umem_release(mr->umem);
645278886Shselaskyend:
646255932Salfred
647219820Sjeff	kfree(mr);
648219820Sjeff
649219820Sjeff	return 0;
650219820Sjeff}
651219820Sjeff
652278886Shselaskystruct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
653278886Shselasky{
654278886Shselasky	struct mlx4_ib_dev *dev = to_mdev(pd->device);
655278886Shselasky	struct mlx4_ib_mw *mw;
656278886Shselasky	int err;
657278886Shselasky
658278886Shselasky	mw = kmalloc(sizeof(*mw), GFP_KERNEL);
659278886Shselasky	if (!mw)
660278886Shselasky		return ERR_PTR(-ENOMEM);
661278886Shselasky
662278886Shselasky	err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw);
663278886Shselasky	if (err)
664278886Shselasky		goto err_free;
665278886Shselasky
666278886Shselasky	err = mlx4_mw_enable(dev->dev, &mw->mmw);
667278886Shselasky	if (err)
668278886Shselasky		goto err_mw;
669278886Shselasky
670278886Shselasky	mw->ibmw.rkey = mw->mmw.key;
671278886Shselasky
672278886Shselasky	return &mw->ibmw;
673278886Shselasky
674278886Shselaskyerr_mw:
675278886Shselasky	mlx4_mw_free(dev->dev, &mw->mmw);
676278886Shselasky
677278886Shselaskyerr_free:
678278886Shselasky	kfree(mw);
679278886Shselasky
680278886Shselasky	return ERR_PTR(err);
681278886Shselasky}
682278886Shselasky
683278886Shselaskyint mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
684278886Shselasky		    struct ib_mw_bind *mw_bind)
685278886Shselasky{
686278886Shselasky	struct ib_send_wr  wr;
687278886Shselasky	struct ib_send_wr *bad_wr;
688278886Shselasky	int ret;
689278886Shselasky
690278886Shselasky	memset(&wr, 0, sizeof(wr));
691278886Shselasky	wr.opcode               = IB_WR_BIND_MW;
692278886Shselasky	wr.wr_id                = mw_bind->wr_id;
693278886Shselasky	wr.send_flags           = mw_bind->send_flags;
694278886Shselasky	wr.wr.bind_mw.mw        = mw;
695278886Shselasky	wr.wr.bind_mw.bind_info = mw_bind->bind_info;
696278886Shselasky	wr.wr.bind_mw.rkey      = ib_inc_rkey(mw->rkey);
697278886Shselasky
698278886Shselasky	ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
699278886Shselasky	if (!ret)
700278886Shselasky		mw->rkey = wr.wr.bind_mw.rkey;
701278886Shselasky
702278886Shselasky	return ret;
703278886Shselasky}
704278886Shselasky
705278886Shselaskyint mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
706278886Shselasky{
707278886Shselasky	struct mlx4_ib_mw *mw = to_mmw(ibmw);
708278886Shselasky
709278886Shselasky	mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
710278886Shselasky	kfree(mw);
711278886Shselasky
712278886Shselasky	return 0;
713278886Shselasky}
714278886Shselasky
715219820Sjeffstruct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
716219820Sjeff					int max_page_list_len)
717219820Sjeff{
718219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
719219820Sjeff	struct mlx4_ib_mr *mr;
720219820Sjeff	int err;
721219820Sjeff
722255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
723219820Sjeff	if (!mr)
724219820Sjeff		return ERR_PTR(-ENOMEM);
725219820Sjeff
726219820Sjeff	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
727219820Sjeff			    max_page_list_len, 0, &mr->mmr);
728219820Sjeff	if (err)
729219820Sjeff		goto err_free;
730219820Sjeff
731219820Sjeff	err = mlx4_mr_enable(dev->dev, &mr->mmr);
732219820Sjeff	if (err)
733219820Sjeff		goto err_mr;
734219820Sjeff
735219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
736219820Sjeff	mr->umem = NULL;
737219820Sjeff
738219820Sjeff	return &mr->ibmr;
739219820Sjeff
740219820Sjefferr_mr:
741278886Shselasky	(void) mlx4_mr_free(dev->dev, &mr->mmr);
742219820Sjeff
743219820Sjefferr_free:
744219820Sjeff	kfree(mr);
745219820Sjeff	return ERR_PTR(err);
746219820Sjeff}
747219820Sjeff
748219820Sjeffstruct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
749219820Sjeff							       int page_list_len)
750219820Sjeff{
751219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ibdev);
752219820Sjeff	struct mlx4_ib_fast_reg_page_list *mfrpl;
753219820Sjeff	int size = page_list_len * sizeof (u64);
754219820Sjeff
755255932Salfred	if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
756219820Sjeff		return ERR_PTR(-EINVAL);
757219820Sjeff
758219820Sjeff	mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
759219820Sjeff	if (!mfrpl)
760219820Sjeff		return ERR_PTR(-ENOMEM);
761219820Sjeff
762219820Sjeff	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
763219820Sjeff	if (!mfrpl->ibfrpl.page_list)
764219820Sjeff		goto err_free;
765219820Sjeff
766219820Sjeff	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
767219820Sjeff						     size, &mfrpl->map,
768219820Sjeff						     GFP_KERNEL);
769219820Sjeff	if (!mfrpl->mapped_page_list)
770219820Sjeff		goto err_free;
771219820Sjeff
772219820Sjeff	WARN_ON(mfrpl->map & 0x3f);
773219820Sjeff
774219820Sjeff	return &mfrpl->ibfrpl;
775219820Sjeff
776219820Sjefferr_free:
777219820Sjeff	kfree(mfrpl->ibfrpl.page_list);
778219820Sjeff	kfree(mfrpl);
779219820Sjeff	return ERR_PTR(-ENOMEM);
780219820Sjeff}
781219820Sjeff
782219820Sjeffvoid mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
783219820Sjeff{
784219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(page_list->device);
785219820Sjeff	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
786219820Sjeff	int size = page_list->max_page_list_len * sizeof (u64);
787219820Sjeff
788219820Sjeff	dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
789219820Sjeff			  mfrpl->map);
790219820Sjeff	kfree(mfrpl->ibfrpl.page_list);
791219820Sjeff	kfree(mfrpl);
792219820Sjeff}
793219820Sjeff
794219820Sjeffstruct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
795219820Sjeff				 struct ib_fmr_attr *fmr_attr)
796219820Sjeff{
797219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
798219820Sjeff	struct mlx4_ib_fmr *fmr;
799219820Sjeff	int err = -ENOMEM;
800219820Sjeff
801219820Sjeff	fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
802219820Sjeff	if (!fmr)
803219820Sjeff		return ERR_PTR(-ENOMEM);
804219820Sjeff
805219820Sjeff	err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
806219820Sjeff			     fmr_attr->max_pages, fmr_attr->max_maps,
807219820Sjeff			     fmr_attr->page_shift, &fmr->mfmr);
808219820Sjeff	if (err)
809219820Sjeff		goto err_free;
810219820Sjeff
811219820Sjeff	err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
812219820Sjeff	if (err)
813219820Sjeff		goto err_mr;
814219820Sjeff
815219820Sjeff	fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
816219820Sjeff
817219820Sjeff	return &fmr->ibfmr;
818219820Sjeff
819219820Sjefferr_mr:
820278886Shselasky	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
821219820Sjeff
822219820Sjefferr_free:
823219820Sjeff	kfree(fmr);
824219820Sjeff
825219820Sjeff	return ERR_PTR(err);
826219820Sjeff}
827219820Sjeff
828219820Sjeffint mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
829219820Sjeff		      int npages, u64 iova)
830219820Sjeff{
831219820Sjeff	struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
832219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
833219820Sjeff
834219820Sjeff	return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
835219820Sjeff				 &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
836219820Sjeff}
837219820Sjeff
838219820Sjeffint mlx4_ib_unmap_fmr(struct list_head *fmr_list)
839219820Sjeff{
840219820Sjeff	struct ib_fmr *ibfmr;
841219820Sjeff	int err;
842219820Sjeff	struct mlx4_dev *mdev = NULL;
843219820Sjeff
844219820Sjeff	list_for_each_entry(ibfmr, fmr_list, list) {
845219820Sjeff		if (mdev && to_mdev(ibfmr->device)->dev != mdev)
846219820Sjeff			return -EINVAL;
847219820Sjeff		mdev = to_mdev(ibfmr->device)->dev;
848219820Sjeff	}
849219820Sjeff
850219820Sjeff	if (!mdev)
851219820Sjeff		return 0;
852219820Sjeff
853219820Sjeff	list_for_each_entry(ibfmr, fmr_list, list) {
854219820Sjeff		struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
855219820Sjeff
856219820Sjeff		mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
857219820Sjeff	}
858219820Sjeff
859219820Sjeff	/*
860219820Sjeff	 * Make sure all MPT status updates are visible before issuing
861219820Sjeff	 * SYNC_TPT firmware command.
862219820Sjeff	 */
863219820Sjeff	wmb();
864219820Sjeff
865219820Sjeff	err = mlx4_SYNC_TPT(mdev);
866219820Sjeff	if (err)
867255932Salfred		pr_warn("SYNC_TPT error %d when "
868219820Sjeff		       "unmapping FMRs\n", err);
869219820Sjeff
870219820Sjeff	return 0;
871219820Sjeff}
872219820Sjeff
873219820Sjeffint mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
874219820Sjeff{
875219820Sjeff	struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
876219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
877219820Sjeff	int err;
878219820Sjeff
879219820Sjeff	err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
880219820Sjeff
881219820Sjeff	if (!err)
882219820Sjeff		kfree(ifmr);
883219820Sjeff
884219820Sjeff	return err;
885219820Sjeff}
886