mlx4_ib_mr.c revision 255932
1219820Sjeff/*
2219820Sjeff * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3219820Sjeff * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4219820Sjeff *
5219820Sjeff * This software is available to you under a choice of one of two
6219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
7219820Sjeff * General Public License (GPL) Version 2, available from the file
8219820Sjeff * COPYING in the main directory of this source tree, or the
9219820Sjeff * OpenIB.org BSD license below:
10219820Sjeff *
11219820Sjeff *     Redistribution and use in source and binary forms, with or
12219820Sjeff *     without modification, are permitted provided that the following
13219820Sjeff *     conditions are met:
14219820Sjeff *
15219820Sjeff *      - Redistributions of source code must retain the above
16219820Sjeff *        copyright notice, this list of conditions and the following
17219820Sjeff *        disclaimer.
18219820Sjeff *
19219820Sjeff *      - Redistributions in binary form must reproduce the above
20219820Sjeff *        copyright notice, this list of conditions and the following
21219820Sjeff *        disclaimer in the documentation and/or other materials
22219820Sjeff *        provided with the distribution.
23219820Sjeff *
24219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31219820Sjeff * SOFTWARE.
32219820Sjeff */
33219820Sjeff
34255932Salfred#include <linux/slab.h>
35255932Salfred#include <linux/module.h>
36255932Salfred#include <linux/sched.h>
37255932Salfred
38255932Salfred#ifdef __linux__
39255932Salfred#include <linux/proc_fs.h>
40255932Salfred#include <linux/cred.h>
41255932Salfred#endif
42255932Salfred
43219820Sjeff#include "mlx4_ib.h"
44219820Sjeff
45219820Sjeffstatic u32 convert_access(int acc)
46219820Sjeff{
47219820Sjeff	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC       : 0) |
48219820Sjeff	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
49219820Sjeff	       (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
50219820Sjeff	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
51219820Sjeff	       MLX4_PERM_LOCAL_READ;
52219820Sjeff}
53255932Salfred#ifdef __linux__
54255932Salfredstatic ssize_t shared_mr_proc_read(struct file *file,
55255932Salfred			  char __user *buffer,
56255932Salfred			  size_t len,
57255932Salfred			  loff_t *offset)
58255932Salfred{
59219820Sjeff
60255932Salfred	return -ENOSYS;
61255932Salfred
62255932Salfred}
63255932Salfred
64255932Salfredstatic ssize_t shared_mr_proc_write(struct file *file,
65255932Salfred			   const char __user *buffer,
66255932Salfred			   size_t len,
67255932Salfred			   loff_t *offset)
68255932Salfred{
69255932Salfred
70255932Salfred	return -ENOSYS;
71255932Salfred}
72255932Salfred
73255932Salfredstatic int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma)
74255932Salfred{
75255932Salfred
76255932Salfred	struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode);
77255932Salfred	struct mlx4_shared_mr_info *smr_info =
78255932Salfred		(struct mlx4_shared_mr_info *)pde->data;
79255932Salfred
80255932Salfred	/* Prevent any mapping not on start of area */
81255932Salfred	if (vma->vm_pgoff != 0)
82255932Salfred		return -EINVAL;
83255932Salfred
84255932Salfred	return ib_umem_map_to_vma(smr_info->umem,
85255932Salfred					vma);
86255932Salfred
87255932Salfred}
88255932Salfred
89255932Salfredstatic const struct file_operations shared_mr_proc_ops = {
90255932Salfred	.owner	= THIS_MODULE,
91255932Salfred	.read	= shared_mr_proc_read,
92255932Salfred	.write	= shared_mr_proc_write,
93255932Salfred	.mmap	= shared_mr_mmap
94255932Salfred};
95255932Salfred
96255932Salfredstatic mode_t convert_shared_access(int acc)
97255932Salfred{
98255932Salfred
99255932Salfred	return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR       : 0) |
100255932Salfred	       (acc & IB_ACCESS_SHARED_MR_USER_WRITE  ? S_IWUSR : 0) |
101255932Salfred	       (acc & IB_ACCESS_SHARED_MR_GROUP_READ   ? S_IRGRP  : 0) |
102255932Salfred	       (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE   ? S_IWGRP  : 0) |
103255932Salfred	       (acc & IB_ACCESS_SHARED_MR_OTHER_READ   ? S_IROTH  : 0) |
104255932Salfred	       (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE   ? S_IWOTH  : 0);
105255932Salfred
106255932Salfred}
107255932Salfred#endif
108219820Sjeffstruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
109219820Sjeff{
110219820Sjeff	struct mlx4_ib_mr *mr;
111219820Sjeff	int err;
112219820Sjeff
113255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
114219820Sjeff	if (!mr)
115219820Sjeff		return ERR_PTR(-ENOMEM);
116219820Sjeff
117219820Sjeff	err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
118219820Sjeff			    ~0ull, convert_access(acc), 0, 0, &mr->mmr);
119219820Sjeff	if (err)
120219820Sjeff		goto err_free;
121219820Sjeff
122219820Sjeff	err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
123219820Sjeff	if (err)
124219820Sjeff		goto err_mr;
125219820Sjeff
126219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
127219820Sjeff	mr->umem = NULL;
128219820Sjeff
129219820Sjeff	return &mr->ibmr;
130219820Sjeff
131219820Sjefferr_mr:
132219820Sjeff	mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
133219820Sjeff
134219820Sjefferr_free:
135219820Sjeff	kfree(mr);
136219820Sjeff
137219820Sjeff	return ERR_PTR(err);
138219820Sjeff}
139219820Sjeff
140255932Salfredstatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
141255932Salfred						struct mlx4_mtt *mtt,
142255932Salfred						u64 mtt_size,
143255932Salfred						u64 mtt_shift,
144255932Salfred						u64 len,
145255932Salfred						u64 cur_start_addr,
146255932Salfred						u64 *pages,
147255932Salfred						int *start_index,
148255932Salfred						int *npages)
149255932Salfred{
150255932Salfred	int k;
151255932Salfred	int err = 0;
152255932Salfred	u64 mtt_entries;
153255932Salfred	u64 cur_end_addr = cur_start_addr + len;
154255932Salfred	u64 cur_end_addr_aligned = 0;
155255932Salfred
156255932Salfred	len += (cur_start_addr & (mtt_size-1ULL));
157255932Salfred	cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
158255932Salfred	len += (cur_end_addr_aligned - cur_end_addr);
159255932Salfred	if (len & (mtt_size-1ULL)) {
160255932Salfred		WARN(1 ,
161255932Salfred		"write_block: len %llx is not aligned to mtt_size %llx\n",
162255932Salfred			len, mtt_size);
163255932Salfred		return -EINVAL;
164255932Salfred	}
165255932Salfred
166255932Salfred
167255932Salfred	mtt_entries = (len >> mtt_shift);
168255932Salfred
169255932Salfred	/* Align the MTT start address to
170255932Salfred		the mtt_size.
171255932Salfred		Required to handle cases when the MR
172255932Salfred		starts in the middle of an MTT record.
173255932Salfred		Was not required in old code since
174255932Salfred		the physical addresses provided by
175255932Salfred		the dma subsystem were page aligned,
176255932Salfred		which was also the MTT size.
177255932Salfred	*/
178255932Salfred	cur_start_addr = round_down(cur_start_addr, mtt_size);
179255932Salfred	/* A new block is started ...*/
180255932Salfred	for (k = 0; k < mtt_entries; ++k) {
181255932Salfred		pages[*npages] = cur_start_addr + (mtt_size * k);
182255932Salfred		(*npages)++;
183255932Salfred		/*
184255932Salfred		 * Be friendly to mlx4_write_mtt() and
185255932Salfred		 * pass it chunks of appropriate size.
186255932Salfred		 */
187255932Salfred		if (*npages == PAGE_SIZE / sizeof(u64)) {
188255932Salfred			err = mlx4_write_mtt(dev->dev,
189255932Salfred					mtt, *start_index,
190255932Salfred					*npages, pages);
191255932Salfred			if (err)
192255932Salfred				return err;
193255932Salfred
194255932Salfred			(*start_index) += *npages;
195255932Salfred			*npages = 0;
196255932Salfred		}
197255932Salfred	}
198255932Salfred
199255932Salfred	return 0;
200255932Salfred}
201255932Salfred
202219820Sjeffint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
203219820Sjeff			   struct ib_umem *umem)
204219820Sjeff{
205219820Sjeff	u64 *pages;
206219820Sjeff	struct ib_umem_chunk *chunk;
207255932Salfred	int j;
208255932Salfred	u64 len = 0;
209219820Sjeff	int err = 0;
210255932Salfred	u64 mtt_size;
211255932Salfred	u64 cur_start_addr = 0;
212255932Salfred	u64 mtt_shift;
213255932Salfred	int start_index = 0;
214255932Salfred	int npages = 0;
215219820Sjeff
216219820Sjeff	pages = (u64 *) __get_free_page(GFP_KERNEL);
217219820Sjeff	if (!pages)
218219820Sjeff		return -ENOMEM;
219219820Sjeff
220255932Salfred	mtt_shift = mtt->page_shift;
221255932Salfred	mtt_size = 1ULL << mtt_shift;
222219820Sjeff
223219820Sjeff	list_for_each_entry(chunk, &umem->chunk_list, list)
224219820Sjeff		for (j = 0; j < chunk->nmap; ++j) {
225255932Salfred			if (cur_start_addr + len ==
226255932Salfred			    sg_dma_address(&chunk->page_list[j])) {
227255932Salfred				/* still the same block */
228255932Salfred				len += sg_dma_len(&chunk->page_list[j]);
229255932Salfred				continue;
230219820Sjeff			}
231255932Salfred			/* A new block is started ...*/
232255932Salfred			/* If len is malaligned, write an extra mtt entry to
233255932Salfred			    cover the misaligned area (round up the division)
234255932Salfred			*/
235255932Salfred			err = mlx4_ib_umem_write_mtt_block(dev,
236255932Salfred						mtt, mtt_size, mtt_shift,
237255932Salfred						len, cur_start_addr,
238255932Salfred						pages,
239255932Salfred						&start_index,
240255932Salfred						&npages);
241255932Salfred			if (err)
242255932Salfred				goto out;
243255932Salfred
244255932Salfred			cur_start_addr =
245255932Salfred				sg_dma_address(&chunk->page_list[j]);
246255932Salfred			len = sg_dma_len(&chunk->page_list[j]);
247219820Sjeff		}
248219820Sjeff
249255932Salfred	/* Handle the last block */
250255932Salfred	if (len > 0) {
251255932Salfred		/*  If len is malaligned, write an extra mtt entry to cover
252255932Salfred		     the misaligned area (round up the division)
253255932Salfred		*/
254255932Salfred		err = mlx4_ib_umem_write_mtt_block(dev,
255255932Salfred						mtt, mtt_size, mtt_shift,
256255932Salfred						len, cur_start_addr,
257255932Salfred						pages,
258255932Salfred						&start_index,
259255932Salfred						&npages);
260255932Salfred			if (err)
261255932Salfred				goto out;
262255932Salfred	}
263219820Sjeff
264255932Salfred
265255932Salfred	if (npages)
266255932Salfred		err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
267255932Salfred
268219820Sjeffout:
269219820Sjeff	free_page((unsigned long) pages);
270219820Sjeff	return err;
271219820Sjeff}
272219820Sjeff
273255932Salfredstatic inline u64 alignment_of(u64 ptr)
274219820Sjeff{
275255932Salfred	return ilog2(ptr & (~(ptr-1)));
276255932Salfred}
277255932Salfred
278255932Salfredstatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
279255932Salfred						u64 current_block_end,
280255932Salfred						u64 block_shift)
281255932Salfred{
282255932Salfred	/* Check whether the alignment of the new block
283255932Salfred	     is aligned as well as the previous block.
284255932Salfred	     Block address must start with zeros till size of entity_size.
285255932Salfred	*/
286255932Salfred	if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
287255932Salfred		/* It is not as well aligned as the
288255932Salfred		previous block-reduce the mtt size
289255932Salfred		accordingly.
290255932Salfred		Here we take the last right bit
291255932Salfred		which is 1.
292255932Salfred		*/
293255932Salfred		block_shift = alignment_of(next_block_start);
294255932Salfred
295255932Salfred	/*  Check whether the alignment of the
296255932Salfred	     end of previous block - is it aligned
297255932Salfred	     as well as the start of the block
298255932Salfred	*/
299255932Salfred	if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
300255932Salfred		/* It is not as well aligned as
301255932Salfred		the start of the block - reduce the
302255932Salfred		mtt size accordingly.
303255932Salfred		*/
304255932Salfred		block_shift = alignment_of(current_block_end);
305255932Salfred
306255932Salfred	return block_shift;
307255932Salfred}
308255932Salfred
309255932Salfred/* Calculate optimal mtt size based on contiguous pages.
310255932Salfred* Function will return also the number of pages that are not aligned to the
311255932Salfred   calculated mtt_size to be added to total number
312255932Salfred    of pages. For that we should check the first chunk length & last chunk
313255932Salfred    length and if not aligned to mtt_size we should increment
314255932Salfred    the non_aligned_pages number.
315255932Salfred    All chunks in the middle already handled as part of mtt shift calculation
316255932Salfred    for both their start & end addresses.
317255932Salfred*/
318255932Salfredint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem,
319255932Salfred						u64 start_va,
320255932Salfred						int *num_of_mtts)
321255932Salfred{
322219820Sjeff	struct ib_umem_chunk *chunk;
323255932Salfred	int j;
324255932Salfred	u64 block_shift = MLX4_MAX_MTT_SHIFT;
325255932Salfred	u64 current_block_len = 0;
326255932Salfred	u64 current_block_start = 0;
327255932Salfred	u64 misalignment_bits;
328255932Salfred	u64 first_block_start = 0;
329255932Salfred	u64 last_block_end = 0;
330255932Salfred	u64 total_len = 0;
331255932Salfred	u64 last_block_aligned_end = 0;
332255932Salfred	u64 min_shift = ilog2(umem->page_size);
333219820Sjeff
334255932Salfred	list_for_each_entry(chunk, &umem->chunk_list, list) {
335255932Salfred		/* Initialization - save the first chunk start as
336255932Salfred		    the current_block_start - block means contiguous pages.
337255932Salfred		*/
338255932Salfred		if (current_block_len == 0 && current_block_start == 0) {
339255932Salfred			first_block_start = current_block_start =
340255932Salfred				sg_dma_address(&chunk->page_list[0]);
341255932Salfred			/* Find the bits that are different between
342255932Salfred			    the physical address and the virtual
343255932Salfred			    address for the start of the MR.
344255932Salfred			*/
345255932Salfred			/* umem_get aligned the start_va to a page
346255932Salfred			   boundry. Therefore, we need to align the
347255932Salfred			   start va to the same boundry */
348255932Salfred			/* misalignment_bits is needed to handle the
349255932Salfred			   case of a single memory region. In this
350255932Salfred			   case, the rest of the logic will not reduce
351255932Salfred			   the block size.  If we use a block size
352255932Salfred			   which is bigger than the alignment of the
353255932Salfred			   misalignment bits, we might use the virtual
354255932Salfred			   page number instead of the physical page
355255932Salfred			   number, resulting in access to the wrong
356255932Salfred			   data. */
357255932Salfred			misalignment_bits =
358255932Salfred			(start_va & (~(((u64)(umem->page_size))-1ULL)))
359255932Salfred						^ current_block_start;
360255932Salfred			block_shift = min(alignment_of(misalignment_bits)
361255932Salfred				, block_shift);
362255932Salfred		}
363219820Sjeff
364255932Salfred		/* Go over the scatter entries in the current chunk, check
365255932Salfred		     if they continue the previous scatter entry.
366255932Salfred		*/
367255932Salfred		for (j = 0; j < chunk->nmap; ++j) {
368255932Salfred			u64 next_block_start =
369255932Salfred				sg_dma_address(&chunk->page_list[j]);
370255932Salfred			u64 current_block_end = current_block_start
371255932Salfred				+ current_block_len;
372255932Salfred			/* If we have a split (non-contig.) between two block*/
373255932Salfred			if (current_block_end != next_block_start) {
374255932Salfred				block_shift = mlx4_ib_umem_calc_block_mtt(
375255932Salfred						next_block_start,
376255932Salfred						current_block_end,
377255932Salfred						block_shift);
378219820Sjeff
379255932Salfred				/* If we reached the minimum shift for 4k
380255932Salfred				     page we stop the loop.
381255932Salfred				*/
382255932Salfred				if (block_shift <= min_shift)
383255932Salfred					goto end;
384255932Salfred
385255932Salfred				/* If not saved yet we are in first block -
386255932Salfred				     we save the length of first block to
387255932Salfred				     calculate the non_aligned_pages number at
388255932Salfred				*    the end.
389255932Salfred				*/
390255932Salfred				total_len += current_block_len;
391255932Salfred
392255932Salfred				/* Start a new block */
393255932Salfred				current_block_start = next_block_start;
394255932Salfred				current_block_len =
395255932Salfred					sg_dma_len(&chunk->page_list[j]);
396255932Salfred				continue;
397219820Sjeff			}
398255932Salfred			/* The scatter entry is another part of
399255932Salfred			     the current block, increase the block size
400255932Salfred			* An entry in the scatter can be larger than
401255932Salfred			4k (page) as of dma mapping
402255932Salfred			which merge some blocks together.
403255932Salfred			*/
404255932Salfred			current_block_len +=
405255932Salfred				sg_dma_len(&chunk->page_list[j]);
406219820Sjeff		}
407255932Salfred	}
408219820Sjeff
409255932Salfred	/* Account for the last block in the total len */
410255932Salfred	total_len += current_block_len;
411255932Salfred	/* Add to the first block the misalignment that it suffers from.*/
412255932Salfred	total_len += (first_block_start & ((1ULL<<block_shift)-1ULL));
413255932Salfred	last_block_end = current_block_start+current_block_len;
414255932Salfred	last_block_aligned_end = round_up(last_block_end, 1<<block_shift);
415255932Salfred	total_len += (last_block_aligned_end - last_block_end);
416255932Salfred
417255932Salfred	WARN((total_len & ((1ULL<<block_shift)-1ULL)),
418255932Salfred		" misaligned total length detected (%llu, %llu)!",
419255932Salfred		total_len, block_shift);
420255932Salfred
421255932Salfred	*num_of_mtts = total_len >> block_shift;
422255932Salfredend:
423255932Salfred	if (block_shift < min_shift) {
424255932Salfred		/* If shift is less than the min we set a WARN and
425255932Salfred		     return the min shift.
426255932Salfred		*/
427255932Salfred		WARN(1,
428255932Salfred		"mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n",
429255932Salfred		block_shift);
430255932Salfred
431255932Salfred		block_shift = min_shift;
432219820Sjeff	}
433255932Salfred	return block_shift;
434255932Salfred}
435219820Sjeff
436255932Salfred#ifdef __linux__
437255932Salfredstatic int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id)
438255932Salfred{
439255932Salfred	struct proc_dir_entry *mr_proc_entry;
440255932Salfred	mode_t mode = S_IFREG;
441255932Salfred	char name_buff[16];
442219820Sjeff
443255932Salfred	mode |= convert_shared_access(access_flags);
444255932Salfred	sprintf(name_buff, "%X", mr_id);
445255932Salfred	mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL);
446255932Salfred	mr->smr_info->mr_id = mr_id;
447255932Salfred	mr->smr_info->umem = mr->umem;
448219820Sjeff
449255932Salfred	mr_proc_entry = proc_create_data(name_buff, mode,
450255932Salfred				mlx4_mrs_dir_entry,
451255932Salfred				&shared_mr_proc_ops,
452255932Salfred				mr->smr_info);
453255932Salfred
454255932Salfred	if (!mr_proc_entry) {
455255932Salfred		pr_err("prepare_shared_mr failed via proc\n");
456255932Salfred		kfree(mr->smr_info);
457255932Salfred		return -ENODEV;
458255932Salfred	}
459255932Salfred
460255932Salfred	current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid));
461255932Salfred	mr_proc_entry->size = mr->umem->length;
462255932Salfred	return 0;
463255932Salfred
464219820Sjeff}
465255932Salfredstatic int is_shared_mr(int access_flags)
466255932Salfred{
467255932Salfred	/* We should check whether IB_ACCESS_SHARED_MR_USER_READ or
468255932Salfred	other shared bits were turned on.
469255932Salfred	*/
470255932Salfred	return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ |
471255932Salfred				IB_ACCESS_SHARED_MR_USER_WRITE |
472255932Salfred				IB_ACCESS_SHARED_MR_GROUP_READ |
473255932Salfred				IB_ACCESS_SHARED_MR_GROUP_WRITE |
474255932Salfred				IB_ACCESS_SHARED_MR_OTHER_READ |
475255932Salfred				IB_ACCESS_SHARED_MR_OTHER_WRITE));
476219820Sjeff
477255932Salfred}
478255932Salfred#endif
479255932Salfred
480219820Sjeffstruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
481219820Sjeff				  u64 virt_addr, int access_flags,
482255932Salfred				  struct ib_udata *udata,
483255932Salfred				  int mr_id)
484219820Sjeff{
485219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
486219820Sjeff	struct mlx4_ib_mr *mr;
487219820Sjeff	int shift;
488219820Sjeff	int err;
489219820Sjeff	int n;
490219820Sjeff
491255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
492219820Sjeff	if (!mr)
493219820Sjeff		return ERR_PTR(-ENOMEM);
494219820Sjeff
495219820Sjeff	mr->umem = ib_umem_get(pd->uobject->context, start, length,
496255932Salfred			access_flags, 0);
497219820Sjeff	if (IS_ERR(mr->umem)) {
498219820Sjeff		err = PTR_ERR(mr->umem);
499219820Sjeff		goto err_free;
500219820Sjeff	}
501219820Sjeff
502255932Salfred	n = ib_umem_page_count(mr->umem);
503255932Salfred	shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start,
504255932Salfred		&n);
505255932Salfred	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
506255932Salfred			 convert_access(access_flags), n, shift, &mr->mmr);
507255932Salfred	if (err)
508255932Salfred		goto err_umem;
509219820Sjeff
510255932Salfred	err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
511255932Salfred	if (err)
512255932Salfred		goto err_mr;
513219820Sjeff
514219820Sjeff	err = mlx4_mr_enable(dev->dev, &mr->mmr);
515219820Sjeff	if (err)
516219820Sjeff		goto err_mr;
517219820Sjeff
518219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
519255932Salfred#ifdef __linux__
520255932Salfred	/* Check whether MR should be shared */
521255932Salfred	if (is_shared_mr(access_flags)) {
522255932Salfred	/* start address and length must be aligned to page size in order
523255932Salfred	    to map a full page and preventing leakage of data */
524255932Salfred		if (mr->umem->offset || (length & ~PAGE_MASK)) {
525255932Salfred		        err = -EINVAL;
526255932Salfred		        goto err_mr;
527255932Salfred		}
528219820Sjeff
529255932Salfred		err = prepare_shared_mr(mr, access_flags, mr_id);
530255932Salfred		if (err)
531255932Salfred			goto err_mr;
532255932Salfred	}
533255932Salfred#endif
534219820Sjeff	return &mr->ibmr;
535219820Sjeff
536219820Sjefferr_mr:
537219820Sjeff	mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
538219820Sjeff
539219820Sjefferr_umem:
540219820Sjeff	ib_umem_release(mr->umem);
541219820Sjeff
542219820Sjefferr_free:
543219820Sjeff	kfree(mr);
544219820Sjeff
545219820Sjeff	return ERR_PTR(err);
546219820Sjeff}
547219820Sjeff
548255932Salfred
549219820Sjeffint mlx4_ib_dereg_mr(struct ib_mr *ibmr)
550219820Sjeff{
551219820Sjeff	struct mlx4_ib_mr *mr = to_mmr(ibmr);
552219820Sjeff
553219820Sjeff	mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
554255932Salfred	if (mr->smr_info) {
555255932Salfred		/* When master/parent shared mr is dereged there is
556255932Salfred		no ability to share this mr any more - its mr_id will be
557255932Salfred		returned to the kernel as part of ib_uverbs_dereg_mr
558255932Salfred		and may be allocated again as part of other reg_mr.
559255932Salfred		*/
560255932Salfred		char name_buff[16];
561255932Salfred
562255932Salfred		sprintf(name_buff, "%X", mr->smr_info->mr_id);
563255932Salfred		/* Remove proc entry is checking internally that no operation
564255932Salfred		    was strated on that proc fs file and if in the middle
565255932Salfred		    current process will wait till end of operation.
566255932Salfred		    That's why no sync mechanism is needed when we release
567255932Salfred		    below the shared umem.
568255932Salfred		*/
569255932Salfred#ifdef __linux__
570255932Salfred		remove_proc_entry(name_buff, mlx4_mrs_dir_entry);
571255932Salfred		kfree(mr->smr_info);
572255932Salfred#endif
573255932Salfred	}
574255932Salfred
575219820Sjeff	if (mr->umem)
576219820Sjeff		ib_umem_release(mr->umem);
577255932Salfred
578219820Sjeff	kfree(mr);
579219820Sjeff
580219820Sjeff	return 0;
581219820Sjeff}
582219820Sjeff
583219820Sjeffstruct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd,
584219820Sjeff					int max_page_list_len)
585219820Sjeff{
586219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
587219820Sjeff	struct mlx4_ib_mr *mr;
588219820Sjeff	int err;
589219820Sjeff
590255932Salfred	mr = kzalloc(sizeof *mr, GFP_KERNEL);
591219820Sjeff	if (!mr)
592219820Sjeff		return ERR_PTR(-ENOMEM);
593219820Sjeff
594219820Sjeff	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
595219820Sjeff			    max_page_list_len, 0, &mr->mmr);
596219820Sjeff	if (err)
597219820Sjeff		goto err_free;
598219820Sjeff
599219820Sjeff	err = mlx4_mr_enable(dev->dev, &mr->mmr);
600219820Sjeff	if (err)
601219820Sjeff		goto err_mr;
602219820Sjeff
603219820Sjeff	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
604219820Sjeff	mr->umem = NULL;
605219820Sjeff
606219820Sjeff	return &mr->ibmr;
607219820Sjeff
608219820Sjefferr_mr:
609219820Sjeff	mlx4_mr_free(dev->dev, &mr->mmr);
610219820Sjeff
611219820Sjefferr_free:
612219820Sjeff	kfree(mr);
613219820Sjeff	return ERR_PTR(err);
614219820Sjeff}
615219820Sjeff
616219820Sjeffstruct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
617219820Sjeff							       int page_list_len)
618219820Sjeff{
619219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ibdev);
620219820Sjeff	struct mlx4_ib_fast_reg_page_list *mfrpl;
621219820Sjeff	int size = page_list_len * sizeof (u64);
622219820Sjeff
623255932Salfred	if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
624219820Sjeff		return ERR_PTR(-EINVAL);
625219820Sjeff
626219820Sjeff	mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
627219820Sjeff	if (!mfrpl)
628219820Sjeff		return ERR_PTR(-ENOMEM);
629219820Sjeff
630219820Sjeff	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
631219820Sjeff	if (!mfrpl->ibfrpl.page_list)
632219820Sjeff		goto err_free;
633219820Sjeff
634219820Sjeff	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
635219820Sjeff						     size, &mfrpl->map,
636219820Sjeff						     GFP_KERNEL);
637219820Sjeff	if (!mfrpl->mapped_page_list)
638219820Sjeff		goto err_free;
639219820Sjeff
640219820Sjeff	WARN_ON(mfrpl->map & 0x3f);
641219820Sjeff
642219820Sjeff	return &mfrpl->ibfrpl;
643219820Sjeff
644219820Sjefferr_free:
645219820Sjeff	kfree(mfrpl->ibfrpl.page_list);
646219820Sjeff	kfree(mfrpl);
647219820Sjeff	return ERR_PTR(-ENOMEM);
648219820Sjeff}
649219820Sjeff
650219820Sjeffvoid mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
651219820Sjeff{
652219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(page_list->device);
653219820Sjeff	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
654219820Sjeff	int size = page_list->max_page_list_len * sizeof (u64);
655219820Sjeff
656219820Sjeff	dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
657219820Sjeff			  mfrpl->map);
658219820Sjeff	kfree(mfrpl->ibfrpl.page_list);
659219820Sjeff	kfree(mfrpl);
660219820Sjeff}
661219820Sjeff
662219820Sjeffstruct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
663219820Sjeff				 struct ib_fmr_attr *fmr_attr)
664219820Sjeff{
665219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(pd->device);
666219820Sjeff	struct mlx4_ib_fmr *fmr;
667219820Sjeff	int err = -ENOMEM;
668219820Sjeff
669219820Sjeff	fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
670219820Sjeff	if (!fmr)
671219820Sjeff		return ERR_PTR(-ENOMEM);
672219820Sjeff
673219820Sjeff	err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
674219820Sjeff			     fmr_attr->max_pages, fmr_attr->max_maps,
675219820Sjeff			     fmr_attr->page_shift, &fmr->mfmr);
676219820Sjeff	if (err)
677219820Sjeff		goto err_free;
678219820Sjeff
679219820Sjeff	err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
680219820Sjeff	if (err)
681219820Sjeff		goto err_mr;
682219820Sjeff
683219820Sjeff	fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
684219820Sjeff
685219820Sjeff	return &fmr->ibfmr;
686219820Sjeff
687219820Sjefferr_mr:
688219820Sjeff	mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
689219820Sjeff
690219820Sjefferr_free:
691219820Sjeff	kfree(fmr);
692219820Sjeff
693219820Sjeff	return ERR_PTR(err);
694219820Sjeff}
695219820Sjeff
696219820Sjeffint mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
697219820Sjeff		      int npages, u64 iova)
698219820Sjeff{
699219820Sjeff	struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
700219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
701219820Sjeff
702219820Sjeff	return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
703219820Sjeff				 &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
704219820Sjeff}
705219820Sjeff
706219820Sjeffint mlx4_ib_unmap_fmr(struct list_head *fmr_list)
707219820Sjeff{
708219820Sjeff	struct ib_fmr *ibfmr;
709219820Sjeff	int err;
710219820Sjeff	struct mlx4_dev *mdev = NULL;
711219820Sjeff
712219820Sjeff	list_for_each_entry(ibfmr, fmr_list, list) {
713219820Sjeff		if (mdev && to_mdev(ibfmr->device)->dev != mdev)
714219820Sjeff			return -EINVAL;
715219820Sjeff		mdev = to_mdev(ibfmr->device)->dev;
716219820Sjeff	}
717219820Sjeff
718219820Sjeff	if (!mdev)
719219820Sjeff		return 0;
720219820Sjeff
721219820Sjeff	list_for_each_entry(ibfmr, fmr_list, list) {
722219820Sjeff		struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
723219820Sjeff
724219820Sjeff		mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
725219820Sjeff	}
726219820Sjeff
727219820Sjeff	/*
728219820Sjeff	 * Make sure all MPT status updates are visible before issuing
729219820Sjeff	 * SYNC_TPT firmware command.
730219820Sjeff	 */
731219820Sjeff	wmb();
732219820Sjeff
733219820Sjeff	err = mlx4_SYNC_TPT(mdev);
734219820Sjeff	if (err)
735255932Salfred		pr_warn("SYNC_TPT error %d when "
736219820Sjeff		       "unmapping FMRs\n", err);
737219820Sjeff
738219820Sjeff	return 0;
739219820Sjeff}
740219820Sjeff
741219820Sjeffint mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
742219820Sjeff{
743219820Sjeff	struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
744219820Sjeff	struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
745219820Sjeff	int err;
746219820Sjeff
747219820Sjeff	err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
748219820Sjeff
749219820Sjeff	if (!err)
750219820Sjeff		kfree(ifmr);
751219820Sjeff
752219820Sjeff	return err;
753219820Sjeff}
754