mlx4_ib_mr.c revision 255932
1219820Sjeff/* 2219820Sjeff * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4219820Sjeff * 5219820Sjeff * This software is available to you under a choice of one of two 6219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 7219820Sjeff * General Public License (GPL) Version 2, available from the file 8219820Sjeff * COPYING in the main directory of this source tree, or the 9219820Sjeff * OpenIB.org BSD license below: 10219820Sjeff * 11219820Sjeff * Redistribution and use in source and binary forms, with or 12219820Sjeff * without modification, are permitted provided that the following 13219820Sjeff * conditions are met: 14219820Sjeff * 15219820Sjeff * - Redistributions of source code must retain the above 16219820Sjeff * copyright notice, this list of conditions and the following 17219820Sjeff * disclaimer. 18219820Sjeff * 19219820Sjeff * - Redistributions in binary form must reproduce the above 20219820Sjeff * copyright notice, this list of conditions and the following 21219820Sjeff * disclaimer in the documentation and/or other materials 22219820Sjeff * provided with the distribution. 23219820Sjeff * 24219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31219820Sjeff * SOFTWARE. 32219820Sjeff */ 33219820Sjeff 34255932Salfred#include <linux/slab.h> 35255932Salfred#include <linux/module.h> 36255932Salfred#include <linux/sched.h> 37255932Salfred 38255932Salfred#ifdef __linux__ 39255932Salfred#include <linux/proc_fs.h> 40255932Salfred#include <linux/cred.h> 41255932Salfred#endif 42255932Salfred 43219820Sjeff#include "mlx4_ib.h" 44219820Sjeff 45219820Sjeffstatic u32 convert_access(int acc) 46219820Sjeff{ 47219820Sjeff return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | 48219820Sjeff (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | 49219820Sjeff (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | 50219820Sjeff (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | 51219820Sjeff MLX4_PERM_LOCAL_READ; 52219820Sjeff} 53255932Salfred#ifdef __linux__ 54255932Salfredstatic ssize_t shared_mr_proc_read(struct file *file, 55255932Salfred char __user *buffer, 56255932Salfred size_t len, 57255932Salfred loff_t *offset) 58255932Salfred{ 59219820Sjeff 60255932Salfred return -ENOSYS; 61255932Salfred 62255932Salfred} 63255932Salfred 64255932Salfredstatic ssize_t shared_mr_proc_write(struct file *file, 65255932Salfred const char __user *buffer, 66255932Salfred size_t len, 67255932Salfred loff_t *offset) 68255932Salfred{ 69255932Salfred 70255932Salfred return -ENOSYS; 71255932Salfred} 72255932Salfred 73255932Salfredstatic int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma) 74255932Salfred{ 75255932Salfred 76255932Salfred struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode); 77255932Salfred struct mlx4_shared_mr_info *smr_info = 78255932Salfred (struct mlx4_shared_mr_info *)pde->data; 79255932Salfred 80255932Salfred /* Prevent any mapping not on start of area */ 81255932Salfred if (vma->vm_pgoff != 0) 82255932Salfred return -EINVAL; 83255932Salfred 84255932Salfred return ib_umem_map_to_vma(smr_info->umem, 85255932Salfred vma); 86255932Salfred 87255932Salfred} 88255932Salfred 89255932Salfredstatic const struct file_operations shared_mr_proc_ops = { 90255932Salfred .owner = THIS_MODULE, 91255932Salfred .read = shared_mr_proc_read, 92255932Salfred .write = shared_mr_proc_write, 93255932Salfred .mmap = shared_mr_mmap 94255932Salfred}; 95255932Salfred 96255932Salfredstatic mode_t convert_shared_access(int acc) 97255932Salfred{ 98255932Salfred 99255932Salfred return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) | 100255932Salfred (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) | 101255932Salfred (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) | 102255932Salfred (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) | 103255932Salfred (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) | 104255932Salfred (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0); 105255932Salfred 106255932Salfred} 107255932Salfred#endif 108219820Sjeffstruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) 109219820Sjeff{ 110219820Sjeff struct mlx4_ib_mr *mr; 111219820Sjeff int err; 112219820Sjeff 113255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 114219820Sjeff if (!mr) 115219820Sjeff return ERR_PTR(-ENOMEM); 116219820Sjeff 117219820Sjeff err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, 118219820Sjeff ~0ull, convert_access(acc), 0, 0, &mr->mmr); 119219820Sjeff if (err) 120219820Sjeff goto err_free; 121219820Sjeff 122219820Sjeff err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); 123219820Sjeff if (err) 124219820Sjeff goto err_mr; 125219820Sjeff 126219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 127219820Sjeff mr->umem = NULL; 128219820Sjeff 129219820Sjeff return &mr->ibmr; 130219820Sjeff 131219820Sjefferr_mr: 132219820Sjeff mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 133219820Sjeff 134219820Sjefferr_free: 135219820Sjeff kfree(mr); 136219820Sjeff 137219820Sjeff return ERR_PTR(err); 138219820Sjeff} 139219820Sjeff 140255932Salfredstatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, 141255932Salfred struct mlx4_mtt *mtt, 142255932Salfred u64 mtt_size, 143255932Salfred u64 mtt_shift, 144255932Salfred u64 len, 145255932Salfred u64 cur_start_addr, 146255932Salfred u64 *pages, 147255932Salfred int *start_index, 148255932Salfred int *npages) 149255932Salfred{ 150255932Salfred int k; 151255932Salfred int err = 0; 152255932Salfred u64 mtt_entries; 153255932Salfred u64 cur_end_addr = cur_start_addr + len; 154255932Salfred u64 cur_end_addr_aligned = 0; 155255932Salfred 156255932Salfred len += (cur_start_addr & (mtt_size-1ULL)); 157255932Salfred cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); 158255932Salfred len += (cur_end_addr_aligned - cur_end_addr); 159255932Salfred if (len & (mtt_size-1ULL)) { 160255932Salfred WARN(1 , 161255932Salfred "write_block: len %llx is not aligned to mtt_size %llx\n", 162255932Salfred len, mtt_size); 163255932Salfred return -EINVAL; 164255932Salfred } 165255932Salfred 166255932Salfred 167255932Salfred mtt_entries = (len >> mtt_shift); 168255932Salfred 169255932Salfred /* Align the MTT start address to 170255932Salfred the mtt_size. 171255932Salfred Required to handle cases when the MR 172255932Salfred starts in the middle of an MTT record. 173255932Salfred Was not required in old code since 174255932Salfred the physical addresses provided by 175255932Salfred the dma subsystem were page aligned, 176255932Salfred which was also the MTT size. 177255932Salfred */ 178255932Salfred cur_start_addr = round_down(cur_start_addr, mtt_size); 179255932Salfred /* A new block is started ...*/ 180255932Salfred for (k = 0; k < mtt_entries; ++k) { 181255932Salfred pages[*npages] = cur_start_addr + (mtt_size * k); 182255932Salfred (*npages)++; 183255932Salfred /* 184255932Salfred * Be friendly to mlx4_write_mtt() and 185255932Salfred * pass it chunks of appropriate size. 186255932Salfred */ 187255932Salfred if (*npages == PAGE_SIZE / sizeof(u64)) { 188255932Salfred err = mlx4_write_mtt(dev->dev, 189255932Salfred mtt, *start_index, 190255932Salfred *npages, pages); 191255932Salfred if (err) 192255932Salfred return err; 193255932Salfred 194255932Salfred (*start_index) += *npages; 195255932Salfred *npages = 0; 196255932Salfred } 197255932Salfred } 198255932Salfred 199255932Salfred return 0; 200255932Salfred} 201255932Salfred 202219820Sjeffint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, 203219820Sjeff struct ib_umem *umem) 204219820Sjeff{ 205219820Sjeff u64 *pages; 206219820Sjeff struct ib_umem_chunk *chunk; 207255932Salfred int j; 208255932Salfred u64 len = 0; 209219820Sjeff int err = 0; 210255932Salfred u64 mtt_size; 211255932Salfred u64 cur_start_addr = 0; 212255932Salfred u64 mtt_shift; 213255932Salfred int start_index = 0; 214255932Salfred int npages = 0; 215219820Sjeff 216219820Sjeff pages = (u64 *) __get_free_page(GFP_KERNEL); 217219820Sjeff if (!pages) 218219820Sjeff return -ENOMEM; 219219820Sjeff 220255932Salfred mtt_shift = mtt->page_shift; 221255932Salfred mtt_size = 1ULL << mtt_shift; 222219820Sjeff 223219820Sjeff list_for_each_entry(chunk, &umem->chunk_list, list) 224219820Sjeff for (j = 0; j < chunk->nmap; ++j) { 225255932Salfred if (cur_start_addr + len == 226255932Salfred sg_dma_address(&chunk->page_list[j])) { 227255932Salfred /* still the same block */ 228255932Salfred len += sg_dma_len(&chunk->page_list[j]); 229255932Salfred continue; 230219820Sjeff } 231255932Salfred /* A new block is started ...*/ 232255932Salfred /* If len is malaligned, write an extra mtt entry to 233255932Salfred cover the misaligned area (round up the division) 234255932Salfred */ 235255932Salfred err = mlx4_ib_umem_write_mtt_block(dev, 236255932Salfred mtt, mtt_size, mtt_shift, 237255932Salfred len, cur_start_addr, 238255932Salfred pages, 239255932Salfred &start_index, 240255932Salfred &npages); 241255932Salfred if (err) 242255932Salfred goto out; 243255932Salfred 244255932Salfred cur_start_addr = 245255932Salfred sg_dma_address(&chunk->page_list[j]); 246255932Salfred len = sg_dma_len(&chunk->page_list[j]); 247219820Sjeff } 248219820Sjeff 249255932Salfred /* Handle the last block */ 250255932Salfred if (len > 0) { 251255932Salfred /* If len is malaligned, write an extra mtt entry to cover 252255932Salfred the misaligned area (round up the division) 253255932Salfred */ 254255932Salfred err = mlx4_ib_umem_write_mtt_block(dev, 255255932Salfred mtt, mtt_size, mtt_shift, 256255932Salfred len, cur_start_addr, 257255932Salfred pages, 258255932Salfred &start_index, 259255932Salfred &npages); 260255932Salfred if (err) 261255932Salfred goto out; 262255932Salfred } 263219820Sjeff 264255932Salfred 265255932Salfred if (npages) 266255932Salfred err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); 267255932Salfred 268219820Sjeffout: 269219820Sjeff free_page((unsigned long) pages); 270219820Sjeff return err; 271219820Sjeff} 272219820Sjeff 273255932Salfredstatic inline u64 alignment_of(u64 ptr) 274219820Sjeff{ 275255932Salfred return ilog2(ptr & (~(ptr-1))); 276255932Salfred} 277255932Salfred 278255932Salfredstatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, 279255932Salfred u64 current_block_end, 280255932Salfred u64 block_shift) 281255932Salfred{ 282255932Salfred /* Check whether the alignment of the new block 283255932Salfred is aligned as well as the previous block. 284255932Salfred Block address must start with zeros till size of entity_size. 285255932Salfred */ 286255932Salfred if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) 287255932Salfred /* It is not as well aligned as the 288255932Salfred previous block-reduce the mtt size 289255932Salfred accordingly. 290255932Salfred Here we take the last right bit 291255932Salfred which is 1. 292255932Salfred */ 293255932Salfred block_shift = alignment_of(next_block_start); 294255932Salfred 295255932Salfred /* Check whether the alignment of the 296255932Salfred end of previous block - is it aligned 297255932Salfred as well as the start of the block 298255932Salfred */ 299255932Salfred if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) 300255932Salfred /* It is not as well aligned as 301255932Salfred the start of the block - reduce the 302255932Salfred mtt size accordingly. 303255932Salfred */ 304255932Salfred block_shift = alignment_of(current_block_end); 305255932Salfred 306255932Salfred return block_shift; 307255932Salfred} 308255932Salfred 309255932Salfred/* Calculate optimal mtt size based on contiguous pages. 310255932Salfred* Function will return also the number of pages that are not aligned to the 311255932Salfred calculated mtt_size to be added to total number 312255932Salfred of pages. For that we should check the first chunk length & last chunk 313255932Salfred length and if not aligned to mtt_size we should increment 314255932Salfred the non_aligned_pages number. 315255932Salfred All chunks in the middle already handled as part of mtt shift calculation 316255932Salfred for both their start & end addresses. 317255932Salfred*/ 318255932Salfredint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, 319255932Salfred u64 start_va, 320255932Salfred int *num_of_mtts) 321255932Salfred{ 322219820Sjeff struct ib_umem_chunk *chunk; 323255932Salfred int j; 324255932Salfred u64 block_shift = MLX4_MAX_MTT_SHIFT; 325255932Salfred u64 current_block_len = 0; 326255932Salfred u64 current_block_start = 0; 327255932Salfred u64 misalignment_bits; 328255932Salfred u64 first_block_start = 0; 329255932Salfred u64 last_block_end = 0; 330255932Salfred u64 total_len = 0; 331255932Salfred u64 last_block_aligned_end = 0; 332255932Salfred u64 min_shift = ilog2(umem->page_size); 333219820Sjeff 334255932Salfred list_for_each_entry(chunk, &umem->chunk_list, list) { 335255932Salfred /* Initialization - save the first chunk start as 336255932Salfred the current_block_start - block means contiguous pages. 337255932Salfred */ 338255932Salfred if (current_block_len == 0 && current_block_start == 0) { 339255932Salfred first_block_start = current_block_start = 340255932Salfred sg_dma_address(&chunk->page_list[0]); 341255932Salfred /* Find the bits that are different between 342255932Salfred the physical address and the virtual 343255932Salfred address for the start of the MR. 344255932Salfred */ 345255932Salfred /* umem_get aligned the start_va to a page 346255932Salfred boundry. Therefore, we need to align the 347255932Salfred start va to the same boundry */ 348255932Salfred /* misalignment_bits is needed to handle the 349255932Salfred case of a single memory region. In this 350255932Salfred case, the rest of the logic will not reduce 351255932Salfred the block size. If we use a block size 352255932Salfred which is bigger than the alignment of the 353255932Salfred misalignment bits, we might use the virtual 354255932Salfred page number instead of the physical page 355255932Salfred number, resulting in access to the wrong 356255932Salfred data. */ 357255932Salfred misalignment_bits = 358255932Salfred (start_va & (~(((u64)(umem->page_size))-1ULL))) 359255932Salfred ^ current_block_start; 360255932Salfred block_shift = min(alignment_of(misalignment_bits) 361255932Salfred , block_shift); 362255932Salfred } 363219820Sjeff 364255932Salfred /* Go over the scatter entries in the current chunk, check 365255932Salfred if they continue the previous scatter entry. 366255932Salfred */ 367255932Salfred for (j = 0; j < chunk->nmap; ++j) { 368255932Salfred u64 next_block_start = 369255932Salfred sg_dma_address(&chunk->page_list[j]); 370255932Salfred u64 current_block_end = current_block_start 371255932Salfred + current_block_len; 372255932Salfred /* If we have a split (non-contig.) between two block*/ 373255932Salfred if (current_block_end != next_block_start) { 374255932Salfred block_shift = mlx4_ib_umem_calc_block_mtt( 375255932Salfred next_block_start, 376255932Salfred current_block_end, 377255932Salfred block_shift); 378219820Sjeff 379255932Salfred /* If we reached the minimum shift for 4k 380255932Salfred page we stop the loop. 381255932Salfred */ 382255932Salfred if (block_shift <= min_shift) 383255932Salfred goto end; 384255932Salfred 385255932Salfred /* If not saved yet we are in first block - 386255932Salfred we save the length of first block to 387255932Salfred calculate the non_aligned_pages number at 388255932Salfred * the end. 389255932Salfred */ 390255932Salfred total_len += current_block_len; 391255932Salfred 392255932Salfred /* Start a new block */ 393255932Salfred current_block_start = next_block_start; 394255932Salfred current_block_len = 395255932Salfred sg_dma_len(&chunk->page_list[j]); 396255932Salfred continue; 397219820Sjeff } 398255932Salfred /* The scatter entry is another part of 399255932Salfred the current block, increase the block size 400255932Salfred * An entry in the scatter can be larger than 401255932Salfred 4k (page) as of dma mapping 402255932Salfred which merge some blocks together. 403255932Salfred */ 404255932Salfred current_block_len += 405255932Salfred sg_dma_len(&chunk->page_list[j]); 406219820Sjeff } 407255932Salfred } 408219820Sjeff 409255932Salfred /* Account for the last block in the total len */ 410255932Salfred total_len += current_block_len; 411255932Salfred /* Add to the first block the misalignment that it suffers from.*/ 412255932Salfred total_len += (first_block_start & ((1ULL<<block_shift)-1ULL)); 413255932Salfred last_block_end = current_block_start+current_block_len; 414255932Salfred last_block_aligned_end = round_up(last_block_end, 1<<block_shift); 415255932Salfred total_len += (last_block_aligned_end - last_block_end); 416255932Salfred 417255932Salfred WARN((total_len & ((1ULL<<block_shift)-1ULL)), 418255932Salfred " misaligned total length detected (%llu, %llu)!", 419255932Salfred total_len, block_shift); 420255932Salfred 421255932Salfred *num_of_mtts = total_len >> block_shift; 422255932Salfredend: 423255932Salfred if (block_shift < min_shift) { 424255932Salfred /* If shift is less than the min we set a WARN and 425255932Salfred return the min shift. 426255932Salfred */ 427255932Salfred WARN(1, 428255932Salfred "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n", 429255932Salfred block_shift); 430255932Salfred 431255932Salfred block_shift = min_shift; 432219820Sjeff } 433255932Salfred return block_shift; 434255932Salfred} 435219820Sjeff 436255932Salfred#ifdef __linux__ 437255932Salfredstatic int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id) 438255932Salfred{ 439255932Salfred struct proc_dir_entry *mr_proc_entry; 440255932Salfred mode_t mode = S_IFREG; 441255932Salfred char name_buff[16]; 442219820Sjeff 443255932Salfred mode |= convert_shared_access(access_flags); 444255932Salfred sprintf(name_buff, "%X", mr_id); 445255932Salfred mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL); 446255932Salfred mr->smr_info->mr_id = mr_id; 447255932Salfred mr->smr_info->umem = mr->umem; 448219820Sjeff 449255932Salfred mr_proc_entry = proc_create_data(name_buff, mode, 450255932Salfred mlx4_mrs_dir_entry, 451255932Salfred &shared_mr_proc_ops, 452255932Salfred mr->smr_info); 453255932Salfred 454255932Salfred if (!mr_proc_entry) { 455255932Salfred pr_err("prepare_shared_mr failed via proc\n"); 456255932Salfred kfree(mr->smr_info); 457255932Salfred return -ENODEV; 458255932Salfred } 459255932Salfred 460255932Salfred current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid)); 461255932Salfred mr_proc_entry->size = mr->umem->length; 462255932Salfred return 0; 463255932Salfred 464219820Sjeff} 465255932Salfredstatic int is_shared_mr(int access_flags) 466255932Salfred{ 467255932Salfred /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or 468255932Salfred other shared bits were turned on. 469255932Salfred */ 470255932Salfred return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ | 471255932Salfred IB_ACCESS_SHARED_MR_USER_WRITE | 472255932Salfred IB_ACCESS_SHARED_MR_GROUP_READ | 473255932Salfred IB_ACCESS_SHARED_MR_GROUP_WRITE | 474255932Salfred IB_ACCESS_SHARED_MR_OTHER_READ | 475255932Salfred IB_ACCESS_SHARED_MR_OTHER_WRITE)); 476219820Sjeff 477255932Salfred} 478255932Salfred#endif 479255932Salfred 480219820Sjeffstruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 481219820Sjeff u64 virt_addr, int access_flags, 482255932Salfred struct ib_udata *udata, 483255932Salfred int mr_id) 484219820Sjeff{ 485219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 486219820Sjeff struct mlx4_ib_mr *mr; 487219820Sjeff int shift; 488219820Sjeff int err; 489219820Sjeff int n; 490219820Sjeff 491255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 492219820Sjeff if (!mr) 493219820Sjeff return ERR_PTR(-ENOMEM); 494219820Sjeff 495219820Sjeff mr->umem = ib_umem_get(pd->uobject->context, start, length, 496255932Salfred access_flags, 0); 497219820Sjeff if (IS_ERR(mr->umem)) { 498219820Sjeff err = PTR_ERR(mr->umem); 499219820Sjeff goto err_free; 500219820Sjeff } 501219820Sjeff 502255932Salfred n = ib_umem_page_count(mr->umem); 503255932Salfred shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, 504255932Salfred &n); 505255932Salfred err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, 506255932Salfred convert_access(access_flags), n, shift, &mr->mmr); 507255932Salfred if (err) 508255932Salfred goto err_umem; 509219820Sjeff 510255932Salfred err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); 511255932Salfred if (err) 512255932Salfred goto err_mr; 513219820Sjeff 514219820Sjeff err = mlx4_mr_enable(dev->dev, &mr->mmr); 515219820Sjeff if (err) 516219820Sjeff goto err_mr; 517219820Sjeff 518219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 519255932Salfred#ifdef __linux__ 520255932Salfred /* Check whether MR should be shared */ 521255932Salfred if (is_shared_mr(access_flags)) { 522255932Salfred /* start address and length must be aligned to page size in order 523255932Salfred to map a full page and preventing leakage of data */ 524255932Salfred if (mr->umem->offset || (length & ~PAGE_MASK)) { 525255932Salfred err = -EINVAL; 526255932Salfred goto err_mr; 527255932Salfred } 528219820Sjeff 529255932Salfred err = prepare_shared_mr(mr, access_flags, mr_id); 530255932Salfred if (err) 531255932Salfred goto err_mr; 532255932Salfred } 533255932Salfred#endif 534219820Sjeff return &mr->ibmr; 535219820Sjeff 536219820Sjefferr_mr: 537219820Sjeff mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 538219820Sjeff 539219820Sjefferr_umem: 540219820Sjeff ib_umem_release(mr->umem); 541219820Sjeff 542219820Sjefferr_free: 543219820Sjeff kfree(mr); 544219820Sjeff 545219820Sjeff return ERR_PTR(err); 546219820Sjeff} 547219820Sjeff 548255932Salfred 549219820Sjeffint mlx4_ib_dereg_mr(struct ib_mr *ibmr) 550219820Sjeff{ 551219820Sjeff struct mlx4_ib_mr *mr = to_mmr(ibmr); 552219820Sjeff 553219820Sjeff mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 554255932Salfred if (mr->smr_info) { 555255932Salfred /* When master/parent shared mr is dereged there is 556255932Salfred no ability to share this mr any more - its mr_id will be 557255932Salfred returned to the kernel as part of ib_uverbs_dereg_mr 558255932Salfred and may be allocated again as part of other reg_mr. 559255932Salfred */ 560255932Salfred char name_buff[16]; 561255932Salfred 562255932Salfred sprintf(name_buff, "%X", mr->smr_info->mr_id); 563255932Salfred /* Remove proc entry is checking internally that no operation 564255932Salfred was strated on that proc fs file and if in the middle 565255932Salfred current process will wait till end of operation. 566255932Salfred That's why no sync mechanism is needed when we release 567255932Salfred below the shared umem. 568255932Salfred */ 569255932Salfred#ifdef __linux__ 570255932Salfred remove_proc_entry(name_buff, mlx4_mrs_dir_entry); 571255932Salfred kfree(mr->smr_info); 572255932Salfred#endif 573255932Salfred } 574255932Salfred 575219820Sjeff if (mr->umem) 576219820Sjeff ib_umem_release(mr->umem); 577255932Salfred 578219820Sjeff kfree(mr); 579219820Sjeff 580219820Sjeff return 0; 581219820Sjeff} 582219820Sjeff 583219820Sjeffstruct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, 584219820Sjeff int max_page_list_len) 585219820Sjeff{ 586219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 587219820Sjeff struct mlx4_ib_mr *mr; 588219820Sjeff int err; 589219820Sjeff 590255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 591219820Sjeff if (!mr) 592219820Sjeff return ERR_PTR(-ENOMEM); 593219820Sjeff 594219820Sjeff err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 595219820Sjeff max_page_list_len, 0, &mr->mmr); 596219820Sjeff if (err) 597219820Sjeff goto err_free; 598219820Sjeff 599219820Sjeff err = mlx4_mr_enable(dev->dev, &mr->mmr); 600219820Sjeff if (err) 601219820Sjeff goto err_mr; 602219820Sjeff 603219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 604219820Sjeff mr->umem = NULL; 605219820Sjeff 606219820Sjeff return &mr->ibmr; 607219820Sjeff 608219820Sjefferr_mr: 609219820Sjeff mlx4_mr_free(dev->dev, &mr->mmr); 610219820Sjeff 611219820Sjefferr_free: 612219820Sjeff kfree(mr); 613219820Sjeff return ERR_PTR(err); 614219820Sjeff} 615219820Sjeff 616219820Sjeffstruct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 617219820Sjeff int page_list_len) 618219820Sjeff{ 619219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ibdev); 620219820Sjeff struct mlx4_ib_fast_reg_page_list *mfrpl; 621219820Sjeff int size = page_list_len * sizeof (u64); 622219820Sjeff 623255932Salfred if (page_list_len > MLX4_MAX_FAST_REG_PAGES) 624219820Sjeff return ERR_PTR(-EINVAL); 625219820Sjeff 626219820Sjeff mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); 627219820Sjeff if (!mfrpl) 628219820Sjeff return ERR_PTR(-ENOMEM); 629219820Sjeff 630219820Sjeff mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 631219820Sjeff if (!mfrpl->ibfrpl.page_list) 632219820Sjeff goto err_free; 633219820Sjeff 634219820Sjeff mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, 635219820Sjeff size, &mfrpl->map, 636219820Sjeff GFP_KERNEL); 637219820Sjeff if (!mfrpl->mapped_page_list) 638219820Sjeff goto err_free; 639219820Sjeff 640219820Sjeff WARN_ON(mfrpl->map & 0x3f); 641219820Sjeff 642219820Sjeff return &mfrpl->ibfrpl; 643219820Sjeff 644219820Sjefferr_free: 645219820Sjeff kfree(mfrpl->ibfrpl.page_list); 646219820Sjeff kfree(mfrpl); 647219820Sjeff return ERR_PTR(-ENOMEM); 648219820Sjeff} 649219820Sjeff 650219820Sjeffvoid mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 651219820Sjeff{ 652219820Sjeff struct mlx4_ib_dev *dev = to_mdev(page_list->device); 653219820Sjeff struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 654219820Sjeff int size = page_list->max_page_list_len * sizeof (u64); 655219820Sjeff 656219820Sjeff dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, 657219820Sjeff mfrpl->map); 658219820Sjeff kfree(mfrpl->ibfrpl.page_list); 659219820Sjeff kfree(mfrpl); 660219820Sjeff} 661219820Sjeff 662219820Sjeffstruct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, 663219820Sjeff struct ib_fmr_attr *fmr_attr) 664219820Sjeff{ 665219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 666219820Sjeff struct mlx4_ib_fmr *fmr; 667219820Sjeff int err = -ENOMEM; 668219820Sjeff 669219820Sjeff fmr = kmalloc(sizeof *fmr, GFP_KERNEL); 670219820Sjeff if (!fmr) 671219820Sjeff return ERR_PTR(-ENOMEM); 672219820Sjeff 673219820Sjeff err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), 674219820Sjeff fmr_attr->max_pages, fmr_attr->max_maps, 675219820Sjeff fmr_attr->page_shift, &fmr->mfmr); 676219820Sjeff if (err) 677219820Sjeff goto err_free; 678219820Sjeff 679219820Sjeff err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); 680219820Sjeff if (err) 681219820Sjeff goto err_mr; 682219820Sjeff 683219820Sjeff fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; 684219820Sjeff 685219820Sjeff return &fmr->ibfmr; 686219820Sjeff 687219820Sjefferr_mr: 688219820Sjeff mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); 689219820Sjeff 690219820Sjefferr_free: 691219820Sjeff kfree(fmr); 692219820Sjeff 693219820Sjeff return ERR_PTR(err); 694219820Sjeff} 695219820Sjeff 696219820Sjeffint mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 697219820Sjeff int npages, u64 iova) 698219820Sjeff{ 699219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 700219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); 701219820Sjeff 702219820Sjeff return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, 703219820Sjeff &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); 704219820Sjeff} 705219820Sjeff 706219820Sjeffint mlx4_ib_unmap_fmr(struct list_head *fmr_list) 707219820Sjeff{ 708219820Sjeff struct ib_fmr *ibfmr; 709219820Sjeff int err; 710219820Sjeff struct mlx4_dev *mdev = NULL; 711219820Sjeff 712219820Sjeff list_for_each_entry(ibfmr, fmr_list, list) { 713219820Sjeff if (mdev && to_mdev(ibfmr->device)->dev != mdev) 714219820Sjeff return -EINVAL; 715219820Sjeff mdev = to_mdev(ibfmr->device)->dev; 716219820Sjeff } 717219820Sjeff 718219820Sjeff if (!mdev) 719219820Sjeff return 0; 720219820Sjeff 721219820Sjeff list_for_each_entry(ibfmr, fmr_list, list) { 722219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 723219820Sjeff 724219820Sjeff mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); 725219820Sjeff } 726219820Sjeff 727219820Sjeff /* 728219820Sjeff * Make sure all MPT status updates are visible before issuing 729219820Sjeff * SYNC_TPT firmware command. 730219820Sjeff */ 731219820Sjeff wmb(); 732219820Sjeff 733219820Sjeff err = mlx4_SYNC_TPT(mdev); 734219820Sjeff if (err) 735255932Salfred pr_warn("SYNC_TPT error %d when " 736219820Sjeff "unmapping FMRs\n", err); 737219820Sjeff 738219820Sjeff return 0; 739219820Sjeff} 740219820Sjeff 741219820Sjeffint mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) 742219820Sjeff{ 743219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 744219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); 745219820Sjeff int err; 746219820Sjeff 747219820Sjeff err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); 748219820Sjeff 749219820Sjeff if (!err) 750219820Sjeff kfree(ifmr); 751219820Sjeff 752219820Sjeff return err; 753219820Sjeff} 754