mlx4_ib_mr.c revision 296382
1219820Sjeff/* 2219820Sjeff * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3219820Sjeff * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4219820Sjeff * 5219820Sjeff * This software is available to you under a choice of one of two 6219820Sjeff * licenses. You may choose to be licensed under the terms of the GNU 7219820Sjeff * General Public License (GPL) Version 2, available from the file 8219820Sjeff * COPYING in the main directory of this source tree, or the 9219820Sjeff * OpenIB.org BSD license below: 10219820Sjeff * 11219820Sjeff * Redistribution and use in source and binary forms, with or 12219820Sjeff * without modification, are permitted provided that the following 13219820Sjeff * conditions are met: 14219820Sjeff * 15219820Sjeff * - Redistributions of source code must retain the above 16219820Sjeff * copyright notice, this list of conditions and the following 17219820Sjeff * disclaimer. 18219820Sjeff * 19219820Sjeff * - Redistributions in binary form must reproduce the above 20219820Sjeff * copyright notice, this list of conditions and the following 21219820Sjeff * disclaimer in the documentation and/or other materials 22219820Sjeff * provided with the distribution. 23219820Sjeff * 24219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31219820Sjeff * SOFTWARE. 32219820Sjeff */ 33219820Sjeff 34255932Salfred#include <linux/slab.h> 35255932Salfred#include <linux/module.h> 36255932Salfred#include <linux/sched.h> 37255932Salfred 38219820Sjeff#include "mlx4_ib.h" 39219820Sjeff 40219820Sjeffstatic u32 convert_access(int acc) 41219820Sjeff{ 42219820Sjeff return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | 43219820Sjeff (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | 44219820Sjeff (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | 45219820Sjeff (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | 46278886Shselasky (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | 47219820Sjeff MLX4_PERM_LOCAL_READ; 48219820Sjeff} 49278886Shselasky/* No suuport for Shared MR feature */ 50278886Shselasky#if 0 51255932Salfredstatic ssize_t shared_mr_proc_read(struct file *file, 52255932Salfred char __user *buffer, 53255932Salfred size_t len, 54255932Salfred loff_t *offset) 55255932Salfred{ 56219820Sjeff 57255932Salfred return -ENOSYS; 58255932Salfred 59255932Salfred} 60255932Salfred 61255932Salfredstatic ssize_t shared_mr_proc_write(struct file *file, 62255932Salfred const char __user *buffer, 63255932Salfred size_t len, 64255932Salfred loff_t *offset) 65255932Salfred{ 66255932Salfred 67255932Salfred return -ENOSYS; 68255932Salfred} 69255932Salfred 70255932Salfredstatic int shared_mr_mmap(struct file *filep, struct vm_area_struct *vma) 71255932Salfred{ 72255932Salfred 73255932Salfred struct proc_dir_entry *pde = PDE(filep->f_path.dentry->d_inode); 74255932Salfred struct mlx4_shared_mr_info *smr_info = 75255932Salfred (struct mlx4_shared_mr_info *)pde->data; 76255932Salfred 77255932Salfred /* Prevent any mapping not on start of area */ 78255932Salfred if (vma->vm_pgoff != 0) 79255932Salfred return -EINVAL; 80255932Salfred 81255932Salfred return ib_umem_map_to_vma(smr_info->umem, 82255932Salfred vma); 83255932Salfred 84255932Salfred} 85255932Salfred 86255932Salfredstatic const struct file_operations shared_mr_proc_ops = { 87255932Salfred .owner = THIS_MODULE, 88255932Salfred .read = shared_mr_proc_read, 89255932Salfred .write = shared_mr_proc_write, 90255932Salfred .mmap = shared_mr_mmap 91255932Salfred}; 92255932Salfred 93255932Salfredstatic mode_t convert_shared_access(int acc) 94255932Salfred{ 95255932Salfred 96255932Salfred return (acc & IB_ACCESS_SHARED_MR_USER_READ ? S_IRUSR : 0) | 97255932Salfred (acc & IB_ACCESS_SHARED_MR_USER_WRITE ? S_IWUSR : 0) | 98255932Salfred (acc & IB_ACCESS_SHARED_MR_GROUP_READ ? S_IRGRP : 0) | 99255932Salfred (acc & IB_ACCESS_SHARED_MR_GROUP_WRITE ? S_IWGRP : 0) | 100255932Salfred (acc & IB_ACCESS_SHARED_MR_OTHER_READ ? S_IROTH : 0) | 101255932Salfred (acc & IB_ACCESS_SHARED_MR_OTHER_WRITE ? S_IWOTH : 0); 102255932Salfred 103255932Salfred} 104255932Salfred#endif 105219820Sjeffstruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) 106219820Sjeff{ 107219820Sjeff struct mlx4_ib_mr *mr; 108219820Sjeff int err; 109219820Sjeff 110255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 111219820Sjeff if (!mr) 112219820Sjeff return ERR_PTR(-ENOMEM); 113219820Sjeff 114219820Sjeff err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, 115219820Sjeff ~0ull, convert_access(acc), 0, 0, &mr->mmr); 116219820Sjeff if (err) 117219820Sjeff goto err_free; 118219820Sjeff 119219820Sjeff err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); 120219820Sjeff if (err) 121219820Sjeff goto err_mr; 122219820Sjeff 123219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 124219820Sjeff mr->umem = NULL; 125219820Sjeff 126219820Sjeff return &mr->ibmr; 127219820Sjeff 128219820Sjefferr_mr: 129278886Shselasky (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 130219820Sjeff 131219820Sjefferr_free: 132219820Sjeff kfree(mr); 133219820Sjeff 134219820Sjeff return ERR_PTR(err); 135219820Sjeff} 136219820Sjeff 137255932Salfredstatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, 138255932Salfred struct mlx4_mtt *mtt, 139255932Salfred u64 mtt_size, 140255932Salfred u64 mtt_shift, 141255932Salfred u64 len, 142255932Salfred u64 cur_start_addr, 143255932Salfred u64 *pages, 144255932Salfred int *start_index, 145255932Salfred int *npages) 146255932Salfred{ 147255932Salfred int k; 148255932Salfred int err = 0; 149255932Salfred u64 mtt_entries; 150255932Salfred u64 cur_end_addr = cur_start_addr + len; 151255932Salfred u64 cur_end_addr_aligned = 0; 152255932Salfred 153255932Salfred len += (cur_start_addr & (mtt_size-1ULL)); 154255932Salfred cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); 155255932Salfred len += (cur_end_addr_aligned - cur_end_addr); 156255932Salfred if (len & (mtt_size-1ULL)) { 157255932Salfred WARN(1 , 158255932Salfred "write_block: len %llx is not aligned to mtt_size %llx\n", 159278886Shselasky (unsigned long long)len, (unsigned long long)mtt_size); 160255932Salfred return -EINVAL; 161255932Salfred } 162255932Salfred 163255932Salfred 164255932Salfred mtt_entries = (len >> mtt_shift); 165255932Salfred 166255932Salfred /* Align the MTT start address to 167255932Salfred the mtt_size. 168255932Salfred Required to handle cases when the MR 169255932Salfred starts in the middle of an MTT record. 170255932Salfred Was not required in old code since 171255932Salfred the physical addresses provided by 172255932Salfred the dma subsystem were page aligned, 173255932Salfred which was also the MTT size. 174255932Salfred */ 175255932Salfred cur_start_addr = round_down(cur_start_addr, mtt_size); 176255932Salfred /* A new block is started ...*/ 177255932Salfred for (k = 0; k < mtt_entries; ++k) { 178255932Salfred pages[*npages] = cur_start_addr + (mtt_size * k); 179255932Salfred (*npages)++; 180255932Salfred /* 181255932Salfred * Be friendly to mlx4_write_mtt() and 182255932Salfred * pass it chunks of appropriate size. 183255932Salfred */ 184255932Salfred if (*npages == PAGE_SIZE / sizeof(u64)) { 185255932Salfred err = mlx4_write_mtt(dev->dev, 186255932Salfred mtt, *start_index, 187255932Salfred *npages, pages); 188255932Salfred if (err) 189255932Salfred return err; 190255932Salfred 191255932Salfred (*start_index) += *npages; 192255932Salfred *npages = 0; 193255932Salfred } 194255932Salfred } 195255932Salfred 196255932Salfred return 0; 197255932Salfred} 198255932Salfred 199219820Sjeffint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, 200219820Sjeff struct ib_umem *umem) 201219820Sjeff{ 202219820Sjeff u64 *pages; 203255932Salfred u64 len = 0; 204219820Sjeff int err = 0; 205255932Salfred u64 mtt_size; 206255932Salfred u64 cur_start_addr = 0; 207255932Salfred u64 mtt_shift; 208255932Salfred int start_index = 0; 209255932Salfred int npages = 0; 210278886Shselasky struct scatterlist *sg; 211278886Shselasky int i; 212219820Sjeff 213219820Sjeff pages = (u64 *) __get_free_page(GFP_KERNEL); 214219820Sjeff if (!pages) 215219820Sjeff return -ENOMEM; 216219820Sjeff 217255932Salfred mtt_shift = mtt->page_shift; 218255932Salfred mtt_size = 1ULL << mtt_shift; 219219820Sjeff 220278886Shselasky for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { 221255932Salfred if (cur_start_addr + len == 222278886Shselasky sg_dma_address(sg)) { 223255932Salfred /* still the same block */ 224278886Shselasky len += sg_dma_len(sg); 225255932Salfred continue; 226219820Sjeff } 227255932Salfred /* A new block is started ...*/ 228255932Salfred /* If len is malaligned, write an extra mtt entry to 229255932Salfred cover the misaligned area (round up the division) 230255932Salfred */ 231255932Salfred err = mlx4_ib_umem_write_mtt_block(dev, 232255932Salfred mtt, mtt_size, mtt_shift, 233255932Salfred len, cur_start_addr, 234255932Salfred pages, 235255932Salfred &start_index, 236255932Salfred &npages); 237255932Salfred if (err) 238255932Salfred goto out; 239255932Salfred 240255932Salfred cur_start_addr = 241278886Shselasky sg_dma_address(sg); 242278886Shselasky len = sg_dma_len(sg); 243296382Shselasky } 244219820Sjeff 245255932Salfred /* Handle the last block */ 246255932Salfred if (len > 0) { 247255932Salfred /* If len is malaligned, write an extra mtt entry to cover 248255932Salfred the misaligned area (round up the division) 249255932Salfred */ 250255932Salfred err = mlx4_ib_umem_write_mtt_block(dev, 251255932Salfred mtt, mtt_size, mtt_shift, 252255932Salfred len, cur_start_addr, 253255932Salfred pages, 254255932Salfred &start_index, 255255932Salfred &npages); 256255932Salfred if (err) 257255932Salfred goto out; 258255932Salfred } 259219820Sjeff 260255932Salfred 261255932Salfred if (npages) 262255932Salfred err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); 263255932Salfred 264219820Sjeffout: 265219820Sjeff free_page((unsigned long) pages); 266219820Sjeff return err; 267219820Sjeff} 268219820Sjeff 269255932Salfredstatic inline u64 alignment_of(u64 ptr) 270219820Sjeff{ 271255932Salfred return ilog2(ptr & (~(ptr-1))); 272255932Salfred} 273255932Salfred 274255932Salfredstatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, 275255932Salfred u64 current_block_end, 276255932Salfred u64 block_shift) 277255932Salfred{ 278255932Salfred /* Check whether the alignment of the new block 279255932Salfred is aligned as well as the previous block. 280255932Salfred Block address must start with zeros till size of entity_size. 281255932Salfred */ 282255932Salfred if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) 283255932Salfred /* It is not as well aligned as the 284255932Salfred previous block-reduce the mtt size 285255932Salfred accordingly. 286255932Salfred Here we take the last right bit 287255932Salfred which is 1. 288255932Salfred */ 289255932Salfred block_shift = alignment_of(next_block_start); 290255932Salfred 291255932Salfred /* Check whether the alignment of the 292255932Salfred end of previous block - is it aligned 293255932Salfred as well as the start of the block 294255932Salfred */ 295255932Salfred if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) 296255932Salfred /* It is not as well aligned as 297255932Salfred the start of the block - reduce the 298255932Salfred mtt size accordingly. 299255932Salfred */ 300255932Salfred block_shift = alignment_of(current_block_end); 301255932Salfred 302255932Salfred return block_shift; 303255932Salfred} 304255932Salfred 305255932Salfred/* Calculate optimal mtt size based on contiguous pages. 306255932Salfred* Function will return also the number of pages that are not aligned to the 307255932Salfred calculated mtt_size to be added to total number 308255932Salfred of pages. For that we should check the first chunk length & last chunk 309255932Salfred length and if not aligned to mtt_size we should increment 310255932Salfred the non_aligned_pages number. 311255932Salfred All chunks in the middle already handled as part of mtt shift calculation 312255932Salfred for both their start & end addresses. 313255932Salfred*/ 314255932Salfredint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, 315255932Salfred u64 start_va, 316255932Salfred int *num_of_mtts) 317255932Salfred{ 318255932Salfred u64 block_shift = MLX4_MAX_MTT_SHIFT; 319255932Salfred u64 current_block_len = 0; 320255932Salfred u64 current_block_start = 0; 321255932Salfred u64 misalignment_bits; 322255932Salfred u64 first_block_start = 0; 323255932Salfred u64 last_block_end = 0; 324255932Salfred u64 total_len = 0; 325255932Salfred u64 last_block_aligned_end = 0; 326255932Salfred u64 min_shift = ilog2(umem->page_size); 327278886Shselasky struct scatterlist *sg; 328278886Shselasky int i; 329278886Shselasky u64 next_block_start; 330278886Shselasky u64 current_block_end; 331219820Sjeff 332278886Shselasky for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) { 333255932Salfred /* Initialization - save the first chunk start as 334255932Salfred the current_block_start - block means contiguous pages. 335255932Salfred */ 336255932Salfred if (current_block_len == 0 && current_block_start == 0) { 337255932Salfred first_block_start = current_block_start = 338278886Shselasky sg_dma_address(sg); 339255932Salfred /* Find the bits that are different between 340255932Salfred the physical address and the virtual 341255932Salfred address for the start of the MR. 342255932Salfred */ 343255932Salfred /* umem_get aligned the start_va to a page 344255932Salfred boundry. Therefore, we need to align the 345255932Salfred start va to the same boundry */ 346255932Salfred /* misalignment_bits is needed to handle the 347255932Salfred case of a single memory region. In this 348255932Salfred case, the rest of the logic will not reduce 349255932Salfred the block size. If we use a block size 350255932Salfred which is bigger than the alignment of the 351255932Salfred misalignment bits, we might use the virtual 352255932Salfred page number instead of the physical page 353255932Salfred number, resulting in access to the wrong 354255932Salfred data. */ 355255932Salfred misalignment_bits = 356255932Salfred (start_va & (~(((u64)(umem->page_size))-1ULL))) 357255932Salfred ^ current_block_start; 358255932Salfred block_shift = min(alignment_of(misalignment_bits) 359255932Salfred , block_shift); 360255932Salfred } 361219820Sjeff 362278886Shselasky /* Go over the scatter entries and check 363255932Salfred if they continue the previous scatter entry. 364255932Salfred */ 365278886Shselasky next_block_start = 366278886Shselasky sg_dma_address(sg); 367278886Shselasky current_block_end = current_block_start 368296382Shselasky + current_block_len; 369296382Shselasky /* If we have a split (non-contig.) between two block*/ 370296382Shselasky if (current_block_end != next_block_start) { 371296382Shselasky block_shift = mlx4_ib_umem_calc_block_mtt( 372296382Shselasky next_block_start, 373296382Shselasky current_block_end, 374296382Shselasky block_shift); 375219820Sjeff 376296382Shselasky /* If we reached the minimum shift for 4k 377296382Shselasky page we stop the loop. 378296382Shselasky */ 379296382Shselasky if (block_shift <= min_shift) 380296382Shselasky goto end; 381255932Salfred 382296382Shselasky /* If not saved yet we are in first block - 383296382Shselasky we save the length of first block to 384296382Shselasky calculate the non_aligned_pages number at 385296382Shselasky * the end. 386296382Shselasky */ 387296382Shselasky total_len += current_block_len; 388255932Salfred 389296382Shselasky /* Start a new block */ 390296382Shselasky current_block_start = next_block_start; 391296382Shselasky current_block_len = 392278886Shselasky sg_dma_len(sg); 393296382Shselasky continue; 394296382Shselasky } 395296382Shselasky /* The scatter entry is another part of 396296382Shselasky the current block, increase the block size 397296382Shselasky * An entry in the scatter can be larger than 398296382Shselasky 4k (page) as of dma mapping 399296382Shselasky which merge some blocks together. 400296382Shselasky */ 401296382Shselasky current_block_len += 402278886Shselasky sg_dma_len(sg); 403255932Salfred } 404219820Sjeff 405255932Salfred /* Account for the last block in the total len */ 406255932Salfred total_len += current_block_len; 407255932Salfred /* Add to the first block the misalignment that it suffers from.*/ 408255932Salfred total_len += (first_block_start & ((1ULL<<block_shift)-1ULL)); 409255932Salfred last_block_end = current_block_start+current_block_len; 410255932Salfred last_block_aligned_end = round_up(last_block_end, 1<<block_shift); 411255932Salfred total_len += (last_block_aligned_end - last_block_end); 412255932Salfred 413255932Salfred WARN((total_len & ((1ULL<<block_shift)-1ULL)), 414255932Salfred " misaligned total length detected (%llu, %llu)!", 415278886Shselasky (unsigned long long)total_len, (unsigned long long)block_shift); 416255932Salfred 417255932Salfred *num_of_mtts = total_len >> block_shift; 418255932Salfredend: 419255932Salfred if (block_shift < min_shift) { 420255932Salfred /* If shift is less than the min we set a WARN and 421255932Salfred return the min shift. 422255932Salfred */ 423255932Salfred WARN(1, 424255932Salfred "mlx4_ib_umem_calc_optimal_mtt_size - unexpected shift %lld\n", 425278886Shselasky (unsigned long long)block_shift); 426255932Salfred 427255932Salfred block_shift = min_shift; 428219820Sjeff } 429255932Salfred return block_shift; 430278886Shselasky 431255932Salfred} 432219820Sjeff 433278886Shselasky/* No suuport for Shared MR */ 434278886Shselasky#if 0 435255932Salfredstatic int prepare_shared_mr(struct mlx4_ib_mr *mr, int access_flags, int mr_id) 436255932Salfred{ 437278886Shselasky 438255932Salfred struct proc_dir_entry *mr_proc_entry; 439255932Salfred mode_t mode = S_IFREG; 440255932Salfred char name_buff[16]; 441219820Sjeff 442255932Salfred mode |= convert_shared_access(access_flags); 443255932Salfred sprintf(name_buff, "%X", mr_id); 444255932Salfred mr->smr_info = kmalloc(sizeof(struct mlx4_shared_mr_info), GFP_KERNEL); 445255932Salfred mr->smr_info->mr_id = mr_id; 446255932Salfred mr->smr_info->umem = mr->umem; 447219820Sjeff 448255932Salfred mr_proc_entry = proc_create_data(name_buff, mode, 449255932Salfred mlx4_mrs_dir_entry, 450255932Salfred &shared_mr_proc_ops, 451255932Salfred mr->smr_info); 452255932Salfred 453255932Salfred if (!mr_proc_entry) { 454255932Salfred pr_err("prepare_shared_mr failed via proc\n"); 455255932Salfred kfree(mr->smr_info); 456255932Salfred return -ENODEV; 457255932Salfred } 458255932Salfred 459255932Salfred current_uid_gid(&(mr_proc_entry->uid), &(mr_proc_entry->gid)); 460255932Salfred mr_proc_entry->size = mr->umem->length; 461255932Salfred return 0; 462255932Salfred 463219820Sjeff} 464255932Salfredstatic int is_shared_mr(int access_flags) 465255932Salfred{ 466255932Salfred /* We should check whether IB_ACCESS_SHARED_MR_USER_READ or 467255932Salfred other shared bits were turned on. 468255932Salfred */ 469255932Salfred return !!(access_flags & (IB_ACCESS_SHARED_MR_USER_READ | 470255932Salfred IB_ACCESS_SHARED_MR_USER_WRITE | 471255932Salfred IB_ACCESS_SHARED_MR_GROUP_READ | 472255932Salfred IB_ACCESS_SHARED_MR_GROUP_WRITE | 473255932Salfred IB_ACCESS_SHARED_MR_OTHER_READ | 474255932Salfred IB_ACCESS_SHARED_MR_OTHER_WRITE)); 475219820Sjeff 476255932Salfred} 477278886Shselasky 478278886Shselaskystatic void free_smr_info(struct mlx4_ib_mr *mr) 479278886Shselasky{ 480278886Shselasky /* When master/parent shared mr is dereged there is 481278886Shselasky no ability to share this mr any more - its mr_id will be 482278886Shselasky returned to the kernel as part of ib_uverbs_dereg_mr 483278886Shselasky and may be allocated again as part of other reg_mr. 484278886Shselasky */ 485278886Shselasky char name_buff[16]; 486278886Shselasky 487278886Shselasky sprintf(name_buff, "%X", mr->smr_info->mr_id); 488278886Shselasky /* Remove proc entry is checking internally that no operation 489278886Shselasky was strated on that proc fs file and if in the middle 490278886Shselasky current process will wait till end of operation. 491278886Shselasky That's why no sync mechanism is needed when we release 492278886Shselasky below the shared umem. 493278886Shselasky */ 494278886Shselasky remove_proc_entry(name_buff, mlx4_mrs_dir_entry); 495278886Shselasky kfree(mr->smr_info); 496278886Shselasky mr->smr_info = NULL; 497278886Shselasky} 498255932Salfred#endif 499255932Salfred 500278886Shselaskystatic void mlx4_invalidate_umem(void *invalidation_cookie, 501278886Shselasky struct ib_umem *umem, 502278886Shselasky unsigned long addr, size_t size) 503278886Shselasky{ 504278886Shselasky struct mlx4_ib_mr *mr = (struct mlx4_ib_mr *)invalidation_cookie; 505278886Shselasky 506278886Shselasky /* This function is called under client peer lock so its resources are race protected */ 507278886Shselasky if (atomic_inc_return(&mr->invalidated) > 1) { 508278886Shselasky umem->invalidation_ctx->inflight_invalidation = 1; 509278886Shselasky goto end; 510278886Shselasky } 511278886Shselasky 512278886Shselasky umem->invalidation_ctx->peer_callback = 1; 513278886Shselasky mlx4_mr_free(to_mdev(mr->ibmr.device)->dev, &mr->mmr); 514278886Shselasky ib_umem_release(umem); 515278886Shselasky complete(&mr->invalidation_comp); 516278886Shselasky 517278886Shselaskyend: 518278886Shselasky return; 519278886Shselasky 520278886Shselasky} 521278886Shselasky 522219820Sjeffstruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 523219820Sjeff u64 virt_addr, int access_flags, 524255932Salfred struct ib_udata *udata, 525255932Salfred int mr_id) 526219820Sjeff{ 527219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 528219820Sjeff struct mlx4_ib_mr *mr; 529219820Sjeff int shift; 530219820Sjeff int err; 531219820Sjeff int n; 532278886Shselasky struct ib_peer_memory_client *ib_peer_mem; 533219820Sjeff 534255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 535219820Sjeff if (!mr) 536219820Sjeff return ERR_PTR(-ENOMEM); 537219820Sjeff 538278886Shselasky mr->umem = ib_umem_get_ex(pd->uobject->context, start, length, 539278886Shselasky access_flags, 0, 1); 540219820Sjeff if (IS_ERR(mr->umem)) { 541219820Sjeff err = PTR_ERR(mr->umem); 542219820Sjeff goto err_free; 543219820Sjeff } 544219820Sjeff 545278886Shselasky ib_peer_mem = mr->umem->ib_peer_mem; 546255932Salfred n = ib_umem_page_count(mr->umem); 547255932Salfred shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, 548255932Salfred &n); 549255932Salfred err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, 550255932Salfred convert_access(access_flags), n, shift, &mr->mmr); 551255932Salfred if (err) 552255932Salfred goto err_umem; 553219820Sjeff 554255932Salfred err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); 555255932Salfred if (err) 556255932Salfred goto err_mr; 557219820Sjeff 558219820Sjeff err = mlx4_mr_enable(dev->dev, &mr->mmr); 559219820Sjeff if (err) 560219820Sjeff goto err_mr; 561219820Sjeff 562219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 563278886Shselasky/* No suuport for Shared MR */ 564278886Shselasky#if 0 565255932Salfred /* Check whether MR should be shared */ 566255932Salfred if (is_shared_mr(access_flags)) { 567255932Salfred /* start address and length must be aligned to page size in order 568255932Salfred to map a full page and preventing leakage of data */ 569255932Salfred if (mr->umem->offset || (length & ~PAGE_MASK)) { 570255932Salfred err = -EINVAL; 571255932Salfred goto err_mr; 572255932Salfred } 573219820Sjeff 574255932Salfred err = prepare_shared_mr(mr, access_flags, mr_id); 575255932Salfred if (err) 576255932Salfred goto err_mr; 577255932Salfred } 578255932Salfred#endif 579278886Shselasky if (ib_peer_mem) { 580278886Shselasky if (access_flags & IB_ACCESS_MW_BIND) { 581278886Shselasky /* Prevent binding MW on peer clients. 582278886Shselasky * mlx4_invalidate_umem must be void, 583278886Shselasky * therefore, mlx4_mr_free should not fail 584278886Shselasky * when using peer clients. */ 585278886Shselasky err = -ENOSYS; 586278886Shselasky pr_err("MW is not supported with peer memory client"); 587278886Shselasky goto err_smr; 588278886Shselasky } 589278886Shselasky init_completion(&mr->invalidation_comp); 590278886Shselasky ib_umem_activate_invalidation_notifier(mr->umem, 591278886Shselasky mlx4_invalidate_umem, mr); 592278886Shselasky } 593278886Shselasky 594278886Shselasky atomic_set(&mr->invalidated, 0); 595219820Sjeff return &mr->ibmr; 596219820Sjeff 597278886Shselaskyerr_smr: 598278886Shselasky/* No suuport for Shared MR */ 599278886Shselasky#if 0 600278886Shselasky if (mr->smr_info) 601278886Shselasky free_smr_info(mr); 602278886Shselasky#endif 603219820Sjefferr_mr: 604278886Shselasky (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 605219820Sjeff 606219820Sjefferr_umem: 607219820Sjeff ib_umem_release(mr->umem); 608219820Sjeff 609219820Sjefferr_free: 610219820Sjeff kfree(mr); 611219820Sjeff 612219820Sjeff return ERR_PTR(err); 613219820Sjeff} 614219820Sjeff 615219820Sjeffint mlx4_ib_dereg_mr(struct ib_mr *ibmr) 616219820Sjeff{ 617219820Sjeff struct mlx4_ib_mr *mr = to_mmr(ibmr); 618278886Shselasky struct ib_umem *umem = mr->umem; 619278886Shselasky int ret; 620219820Sjeff 621278886Shselasky/* No suuport for Shared MR */ 622278886Shselasky#if 0 623278886Shselasky if (mr->smr_info) 624278886Shselasky free_smr_info(mr); 625278886Shselasky#endif 626255932Salfred 627278886Shselasky if (atomic_inc_return(&mr->invalidated) > 1) { 628278886Shselasky wait_for_completion(&mr->invalidation_comp); 629278886Shselasky goto end; 630255932Salfred } 631255932Salfred 632278886Shselasky ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 633278886Shselasky if (ret) { 634278886Shselasky /* Error is not expected here, except when memory windows 635278886Shselasky * are bound to MR which is not supported with 636278886Shselasky * peer memory clients */ 637278886Shselasky atomic_set(&mr->invalidated, 0); 638278886Shselasky return ret; 639278886Shselasky } 640278886Shselasky 641278886Shselasky if (!umem) 642278886Shselasky goto end; 643278886Shselasky 644296382Shselasky ib_umem_release(mr->umem); 645278886Shselaskyend: 646255932Salfred 647219820Sjeff kfree(mr); 648219820Sjeff 649219820Sjeff return 0; 650219820Sjeff} 651219820Sjeff 652278886Shselaskystruct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type) 653278886Shselasky{ 654278886Shselasky struct mlx4_ib_dev *dev = to_mdev(pd->device); 655278886Shselasky struct mlx4_ib_mw *mw; 656278886Shselasky int err; 657278886Shselasky 658278886Shselasky mw = kmalloc(sizeof(*mw), GFP_KERNEL); 659278886Shselasky if (!mw) 660278886Shselasky return ERR_PTR(-ENOMEM); 661278886Shselasky 662278886Shselasky err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn, (enum mlx4_mw_type)type, &mw->mmw); 663278886Shselasky if (err) 664278886Shselasky goto err_free; 665278886Shselasky 666278886Shselasky err = mlx4_mw_enable(dev->dev, &mw->mmw); 667278886Shselasky if (err) 668278886Shselasky goto err_mw; 669278886Shselasky 670278886Shselasky mw->ibmw.rkey = mw->mmw.key; 671278886Shselasky 672278886Shselasky return &mw->ibmw; 673278886Shselasky 674278886Shselaskyerr_mw: 675278886Shselasky mlx4_mw_free(dev->dev, &mw->mmw); 676278886Shselasky 677278886Shselaskyerr_free: 678278886Shselasky kfree(mw); 679278886Shselasky 680278886Shselasky return ERR_PTR(err); 681278886Shselasky} 682278886Shselasky 683278886Shselaskyint mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw, 684278886Shselasky struct ib_mw_bind *mw_bind) 685278886Shselasky{ 686278886Shselasky struct ib_send_wr wr; 687278886Shselasky struct ib_send_wr *bad_wr; 688278886Shselasky int ret; 689278886Shselasky 690278886Shselasky memset(&wr, 0, sizeof(wr)); 691278886Shselasky wr.opcode = IB_WR_BIND_MW; 692278886Shselasky wr.wr_id = mw_bind->wr_id; 693278886Shselasky wr.send_flags = mw_bind->send_flags; 694278886Shselasky wr.wr.bind_mw.mw = mw; 695278886Shselasky wr.wr.bind_mw.bind_info = mw_bind->bind_info; 696278886Shselasky wr.wr.bind_mw.rkey = ib_inc_rkey(mw->rkey); 697278886Shselasky 698278886Shselasky ret = mlx4_ib_post_send(qp, &wr, &bad_wr); 699278886Shselasky if (!ret) 700278886Shselasky mw->rkey = wr.wr.bind_mw.rkey; 701278886Shselasky 702278886Shselasky return ret; 703278886Shselasky} 704278886Shselasky 705278886Shselaskyint mlx4_ib_dealloc_mw(struct ib_mw *ibmw) 706278886Shselasky{ 707278886Shselasky struct mlx4_ib_mw *mw = to_mmw(ibmw); 708278886Shselasky 709278886Shselasky mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); 710278886Shselasky kfree(mw); 711278886Shselasky 712278886Shselasky return 0; 713278886Shselasky} 714278886Shselasky 715219820Sjeffstruct ib_mr *mlx4_ib_alloc_fast_reg_mr(struct ib_pd *pd, 716219820Sjeff int max_page_list_len) 717219820Sjeff{ 718219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 719219820Sjeff struct mlx4_ib_mr *mr; 720219820Sjeff int err; 721219820Sjeff 722255932Salfred mr = kzalloc(sizeof *mr, GFP_KERNEL); 723219820Sjeff if (!mr) 724219820Sjeff return ERR_PTR(-ENOMEM); 725219820Sjeff 726219820Sjeff err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 727219820Sjeff max_page_list_len, 0, &mr->mmr); 728219820Sjeff if (err) 729219820Sjeff goto err_free; 730219820Sjeff 731219820Sjeff err = mlx4_mr_enable(dev->dev, &mr->mmr); 732219820Sjeff if (err) 733219820Sjeff goto err_mr; 734219820Sjeff 735219820Sjeff mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 736219820Sjeff mr->umem = NULL; 737219820Sjeff 738219820Sjeff return &mr->ibmr; 739219820Sjeff 740219820Sjefferr_mr: 741278886Shselasky (void) mlx4_mr_free(dev->dev, &mr->mmr); 742219820Sjeff 743219820Sjefferr_free: 744219820Sjeff kfree(mr); 745219820Sjeff return ERR_PTR(err); 746219820Sjeff} 747219820Sjeff 748219820Sjeffstruct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 749219820Sjeff int page_list_len) 750219820Sjeff{ 751219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ibdev); 752219820Sjeff struct mlx4_ib_fast_reg_page_list *mfrpl; 753219820Sjeff int size = page_list_len * sizeof (u64); 754219820Sjeff 755255932Salfred if (page_list_len > MLX4_MAX_FAST_REG_PAGES) 756219820Sjeff return ERR_PTR(-EINVAL); 757219820Sjeff 758219820Sjeff mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL); 759219820Sjeff if (!mfrpl) 760219820Sjeff return ERR_PTR(-ENOMEM); 761219820Sjeff 762219820Sjeff mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 763219820Sjeff if (!mfrpl->ibfrpl.page_list) 764219820Sjeff goto err_free; 765219820Sjeff 766219820Sjeff mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, 767219820Sjeff size, &mfrpl->map, 768219820Sjeff GFP_KERNEL); 769219820Sjeff if (!mfrpl->mapped_page_list) 770219820Sjeff goto err_free; 771219820Sjeff 772219820Sjeff WARN_ON(mfrpl->map & 0x3f); 773219820Sjeff 774219820Sjeff return &mfrpl->ibfrpl; 775219820Sjeff 776219820Sjefferr_free: 777219820Sjeff kfree(mfrpl->ibfrpl.page_list); 778219820Sjeff kfree(mfrpl); 779219820Sjeff return ERR_PTR(-ENOMEM); 780219820Sjeff} 781219820Sjeff 782219820Sjeffvoid mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 783219820Sjeff{ 784219820Sjeff struct mlx4_ib_dev *dev = to_mdev(page_list->device); 785219820Sjeff struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 786219820Sjeff int size = page_list->max_page_list_len * sizeof (u64); 787219820Sjeff 788219820Sjeff dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, 789219820Sjeff mfrpl->map); 790219820Sjeff kfree(mfrpl->ibfrpl.page_list); 791219820Sjeff kfree(mfrpl); 792219820Sjeff} 793219820Sjeff 794219820Sjeffstruct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc, 795219820Sjeff struct ib_fmr_attr *fmr_attr) 796219820Sjeff{ 797219820Sjeff struct mlx4_ib_dev *dev = to_mdev(pd->device); 798219820Sjeff struct mlx4_ib_fmr *fmr; 799219820Sjeff int err = -ENOMEM; 800219820Sjeff 801219820Sjeff fmr = kmalloc(sizeof *fmr, GFP_KERNEL); 802219820Sjeff if (!fmr) 803219820Sjeff return ERR_PTR(-ENOMEM); 804219820Sjeff 805219820Sjeff err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc), 806219820Sjeff fmr_attr->max_pages, fmr_attr->max_maps, 807219820Sjeff fmr_attr->page_shift, &fmr->mfmr); 808219820Sjeff if (err) 809219820Sjeff goto err_free; 810219820Sjeff 811219820Sjeff err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr); 812219820Sjeff if (err) 813219820Sjeff goto err_mr; 814219820Sjeff 815219820Sjeff fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key; 816219820Sjeff 817219820Sjeff return &fmr->ibfmr; 818219820Sjeff 819219820Sjefferr_mr: 820278886Shselasky (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr); 821219820Sjeff 822219820Sjefferr_free: 823219820Sjeff kfree(fmr); 824219820Sjeff 825219820Sjeff return ERR_PTR(err); 826219820Sjeff} 827219820Sjeff 828219820Sjeffint mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 829219820Sjeff int npages, u64 iova) 830219820Sjeff{ 831219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 832219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device); 833219820Sjeff 834219820Sjeff return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova, 835219820Sjeff &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); 836219820Sjeff} 837219820Sjeff 838219820Sjeffint mlx4_ib_unmap_fmr(struct list_head *fmr_list) 839219820Sjeff{ 840219820Sjeff struct ib_fmr *ibfmr; 841219820Sjeff int err; 842219820Sjeff struct mlx4_dev *mdev = NULL; 843219820Sjeff 844219820Sjeff list_for_each_entry(ibfmr, fmr_list, list) { 845219820Sjeff if (mdev && to_mdev(ibfmr->device)->dev != mdev) 846219820Sjeff return -EINVAL; 847219820Sjeff mdev = to_mdev(ibfmr->device)->dev; 848219820Sjeff } 849219820Sjeff 850219820Sjeff if (!mdev) 851219820Sjeff return 0; 852219820Sjeff 853219820Sjeff list_for_each_entry(ibfmr, fmr_list, list) { 854219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 855219820Sjeff 856219820Sjeff mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey); 857219820Sjeff } 858219820Sjeff 859219820Sjeff /* 860219820Sjeff * Make sure all MPT status updates are visible before issuing 861219820Sjeff * SYNC_TPT firmware command. 862219820Sjeff */ 863219820Sjeff wmb(); 864219820Sjeff 865219820Sjeff err = mlx4_SYNC_TPT(mdev); 866219820Sjeff if (err) 867255932Salfred pr_warn("SYNC_TPT error %d when " 868219820Sjeff "unmapping FMRs\n", err); 869219820Sjeff 870219820Sjeff return 0; 871219820Sjeff} 872219820Sjeff 873219820Sjeffint mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr) 874219820Sjeff{ 875219820Sjeff struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr); 876219820Sjeff struct mlx4_ib_dev *dev = to_mdev(ibfmr->device); 877219820Sjeff int err; 878219820Sjeff 879219820Sjeff err = mlx4_fmr_free(dev->dev, &ifmr->mfmr); 880219820Sjeff 881219820Sjeff if (!err) 882219820Sjeff kfree(ifmr); 883219820Sjeff 884219820Sjeff return err; 885219820Sjeff} 886