mlx5_ib_mr.c revision 331769
1322810Shselasky/*- 2322810Shselasky * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3322810Shselasky * 4322810Shselasky * Redistribution and use in source and binary forms, with or without 5322810Shselasky * modification, are permitted provided that the following conditions 6322810Shselasky * are met: 7322810Shselasky * 1. Redistributions of source code must retain the above copyright 8322810Shselasky * notice, this list of conditions and the following disclaimer. 9322810Shselasky * 2. Redistributions in binary form must reproduce the above copyright 10322810Shselasky * notice, this list of conditions and the following disclaimer in the 11322810Shselasky * documentation and/or other materials provided with the distribution. 12322810Shselasky * 13322810Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14322810Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15322810Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16322810Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17322810Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18322810Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19322810Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20322810Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21322810Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22322810Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23322810Shselasky * SUCH DAMAGE. 24322810Shselasky * 25322810Shselasky * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c 331769 2018-03-30 18:06:29Z hselasky $ 26322810Shselasky */ 27322810Shselasky 28322810Shselasky#include <linux/kref.h> 29322810Shselasky#include <linux/random.h> 30322810Shselasky#include <linux/delay.h> 31331769Shselasky#include <linux/sched.h> 32322810Shselasky#include <rdma/ib_umem.h> 33331769Shselasky#include <rdma/ib_umem_odp.h> 34331769Shselasky#include <rdma/ib_verbs.h> 35322810Shselasky#include "mlx5_ib.h" 36322810Shselasky 37322810Shselaskyenum { 38322810Shselasky MAX_PENDING_REG_MR = 8, 39322810Shselasky}; 40322810Shselasky 41322810Shselasky#define MLX5_UMR_ALIGN 2048 42331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 43331769Shselaskystatic __be64 mlx5_ib_update_mtt_emergency_buffer[ 44331769Shselasky MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 45331769Shselasky __aligned(MLX5_UMR_ALIGN); 46331769Shselaskystatic DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 47331769Shselasky#endif 48322810Shselasky 49331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr); 50322810Shselasky 51322810Shselaskystatic int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 52322810Shselasky{ 53331769Shselasky int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 54322810Shselasky 55331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 56331769Shselasky /* Wait until all page fault handlers using the mr complete. */ 57331769Shselasky synchronize_srcu(&dev->mr_srcu); 58331769Shselasky#endif 59331769Shselasky 60322810Shselasky return err; 61322810Shselasky} 62322810Shselasky 63322810Shselaskystatic int order2idx(struct mlx5_ib_dev *dev, int order) 64322810Shselasky{ 65322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 66322810Shselasky 67322810Shselasky if (order < cache->ent[0].order) 68322810Shselasky return 0; 69322810Shselasky else 70322810Shselasky return order - cache->ent[0].order; 71322810Shselasky} 72322810Shselasky 73331769Shselaskystatic bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 74331769Shselasky{ 75331769Shselasky return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 76331769Shselasky length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 77331769Shselasky} 78331769Shselasky 79331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 80331769Shselaskystatic void update_odp_mr(struct mlx5_ib_mr *mr) 81331769Shselasky{ 82331769Shselasky if (mr->umem->odp_data) { 83331769Shselasky /* 84331769Shselasky * This barrier prevents the compiler from moving the 85331769Shselasky * setting of umem->odp_data->private to point to our 86331769Shselasky * MR, before reg_umr finished, to ensure that the MR 87331769Shselasky * initialization have finished before starting to 88331769Shselasky * handle invalidations. 89331769Shselasky */ 90331769Shselasky smp_wmb(); 91331769Shselasky mr->umem->odp_data->private = mr; 92331769Shselasky /* 93331769Shselasky * Make sure we will see the new 94331769Shselasky * umem->odp_data->private value in the invalidation 95331769Shselasky * routines, before we can get page faults on the 96331769Shselasky * MR. Page faults can happen once we put the MR in 97331769Shselasky * the tree, below this line. Without the barrier, 98331769Shselasky * there can be a fault handling and an invalidation 99331769Shselasky * before umem->odp_data->private == mr is visible to 100331769Shselasky * the invalidation handler. 101331769Shselasky */ 102331769Shselasky smp_wmb(); 103331769Shselasky } 104331769Shselasky} 105331769Shselasky#endif 106331769Shselasky 107322810Shselaskystatic void reg_mr_callback(int status, void *context) 108322810Shselasky{ 109322810Shselasky struct mlx5_ib_mr *mr = context; 110322810Shselasky struct mlx5_ib_dev *dev = mr->dev; 111322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 112322810Shselasky int c = order2idx(dev, mr->order); 113322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 114331769Shselasky u8 key; 115331769Shselasky unsigned long flags; 116322810Shselasky struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 117322810Shselasky int err; 118322810Shselasky 119322810Shselasky spin_lock_irqsave(&ent->lock, flags); 120322810Shselasky ent->pending--; 121322810Shselasky spin_unlock_irqrestore(&ent->lock, flags); 122322810Shselasky if (status) { 123331769Shselasky mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 124322810Shselasky kfree(mr); 125322810Shselasky dev->fill_delay = 1; 126322810Shselasky mod_timer(&dev->delay_timer, jiffies + HZ); 127322810Shselasky return; 128322810Shselasky } 129322810Shselasky 130322810Shselasky spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 131322810Shselasky key = dev->mdev->priv.mkey_key++; 132322810Shselasky spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 133331769Shselasky mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 134322810Shselasky 135322810Shselasky cache->last_add = jiffies; 136322810Shselasky 137322810Shselasky spin_lock_irqsave(&ent->lock, flags); 138322810Shselasky list_add_tail(&mr->list, &ent->head); 139322810Shselasky ent->cur++; 140322810Shselasky ent->size++; 141322810Shselasky spin_unlock_irqrestore(&ent->lock, flags); 142322810Shselasky 143322810Shselasky spin_lock_irqsave(&table->lock, flags); 144331769Shselasky err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key), 145331769Shselasky &mr->mmkey); 146331769Shselasky if (err) 147331769Shselasky pr_err("Error inserting to mkey tree. 0x%x\n", -err); 148322810Shselasky spin_unlock_irqrestore(&table->lock, flags); 149322810Shselasky} 150322810Shselasky 151322810Shselaskystatic int add_keys(struct mlx5_ib_dev *dev, int c, int num) 152322810Shselasky{ 153322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 154322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 155331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 156322810Shselasky struct mlx5_ib_mr *mr; 157322810Shselasky int npages = 1 << ent->order; 158331769Shselasky void *mkc; 159331769Shselasky u32 *in; 160322810Shselasky int err = 0; 161322810Shselasky int i; 162322810Shselasky 163331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 164322810Shselasky if (!in) 165322810Shselasky return -ENOMEM; 166322810Shselasky 167331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 168322810Shselasky for (i = 0; i < num; i++) { 169322810Shselasky if (ent->pending >= MAX_PENDING_REG_MR) { 170322810Shselasky err = -EAGAIN; 171322810Shselasky break; 172322810Shselasky } 173322810Shselasky 174322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 175322810Shselasky if (!mr) { 176322810Shselasky err = -ENOMEM; 177322810Shselasky break; 178322810Shselasky } 179322810Shselasky mr->order = ent->order; 180322810Shselasky mr->umred = 1; 181322810Shselasky mr->dev = dev; 182322810Shselasky 183331769Shselasky MLX5_SET(mkc, mkc, free, 1); 184331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 185331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 186331769Shselasky 187331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 188331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); 189331769Shselasky MLX5_SET(mkc, mkc, log_page_size, 12); 190331769Shselasky 191322810Shselasky spin_lock_irq(&ent->lock); 192322810Shselasky ent->pending++; 193322810Shselasky spin_unlock_irq(&ent->lock); 194331769Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, 195331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, 196331769Shselasky inlen, reg_mr_callback, mr, 197331769Shselasky (struct mlx5_create_mkey_mbox_out *)mr->out); 198322810Shselasky if (err) { 199322810Shselasky spin_lock_irq(&ent->lock); 200322810Shselasky ent->pending--; 201322810Shselasky spin_unlock_irq(&ent->lock); 202322810Shselasky mlx5_ib_warn(dev, "create mkey failed %d\n", err); 203322810Shselasky kfree(mr); 204322810Shselasky break; 205322810Shselasky } 206322810Shselasky } 207322810Shselasky 208322810Shselasky kfree(in); 209322810Shselasky return err; 210322810Shselasky} 211322810Shselasky 212322810Shselaskystatic void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 213322810Shselasky{ 214322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 215322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 216322810Shselasky struct mlx5_ib_mr *mr; 217322810Shselasky int err; 218322810Shselasky int i; 219322810Shselasky 220322810Shselasky for (i = 0; i < num; i++) { 221322810Shselasky spin_lock_irq(&ent->lock); 222322810Shselasky if (list_empty(&ent->head)) { 223322810Shselasky spin_unlock_irq(&ent->lock); 224322810Shselasky return; 225322810Shselasky } 226322810Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 227322810Shselasky list_del(&mr->list); 228322810Shselasky ent->cur--; 229322810Shselasky ent->size--; 230322810Shselasky spin_unlock_irq(&ent->lock); 231322810Shselasky err = destroy_mkey(dev, mr); 232322810Shselasky if (err) 233322810Shselasky mlx5_ib_warn(dev, "failed destroy mkey\n"); 234322810Shselasky else 235322810Shselasky kfree(mr); 236322810Shselasky } 237322810Shselasky} 238322810Shselasky 239322810Shselaskystatic int someone_adding(struct mlx5_mr_cache *cache) 240322810Shselasky{ 241322810Shselasky int i; 242322810Shselasky 243322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 244322810Shselasky if (cache->ent[i].cur < cache->ent[i].limit) 245322810Shselasky return 1; 246322810Shselasky } 247322810Shselasky 248322810Shselasky return 0; 249322810Shselasky} 250322810Shselasky 251322810Shselaskystatic void __cache_work_func(struct mlx5_cache_ent *ent) 252322810Shselasky{ 253322810Shselasky struct mlx5_ib_dev *dev = ent->dev; 254322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 255322810Shselasky int i = order2idx(dev, ent->order); 256322810Shselasky int err; 257322810Shselasky 258322810Shselasky if (cache->stopped) 259322810Shselasky return; 260322810Shselasky 261322810Shselasky ent = &dev->cache.ent[i]; 262322810Shselasky if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 263322810Shselasky err = add_keys(dev, i, 1); 264322810Shselasky if (ent->cur < 2 * ent->limit) { 265322810Shselasky if (err == -EAGAIN) { 266322810Shselasky mlx5_ib_dbg(dev, "returned eagain, order %d\n", 267322810Shselasky i + 2); 268331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 269331769Shselasky msecs_to_jiffies(3)); 270322810Shselasky } else if (err) { 271322810Shselasky mlx5_ib_warn(dev, "command failed order %d, err %d\n", 272322810Shselasky i + 2, err); 273331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 274331769Shselasky msecs_to_jiffies(1000)); 275322810Shselasky } else { 276331769Shselasky queue_work(cache->wq, &ent->work); 277322810Shselasky } 278322810Shselasky } 279322810Shselasky } else if (ent->cur > 2 * ent->limit) { 280331769Shselasky /* 281331769Shselasky * The remove_keys() logic is performed as garbage collection 282331769Shselasky * task. Such task is intended to be run when no other active 283331769Shselasky * processes are running. 284331769Shselasky * 285331769Shselasky * The need_resched() will return TRUE if there are user tasks 286331769Shselasky * to be activated in near future. 287331769Shselasky * 288331769Shselasky * In such case, we don't execute remove_keys() and postpone 289331769Shselasky * the garbage collection work to try to run in next cycle, 290331769Shselasky * in order to free CPU resources to other tasks. 291331769Shselasky */ 292331769Shselasky if (!need_resched() && !someone_adding(cache) && 293331769Shselasky time_after(jiffies, cache->last_add + 300 * HZ)) { 294322810Shselasky remove_keys(dev, i, 1); 295322810Shselasky if (ent->cur > ent->limit) 296331769Shselasky queue_work(cache->wq, &ent->work); 297331769Shselasky } else { 298331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 299322810Shselasky } 300322810Shselasky } 301322810Shselasky} 302322810Shselasky 303322810Shselaskystatic void delayed_cache_work_func(struct work_struct *work) 304322810Shselasky{ 305322810Shselasky struct mlx5_cache_ent *ent; 306322810Shselasky 307322810Shselasky ent = container_of(work, struct mlx5_cache_ent, dwork.work); 308322810Shselasky __cache_work_func(ent); 309322810Shselasky} 310322810Shselasky 311322810Shselaskystatic void cache_work_func(struct work_struct *work) 312322810Shselasky{ 313322810Shselasky struct mlx5_cache_ent *ent; 314322810Shselasky 315322810Shselasky ent = container_of(work, struct mlx5_cache_ent, work); 316322810Shselasky __cache_work_func(ent); 317322810Shselasky} 318322810Shselasky 319331769Shselaskystatic struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 320331769Shselasky{ 321331769Shselasky struct mlx5_mr_cache *cache = &dev->cache; 322331769Shselasky struct mlx5_ib_mr *mr = NULL; 323331769Shselasky struct mlx5_cache_ent *ent; 324331769Shselasky int c; 325331769Shselasky int i; 326331769Shselasky 327331769Shselasky c = order2idx(dev, order); 328331769Shselasky if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 329331769Shselasky mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 330331769Shselasky return NULL; 331331769Shselasky } 332331769Shselasky 333331769Shselasky for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 334331769Shselasky ent = &cache->ent[i]; 335331769Shselasky 336331769Shselasky mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 337331769Shselasky 338331769Shselasky spin_lock_irq(&ent->lock); 339331769Shselasky if (!list_empty(&ent->head)) { 340331769Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 341331769Shselasky list); 342331769Shselasky list_del(&mr->list); 343331769Shselasky ent->cur--; 344331769Shselasky spin_unlock_irq(&ent->lock); 345331769Shselasky if (ent->cur < ent->limit) 346331769Shselasky queue_work(cache->wq, &ent->work); 347331769Shselasky break; 348331769Shselasky } 349331769Shselasky spin_unlock_irq(&ent->lock); 350331769Shselasky 351331769Shselasky queue_work(cache->wq, &ent->work); 352331769Shselasky } 353331769Shselasky 354331769Shselasky if (!mr) 355331769Shselasky cache->ent[c].miss++; 356331769Shselasky 357331769Shselasky return mr; 358331769Shselasky} 359331769Shselasky 360322810Shselaskystatic void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 361322810Shselasky{ 362322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 363322810Shselasky struct mlx5_cache_ent *ent; 364322810Shselasky int shrink = 0; 365322810Shselasky int c; 366322810Shselasky 367322810Shselasky c = order2idx(dev, mr->order); 368322810Shselasky if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 369322810Shselasky mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 370322810Shselasky return; 371322810Shselasky } 372322810Shselasky ent = &cache->ent[c]; 373322810Shselasky spin_lock_irq(&ent->lock); 374322810Shselasky list_add_tail(&mr->list, &ent->head); 375322810Shselasky ent->cur++; 376322810Shselasky if (ent->cur > 2 * ent->limit) 377322810Shselasky shrink = 1; 378322810Shselasky spin_unlock_irq(&ent->lock); 379322810Shselasky 380322810Shselasky if (shrink) 381331769Shselasky queue_work(cache->wq, &ent->work); 382322810Shselasky} 383322810Shselasky 384322810Shselaskystatic void clean_keys(struct mlx5_ib_dev *dev, int c) 385322810Shselasky{ 386322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 387322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 388322810Shselasky struct mlx5_ib_mr *mr; 389322810Shselasky int err; 390322810Shselasky 391322810Shselasky cancel_delayed_work(&ent->dwork); 392322810Shselasky while (1) { 393322810Shselasky spin_lock_irq(&ent->lock); 394322810Shselasky if (list_empty(&ent->head)) { 395322810Shselasky spin_unlock_irq(&ent->lock); 396322810Shselasky return; 397322810Shselasky } 398322810Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 399322810Shselasky list_del(&mr->list); 400322810Shselasky ent->cur--; 401322810Shselasky ent->size--; 402322810Shselasky spin_unlock_irq(&ent->lock); 403322810Shselasky err = destroy_mkey(dev, mr); 404322810Shselasky if (err) 405331769Shselasky mlx5_ib_warn(dev, "failed destroy mkey\n"); 406322810Shselasky else 407322810Shselasky kfree(mr); 408322810Shselasky } 409322810Shselasky} 410322810Shselasky 411322810Shselaskystatic void delay_time_func(unsigned long ctx) 412322810Shselasky{ 413322810Shselasky struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 414322810Shselasky 415322810Shselasky dev->fill_delay = 0; 416322810Shselasky} 417322810Shselasky 418322810Shselaskyint mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 419322810Shselasky{ 420322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 421322810Shselasky struct mlx5_cache_ent *ent; 422322810Shselasky int limit; 423322810Shselasky int i; 424322810Shselasky 425322810Shselasky mutex_init(&dev->slow_path_mutex); 426331769Shselasky cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 427322810Shselasky if (!cache->wq) { 428322810Shselasky mlx5_ib_warn(dev, "failed to create work queue\n"); 429322810Shselasky return -ENOMEM; 430322810Shselasky } 431322810Shselasky 432331769Shselasky setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 433322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 434322810Shselasky INIT_LIST_HEAD(&cache->ent[i].head); 435322810Shselasky spin_lock_init(&cache->ent[i].lock); 436322810Shselasky 437322810Shselasky ent = &cache->ent[i]; 438322810Shselasky INIT_LIST_HEAD(&ent->head); 439322810Shselasky spin_lock_init(&ent->lock); 440322810Shselasky ent->order = i + 2; 441322810Shselasky ent->dev = dev; 442322810Shselasky 443331769Shselasky if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 444331769Shselasky limit = dev->mdev->profile->mr_cache[i].limit; 445331769Shselasky else 446322810Shselasky limit = 0; 447322810Shselasky 448322810Shselasky INIT_WORK(&ent->work, cache_work_func); 449322810Shselasky INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 450322810Shselasky ent->limit = limit; 451331769Shselasky queue_work(cache->wq, &ent->work); 452322810Shselasky } 453322810Shselasky 454322810Shselasky return 0; 455322810Shselasky} 456322810Shselasky 457322810Shselaskyint mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 458322810Shselasky{ 459322810Shselasky int i; 460322810Shselasky 461322810Shselasky dev->cache.stopped = 1; 462322810Shselasky flush_workqueue(dev->cache.wq); 463322810Shselasky 464322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 465322810Shselasky clean_keys(dev, i); 466322810Shselasky 467322810Shselasky destroy_workqueue(dev->cache.wq); 468322810Shselasky del_timer_sync(&dev->delay_timer); 469331769Shselasky 470322810Shselasky return 0; 471322810Shselasky} 472322810Shselasky 473322810Shselaskystruct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 474322810Shselasky{ 475322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 476331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 477322810Shselasky struct mlx5_core_dev *mdev = dev->mdev; 478322810Shselasky struct mlx5_ib_mr *mr; 479331769Shselasky void *mkc; 480331769Shselasky u32 *in; 481322810Shselasky int err; 482322810Shselasky 483322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 484322810Shselasky if (!mr) 485322810Shselasky return ERR_PTR(-ENOMEM); 486322810Shselasky 487331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 488322810Shselasky if (!in) { 489322810Shselasky err = -ENOMEM; 490322810Shselasky goto err_free; 491322810Shselasky } 492322810Shselasky 493331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 494322810Shselasky 495331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); 496331769Shselasky MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 497331769Shselasky MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 498331769Shselasky MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 499331769Shselasky MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 500331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 501331769Shselasky 502331769Shselasky MLX5_SET(mkc, mkc, length64, 1); 503331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 504331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 505331769Shselasky MLX5_SET64(mkc, mkc, start_addr, 0); 506331769Shselasky 507331769Shselasky err = mlx5_core_create_mkey(mdev, &mr->mmkey, 508331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, 509331769Shselasky inlen, NULL, NULL, NULL); 510322810Shselasky if (err) 511322810Shselasky goto err_in; 512322810Shselasky 513322810Shselasky kfree(in); 514331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 515331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 516322810Shselasky mr->umem = NULL; 517322810Shselasky 518322810Shselasky return &mr->ibmr; 519322810Shselasky 520322810Shselaskyerr_in: 521322810Shselasky kfree(in); 522322810Shselasky 523322810Shselaskyerr_free: 524322810Shselasky kfree(mr); 525322810Shselasky 526322810Shselasky return ERR_PTR(err); 527322810Shselasky} 528322810Shselasky 529331769Shselaskystatic int get_octo_len(u64 addr, u64 len, int page_size) 530322810Shselasky{ 531322810Shselasky u64 offset; 532322810Shselasky int npages; 533322810Shselasky 534331769Shselasky offset = addr & (page_size - 1); 535322810Shselasky npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 536322810Shselasky return (npages + 1) / 2; 537322810Shselasky} 538322810Shselasky 539331769Shselaskystatic int use_umr(int order) 540322810Shselasky{ 541331769Shselasky return order <= MLX5_MAX_UMR_SHIFT; 542331769Shselasky} 543322810Shselasky 544331769Shselaskystatic int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 545331769Shselasky int npages, int page_shift, int *size, 546331769Shselasky __be64 **mr_pas, dma_addr_t *dma) 547331769Shselasky{ 548331769Shselasky __be64 *pas; 549331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 550331769Shselasky 551331769Shselasky /* 552331769Shselasky * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 553331769Shselasky * To avoid copying garbage after the pas array, we allocate 554331769Shselasky * a little more. 555331769Shselasky */ 556331769Shselasky *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 557331769Shselasky *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 558331769Shselasky if (!(*mr_pas)) 559331769Shselasky return -ENOMEM; 560331769Shselasky 561331769Shselasky pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); 562331769Shselasky mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 563331769Shselasky /* Clear padding after the actual pages. */ 564331769Shselasky memset(pas + npages, 0, *size - npages * sizeof(u64)); 565331769Shselasky 566331769Shselasky *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); 567331769Shselasky if (dma_mapping_error(ddev, *dma)) { 568331769Shselasky kfree(*mr_pas); 569331769Shselasky return -ENOMEM; 570331769Shselasky } 571331769Shselasky 572331769Shselasky return 0; 573331769Shselasky} 574331769Shselasky 575331769Shselaskystatic void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, 576331769Shselasky struct ib_sge *sg, u64 dma, int n, u32 key, 577331769Shselasky int page_shift) 578331769Shselasky{ 579331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 580331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 581331769Shselasky 582331769Shselasky sg->addr = dma; 583331769Shselasky sg->length = ALIGN(sizeof(u64) * n, 64); 584331769Shselasky sg->lkey = dev->umrc.pd->local_dma_lkey; 585331769Shselasky 586331769Shselasky wr->next = NULL; 587331769Shselasky wr->sg_list = sg; 588331769Shselasky if (n) 589331769Shselasky wr->num_sge = 1; 590331769Shselasky else 591331769Shselasky wr->num_sge = 0; 592331769Shselasky 593331769Shselasky wr->opcode = MLX5_IB_WR_UMR; 594331769Shselasky 595331769Shselasky umrwr->npages = n; 596331769Shselasky umrwr->page_shift = page_shift; 597331769Shselasky umrwr->mkey = key; 598331769Shselasky} 599331769Shselasky 600331769Shselaskystatic void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 601331769Shselasky struct ib_sge *sg, u64 dma, int n, u32 key, 602331769Shselasky int page_shift, u64 virt_addr, u64 len, 603331769Shselasky int access_flags) 604331769Shselasky{ 605331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 606331769Shselasky 607331769Shselasky prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); 608331769Shselasky 609331769Shselasky wr->send_flags = 0; 610331769Shselasky 611331769Shselasky umrwr->target.virt_addr = virt_addr; 612331769Shselasky umrwr->length = len; 613331769Shselasky umrwr->access_flags = access_flags; 614331769Shselasky umrwr->pd = pd; 615331769Shselasky} 616331769Shselasky 617331769Shselaskystatic void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 618331769Shselasky struct ib_send_wr *wr, u32 key) 619331769Shselasky{ 620331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 621331769Shselasky 622331769Shselasky wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 623331769Shselasky wr->opcode = MLX5_IB_WR_UMR; 624331769Shselasky umrwr->mkey = key; 625331769Shselasky} 626331769Shselasky 627331769Shselaskystatic struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 628331769Shselasky int access_flags, int *npages, 629331769Shselasky int *page_shift, int *ncont, int *order) 630331769Shselasky{ 631331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 632331769Shselasky struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, 633331769Shselasky access_flags, 0); 634331769Shselasky if (IS_ERR(umem)) { 635331769Shselasky mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 636331769Shselasky return (void *)umem; 637331769Shselasky } 638331769Shselasky 639331769Shselasky mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); 640331769Shselasky if (!*npages) { 641331769Shselasky mlx5_ib_warn(dev, "avoid zero region\n"); 642331769Shselasky ib_umem_release(umem); 643331769Shselasky return ERR_PTR(-EINVAL); 644331769Shselasky } 645331769Shselasky 646331769Shselasky mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 647331769Shselasky *npages, *ncont, *order, *page_shift); 648331769Shselasky 649331769Shselasky return umem; 650331769Shselasky} 651331769Shselasky 652331769Shselaskystatic void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 653331769Shselasky{ 654331769Shselasky struct mlx5_ib_umr_context *context = 655331769Shselasky container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 656331769Shselasky 657331769Shselasky context->status = wc->status; 658331769Shselasky complete(&context->done); 659331769Shselasky} 660331769Shselasky 661331769Shselaskystatic inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 662331769Shselasky{ 663331769Shselasky context->cqe.done = mlx5_ib_umr_done; 664331769Shselasky context->status = -1; 665331769Shselasky init_completion(&context->done); 666331769Shselasky} 667331769Shselasky 668331769Shselaskystatic struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 669331769Shselasky u64 virt_addr, u64 len, int npages, 670331769Shselasky int page_shift, int order, int access_flags) 671331769Shselasky{ 672331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 673331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 674331769Shselasky struct umr_common *umrc = &dev->umrc; 675331769Shselasky struct mlx5_ib_umr_context umr_context; 676331769Shselasky struct mlx5_umr_wr umrwr = {}; 677331769Shselasky struct ib_send_wr *bad; 678331769Shselasky struct mlx5_ib_mr *mr; 679331769Shselasky struct ib_sge sg; 680331769Shselasky int size; 681331769Shselasky __be64 *mr_pas; 682331769Shselasky dma_addr_t dma; 683331769Shselasky int err = 0; 684331769Shselasky int i; 685331769Shselasky 686331769Shselasky for (i = 0; i < 1; i++) { 687331769Shselasky mr = alloc_cached_mr(dev, order); 688331769Shselasky if (mr) 689322810Shselasky break; 690322810Shselasky 691331769Shselasky err = add_keys(dev, order2idx(dev, order), 1); 692331769Shselasky if (err && err != -EAGAIN) { 693331769Shselasky mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 694331769Shselasky break; 695331769Shselasky } 696322810Shselasky } 697331769Shselasky 698331769Shselasky if (!mr) 699331769Shselasky return ERR_PTR(-EAGAIN); 700331769Shselasky 701331769Shselasky err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, 702331769Shselasky &dma); 703331769Shselasky if (err) 704331769Shselasky goto free_mr; 705331769Shselasky 706331769Shselasky mlx5_ib_init_umr_context(&umr_context); 707331769Shselasky 708331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 709331769Shselasky prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 710331769Shselasky page_shift, virt_addr, len, access_flags); 711331769Shselasky 712331769Shselasky down(&umrc->sem); 713331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 714331769Shselasky if (err) { 715331769Shselasky mlx5_ib_warn(dev, "post send failed, err %d\n", err); 716331769Shselasky goto unmap_dma; 717331769Shselasky } else { 718331769Shselasky wait_for_completion(&umr_context.done); 719331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 720331769Shselasky mlx5_ib_warn(dev, "reg umr failed\n"); 721331769Shselasky err = -EFAULT; 722331769Shselasky } 723331769Shselasky } 724331769Shselasky 725331769Shselasky mr->mmkey.iova = virt_addr; 726331769Shselasky mr->mmkey.size = len; 727331769Shselasky mr->mmkey.pd = to_mpd(pd)->pdn; 728331769Shselasky 729331769Shselasky mr->live = 1; 730331769Shselasky 731331769Shselaskyunmap_dma: 732331769Shselasky up(&umrc->sem); 733331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 734331769Shselasky 735331769Shselasky kfree(mr_pas); 736331769Shselasky 737331769Shselaskyfree_mr: 738331769Shselasky if (err) { 739331769Shselasky free_cached_mr(dev, mr); 740331769Shselasky return ERR_PTR(err); 741331769Shselasky } 742331769Shselasky 743331769Shselasky return mr; 744322810Shselasky} 745322810Shselasky 746331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 747331769Shselaskyint mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 748331769Shselasky int zap) 749322810Shselasky{ 750331769Shselasky struct mlx5_ib_dev *dev = mr->dev; 751331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 752331769Shselasky struct umr_common *umrc = &dev->umrc; 753331769Shselasky struct mlx5_ib_umr_context umr_context; 754331769Shselasky struct ib_umem *umem = mr->umem; 755331769Shselasky int size; 756331769Shselasky __be64 *pas; 757331769Shselasky dma_addr_t dma; 758331769Shselasky struct ib_send_wr *bad; 759331769Shselasky struct mlx5_umr_wr wr; 760331769Shselasky struct ib_sge sg; 761331769Shselasky int err = 0; 762331769Shselasky const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 763331769Shselasky const int page_index_mask = page_index_alignment - 1; 764331769Shselasky size_t pages_mapped = 0; 765331769Shselasky size_t pages_to_map = 0; 766331769Shselasky size_t pages_iter = 0; 767331769Shselasky int use_emergency_buf = 0; 768331769Shselasky 769331769Shselasky /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 770331769Shselasky * so we need to align the offset and length accordingly */ 771331769Shselasky if (start_page_index & page_index_mask) { 772331769Shselasky npages += start_page_index & page_index_mask; 773331769Shselasky start_page_index &= ~page_index_mask; 774331769Shselasky } 775331769Shselasky 776331769Shselasky pages_to_map = ALIGN(npages, page_index_alignment); 777331769Shselasky 778331769Shselasky if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 779331769Shselasky return -EINVAL; 780331769Shselasky 781331769Shselasky size = sizeof(u64) * pages_to_map; 782331769Shselasky size = min_t(int, PAGE_SIZE, size); 783331769Shselasky /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 784331769Shselasky * code, when we are called from an invalidation. The pas buffer must 785331769Shselasky * be 2k-aligned for Connect-IB. */ 786331769Shselasky pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 787331769Shselasky if (!pas) { 788331769Shselasky mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 789331769Shselasky pas = mlx5_ib_update_mtt_emergency_buffer; 790331769Shselasky size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 791331769Shselasky use_emergency_buf = 1; 792331769Shselasky mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 793331769Shselasky memset(pas, 0, size); 794331769Shselasky } 795331769Shselasky pages_iter = size / sizeof(u64); 796331769Shselasky dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 797331769Shselasky if (dma_mapping_error(ddev, dma)) { 798331769Shselasky mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 799331769Shselasky err = -ENOMEM; 800331769Shselasky goto free_pas; 801331769Shselasky } 802331769Shselasky 803331769Shselasky for (pages_mapped = 0; 804331769Shselasky pages_mapped < pages_to_map && !err; 805331769Shselasky pages_mapped += pages_iter, start_page_index += pages_iter) { 806331769Shselasky dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 807331769Shselasky 808331769Shselasky npages = min_t(size_t, 809331769Shselasky pages_iter, 810331769Shselasky ib_umem_num_pages(umem) - start_page_index); 811331769Shselasky 812331769Shselasky if (!zap) { 813331769Shselasky __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 814331769Shselasky start_page_index, npages, pas, 815331769Shselasky MLX5_IB_MTT_PRESENT); 816331769Shselasky /* Clear padding after the pages brought from the 817331769Shselasky * umem. */ 818331769Shselasky memset(pas + npages, 0, size - npages * sizeof(u64)); 819331769Shselasky } 820331769Shselasky 821331769Shselasky dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 822331769Shselasky 823331769Shselasky mlx5_ib_init_umr_context(&umr_context); 824331769Shselasky 825331769Shselasky memset(&wr, 0, sizeof(wr)); 826331769Shselasky wr.wr.wr_cqe = &umr_context.cqe; 827331769Shselasky 828331769Shselasky sg.addr = dma; 829331769Shselasky sg.length = ALIGN(npages * sizeof(u64), 830331769Shselasky MLX5_UMR_MTT_ALIGNMENT); 831331769Shselasky sg.lkey = dev->umrc.pd->local_dma_lkey; 832331769Shselasky 833331769Shselasky wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 834331769Shselasky MLX5_IB_SEND_UMR_UPDATE_MTT; 835331769Shselasky wr.wr.sg_list = &sg; 836331769Shselasky wr.wr.num_sge = 1; 837331769Shselasky wr.wr.opcode = MLX5_IB_WR_UMR; 838331769Shselasky wr.npages = sg.length / sizeof(u64); 839331769Shselasky wr.page_shift = PAGE_SHIFT; 840331769Shselasky wr.mkey = mr->mmkey.key; 841331769Shselasky wr.target.offset = start_page_index; 842331769Shselasky 843331769Shselasky down(&umrc->sem); 844331769Shselasky err = ib_post_send(umrc->qp, &wr.wr, &bad); 845331769Shselasky if (err) { 846331769Shselasky mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 847331769Shselasky } else { 848331769Shselasky wait_for_completion(&umr_context.done); 849331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 850331769Shselasky mlx5_ib_err(dev, "UMR completion failed, code %d\n", 851331769Shselasky umr_context.status); 852331769Shselasky err = -EFAULT; 853331769Shselasky } 854331769Shselasky } 855331769Shselasky up(&umrc->sem); 856331769Shselasky } 857331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 858331769Shselasky 859331769Shselaskyfree_pas: 860331769Shselasky if (!use_emergency_buf) 861331769Shselasky free_page((unsigned long)pas); 862331769Shselasky else 863331769Shselasky mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 864331769Shselasky 865331769Shselasky return err; 866331769Shselasky} 867331769Shselasky#endif 868331769Shselasky 869331769Shselasky/* 870331769Shselasky * If ibmr is NULL it will be allocated by reg_create. 871331769Shselasky * Else, the given ibmr will be used. 872331769Shselasky */ 873331769Shselaskystatic struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 874331769Shselasky u64 virt_addr, u64 length, 875331769Shselasky struct ib_umem *umem, int npages, 876331769Shselasky int page_shift, int access_flags) 877331769Shselasky{ 878322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 879322810Shselasky struct mlx5_ib_mr *mr; 880331769Shselasky __be64 *pas; 881331769Shselasky void *mkc; 882322810Shselasky int inlen; 883331769Shselasky u32 *in; 884322810Shselasky int err; 885322810Shselasky bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 886322810Shselasky 887331769Shselasky mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 888322810Shselasky if (!mr) 889322810Shselasky return ERR_PTR(-ENOMEM); 890322810Shselasky 891331769Shselasky inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 892331769Shselasky sizeof(*pas) * ((npages + 1) / 2) * 2; 893322810Shselasky in = mlx5_vzalloc(inlen); 894322810Shselasky if (!in) { 895322810Shselasky err = -ENOMEM; 896322810Shselasky goto err_1; 897322810Shselasky } 898331769Shselasky pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 899331769Shselasky mlx5_ib_populate_pas(dev, umem, page_shift, pas, 900322810Shselasky pg_cap ? MLX5_IB_MTT_PRESENT : 0); 901322810Shselasky 902331769Shselasky /* The pg_access bit allows setting the access flags 903322810Shselasky * in the page list submitted with the command. */ 904331769Shselasky MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 905331769Shselasky 906331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 907331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 908331769Shselasky MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 909331769Shselasky MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 910331769Shselasky MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 911331769Shselasky MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 912331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 913331769Shselasky 914331769Shselasky MLX5_SET64(mkc, mkc, start_addr, virt_addr); 915331769Shselasky MLX5_SET64(mkc, mkc, len, length); 916331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 917331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, 0); 918331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, 919331769Shselasky get_octo_len(virt_addr, length, 1 << page_shift)); 920331769Shselasky MLX5_SET(mkc, mkc, log_page_size, page_shift); 921331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 922331769Shselasky MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 923331769Shselasky get_octo_len(virt_addr, length, 1 << page_shift)); 924331769Shselasky 925331769Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, 926331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, 927331769Shselasky inlen, NULL, NULL, NULL); 928322810Shselasky if (err) { 929322810Shselasky mlx5_ib_warn(dev, "create mkey failed\n"); 930322810Shselasky goto err_2; 931322810Shselasky } 932322810Shselasky mr->umem = umem; 933322810Shselasky mr->dev = dev; 934331769Shselasky mr->live = 1; 935322810Shselasky kvfree(in); 936322810Shselasky 937331769Shselasky mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 938322810Shselasky 939322810Shselasky return mr; 940322810Shselasky 941322810Shselaskyerr_2: 942322810Shselasky kvfree(in); 943322810Shselasky 944322810Shselaskyerr_1: 945331769Shselasky if (!ibmr) 946331769Shselasky kfree(mr); 947322810Shselasky 948322810Shselasky return ERR_PTR(err); 949322810Shselasky} 950322810Shselasky 951331769Shselaskystatic void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 952331769Shselasky int npages, u64 length, int access_flags) 953322810Shselasky{ 954331769Shselasky mr->npages = npages; 955331769Shselasky atomic_add(npages, &dev->mdev->priv.reg_pages); 956331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 957331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 958331769Shselasky mr->ibmr.length = length; 959331769Shselasky mr->access_flags = access_flags; 960322810Shselasky} 961322810Shselasky 962322810Shselaskystruct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 963322810Shselasky u64 virt_addr, int access_flags, 964331769Shselasky struct ib_udata *udata) 965322810Shselasky{ 966322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 967322810Shselasky struct mlx5_ib_mr *mr = NULL; 968322810Shselasky struct ib_umem *umem; 969322810Shselasky int page_shift; 970322810Shselasky int npages; 971322810Shselasky int ncont; 972322810Shselasky int order; 973322810Shselasky int err; 974322810Shselasky 975322810Shselasky mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 976331769Shselasky (long long)start, (long long)virt_addr, (long long)length, access_flags); 977331769Shselasky umem = mr_umem_get(pd, start, length, access_flags, &npages, 978331769Shselasky &page_shift, &ncont, &order); 979331769Shselasky 980331769Shselasky if (IS_ERR(umem)) 981322810Shselasky return (void *)umem; 982322810Shselasky 983331769Shselasky if (use_umr(order)) { 984331769Shselasky mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 985331769Shselasky order, access_flags); 986331769Shselasky if (PTR_ERR(mr) == -EAGAIN) { 987331769Shselasky mlx5_ib_dbg(dev, "cache empty for order %d", order); 988331769Shselasky mr = NULL; 989331769Shselasky } 990331769Shselasky } else if (access_flags & IB_ACCESS_ON_DEMAND) { 991322810Shselasky err = -EINVAL; 992331769Shselasky pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 993322810Shselasky goto error; 994322810Shselasky } 995322810Shselasky 996331769Shselasky if (!mr) { 997331769Shselasky mutex_lock(&dev->slow_path_mutex); 998331769Shselasky mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 999331769Shselasky page_shift, access_flags); 1000331769Shselasky mutex_unlock(&dev->slow_path_mutex); 1001331769Shselasky } 1002322810Shselasky 1003322810Shselasky if (IS_ERR(mr)) { 1004322810Shselasky err = PTR_ERR(mr); 1005322810Shselasky goto error; 1006322810Shselasky } 1007322810Shselasky 1008331769Shselasky mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1009322810Shselasky 1010322810Shselasky mr->umem = umem; 1011331769Shselasky set_mr_fileds(dev, mr, npages, length, access_flags); 1012322810Shselasky 1013331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1014331769Shselasky update_odp_mr(mr); 1015331769Shselasky#endif 1016331769Shselasky 1017322810Shselasky return &mr->ibmr; 1018322810Shselasky 1019322810Shselaskyerror: 1020322810Shselasky ib_umem_release(umem); 1021322810Shselasky return ERR_PTR(err); 1022322810Shselasky} 1023322810Shselasky 1024331769Shselaskystatic int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1025331769Shselasky{ 1026331769Shselasky struct mlx5_core_dev *mdev = dev->mdev; 1027331769Shselasky struct umr_common *umrc = &dev->umrc; 1028331769Shselasky struct mlx5_ib_umr_context umr_context; 1029331769Shselasky struct mlx5_umr_wr umrwr = {}; 1030331769Shselasky struct ib_send_wr *bad; 1031331769Shselasky int err; 1032331769Shselasky 1033331769Shselasky if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1034331769Shselasky return 0; 1035331769Shselasky 1036331769Shselasky mlx5_ib_init_umr_context(&umr_context); 1037331769Shselasky 1038331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 1039331769Shselasky prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); 1040331769Shselasky 1041331769Shselasky down(&umrc->sem); 1042331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1043331769Shselasky if (err) { 1044331769Shselasky up(&umrc->sem); 1045331769Shselasky mlx5_ib_dbg(dev, "err %d\n", err); 1046331769Shselasky goto error; 1047331769Shselasky } else { 1048331769Shselasky wait_for_completion(&umr_context.done); 1049331769Shselasky up(&umrc->sem); 1050331769Shselasky } 1051331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 1052331769Shselasky mlx5_ib_warn(dev, "unreg umr failed\n"); 1053331769Shselasky err = -EFAULT; 1054331769Shselasky goto error; 1055331769Shselasky } 1056331769Shselasky return 0; 1057331769Shselasky 1058331769Shselaskyerror: 1059331769Shselasky return err; 1060331769Shselasky} 1061331769Shselasky 1062331769Shselaskystatic int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, 1063331769Shselasky u64 length, int npages, int page_shift, int order, 1064331769Shselasky int access_flags, int flags) 1065331769Shselasky{ 1066331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1067331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 1068331769Shselasky struct mlx5_ib_umr_context umr_context; 1069331769Shselasky struct ib_send_wr *bad; 1070331769Shselasky struct mlx5_umr_wr umrwr = {}; 1071331769Shselasky struct ib_sge sg; 1072331769Shselasky struct umr_common *umrc = &dev->umrc; 1073331769Shselasky dma_addr_t dma = 0; 1074331769Shselasky __be64 *mr_pas = NULL; 1075331769Shselasky int size; 1076331769Shselasky int err; 1077331769Shselasky 1078331769Shselasky mlx5_ib_init_umr_context(&umr_context); 1079331769Shselasky 1080331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 1081331769Shselasky umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1082331769Shselasky 1083331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1084331769Shselasky err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, 1085331769Shselasky &mr_pas, &dma); 1086331769Shselasky if (err) 1087331769Shselasky return err; 1088331769Shselasky 1089331769Shselasky umrwr.target.virt_addr = virt_addr; 1090331769Shselasky umrwr.length = length; 1091331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1092331769Shselasky } 1093331769Shselasky 1094331769Shselasky prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 1095331769Shselasky page_shift); 1096331769Shselasky 1097331769Shselasky if (flags & IB_MR_REREG_PD) { 1098331769Shselasky umrwr.pd = pd; 1099331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; 1100331769Shselasky } 1101331769Shselasky 1102331769Shselasky if (flags & IB_MR_REREG_ACCESS) { 1103331769Shselasky umrwr.access_flags = access_flags; 1104331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; 1105331769Shselasky } 1106331769Shselasky 1107331769Shselasky /* post send request to UMR QP */ 1108331769Shselasky down(&umrc->sem); 1109331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1110331769Shselasky 1111331769Shselasky if (err) { 1112331769Shselasky mlx5_ib_warn(dev, "post send failed, err %d\n", err); 1113331769Shselasky } else { 1114331769Shselasky wait_for_completion(&umr_context.done); 1115331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 1116331769Shselasky mlx5_ib_warn(dev, "reg umr failed (%u)\n", 1117331769Shselasky umr_context.status); 1118331769Shselasky err = -EFAULT; 1119331769Shselasky } 1120331769Shselasky } 1121331769Shselasky 1122331769Shselasky up(&umrc->sem); 1123331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1124331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1125331769Shselasky kfree(mr_pas); 1126331769Shselasky } 1127331769Shselasky return err; 1128331769Shselasky} 1129331769Shselasky 1130331769Shselaskyint mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1131331769Shselasky u64 length, u64 virt_addr, int new_access_flags, 1132331769Shselasky struct ib_pd *new_pd, struct ib_udata *udata) 1133331769Shselasky{ 1134331769Shselasky struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1135331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1136331769Shselasky struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1137331769Shselasky int access_flags = flags & IB_MR_REREG_ACCESS ? 1138331769Shselasky new_access_flags : 1139331769Shselasky mr->access_flags; 1140331769Shselasky u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; 1141331769Shselasky u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; 1142331769Shselasky int page_shift = 0; 1143331769Shselasky int npages = 0; 1144331769Shselasky int ncont = 0; 1145331769Shselasky int order = 0; 1146331769Shselasky int err; 1147331769Shselasky 1148331769Shselasky mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1149331769Shselasky (long long)start, (long long)virt_addr, (long long)length, access_flags); 1150331769Shselasky 1151331769Shselasky if (flags != IB_MR_REREG_PD) { 1152331769Shselasky /* 1153331769Shselasky * Replace umem. This needs to be done whether or not UMR is 1154331769Shselasky * used. 1155331769Shselasky */ 1156331769Shselasky flags |= IB_MR_REREG_TRANS; 1157331769Shselasky ib_umem_release(mr->umem); 1158331769Shselasky mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, 1159331769Shselasky &page_shift, &ncont, &order); 1160331769Shselasky if (IS_ERR(mr->umem)) { 1161331769Shselasky err = PTR_ERR(mr->umem); 1162331769Shselasky mr->umem = NULL; 1163331769Shselasky return err; 1164331769Shselasky } 1165331769Shselasky } 1166331769Shselasky 1167331769Shselasky if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { 1168331769Shselasky /* 1169331769Shselasky * UMR can't be used - MKey needs to be replaced. 1170331769Shselasky */ 1171331769Shselasky if (mr->umred) { 1172331769Shselasky err = unreg_umr(dev, mr); 1173331769Shselasky if (err) 1174331769Shselasky mlx5_ib_warn(dev, "Failed to unregister MR\n"); 1175331769Shselasky } else { 1176331769Shselasky err = destroy_mkey(dev, mr); 1177331769Shselasky if (err) 1178331769Shselasky mlx5_ib_warn(dev, "Failed to destroy MKey\n"); 1179331769Shselasky } 1180331769Shselasky if (err) 1181331769Shselasky return err; 1182331769Shselasky 1183331769Shselasky mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1184331769Shselasky page_shift, access_flags); 1185331769Shselasky 1186331769Shselasky if (IS_ERR(mr)) 1187331769Shselasky return PTR_ERR(mr); 1188331769Shselasky 1189331769Shselasky mr->umred = 0; 1190331769Shselasky } else { 1191331769Shselasky /* 1192331769Shselasky * Send a UMR WQE 1193331769Shselasky */ 1194331769Shselasky err = rereg_umr(pd, mr, addr, len, npages, page_shift, 1195331769Shselasky order, access_flags, flags); 1196331769Shselasky if (err) { 1197331769Shselasky mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1198331769Shselasky return err; 1199331769Shselasky } 1200331769Shselasky } 1201331769Shselasky 1202331769Shselasky if (flags & IB_MR_REREG_PD) { 1203331769Shselasky ib_mr->pd = pd; 1204331769Shselasky mr->mmkey.pd = to_mpd(pd)->pdn; 1205331769Shselasky } 1206331769Shselasky 1207331769Shselasky if (flags & IB_MR_REREG_ACCESS) 1208331769Shselasky mr->access_flags = access_flags; 1209331769Shselasky 1210331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1211331769Shselasky atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1212331769Shselasky set_mr_fileds(dev, mr, npages, len, access_flags); 1213331769Shselasky mr->mmkey.iova = addr; 1214331769Shselasky mr->mmkey.size = len; 1215331769Shselasky } 1216331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1217331769Shselasky update_odp_mr(mr); 1218331769Shselasky#endif 1219331769Shselasky 1220331769Shselasky return 0; 1221331769Shselasky} 1222331769Shselasky 1223331769Shselaskystatic int 1224331769Shselaskymlx5_alloc_priv_descs(struct ib_device *device, 1225331769Shselasky struct mlx5_ib_mr *mr, 1226331769Shselasky int ndescs, 1227331769Shselasky int desc_size) 1228331769Shselasky{ 1229331769Shselasky int size = ndescs * desc_size; 1230331769Shselasky int add_size; 1231331769Shselasky int ret; 1232331769Shselasky 1233331769Shselasky add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0); 1234331769Shselasky 1235331769Shselasky mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1236331769Shselasky if (!mr->descs_alloc) 1237331769Shselasky return -ENOMEM; 1238331769Shselasky 1239331769Shselasky mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1240331769Shselasky 1241331769Shselasky mr->desc_map = dma_map_single(device->dma_device, mr->descs, 1242331769Shselasky size, DMA_TO_DEVICE); 1243331769Shselasky if (dma_mapping_error(device->dma_device, mr->desc_map)) { 1244331769Shselasky ret = -ENOMEM; 1245331769Shselasky goto err; 1246331769Shselasky } 1247331769Shselasky 1248331769Shselasky return 0; 1249331769Shselaskyerr: 1250331769Shselasky kfree(mr->descs_alloc); 1251331769Shselasky 1252331769Shselasky return ret; 1253331769Shselasky} 1254331769Shselasky 1255331769Shselaskystatic void 1256331769Shselaskymlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1257331769Shselasky{ 1258331769Shselasky if (mr->descs) { 1259331769Shselasky struct ib_device *device = mr->ibmr.device; 1260331769Shselasky int size = mr->max_descs * mr->desc_size; 1261331769Shselasky 1262331769Shselasky dma_unmap_single(device->dma_device, mr->desc_map, 1263331769Shselasky size, DMA_TO_DEVICE); 1264331769Shselasky kfree(mr->descs_alloc); 1265331769Shselasky mr->descs = NULL; 1266331769Shselasky } 1267331769Shselasky} 1268331769Shselasky 1269331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr) 1270331769Shselasky{ 1271331769Shselasky struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1272331769Shselasky int umred = mr->umred; 1273331769Shselasky int err; 1274331769Shselasky 1275331769Shselasky if (mr->sig) { 1276331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1277331769Shselasky mr->sig->psv_memory.psv_idx)) 1278331769Shselasky mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1279331769Shselasky mr->sig->psv_memory.psv_idx); 1280331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1281331769Shselasky mr->sig->psv_wire.psv_idx)) 1282331769Shselasky mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1283331769Shselasky mr->sig->psv_wire.psv_idx); 1284331769Shselasky kfree(mr->sig); 1285331769Shselasky mr->sig = NULL; 1286331769Shselasky } 1287331769Shselasky 1288331769Shselasky mlx5_free_priv_descs(mr); 1289331769Shselasky 1290331769Shselasky if (!umred) { 1291331769Shselasky err = destroy_mkey(dev, mr); 1292331769Shselasky if (err) { 1293331769Shselasky mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1294331769Shselasky mr->mmkey.key, err); 1295331769Shselasky return err; 1296331769Shselasky } 1297331769Shselasky } else { 1298331769Shselasky err = unreg_umr(dev, mr); 1299331769Shselasky if (err) { 1300331769Shselasky mlx5_ib_warn(dev, "failed unregister\n"); 1301331769Shselasky return err; 1302331769Shselasky } 1303331769Shselasky free_cached_mr(dev, mr); 1304331769Shselasky } 1305331769Shselasky 1306331769Shselasky if (!umred) 1307331769Shselasky kfree(mr); 1308331769Shselasky 1309331769Shselasky return 0; 1310331769Shselasky} 1311331769Shselasky 1312322810ShselaskyCTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8); 1313322810Shselasky 1314322810Shselaskystruct ib_mr * 1315322810Shselaskymlx5_ib_reg_phys_mr(struct ib_pd *pd, 1316322810Shselasky struct ib_phys_buf *buffer_list, 1317322810Shselasky int num_phys_buf, 1318322810Shselasky int access_flags, 1319322810Shselasky u64 *virt_addr) 1320322810Shselasky{ 1321322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1322322810Shselasky struct mlx5_ib_mr *mr; 1323331769Shselasky __be64 *pas; 1324331769Shselasky void *mkc; 1325331769Shselasky u32 *in; 1326322810Shselasky u64 total_size; 1327322810Shselasky u32 octo_len; 1328322810Shselasky bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1329322810Shselasky unsigned long mask; 1330322810Shselasky int shift; 1331322810Shselasky int npages; 1332322810Shselasky int inlen; 1333322810Shselasky int err; 1334322810Shselasky int i, j, n; 1335322810Shselasky 1336322810Shselasky mask = buffer_list[0].addr ^ *virt_addr; 1337322810Shselasky total_size = 0; 1338322810Shselasky for (i = 0; i < num_phys_buf; ++i) { 1339322810Shselasky if (i != 0) 1340322810Shselasky mask |= buffer_list[i].addr; 1341322810Shselasky if (i != num_phys_buf - 1) 1342322810Shselasky mask |= buffer_list[i].addr + buffer_list[i].size; 1343322810Shselasky 1344322810Shselasky total_size += buffer_list[i].size; 1345322810Shselasky } 1346322810Shselasky 1347322810Shselasky if (mask & ~PAGE_MASK) 1348322810Shselasky return ERR_PTR(-EINVAL); 1349322810Shselasky 1350322810Shselasky shift = __ffs(mask | 1 << 31); 1351322810Shselasky 1352322810Shselasky buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); 1353322810Shselasky buffer_list[0].addr &= ~0ULL << shift; 1354322810Shselasky 1355322810Shselasky npages = 0; 1356322810Shselasky for (i = 0; i < num_phys_buf; ++i) 1357322810Shselasky npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 1358322810Shselasky 1359322810Shselasky if (!npages) { 1360322810Shselasky mlx5_ib_warn(dev, "avoid zero region\n"); 1361322810Shselasky return ERR_PTR(-EINVAL); 1362322810Shselasky } 1363322810Shselasky 1364322810Shselasky mr = kzalloc(sizeof *mr, GFP_KERNEL); 1365322810Shselasky if (!mr) 1366322810Shselasky return ERR_PTR(-ENOMEM); 1367322810Shselasky 1368322810Shselasky octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift); 1369322810Shselasky octo_len = ALIGN(octo_len, 4); 1370322810Shselasky 1371331769Shselasky inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + (octo_len * 16); 1372322810Shselasky in = mlx5_vzalloc(inlen); 1373322810Shselasky if (!in) { 1374322810Shselasky kfree(mr); 1375322810Shselasky return ERR_PTR(-ENOMEM); 1376322810Shselasky } 1377331769Shselasky pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1378322810Shselasky 1379322810Shselasky n = 0; 1380322810Shselasky for (i = 0; i < num_phys_buf; ++i) { 1381322810Shselasky for (j = 0; 1382322810Shselasky j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 1383322810Shselasky ++j) { 1384322810Shselasky u64 temp = buffer_list[i].addr + ((u64) j << shift); 1385322810Shselasky if (pg_cap) 1386322810Shselasky temp |= MLX5_IB_MTT_PRESENT; 1387331769Shselasky pas[n++] = cpu_to_be64(temp); 1388322810Shselasky } 1389322810Shselasky } 1390322810Shselasky 1391331769Shselasky /* 1392331769Shselasky * The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access 1393331769Shselasky * flags in the page list submitted with the command: 1394331769Shselasky */ 1395331769Shselasky MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1396331769Shselasky 1397331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1398331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 1399331769Shselasky MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1400331769Shselasky MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1401331769Shselasky MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1402331769Shselasky MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1403331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 1404331769Shselasky 1405331769Shselasky MLX5_SET64(mkc, mkc, start_addr, *virt_addr); 1406331769Shselasky MLX5_SET64(mkc, mkc, len, total_size); 1407331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1408331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1409331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, octo_len); 1410331769Shselasky MLX5_SET(mkc, mkc, log_page_size, shift); 1411331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1412331769Shselasky MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octo_len); 1413331769Shselasky 1414331769Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, 1415331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, inlen, 1416331769Shselasky NULL, NULL, NULL); 1417322810Shselasky mr->umem = NULL; 1418322810Shselasky mr->dev = dev; 1419331769Shselasky mr->live = 1; 1420322810Shselasky mr->npages = npages; 1421331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 1422331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 1423331769Shselasky mr->ibmr.length = total_size; 1424331769Shselasky mr->access_flags = access_flags; 1425322810Shselasky 1426322810Shselasky kvfree(in); 1427322810Shselasky 1428322810Shselasky if (err) { 1429322810Shselasky kfree(mr); 1430322810Shselasky return ERR_PTR(err); 1431322810Shselasky } 1432322810Shselasky return &mr->ibmr; 1433322810Shselasky} 1434322810Shselasky 1435322810Shselaskyint mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1436322810Shselasky{ 1437322810Shselasky struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1438322810Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1439331769Shselasky int npages = mr->npages; 1440322810Shselasky struct ib_umem *umem = mr->umem; 1441322810Shselasky 1442331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1443331769Shselasky if (umem && umem->odp_data) { 1444331769Shselasky /* Prevent new page faults from succeeding */ 1445331769Shselasky mr->live = 0; 1446331769Shselasky /* Wait for all running page-fault handlers to finish. */ 1447331769Shselasky synchronize_srcu(&dev->mr_srcu); 1448331769Shselasky /* Destroy all page mappings */ 1449331769Shselasky mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1450331769Shselasky ib_umem_end(umem)); 1451331769Shselasky /* 1452331769Shselasky * We kill the umem before the MR for ODP, 1453331769Shselasky * so that there will not be any invalidations in 1454331769Shselasky * flight, looking at the *mr struct. 1455331769Shselasky */ 1456331769Shselasky ib_umem_release(umem); 1457331769Shselasky atomic_sub(npages, &dev->mdev->priv.reg_pages); 1458322810Shselasky 1459331769Shselasky /* Avoid double-freeing the umem. */ 1460331769Shselasky umem = NULL; 1461331769Shselasky } 1462331769Shselasky#endif 1463331769Shselasky 1464331769Shselasky clean_mr(mr); 1465331769Shselasky 1466322810Shselasky if (umem) { 1467322810Shselasky ib_umem_release(umem); 1468322810Shselasky atomic_sub(npages, &dev->mdev->priv.reg_pages); 1469322810Shselasky } 1470322810Shselasky 1471322810Shselasky return 0; 1472322810Shselasky} 1473322810Shselasky 1474331769Shselaskystruct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 1475331769Shselasky enum ib_mr_type mr_type, 1476331769Shselasky u32 max_num_sg) 1477322810Shselasky{ 1478322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1479331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1480331769Shselasky int ndescs = ALIGN(max_num_sg, 4); 1481322810Shselasky struct mlx5_ib_mr *mr; 1482331769Shselasky void *mkc; 1483331769Shselasky u32 *in; 1484322810Shselasky int err; 1485322810Shselasky 1486322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1487322810Shselasky if (!mr) 1488322810Shselasky return ERR_PTR(-ENOMEM); 1489322810Shselasky 1490331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 1491322810Shselasky if (!in) { 1492322810Shselasky err = -ENOMEM; 1493322810Shselasky goto err_free; 1494322810Shselasky } 1495322810Shselasky 1496331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1497331769Shselasky MLX5_SET(mkc, mkc, free, 1); 1498331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1499331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1500331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1501322810Shselasky 1502331769Shselasky if (mr_type == IB_MR_TYPE_MEM_REG) { 1503331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_MTT; 1504331769Shselasky MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 1505331769Shselasky err = mlx5_alloc_priv_descs(pd->device, mr, 1506331769Shselasky ndescs, sizeof(u64)); 1507331769Shselasky if (err) 1508331769Shselasky goto err_free_in; 1509322810Shselasky 1510331769Shselasky mr->desc_size = sizeof(u64); 1511331769Shselasky mr->max_descs = ndescs; 1512331769Shselasky } else if (mr_type == IB_MR_TYPE_SG_GAPS) { 1513331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_KLM; 1514322810Shselasky 1515331769Shselasky err = mlx5_alloc_priv_descs(pd->device, mr, 1516331769Shselasky ndescs, sizeof(struct mlx5_klm)); 1517331769Shselasky if (err) 1518331769Shselasky goto err_free_in; 1519331769Shselasky mr->desc_size = sizeof(struct mlx5_klm); 1520331769Shselasky mr->max_descs = ndescs; 1521331769Shselasky } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1522331769Shselasky u32 psv_index[2]; 1523322810Shselasky 1524331769Shselasky MLX5_SET(mkc, mkc, bsf_en, 1); 1525331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1526331769Shselasky mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1527331769Shselasky if (!mr->sig) { 1528331769Shselasky err = -ENOMEM; 1529331769Shselasky goto err_free_in; 1530331769Shselasky } 1531322810Shselasky 1532331769Shselasky /* create mem & wire PSVs */ 1533331769Shselasky err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1534331769Shselasky 2, psv_index); 1535331769Shselasky if (err) 1536331769Shselasky goto err_free_sig; 1537322810Shselasky 1538331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_KLM; 1539331769Shselasky mr->sig->psv_memory.psv_idx = psv_index[0]; 1540331769Shselasky mr->sig->psv_wire.psv_idx = psv_index[1]; 1541322810Shselasky 1542331769Shselasky mr->sig->sig_status_checked = true; 1543331769Shselasky mr->sig->sig_err_exists = false; 1544331769Shselasky /* Next UMR, Arm SIGERR */ 1545331769Shselasky ++mr->sig->sigerr_count; 1546331769Shselasky } else { 1547331769Shselasky mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1548331769Shselasky err = -EINVAL; 1549331769Shselasky goto err_free_in; 1550331769Shselasky } 1551322810Shselasky 1552331769Shselasky MLX5_SET(mkc, mkc, access_mode, mr->access_mode); 1553331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 1554322810Shselasky 1555331769Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, 1556331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, 1557331769Shselasky inlen, NULL, NULL, NULL); 1558331769Shselasky if (err) 1559331769Shselasky goto err_destroy_psv; 1560322810Shselasky 1561331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 1562331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 1563331769Shselasky mr->umem = NULL; 1564331769Shselasky kfree(in); 1565322810Shselasky 1566331769Shselasky return &mr->ibmr; 1567331769Shselasky 1568331769Shselaskyerr_destroy_psv: 1569331769Shselasky if (mr->sig) { 1570331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1571331769Shselasky mr->sig->psv_memory.psv_idx)) 1572331769Shselasky mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1573331769Shselasky mr->sig->psv_memory.psv_idx); 1574331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1575331769Shselasky mr->sig->psv_wire.psv_idx)) 1576331769Shselasky mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1577331769Shselasky mr->sig->psv_wire.psv_idx); 1578331769Shselasky } 1579331769Shselasky mlx5_free_priv_descs(mr); 1580331769Shselaskyerr_free_sig: 1581331769Shselasky kfree(mr->sig); 1582331769Shselaskyerr_free_in: 1583331769Shselasky kfree(in); 1584322810Shselaskyerr_free: 1585331769Shselasky kfree(mr); 1586331769Shselasky return ERR_PTR(err); 1587322810Shselasky} 1588322810Shselasky 1589331769Shselaskystruct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1590331769Shselasky struct ib_udata *udata) 1591322810Shselasky{ 1592331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1593331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1594331769Shselasky struct mlx5_ib_mw *mw = NULL; 1595331769Shselasky u32 *in = NULL; 1596331769Shselasky void *mkc; 1597331769Shselasky int ndescs; 1598322810Shselasky int err; 1599331769Shselasky struct mlx5_ib_alloc_mw req = {}; 1600331769Shselasky struct { 1601331769Shselasky __u32 comp_mask; 1602331769Shselasky __u32 response_length; 1603331769Shselasky } resp = {}; 1604322810Shselasky 1605331769Shselasky err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1606331769Shselasky if (err) 1607331769Shselasky return ERR_PTR(err); 1608322810Shselasky 1609331769Shselasky if (req.comp_mask || req.reserved1 || req.reserved2) 1610331769Shselasky return ERR_PTR(-EOPNOTSUPP); 1611322810Shselasky 1612331769Shselasky if (udata->inlen > sizeof(req) && 1613331769Shselasky !ib_is_udata_cleared(udata, sizeof(req), 1614331769Shselasky udata->inlen - sizeof(req))) 1615331769Shselasky return ERR_PTR(-EOPNOTSUPP); 1616322810Shselasky 1617331769Shselasky ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1618322810Shselasky 1619331769Shselasky mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1620331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 1621331769Shselasky if (!mw || !in) { 1622331769Shselasky err = -ENOMEM; 1623331769Shselasky goto free; 1624322810Shselasky } 1625322810Shselasky 1626331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1627322810Shselasky 1628331769Shselasky MLX5_SET(mkc, mkc, free, 1); 1629331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1630331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1631331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 1632331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 1633331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM); 1634331769Shselasky MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1635331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1636322810Shselasky 1637331769Shselasky err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, 1638331769Shselasky (struct mlx5_create_mkey_mbox_in *)in, 1639331769Shselasky inlen, NULL, NULL, NULL); 1640331769Shselasky if (err) 1641331769Shselasky goto free; 1642322810Shselasky 1643331769Shselasky mw->ibmw.rkey = mw->mmkey.key; 1644322810Shselasky 1645331769Shselasky resp.response_length = min(offsetof(typeof(resp), response_length) + 1646331769Shselasky sizeof(resp.response_length), udata->outlen); 1647331769Shselasky if (resp.response_length) { 1648331769Shselasky err = ib_copy_to_udata(udata, &resp, resp.response_length); 1649331769Shselasky if (err) { 1650331769Shselasky mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1651331769Shselasky goto free; 1652331769Shselasky } 1653331769Shselasky } 1654322810Shselasky 1655331769Shselasky kfree(in); 1656331769Shselasky return &mw->ibmw; 1657322810Shselasky 1658331769Shselaskyfree: 1659331769Shselasky kfree(mw); 1660331769Shselasky kfree(in); 1661331769Shselasky return ERR_PTR(err); 1662322810Shselasky} 1663322810Shselasky 1664331769Shselaskyint mlx5_ib_dealloc_mw(struct ib_mw *mw) 1665322810Shselasky{ 1666331769Shselasky struct mlx5_ib_mw *mmw = to_mmw(mw); 1667322810Shselasky int err; 1668322810Shselasky 1669331769Shselasky err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, 1670331769Shselasky &mmw->mmkey); 1671331769Shselasky if (!err) 1672331769Shselasky kfree(mmw); 1673322810Shselasky return err; 1674322810Shselasky} 1675322810Shselasky 1676331769Shselaskyint mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1677331769Shselasky struct ib_mr_status *mr_status) 1678322810Shselasky{ 1679331769Shselasky struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1680331769Shselasky int ret = 0; 1681322810Shselasky 1682331769Shselasky if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1683331769Shselasky pr_err("Invalid status check mask\n"); 1684331769Shselasky ret = -EINVAL; 1685331769Shselasky goto done; 1686322810Shselasky } 1687322810Shselasky 1688331769Shselasky mr_status->fail_status = 0; 1689331769Shselasky if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1690331769Shselasky if (!mmr->sig) { 1691331769Shselasky ret = -EINVAL; 1692331769Shselasky pr_err("signature status check requested on a non-signature enabled MR\n"); 1693331769Shselasky goto done; 1694331769Shselasky } 1695322810Shselasky 1696331769Shselasky mmr->sig->sig_status_checked = true; 1697331769Shselasky if (!mmr->sig->sig_err_exists) 1698331769Shselasky goto done; 1699322810Shselasky 1700331769Shselasky if (ibmr->lkey == mmr->sig->err_item.key) 1701331769Shselasky memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1702331769Shselasky sizeof(mr_status->sig_err)); 1703331769Shselasky else { 1704331769Shselasky mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1705331769Shselasky mr_status->sig_err.sig_err_offset = 0; 1706331769Shselasky mr_status->sig_err.key = mmr->sig->err_item.key; 1707331769Shselasky } 1708322810Shselasky 1709331769Shselasky mmr->sig->sig_err_exists = false; 1710331769Shselasky mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1711331769Shselasky } 1712322810Shselasky 1713331769Shselaskydone: 1714331769Shselasky return ret; 1715322810Shselasky} 1716322810Shselasky 1717331769Shselaskystatic int 1718331769Shselaskymlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1719331769Shselasky struct scatterlist *sgl, 1720331769Shselasky unsigned short sg_nents, 1721331769Shselasky unsigned int *sg_offset_p) 1722322810Shselasky{ 1723331769Shselasky struct scatterlist *sg = sgl; 1724331769Shselasky struct mlx5_klm *klms = mr->descs; 1725331769Shselasky unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1726331769Shselasky u32 lkey = mr->ibmr.pd->local_dma_lkey; 1727322810Shselasky int i; 1728322810Shselasky 1729331769Shselasky mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 1730331769Shselasky mr->ibmr.length = 0; 1731331769Shselasky mr->ndescs = sg_nents; 1732322810Shselasky 1733331769Shselasky for_each_sg(sgl, sg, sg_nents, i) { 1734331769Shselasky if (unlikely(i > mr->max_descs)) 1735331769Shselasky break; 1736331769Shselasky klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 1737331769Shselasky klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 1738331769Shselasky klms[i].key = cpu_to_be32(lkey); 1739331769Shselasky mr->ibmr.length += sg_dma_len(sg); 1740322810Shselasky 1741331769Shselasky sg_offset = 0; 1742322810Shselasky } 1743322810Shselasky 1744331769Shselasky if (sg_offset_p) 1745331769Shselasky *sg_offset_p = sg_offset; 1746322810Shselasky 1747331769Shselasky return i; 1748322810Shselasky} 1749322810Shselasky 1750331769Shselaskystatic int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1751322810Shselasky{ 1752331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1753331769Shselasky __be64 *descs; 1754322810Shselasky 1755331769Shselasky if (unlikely(mr->ndescs == mr->max_descs)) 1756331769Shselasky return -ENOMEM; 1757322810Shselasky 1758331769Shselasky descs = mr->descs; 1759331769Shselasky descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 1760322810Shselasky 1761331769Shselasky return 0; 1762322810Shselasky} 1763322810Shselasky 1764331769Shselaskyint mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1765331769Shselasky unsigned int *sg_offset) 1766322810Shselasky{ 1767331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1768331769Shselasky int n; 1769322810Shselasky 1770331769Shselasky mr->ndescs = 0; 1771322810Shselasky 1772331769Shselasky ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 1773331769Shselasky mr->desc_size * mr->max_descs, 1774331769Shselasky DMA_TO_DEVICE); 1775322810Shselasky 1776331769Shselasky if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 1777331769Shselasky n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); 1778331769Shselasky else 1779331769Shselasky n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 1780331769Shselasky mlx5_set_page); 1781322810Shselasky 1782331769Shselasky ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1783331769Shselasky mr->desc_size * mr->max_descs, 1784331769Shselasky DMA_TO_DEVICE); 1785322810Shselasky 1786331769Shselasky return n; 1787322810Shselasky} 1788