1322810Shselasky/*- 2322810Shselasky * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3322810Shselasky * 4322810Shselasky * Redistribution and use in source and binary forms, with or without 5322810Shselasky * modification, are permitted provided that the following conditions 6322810Shselasky * are met: 7322810Shselasky * 1. Redistributions of source code must retain the above copyright 8322810Shselasky * notice, this list of conditions and the following disclaimer. 9322810Shselasky * 2. Redistributions in binary form must reproduce the above copyright 10322810Shselasky * notice, this list of conditions and the following disclaimer in the 11322810Shselasky * documentation and/or other materials provided with the distribution. 12322810Shselasky * 13322810Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14322810Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15322810Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16322810Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17322810Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18322810Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19322810Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20322810Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21322810Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22322810Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23322810Shselasky * SUCH DAMAGE. 24322810Shselasky * 25322810Shselasky * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_mr.c 368226 2020-12-01 13:10:25Z hselasky $ 26322810Shselasky */ 27322810Shselasky 28322810Shselasky#include <linux/kref.h> 29322810Shselasky#include <linux/random.h> 30322810Shselasky#include <linux/delay.h> 31331769Shselasky#include <linux/sched.h> 32322810Shselasky#include <rdma/ib_umem.h> 33331769Shselasky#include <rdma/ib_umem_odp.h> 34331769Shselasky#include <rdma/ib_verbs.h> 35322810Shselasky#include "mlx5_ib.h" 36322810Shselasky 37322810Shselaskyenum { 38322810Shselasky MAX_PENDING_REG_MR = 8, 39322810Shselasky}; 40322810Shselasky 41322810Shselasky#define MLX5_UMR_ALIGN 2048 42331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 43331769Shselaskystatic __be64 mlx5_ib_update_mtt_emergency_buffer[ 44331769Shselasky MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 45331769Shselasky __aligned(MLX5_UMR_ALIGN); 46331769Shselaskystatic DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 47331769Shselasky#endif 48322810Shselasky 49331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr); 50322810Shselasky 51322810Shselaskystatic int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 52322810Shselasky{ 53331769Shselasky int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 54322810Shselasky 55331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 56331769Shselasky /* Wait until all page fault handlers using the mr complete. */ 57331769Shselasky synchronize_srcu(&dev->mr_srcu); 58331769Shselasky#endif 59331769Shselasky 60322810Shselasky return err; 61322810Shselasky} 62322810Shselasky 63322810Shselaskystatic int order2idx(struct mlx5_ib_dev *dev, int order) 64322810Shselasky{ 65322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 66322810Shselasky 67322810Shselasky if (order < cache->ent[0].order) 68322810Shselasky return 0; 69322810Shselasky else 70322810Shselasky return order - cache->ent[0].order; 71322810Shselasky} 72322810Shselasky 73331769Shselaskystatic bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 74331769Shselasky{ 75331769Shselasky return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 76331769Shselasky length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 77331769Shselasky} 78331769Shselasky 79331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 80331769Shselaskystatic void update_odp_mr(struct mlx5_ib_mr *mr) 81331769Shselasky{ 82331769Shselasky if (mr->umem->odp_data) { 83331769Shselasky /* 84331769Shselasky * This barrier prevents the compiler from moving the 85331769Shselasky * setting of umem->odp_data->private to point to our 86331769Shselasky * MR, before reg_umr finished, to ensure that the MR 87331769Shselasky * initialization have finished before starting to 88331769Shselasky * handle invalidations. 89331769Shselasky */ 90331769Shselasky smp_wmb(); 91331769Shselasky mr->umem->odp_data->private = mr; 92331769Shselasky /* 93331769Shselasky * Make sure we will see the new 94331769Shselasky * umem->odp_data->private value in the invalidation 95331769Shselasky * routines, before we can get page faults on the 96331769Shselasky * MR. Page faults can happen once we put the MR in 97331769Shselasky * the tree, below this line. Without the barrier, 98331769Shselasky * there can be a fault handling and an invalidation 99331769Shselasky * before umem->odp_data->private == mr is visible to 100331769Shselasky * the invalidation handler. 101331769Shselasky */ 102331769Shselasky smp_wmb(); 103331769Shselasky } 104331769Shselasky} 105331769Shselasky#endif 106331769Shselasky 107368226Shselaskystatic void reg_mr_callback(int status, struct mlx5_async_work *context) 108322810Shselasky{ 109368226Shselasky struct mlx5_ib_mr *mr = 110368226Shselasky container_of(context, struct mlx5_ib_mr, cb_work); 111322810Shselasky struct mlx5_ib_dev *dev = mr->dev; 112322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 113322810Shselasky int c = order2idx(dev, mr->order); 114322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 115331769Shselasky u8 key; 116331769Shselasky unsigned long flags; 117322810Shselasky struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 118322810Shselasky int err; 119322810Shselasky 120322810Shselasky spin_lock_irqsave(&ent->lock, flags); 121322810Shselasky ent->pending--; 122322810Shselasky spin_unlock_irqrestore(&ent->lock, flags); 123322810Shselasky if (status) { 124331769Shselasky mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 125322810Shselasky kfree(mr); 126322810Shselasky dev->fill_delay = 1; 127322810Shselasky mod_timer(&dev->delay_timer, jiffies + HZ); 128322810Shselasky return; 129322810Shselasky } 130322810Shselasky 131322810Shselasky spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 132322810Shselasky key = dev->mdev->priv.mkey_key++; 133322810Shselasky spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 134331769Shselasky mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 135322810Shselasky 136322810Shselasky cache->last_add = jiffies; 137322810Shselasky 138322810Shselasky spin_lock_irqsave(&ent->lock, flags); 139322810Shselasky list_add_tail(&mr->list, &ent->head); 140322810Shselasky ent->cur++; 141322810Shselasky ent->size++; 142322810Shselasky spin_unlock_irqrestore(&ent->lock, flags); 143322810Shselasky 144322810Shselasky spin_lock_irqsave(&table->lock, flags); 145331769Shselasky err = radix_tree_insert(&table->tree, mlx5_mkey_to_idx(mr->mmkey.key), 146331769Shselasky &mr->mmkey); 147331769Shselasky if (err) 148331769Shselasky pr_err("Error inserting to mkey tree. 0x%x\n", -err); 149322810Shselasky spin_unlock_irqrestore(&table->lock, flags); 150322810Shselasky} 151322810Shselasky 152322810Shselaskystatic int add_keys(struct mlx5_ib_dev *dev, int c, int num) 153322810Shselasky{ 154322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 155322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 156331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 157322810Shselasky struct mlx5_ib_mr *mr; 158322810Shselasky int npages = 1 << ent->order; 159331769Shselasky void *mkc; 160331769Shselasky u32 *in; 161322810Shselasky int err = 0; 162322810Shselasky int i; 163322810Shselasky 164331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 165322810Shselasky if (!in) 166322810Shselasky return -ENOMEM; 167322810Shselasky 168331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 169322810Shselasky for (i = 0; i < num; i++) { 170322810Shselasky if (ent->pending >= MAX_PENDING_REG_MR) { 171322810Shselasky err = -EAGAIN; 172322810Shselasky break; 173322810Shselasky } 174322810Shselasky 175322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 176322810Shselasky if (!mr) { 177322810Shselasky err = -ENOMEM; 178322810Shselasky break; 179322810Shselasky } 180322810Shselasky mr->order = ent->order; 181322810Shselasky mr->umred = 1; 182322810Shselasky mr->dev = dev; 183322810Shselasky 184331769Shselasky MLX5_SET(mkc, mkc, free, 1); 185331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 186331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 187331769Shselasky 188331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 189331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); 190331769Shselasky MLX5_SET(mkc, mkc, log_page_size, 12); 191331769Shselasky 192322810Shselasky spin_lock_irq(&ent->lock); 193322810Shselasky ent->pending++; 194322810Shselasky spin_unlock_irq(&ent->lock); 195331807Shselasky err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, 196368226Shselasky &dev->async_ctx, in, inlen, 197331807Shselasky mr->out, sizeof(mr->out), 198368226Shselasky reg_mr_callback, &mr->cb_work); 199322810Shselasky if (err) { 200322810Shselasky spin_lock_irq(&ent->lock); 201322810Shselasky ent->pending--; 202322810Shselasky spin_unlock_irq(&ent->lock); 203322810Shselasky mlx5_ib_warn(dev, "create mkey failed %d\n", err); 204322810Shselasky kfree(mr); 205322810Shselasky break; 206322810Shselasky } 207322810Shselasky } 208322810Shselasky 209322810Shselasky kfree(in); 210322810Shselasky return err; 211322810Shselasky} 212322810Shselasky 213322810Shselaskystatic void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 214322810Shselasky{ 215322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 216322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 217322810Shselasky struct mlx5_ib_mr *mr; 218322810Shselasky int err; 219322810Shselasky int i; 220322810Shselasky 221322810Shselasky for (i = 0; i < num; i++) { 222322810Shselasky spin_lock_irq(&ent->lock); 223322810Shselasky if (list_empty(&ent->head)) { 224322810Shselasky spin_unlock_irq(&ent->lock); 225322810Shselasky return; 226322810Shselasky } 227322810Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 228322810Shselasky list_del(&mr->list); 229322810Shselasky ent->cur--; 230322810Shselasky ent->size--; 231322810Shselasky spin_unlock_irq(&ent->lock); 232322810Shselasky err = destroy_mkey(dev, mr); 233322810Shselasky if (err) 234322810Shselasky mlx5_ib_warn(dev, "failed destroy mkey\n"); 235322810Shselasky else 236322810Shselasky kfree(mr); 237322810Shselasky } 238322810Shselasky} 239322810Shselasky 240322810Shselaskystatic int someone_adding(struct mlx5_mr_cache *cache) 241322810Shselasky{ 242322810Shselasky int i; 243322810Shselasky 244322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 245322810Shselasky if (cache->ent[i].cur < cache->ent[i].limit) 246322810Shselasky return 1; 247322810Shselasky } 248322810Shselasky 249322810Shselasky return 0; 250322810Shselasky} 251322810Shselasky 252322810Shselaskystatic void __cache_work_func(struct mlx5_cache_ent *ent) 253322810Shselasky{ 254322810Shselasky struct mlx5_ib_dev *dev = ent->dev; 255322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 256322810Shselasky int i = order2idx(dev, ent->order); 257322810Shselasky int err; 258322810Shselasky 259322810Shselasky if (cache->stopped) 260322810Shselasky return; 261322810Shselasky 262322810Shselasky ent = &dev->cache.ent[i]; 263322810Shselasky if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 264322810Shselasky err = add_keys(dev, i, 1); 265322810Shselasky if (ent->cur < 2 * ent->limit) { 266322810Shselasky if (err == -EAGAIN) { 267322810Shselasky mlx5_ib_dbg(dev, "returned eagain, order %d\n", 268322810Shselasky i + 2); 269331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 270331769Shselasky msecs_to_jiffies(3)); 271322810Shselasky } else if (err) { 272322810Shselasky mlx5_ib_warn(dev, "command failed order %d, err %d\n", 273322810Shselasky i + 2, err); 274331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 275331769Shselasky msecs_to_jiffies(1000)); 276322810Shselasky } else { 277331769Shselasky queue_work(cache->wq, &ent->work); 278322810Shselasky } 279322810Shselasky } 280322810Shselasky } else if (ent->cur > 2 * ent->limit) { 281331769Shselasky /* 282331769Shselasky * The remove_keys() logic is performed as garbage collection 283331769Shselasky * task. Such task is intended to be run when no other active 284331769Shselasky * processes are running. 285331769Shselasky * 286331769Shselasky * The need_resched() will return TRUE if there are user tasks 287331769Shselasky * to be activated in near future. 288331769Shselasky * 289331769Shselasky * In such case, we don't execute remove_keys() and postpone 290331769Shselasky * the garbage collection work to try to run in next cycle, 291331769Shselasky * in order to free CPU resources to other tasks. 292331769Shselasky */ 293331769Shselasky if (!need_resched() && !someone_adding(cache) && 294331769Shselasky time_after(jiffies, cache->last_add + 300 * HZ)) { 295322810Shselasky remove_keys(dev, i, 1); 296322810Shselasky if (ent->cur > ent->limit) 297331769Shselasky queue_work(cache->wq, &ent->work); 298331769Shselasky } else { 299331769Shselasky queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 300322810Shselasky } 301322810Shselasky } 302322810Shselasky} 303322810Shselasky 304322810Shselaskystatic void delayed_cache_work_func(struct work_struct *work) 305322810Shselasky{ 306322810Shselasky struct mlx5_cache_ent *ent; 307322810Shselasky 308322810Shselasky ent = container_of(work, struct mlx5_cache_ent, dwork.work); 309322810Shselasky __cache_work_func(ent); 310322810Shselasky} 311322810Shselasky 312322810Shselaskystatic void cache_work_func(struct work_struct *work) 313322810Shselasky{ 314322810Shselasky struct mlx5_cache_ent *ent; 315322810Shselasky 316322810Shselasky ent = container_of(work, struct mlx5_cache_ent, work); 317322810Shselasky __cache_work_func(ent); 318322810Shselasky} 319322810Shselasky 320331769Shselaskystatic struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 321331769Shselasky{ 322331769Shselasky struct mlx5_mr_cache *cache = &dev->cache; 323331769Shselasky struct mlx5_ib_mr *mr = NULL; 324331769Shselasky struct mlx5_cache_ent *ent; 325331769Shselasky int c; 326331769Shselasky int i; 327331769Shselasky 328331769Shselasky c = order2idx(dev, order); 329331769Shselasky if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 330331769Shselasky mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 331331769Shselasky return NULL; 332331769Shselasky } 333331769Shselasky 334331769Shselasky for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 335331769Shselasky ent = &cache->ent[i]; 336331769Shselasky 337331769Shselasky mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 338331769Shselasky 339331769Shselasky spin_lock_irq(&ent->lock); 340331769Shselasky if (!list_empty(&ent->head)) { 341331769Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 342331769Shselasky list); 343331769Shselasky list_del(&mr->list); 344331769Shselasky ent->cur--; 345331769Shselasky spin_unlock_irq(&ent->lock); 346331769Shselasky if (ent->cur < ent->limit) 347331769Shselasky queue_work(cache->wq, &ent->work); 348331769Shselasky break; 349331769Shselasky } 350331769Shselasky spin_unlock_irq(&ent->lock); 351331769Shselasky 352331769Shselasky queue_work(cache->wq, &ent->work); 353331769Shselasky } 354331769Shselasky 355331769Shselasky if (!mr) 356331769Shselasky cache->ent[c].miss++; 357331769Shselasky 358331769Shselasky return mr; 359331769Shselasky} 360331769Shselasky 361322810Shselaskystatic void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 362322810Shselasky{ 363322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 364322810Shselasky struct mlx5_cache_ent *ent; 365322810Shselasky int shrink = 0; 366322810Shselasky int c; 367322810Shselasky 368322810Shselasky c = order2idx(dev, mr->order); 369322810Shselasky if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 370322810Shselasky mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 371322810Shselasky return; 372322810Shselasky } 373322810Shselasky ent = &cache->ent[c]; 374322810Shselasky spin_lock_irq(&ent->lock); 375322810Shselasky list_add_tail(&mr->list, &ent->head); 376322810Shselasky ent->cur++; 377322810Shselasky if (ent->cur > 2 * ent->limit) 378322810Shselasky shrink = 1; 379322810Shselasky spin_unlock_irq(&ent->lock); 380322810Shselasky 381322810Shselasky if (shrink) 382331769Shselasky queue_work(cache->wq, &ent->work); 383322810Shselasky} 384322810Shselasky 385322810Shselaskystatic void clean_keys(struct mlx5_ib_dev *dev, int c) 386322810Shselasky{ 387322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 388322810Shselasky struct mlx5_cache_ent *ent = &cache->ent[c]; 389322810Shselasky struct mlx5_ib_mr *mr; 390322810Shselasky int err; 391322810Shselasky 392322810Shselasky cancel_delayed_work(&ent->dwork); 393322810Shselasky while (1) { 394322810Shselasky spin_lock_irq(&ent->lock); 395322810Shselasky if (list_empty(&ent->head)) { 396322810Shselasky spin_unlock_irq(&ent->lock); 397322810Shselasky return; 398322810Shselasky } 399322810Shselasky mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 400322810Shselasky list_del(&mr->list); 401322810Shselasky ent->cur--; 402322810Shselasky ent->size--; 403322810Shselasky spin_unlock_irq(&ent->lock); 404322810Shselasky err = destroy_mkey(dev, mr); 405322810Shselasky if (err) 406331769Shselasky mlx5_ib_warn(dev, "failed destroy mkey\n"); 407322810Shselasky else 408322810Shselasky kfree(mr); 409322810Shselasky } 410322810Shselasky} 411322810Shselasky 412322810Shselaskystatic void delay_time_func(unsigned long ctx) 413322810Shselasky{ 414322810Shselasky struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 415322810Shselasky 416322810Shselasky dev->fill_delay = 0; 417322810Shselasky} 418322810Shselasky 419322810Shselaskyint mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 420322810Shselasky{ 421322810Shselasky struct mlx5_mr_cache *cache = &dev->cache; 422322810Shselasky struct mlx5_cache_ent *ent; 423322810Shselasky int limit; 424322810Shselasky int i; 425322810Shselasky 426322810Shselasky mutex_init(&dev->slow_path_mutex); 427331769Shselasky cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 428322810Shselasky if (!cache->wq) { 429322810Shselasky mlx5_ib_warn(dev, "failed to create work queue\n"); 430322810Shselasky return -ENOMEM; 431322810Shselasky } 432322810Shselasky 433368226Shselasky mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 434331769Shselasky setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 435322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 436322810Shselasky INIT_LIST_HEAD(&cache->ent[i].head); 437322810Shselasky spin_lock_init(&cache->ent[i].lock); 438322810Shselasky 439322810Shselasky ent = &cache->ent[i]; 440322810Shselasky INIT_LIST_HEAD(&ent->head); 441322810Shselasky spin_lock_init(&ent->lock); 442322810Shselasky ent->order = i + 2; 443322810Shselasky ent->dev = dev; 444322810Shselasky 445331769Shselasky if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 446331769Shselasky limit = dev->mdev->profile->mr_cache[i].limit; 447331769Shselasky else 448322810Shselasky limit = 0; 449322810Shselasky 450322810Shselasky INIT_WORK(&ent->work, cache_work_func); 451322810Shselasky INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 452322810Shselasky ent->limit = limit; 453331769Shselasky queue_work(cache->wq, &ent->work); 454322810Shselasky } 455322810Shselasky 456322810Shselasky return 0; 457322810Shselasky} 458322810Shselasky 459322810Shselaskyint mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 460322810Shselasky{ 461322810Shselasky int i; 462322810Shselasky 463322810Shselasky dev->cache.stopped = 1; 464322810Shselasky flush_workqueue(dev->cache.wq); 465368226Shselasky mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 466322810Shselasky 467322810Shselasky for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 468322810Shselasky clean_keys(dev, i); 469322810Shselasky 470322810Shselasky destroy_workqueue(dev->cache.wq); 471322810Shselasky del_timer_sync(&dev->delay_timer); 472331769Shselasky 473322810Shselasky return 0; 474322810Shselasky} 475322810Shselasky 476322810Shselaskystruct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 477322810Shselasky{ 478322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 479331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 480322810Shselasky struct mlx5_core_dev *mdev = dev->mdev; 481322810Shselasky struct mlx5_ib_mr *mr; 482331769Shselasky void *mkc; 483331769Shselasky u32 *in; 484322810Shselasky int err; 485322810Shselasky 486322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 487322810Shselasky if (!mr) 488322810Shselasky return ERR_PTR(-ENOMEM); 489322810Shselasky 490331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 491322810Shselasky if (!in) { 492322810Shselasky err = -ENOMEM; 493322810Shselasky goto err_free; 494322810Shselasky } 495322810Shselasky 496331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 497322810Shselasky 498331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); 499331769Shselasky MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 500331769Shselasky MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 501331769Shselasky MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 502331769Shselasky MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 503331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 504331769Shselasky 505331769Shselasky MLX5_SET(mkc, mkc, length64, 1); 506331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 507331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 508331769Shselasky MLX5_SET64(mkc, mkc, start_addr, 0); 509331769Shselasky 510331807Shselasky err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 511322810Shselasky if (err) 512322810Shselasky goto err_in; 513322810Shselasky 514322810Shselasky kfree(in); 515331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 516331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 517322810Shselasky mr->umem = NULL; 518322810Shselasky 519322810Shselasky return &mr->ibmr; 520322810Shselasky 521322810Shselaskyerr_in: 522322810Shselasky kfree(in); 523322810Shselasky 524322810Shselaskyerr_free: 525322810Shselasky kfree(mr); 526322810Shselasky 527322810Shselasky return ERR_PTR(err); 528322810Shselasky} 529322810Shselasky 530331769Shselaskystatic int get_octo_len(u64 addr, u64 len, int page_size) 531322810Shselasky{ 532322810Shselasky u64 offset; 533322810Shselasky int npages; 534322810Shselasky 535331769Shselasky offset = addr & (page_size - 1); 536322810Shselasky npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 537322810Shselasky return (npages + 1) / 2; 538322810Shselasky} 539322810Shselasky 540331769Shselaskystatic int use_umr(int order) 541322810Shselasky{ 542331769Shselasky return order <= MLX5_MAX_UMR_SHIFT; 543331769Shselasky} 544322810Shselasky 545331769Shselaskystatic int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, 546331769Shselasky int npages, int page_shift, int *size, 547331769Shselasky __be64 **mr_pas, dma_addr_t *dma) 548331769Shselasky{ 549331769Shselasky __be64 *pas; 550331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 551331769Shselasky 552331769Shselasky /* 553331769Shselasky * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 554331769Shselasky * To avoid copying garbage after the pas array, we allocate 555331769Shselasky * a little more. 556331769Shselasky */ 557331769Shselasky *size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 558331769Shselasky *mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 559331769Shselasky if (!(*mr_pas)) 560331769Shselasky return -ENOMEM; 561331769Shselasky 562331769Shselasky pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN); 563331769Shselasky mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 564331769Shselasky /* Clear padding after the actual pages. */ 565331769Shselasky memset(pas + npages, 0, *size - npages * sizeof(u64)); 566331769Shselasky 567331769Shselasky *dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE); 568331769Shselasky if (dma_mapping_error(ddev, *dma)) { 569331769Shselasky kfree(*mr_pas); 570331769Shselasky return -ENOMEM; 571331769Shselasky } 572331769Shselasky 573331769Shselasky return 0; 574331769Shselasky} 575331769Shselasky 576331769Shselaskystatic void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr, 577331769Shselasky struct ib_sge *sg, u64 dma, int n, u32 key, 578331769Shselasky int page_shift) 579331769Shselasky{ 580331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 581331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 582331769Shselasky 583331769Shselasky sg->addr = dma; 584331769Shselasky sg->length = ALIGN(sizeof(u64) * n, 64); 585331769Shselasky sg->lkey = dev->umrc.pd->local_dma_lkey; 586331769Shselasky 587331769Shselasky wr->next = NULL; 588331769Shselasky wr->sg_list = sg; 589331769Shselasky if (n) 590331769Shselasky wr->num_sge = 1; 591331769Shselasky else 592331769Shselasky wr->num_sge = 0; 593331769Shselasky 594331769Shselasky wr->opcode = MLX5_IB_WR_UMR; 595331769Shselasky 596331769Shselasky umrwr->npages = n; 597331769Shselasky umrwr->page_shift = page_shift; 598331769Shselasky umrwr->mkey = key; 599331769Shselasky} 600331769Shselasky 601331769Shselaskystatic void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 602331769Shselasky struct ib_sge *sg, u64 dma, int n, u32 key, 603331769Shselasky int page_shift, u64 virt_addr, u64 len, 604331769Shselasky int access_flags) 605331769Shselasky{ 606331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 607331769Shselasky 608331769Shselasky prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift); 609331769Shselasky 610331769Shselasky wr->send_flags = 0; 611331769Shselasky 612331769Shselasky umrwr->target.virt_addr = virt_addr; 613331769Shselasky umrwr->length = len; 614331769Shselasky umrwr->access_flags = access_flags; 615331769Shselasky umrwr->pd = pd; 616331769Shselasky} 617331769Shselasky 618331769Shselaskystatic void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 619331769Shselasky struct ib_send_wr *wr, u32 key) 620331769Shselasky{ 621331769Shselasky struct mlx5_umr_wr *umrwr = umr_wr(wr); 622331769Shselasky 623331769Shselasky wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 624331769Shselasky wr->opcode = MLX5_IB_WR_UMR; 625331769Shselasky umrwr->mkey = key; 626331769Shselasky} 627331769Shselasky 628331769Shselaskystatic struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, 629331769Shselasky int access_flags, int *npages, 630331769Shselasky int *page_shift, int *ncont, int *order) 631331769Shselasky{ 632331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 633331769Shselasky struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, 634331769Shselasky access_flags, 0); 635331769Shselasky if (IS_ERR(umem)) { 636331769Shselasky mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 637331769Shselasky return (void *)umem; 638331769Shselasky } 639331769Shselasky 640331769Shselasky mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); 641331769Shselasky if (!*npages) { 642331769Shselasky mlx5_ib_warn(dev, "avoid zero region\n"); 643331769Shselasky ib_umem_release(umem); 644331769Shselasky return ERR_PTR(-EINVAL); 645331769Shselasky } 646331769Shselasky 647331769Shselasky mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 648331769Shselasky *npages, *ncont, *order, *page_shift); 649331769Shselasky 650331769Shselasky return umem; 651331769Shselasky} 652331769Shselasky 653331769Shselaskystatic void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 654331769Shselasky{ 655331769Shselasky struct mlx5_ib_umr_context *context = 656331769Shselasky container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 657331769Shselasky 658331769Shselasky context->status = wc->status; 659331769Shselasky complete(&context->done); 660331769Shselasky} 661331769Shselasky 662331769Shselaskystatic inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 663331769Shselasky{ 664331769Shselasky context->cqe.done = mlx5_ib_umr_done; 665331769Shselasky context->status = -1; 666331769Shselasky init_completion(&context->done); 667331769Shselasky} 668331769Shselasky 669331769Shselaskystatic struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 670331769Shselasky u64 virt_addr, u64 len, int npages, 671331769Shselasky int page_shift, int order, int access_flags) 672331769Shselasky{ 673331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 674331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 675331769Shselasky struct umr_common *umrc = &dev->umrc; 676331769Shselasky struct mlx5_ib_umr_context umr_context; 677331769Shselasky struct mlx5_umr_wr umrwr = {}; 678331769Shselasky struct ib_send_wr *bad; 679331769Shselasky struct mlx5_ib_mr *mr; 680331769Shselasky struct ib_sge sg; 681331769Shselasky int size; 682331769Shselasky __be64 *mr_pas; 683331769Shselasky dma_addr_t dma; 684331769Shselasky int err = 0; 685331769Shselasky int i; 686331769Shselasky 687331769Shselasky for (i = 0; i < 1; i++) { 688331769Shselasky mr = alloc_cached_mr(dev, order); 689331769Shselasky if (mr) 690322810Shselasky break; 691322810Shselasky 692331769Shselasky err = add_keys(dev, order2idx(dev, order), 1); 693331769Shselasky if (err && err != -EAGAIN) { 694331769Shselasky mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 695331769Shselasky break; 696331769Shselasky } 697322810Shselasky } 698331769Shselasky 699331769Shselasky if (!mr) 700331769Shselasky return ERR_PTR(-EAGAIN); 701331769Shselasky 702331769Shselasky err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas, 703331769Shselasky &dma); 704331769Shselasky if (err) 705331769Shselasky goto free_mr; 706331769Shselasky 707331769Shselasky mlx5_ib_init_umr_context(&umr_context); 708331769Shselasky 709331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 710331769Shselasky prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 711331769Shselasky page_shift, virt_addr, len, access_flags); 712331769Shselasky 713331769Shselasky down(&umrc->sem); 714331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 715331769Shselasky if (err) { 716331769Shselasky mlx5_ib_warn(dev, "post send failed, err %d\n", err); 717331769Shselasky goto unmap_dma; 718331769Shselasky } else { 719331769Shselasky wait_for_completion(&umr_context.done); 720331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 721331769Shselasky mlx5_ib_warn(dev, "reg umr failed\n"); 722331769Shselasky err = -EFAULT; 723331769Shselasky } 724331769Shselasky } 725331769Shselasky 726331769Shselasky mr->mmkey.iova = virt_addr; 727331769Shselasky mr->mmkey.size = len; 728331769Shselasky mr->mmkey.pd = to_mpd(pd)->pdn; 729331769Shselasky 730331769Shselasky mr->live = 1; 731331769Shselasky 732331769Shselaskyunmap_dma: 733331769Shselasky up(&umrc->sem); 734331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 735331769Shselasky 736331769Shselasky kfree(mr_pas); 737331769Shselasky 738331769Shselaskyfree_mr: 739331769Shselasky if (err) { 740331769Shselasky free_cached_mr(dev, mr); 741331769Shselasky return ERR_PTR(err); 742331769Shselasky } 743331769Shselasky 744331769Shselasky return mr; 745322810Shselasky} 746322810Shselasky 747331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 748331769Shselaskyint mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 749331769Shselasky int zap) 750322810Shselasky{ 751331769Shselasky struct mlx5_ib_dev *dev = mr->dev; 752331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 753331769Shselasky struct umr_common *umrc = &dev->umrc; 754331769Shselasky struct mlx5_ib_umr_context umr_context; 755331769Shselasky struct ib_umem *umem = mr->umem; 756331769Shselasky int size; 757331769Shselasky __be64 *pas; 758331769Shselasky dma_addr_t dma; 759331769Shselasky struct ib_send_wr *bad; 760331769Shselasky struct mlx5_umr_wr wr; 761331769Shselasky struct ib_sge sg; 762331769Shselasky int err = 0; 763331769Shselasky const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 764331769Shselasky const int page_index_mask = page_index_alignment - 1; 765331769Shselasky size_t pages_mapped = 0; 766331769Shselasky size_t pages_to_map = 0; 767331769Shselasky size_t pages_iter = 0; 768331769Shselasky int use_emergency_buf = 0; 769331769Shselasky 770331769Shselasky /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 771331769Shselasky * so we need to align the offset and length accordingly */ 772331769Shselasky if (start_page_index & page_index_mask) { 773331769Shselasky npages += start_page_index & page_index_mask; 774331769Shselasky start_page_index &= ~page_index_mask; 775331769Shselasky } 776331769Shselasky 777331769Shselasky pages_to_map = ALIGN(npages, page_index_alignment); 778331769Shselasky 779331769Shselasky if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 780331769Shselasky return -EINVAL; 781331769Shselasky 782331769Shselasky size = sizeof(u64) * pages_to_map; 783331769Shselasky size = min_t(int, PAGE_SIZE, size); 784331769Shselasky /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 785331769Shselasky * code, when we are called from an invalidation. The pas buffer must 786331769Shselasky * be 2k-aligned for Connect-IB. */ 787331769Shselasky pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 788331769Shselasky if (!pas) { 789331769Shselasky mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 790331769Shselasky pas = mlx5_ib_update_mtt_emergency_buffer; 791331769Shselasky size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 792331769Shselasky use_emergency_buf = 1; 793331769Shselasky mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 794331769Shselasky memset(pas, 0, size); 795331769Shselasky } 796331769Shselasky pages_iter = size / sizeof(u64); 797331769Shselasky dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 798331769Shselasky if (dma_mapping_error(ddev, dma)) { 799331769Shselasky mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 800331769Shselasky err = -ENOMEM; 801331769Shselasky goto free_pas; 802331769Shselasky } 803331769Shselasky 804331769Shselasky for (pages_mapped = 0; 805331769Shselasky pages_mapped < pages_to_map && !err; 806331769Shselasky pages_mapped += pages_iter, start_page_index += pages_iter) { 807331769Shselasky dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 808331769Shselasky 809331769Shselasky npages = min_t(size_t, 810331769Shselasky pages_iter, 811331769Shselasky ib_umem_num_pages(umem) - start_page_index); 812331769Shselasky 813331769Shselasky if (!zap) { 814331769Shselasky __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 815331769Shselasky start_page_index, npages, pas, 816331769Shselasky MLX5_IB_MTT_PRESENT); 817331769Shselasky /* Clear padding after the pages brought from the 818331769Shselasky * umem. */ 819331769Shselasky memset(pas + npages, 0, size - npages * sizeof(u64)); 820331769Shselasky } 821331769Shselasky 822331769Shselasky dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 823331769Shselasky 824331769Shselasky mlx5_ib_init_umr_context(&umr_context); 825331769Shselasky 826331769Shselasky memset(&wr, 0, sizeof(wr)); 827331769Shselasky wr.wr.wr_cqe = &umr_context.cqe; 828331769Shselasky 829331769Shselasky sg.addr = dma; 830331769Shselasky sg.length = ALIGN(npages * sizeof(u64), 831331769Shselasky MLX5_UMR_MTT_ALIGNMENT); 832331769Shselasky sg.lkey = dev->umrc.pd->local_dma_lkey; 833331769Shselasky 834331769Shselasky wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 835331769Shselasky MLX5_IB_SEND_UMR_UPDATE_MTT; 836331769Shselasky wr.wr.sg_list = &sg; 837331769Shselasky wr.wr.num_sge = 1; 838331769Shselasky wr.wr.opcode = MLX5_IB_WR_UMR; 839331769Shselasky wr.npages = sg.length / sizeof(u64); 840331769Shselasky wr.page_shift = PAGE_SHIFT; 841331769Shselasky wr.mkey = mr->mmkey.key; 842331769Shselasky wr.target.offset = start_page_index; 843331769Shselasky 844331769Shselasky down(&umrc->sem); 845331769Shselasky err = ib_post_send(umrc->qp, &wr.wr, &bad); 846331769Shselasky if (err) { 847331769Shselasky mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 848331769Shselasky } else { 849331769Shselasky wait_for_completion(&umr_context.done); 850331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 851331769Shselasky mlx5_ib_err(dev, "UMR completion failed, code %d\n", 852331769Shselasky umr_context.status); 853331769Shselasky err = -EFAULT; 854331769Shselasky } 855331769Shselasky } 856331769Shselasky up(&umrc->sem); 857331769Shselasky } 858331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 859331769Shselasky 860331769Shselaskyfree_pas: 861331769Shselasky if (!use_emergency_buf) 862331769Shselasky free_page((unsigned long)pas); 863331769Shselasky else 864331769Shselasky mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 865331769Shselasky 866331769Shselasky return err; 867331769Shselasky} 868331769Shselasky#endif 869331769Shselasky 870331769Shselasky/* 871331769Shselasky * If ibmr is NULL it will be allocated by reg_create. 872331769Shselasky * Else, the given ibmr will be used. 873331769Shselasky */ 874331769Shselaskystatic struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 875331769Shselasky u64 virt_addr, u64 length, 876331769Shselasky struct ib_umem *umem, int npages, 877331769Shselasky int page_shift, int access_flags) 878331769Shselasky{ 879322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 880322810Shselasky struct mlx5_ib_mr *mr; 881331769Shselasky __be64 *pas; 882331769Shselasky void *mkc; 883322810Shselasky int inlen; 884331769Shselasky u32 *in; 885322810Shselasky int err; 886322810Shselasky bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 887322810Shselasky 888331769Shselasky mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 889322810Shselasky if (!mr) 890322810Shselasky return ERR_PTR(-ENOMEM); 891322810Shselasky 892331769Shselasky inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + 893331769Shselasky sizeof(*pas) * ((npages + 1) / 2) * 2; 894322810Shselasky in = mlx5_vzalloc(inlen); 895322810Shselasky if (!in) { 896322810Shselasky err = -ENOMEM; 897322810Shselasky goto err_1; 898322810Shselasky } 899331769Shselasky pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 900331769Shselasky mlx5_ib_populate_pas(dev, umem, page_shift, pas, 901322810Shselasky pg_cap ? MLX5_IB_MTT_PRESENT : 0); 902322810Shselasky 903331769Shselasky /* The pg_access bit allows setting the access flags 904322810Shselasky * in the page list submitted with the command. */ 905331769Shselasky MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 906331769Shselasky 907331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 908331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 909331769Shselasky MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 910331769Shselasky MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 911331769Shselasky MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 912331769Shselasky MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 913331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 914331769Shselasky 915331769Shselasky MLX5_SET64(mkc, mkc, start_addr, virt_addr); 916331769Shselasky MLX5_SET64(mkc, mkc, len, length); 917331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 918331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, 0); 919331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, 920331769Shselasky get_octo_len(virt_addr, length, 1 << page_shift)); 921331769Shselasky MLX5_SET(mkc, mkc, log_page_size, page_shift); 922331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 923331769Shselasky MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 924331769Shselasky get_octo_len(virt_addr, length, 1 << page_shift)); 925331769Shselasky 926331807Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 927322810Shselasky if (err) { 928322810Shselasky mlx5_ib_warn(dev, "create mkey failed\n"); 929322810Shselasky goto err_2; 930322810Shselasky } 931322810Shselasky mr->umem = umem; 932322810Shselasky mr->dev = dev; 933331769Shselasky mr->live = 1; 934322810Shselasky kvfree(in); 935322810Shselasky 936331769Shselasky mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 937322810Shselasky 938322810Shselasky return mr; 939322810Shselasky 940322810Shselaskyerr_2: 941322810Shselasky kvfree(in); 942322810Shselasky 943322810Shselaskyerr_1: 944331769Shselasky if (!ibmr) 945331769Shselasky kfree(mr); 946322810Shselasky 947322810Shselasky return ERR_PTR(err); 948322810Shselasky} 949322810Shselasky 950331769Shselaskystatic void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 951331769Shselasky int npages, u64 length, int access_flags) 952322810Shselasky{ 953331769Shselasky mr->npages = npages; 954331769Shselasky atomic_add(npages, &dev->mdev->priv.reg_pages); 955331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 956331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 957331769Shselasky mr->ibmr.length = length; 958331769Shselasky mr->access_flags = access_flags; 959322810Shselasky} 960322810Shselasky 961322810Shselaskystruct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 962322810Shselasky u64 virt_addr, int access_flags, 963331769Shselasky struct ib_udata *udata) 964322810Shselasky{ 965322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 966322810Shselasky struct mlx5_ib_mr *mr = NULL; 967322810Shselasky struct ib_umem *umem; 968322810Shselasky int page_shift; 969322810Shselasky int npages; 970322810Shselasky int ncont; 971322810Shselasky int order; 972322810Shselasky int err; 973322810Shselasky 974322810Shselasky mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 975331769Shselasky (long long)start, (long long)virt_addr, (long long)length, access_flags); 976331769Shselasky umem = mr_umem_get(pd, start, length, access_flags, &npages, 977331769Shselasky &page_shift, &ncont, &order); 978331769Shselasky 979331769Shselasky if (IS_ERR(umem)) 980322810Shselasky return (void *)umem; 981322810Shselasky 982331769Shselasky if (use_umr(order)) { 983331769Shselasky mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 984331769Shselasky order, access_flags); 985331769Shselasky if (PTR_ERR(mr) == -EAGAIN) { 986331769Shselasky mlx5_ib_dbg(dev, "cache empty for order %d", order); 987331769Shselasky mr = NULL; 988331769Shselasky } 989331769Shselasky } else if (access_flags & IB_ACCESS_ON_DEMAND) { 990322810Shselasky err = -EINVAL; 991331769Shselasky pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 992322810Shselasky goto error; 993322810Shselasky } 994322810Shselasky 995331769Shselasky if (!mr) { 996331769Shselasky mutex_lock(&dev->slow_path_mutex); 997331769Shselasky mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 998331769Shselasky page_shift, access_flags); 999331769Shselasky mutex_unlock(&dev->slow_path_mutex); 1000331769Shselasky } 1001322810Shselasky 1002322810Shselasky if (IS_ERR(mr)) { 1003322810Shselasky err = PTR_ERR(mr); 1004322810Shselasky goto error; 1005322810Shselasky } 1006322810Shselasky 1007331769Shselasky mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1008322810Shselasky 1009322810Shselasky mr->umem = umem; 1010331769Shselasky set_mr_fileds(dev, mr, npages, length, access_flags); 1011322810Shselasky 1012331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1013331769Shselasky update_odp_mr(mr); 1014331769Shselasky#endif 1015331769Shselasky 1016322810Shselasky return &mr->ibmr; 1017322810Shselasky 1018322810Shselaskyerror: 1019322810Shselasky ib_umem_release(umem); 1020322810Shselasky return ERR_PTR(err); 1021322810Shselasky} 1022322810Shselasky 1023331769Shselaskystatic int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1024331769Shselasky{ 1025331769Shselasky struct mlx5_core_dev *mdev = dev->mdev; 1026331769Shselasky struct umr_common *umrc = &dev->umrc; 1027331769Shselasky struct mlx5_ib_umr_context umr_context; 1028331769Shselasky struct mlx5_umr_wr umrwr = {}; 1029331769Shselasky struct ib_send_wr *bad; 1030331769Shselasky int err; 1031331769Shselasky 1032331769Shselasky if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1033331769Shselasky return 0; 1034331769Shselasky 1035331769Shselasky mlx5_ib_init_umr_context(&umr_context); 1036331769Shselasky 1037331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 1038331769Shselasky prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key); 1039331769Shselasky 1040331769Shselasky down(&umrc->sem); 1041331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1042331769Shselasky if (err) { 1043331769Shselasky up(&umrc->sem); 1044331769Shselasky mlx5_ib_dbg(dev, "err %d\n", err); 1045331769Shselasky goto error; 1046331769Shselasky } else { 1047331769Shselasky wait_for_completion(&umr_context.done); 1048331769Shselasky up(&umrc->sem); 1049331769Shselasky } 1050331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 1051331769Shselasky mlx5_ib_warn(dev, "unreg umr failed\n"); 1052331769Shselasky err = -EFAULT; 1053331769Shselasky goto error; 1054331769Shselasky } 1055331769Shselasky return 0; 1056331769Shselasky 1057331769Shselaskyerror: 1058331769Shselasky return err; 1059331769Shselasky} 1060331769Shselasky 1061331769Shselaskystatic int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr, 1062331769Shselasky u64 length, int npages, int page_shift, int order, 1063331769Shselasky int access_flags, int flags) 1064331769Shselasky{ 1065331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1066331769Shselasky struct device *ddev = dev->ib_dev.dma_device; 1067331769Shselasky struct mlx5_ib_umr_context umr_context; 1068331769Shselasky struct ib_send_wr *bad; 1069331769Shselasky struct mlx5_umr_wr umrwr = {}; 1070331769Shselasky struct ib_sge sg; 1071331769Shselasky struct umr_common *umrc = &dev->umrc; 1072331769Shselasky dma_addr_t dma = 0; 1073331769Shselasky __be64 *mr_pas = NULL; 1074331769Shselasky int size; 1075331769Shselasky int err; 1076331769Shselasky 1077331769Shselasky mlx5_ib_init_umr_context(&umr_context); 1078331769Shselasky 1079331769Shselasky umrwr.wr.wr_cqe = &umr_context.cqe; 1080331769Shselasky umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1081331769Shselasky 1082331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1083331769Shselasky err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size, 1084331769Shselasky &mr_pas, &dma); 1085331769Shselasky if (err) 1086331769Shselasky return err; 1087331769Shselasky 1088331769Shselasky umrwr.target.virt_addr = virt_addr; 1089331769Shselasky umrwr.length = length; 1090331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1091331769Shselasky } 1092331769Shselasky 1093331769Shselasky prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key, 1094331769Shselasky page_shift); 1095331769Shselasky 1096331769Shselasky if (flags & IB_MR_REREG_PD) { 1097331769Shselasky umrwr.pd = pd; 1098331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD; 1099331769Shselasky } 1100331769Shselasky 1101331769Shselasky if (flags & IB_MR_REREG_ACCESS) { 1102331769Shselasky umrwr.access_flags = access_flags; 1103331769Shselasky umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS; 1104331769Shselasky } 1105331769Shselasky 1106331769Shselasky /* post send request to UMR QP */ 1107331769Shselasky down(&umrc->sem); 1108331769Shselasky err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1109331769Shselasky 1110331769Shselasky if (err) { 1111331769Shselasky mlx5_ib_warn(dev, "post send failed, err %d\n", err); 1112331769Shselasky } else { 1113331769Shselasky wait_for_completion(&umr_context.done); 1114331769Shselasky if (umr_context.status != IB_WC_SUCCESS) { 1115331769Shselasky mlx5_ib_warn(dev, "reg umr failed (%u)\n", 1116331769Shselasky umr_context.status); 1117331769Shselasky err = -EFAULT; 1118331769Shselasky } 1119331769Shselasky } 1120331769Shselasky 1121331769Shselasky up(&umrc->sem); 1122331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1123331769Shselasky dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1124331769Shselasky kfree(mr_pas); 1125331769Shselasky } 1126331769Shselasky return err; 1127331769Shselasky} 1128331769Shselasky 1129331769Shselaskyint mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1130331769Shselasky u64 length, u64 virt_addr, int new_access_flags, 1131331769Shselasky struct ib_pd *new_pd, struct ib_udata *udata) 1132331769Shselasky{ 1133331769Shselasky struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1134331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1135331769Shselasky struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1136331769Shselasky int access_flags = flags & IB_MR_REREG_ACCESS ? 1137331769Shselasky new_access_flags : 1138331769Shselasky mr->access_flags; 1139331769Shselasky u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address; 1140331769Shselasky u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length; 1141331769Shselasky int page_shift = 0; 1142331769Shselasky int npages = 0; 1143331769Shselasky int ncont = 0; 1144331769Shselasky int order = 0; 1145331769Shselasky int err; 1146331769Shselasky 1147331769Shselasky mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1148331769Shselasky (long long)start, (long long)virt_addr, (long long)length, access_flags); 1149331769Shselasky 1150331769Shselasky if (flags != IB_MR_REREG_PD) { 1151331769Shselasky /* 1152331769Shselasky * Replace umem. This needs to be done whether or not UMR is 1153331769Shselasky * used. 1154331769Shselasky */ 1155331769Shselasky flags |= IB_MR_REREG_TRANS; 1156331769Shselasky ib_umem_release(mr->umem); 1157331769Shselasky mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, 1158331769Shselasky &page_shift, &ncont, &order); 1159331769Shselasky if (IS_ERR(mr->umem)) { 1160331769Shselasky err = PTR_ERR(mr->umem); 1161331769Shselasky mr->umem = NULL; 1162331769Shselasky return err; 1163331769Shselasky } 1164331769Shselasky } 1165331769Shselasky 1166331769Shselasky if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) { 1167331769Shselasky /* 1168331769Shselasky * UMR can't be used - MKey needs to be replaced. 1169331769Shselasky */ 1170331769Shselasky if (mr->umred) { 1171331769Shselasky err = unreg_umr(dev, mr); 1172331769Shselasky if (err) 1173331769Shselasky mlx5_ib_warn(dev, "Failed to unregister MR\n"); 1174331769Shselasky } else { 1175331769Shselasky err = destroy_mkey(dev, mr); 1176331769Shselasky if (err) 1177331769Shselasky mlx5_ib_warn(dev, "Failed to destroy MKey\n"); 1178331769Shselasky } 1179331769Shselasky if (err) 1180331769Shselasky return err; 1181331769Shselasky 1182331769Shselasky mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1183331769Shselasky page_shift, access_flags); 1184331769Shselasky 1185331769Shselasky if (IS_ERR(mr)) 1186331769Shselasky return PTR_ERR(mr); 1187331769Shselasky 1188331769Shselasky mr->umred = 0; 1189331769Shselasky } else { 1190331769Shselasky /* 1191331769Shselasky * Send a UMR WQE 1192331769Shselasky */ 1193331769Shselasky err = rereg_umr(pd, mr, addr, len, npages, page_shift, 1194331769Shselasky order, access_flags, flags); 1195331769Shselasky if (err) { 1196331769Shselasky mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1197331769Shselasky return err; 1198331769Shselasky } 1199331769Shselasky } 1200331769Shselasky 1201331769Shselasky if (flags & IB_MR_REREG_PD) { 1202331769Shselasky ib_mr->pd = pd; 1203331769Shselasky mr->mmkey.pd = to_mpd(pd)->pdn; 1204331769Shselasky } 1205331769Shselasky 1206331769Shselasky if (flags & IB_MR_REREG_ACCESS) 1207331769Shselasky mr->access_flags = access_flags; 1208331769Shselasky 1209331769Shselasky if (flags & IB_MR_REREG_TRANS) { 1210331769Shselasky atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1211331769Shselasky set_mr_fileds(dev, mr, npages, len, access_flags); 1212331769Shselasky mr->mmkey.iova = addr; 1213331769Shselasky mr->mmkey.size = len; 1214331769Shselasky } 1215331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1216331769Shselasky update_odp_mr(mr); 1217331769Shselasky#endif 1218331769Shselasky 1219331769Shselasky return 0; 1220331769Shselasky} 1221331769Shselasky 1222331769Shselaskystatic int 1223331769Shselaskymlx5_alloc_priv_descs(struct ib_device *device, 1224331769Shselasky struct mlx5_ib_mr *mr, 1225331769Shselasky int ndescs, 1226331769Shselasky int desc_size) 1227331769Shselasky{ 1228331769Shselasky int size = ndescs * desc_size; 1229331769Shselasky int add_size; 1230331769Shselasky int ret; 1231331769Shselasky 1232331769Shselasky add_size = max_t(int, MLX5_UMR_ALIGN - 1, 0); 1233331769Shselasky 1234331769Shselasky mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1235331769Shselasky if (!mr->descs_alloc) 1236331769Shselasky return -ENOMEM; 1237331769Shselasky 1238331769Shselasky mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1239331769Shselasky 1240331769Shselasky mr->desc_map = dma_map_single(device->dma_device, mr->descs, 1241331769Shselasky size, DMA_TO_DEVICE); 1242331769Shselasky if (dma_mapping_error(device->dma_device, mr->desc_map)) { 1243331769Shselasky ret = -ENOMEM; 1244331769Shselasky goto err; 1245331769Shselasky } 1246331769Shselasky 1247331769Shselasky return 0; 1248331769Shselaskyerr: 1249331769Shselasky kfree(mr->descs_alloc); 1250331769Shselasky 1251331769Shselasky return ret; 1252331769Shselasky} 1253331769Shselasky 1254331769Shselaskystatic void 1255331769Shselaskymlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1256331769Shselasky{ 1257331769Shselasky if (mr->descs) { 1258331769Shselasky struct ib_device *device = mr->ibmr.device; 1259331769Shselasky int size = mr->max_descs * mr->desc_size; 1260331769Shselasky 1261331769Shselasky dma_unmap_single(device->dma_device, mr->desc_map, 1262331769Shselasky size, DMA_TO_DEVICE); 1263331769Shselasky kfree(mr->descs_alloc); 1264331769Shselasky mr->descs = NULL; 1265331769Shselasky } 1266331769Shselasky} 1267331769Shselasky 1268331769Shselaskystatic int clean_mr(struct mlx5_ib_mr *mr) 1269331769Shselasky{ 1270331769Shselasky struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1271331769Shselasky int umred = mr->umred; 1272331769Shselasky int err; 1273331769Shselasky 1274331769Shselasky if (mr->sig) { 1275331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1276331769Shselasky mr->sig->psv_memory.psv_idx)) 1277331769Shselasky mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1278331769Shselasky mr->sig->psv_memory.psv_idx); 1279331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1280331769Shselasky mr->sig->psv_wire.psv_idx)) 1281331769Shselasky mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1282331769Shselasky mr->sig->psv_wire.psv_idx); 1283331769Shselasky kfree(mr->sig); 1284331769Shselasky mr->sig = NULL; 1285331769Shselasky } 1286331769Shselasky 1287331769Shselasky mlx5_free_priv_descs(mr); 1288331769Shselasky 1289331769Shselasky if (!umred) { 1290331769Shselasky err = destroy_mkey(dev, mr); 1291331769Shselasky if (err) { 1292331769Shselasky mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1293331769Shselasky mr->mmkey.key, err); 1294331769Shselasky return err; 1295331769Shselasky } 1296331769Shselasky } else { 1297331769Shselasky err = unreg_umr(dev, mr); 1298331769Shselasky if (err) { 1299331769Shselasky mlx5_ib_warn(dev, "failed unregister\n"); 1300331769Shselasky return err; 1301331769Shselasky } 1302331769Shselasky free_cached_mr(dev, mr); 1303331769Shselasky } 1304331769Shselasky 1305331769Shselasky if (!umred) 1306331769Shselasky kfree(mr); 1307331769Shselasky 1308331769Shselasky return 0; 1309331769Shselasky} 1310331769Shselasky 1311322810ShselaskyCTASSERT(sizeof(((struct ib_phys_buf *)0)->size) == 8); 1312322810Shselasky 1313322810Shselaskystruct ib_mr * 1314322810Shselaskymlx5_ib_reg_phys_mr(struct ib_pd *pd, 1315322810Shselasky struct ib_phys_buf *buffer_list, 1316322810Shselasky int num_phys_buf, 1317322810Shselasky int access_flags, 1318322810Shselasky u64 *virt_addr) 1319322810Shselasky{ 1320322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1321322810Shselasky struct mlx5_ib_mr *mr; 1322331769Shselasky __be64 *pas; 1323331769Shselasky void *mkc; 1324331769Shselasky u32 *in; 1325322810Shselasky u64 total_size; 1326322810Shselasky u32 octo_len; 1327322810Shselasky bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1328322810Shselasky unsigned long mask; 1329322810Shselasky int shift; 1330322810Shselasky int npages; 1331322810Shselasky int inlen; 1332322810Shselasky int err; 1333322810Shselasky int i, j, n; 1334322810Shselasky 1335322810Shselasky mask = buffer_list[0].addr ^ *virt_addr; 1336322810Shselasky total_size = 0; 1337322810Shselasky for (i = 0; i < num_phys_buf; ++i) { 1338322810Shselasky if (i != 0) 1339322810Shselasky mask |= buffer_list[i].addr; 1340322810Shselasky if (i != num_phys_buf - 1) 1341322810Shselasky mask |= buffer_list[i].addr + buffer_list[i].size; 1342322810Shselasky 1343322810Shselasky total_size += buffer_list[i].size; 1344322810Shselasky } 1345322810Shselasky 1346322810Shselasky if (mask & ~PAGE_MASK) 1347322810Shselasky return ERR_PTR(-EINVAL); 1348322810Shselasky 1349322810Shselasky shift = __ffs(mask | 1 << 31); 1350322810Shselasky 1351322810Shselasky buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1); 1352322810Shselasky buffer_list[0].addr &= ~0ULL << shift; 1353322810Shselasky 1354322810Shselasky npages = 0; 1355322810Shselasky for (i = 0; i < num_phys_buf; ++i) 1356322810Shselasky npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 1357322810Shselasky 1358322810Shselasky if (!npages) { 1359322810Shselasky mlx5_ib_warn(dev, "avoid zero region\n"); 1360322810Shselasky return ERR_PTR(-EINVAL); 1361322810Shselasky } 1362322810Shselasky 1363322810Shselasky mr = kzalloc(sizeof *mr, GFP_KERNEL); 1364322810Shselasky if (!mr) 1365322810Shselasky return ERR_PTR(-ENOMEM); 1366322810Shselasky 1367322810Shselasky octo_len = get_octo_len(*virt_addr, total_size, 1ULL << shift); 1368322810Shselasky octo_len = ALIGN(octo_len, 4); 1369322810Shselasky 1370331769Shselasky inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + (octo_len * 16); 1371322810Shselasky in = mlx5_vzalloc(inlen); 1372322810Shselasky if (!in) { 1373322810Shselasky kfree(mr); 1374322810Shselasky return ERR_PTR(-ENOMEM); 1375322810Shselasky } 1376331769Shselasky pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1377322810Shselasky 1378322810Shselasky n = 0; 1379322810Shselasky for (i = 0; i < num_phys_buf; ++i) { 1380322810Shselasky for (j = 0; 1381322810Shselasky j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift; 1382322810Shselasky ++j) { 1383322810Shselasky u64 temp = buffer_list[i].addr + ((u64) j << shift); 1384322810Shselasky if (pg_cap) 1385322810Shselasky temp |= MLX5_IB_MTT_PRESENT; 1386331769Shselasky pas[n++] = cpu_to_be64(temp); 1387322810Shselasky } 1388322810Shselasky } 1389322810Shselasky 1390331769Shselasky /* 1391331769Shselasky * The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access 1392331769Shselasky * flags in the page list submitted with the command: 1393331769Shselasky */ 1394331769Shselasky MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1395331769Shselasky 1396331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1397331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_MTT); 1398331769Shselasky MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1399331769Shselasky MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1400331769Shselasky MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1401331769Shselasky MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1402331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 1403331769Shselasky 1404331769Shselasky MLX5_SET64(mkc, mkc, start_addr, *virt_addr); 1405331769Shselasky MLX5_SET64(mkc, mkc, len, total_size); 1406331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1407331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1408331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, octo_len); 1409331769Shselasky MLX5_SET(mkc, mkc, log_page_size, shift); 1410331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1411331769Shselasky MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octo_len); 1412331769Shselasky 1413331807Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1414331807Shselasky 1415322810Shselasky mr->umem = NULL; 1416322810Shselasky mr->dev = dev; 1417331769Shselasky mr->live = 1; 1418322810Shselasky mr->npages = npages; 1419331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 1420331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 1421331769Shselasky mr->ibmr.length = total_size; 1422331769Shselasky mr->access_flags = access_flags; 1423322810Shselasky 1424322810Shselasky kvfree(in); 1425322810Shselasky 1426322810Shselasky if (err) { 1427322810Shselasky kfree(mr); 1428322810Shselasky return ERR_PTR(err); 1429322810Shselasky } 1430322810Shselasky return &mr->ibmr; 1431322810Shselasky} 1432322810Shselasky 1433322810Shselaskyint mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1434322810Shselasky{ 1435322810Shselasky struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1436322810Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1437331769Shselasky int npages = mr->npages; 1438322810Shselasky struct ib_umem *umem = mr->umem; 1439322810Shselasky 1440331769Shselasky#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1441331769Shselasky if (umem && umem->odp_data) { 1442331769Shselasky /* Prevent new page faults from succeeding */ 1443331769Shselasky mr->live = 0; 1444331769Shselasky /* Wait for all running page-fault handlers to finish. */ 1445331769Shselasky synchronize_srcu(&dev->mr_srcu); 1446331769Shselasky /* Destroy all page mappings */ 1447331769Shselasky mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1448331769Shselasky ib_umem_end(umem)); 1449331769Shselasky /* 1450331769Shselasky * We kill the umem before the MR for ODP, 1451331769Shselasky * so that there will not be any invalidations in 1452331769Shselasky * flight, looking at the *mr struct. 1453331769Shselasky */ 1454331769Shselasky ib_umem_release(umem); 1455331769Shselasky atomic_sub(npages, &dev->mdev->priv.reg_pages); 1456322810Shselasky 1457331769Shselasky /* Avoid double-freeing the umem. */ 1458331769Shselasky umem = NULL; 1459331769Shselasky } 1460331769Shselasky#endif 1461331769Shselasky 1462331769Shselasky clean_mr(mr); 1463331769Shselasky 1464322810Shselasky if (umem) { 1465322810Shselasky ib_umem_release(umem); 1466322810Shselasky atomic_sub(npages, &dev->mdev->priv.reg_pages); 1467322810Shselasky } 1468322810Shselasky 1469322810Shselasky return 0; 1470322810Shselasky} 1471322810Shselasky 1472331769Shselaskystruct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 1473331769Shselasky enum ib_mr_type mr_type, 1474331769Shselasky u32 max_num_sg) 1475322810Shselasky{ 1476322810Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1477331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1478331769Shselasky int ndescs = ALIGN(max_num_sg, 4); 1479322810Shselasky struct mlx5_ib_mr *mr; 1480331769Shselasky void *mkc; 1481331769Shselasky u32 *in; 1482322810Shselasky int err; 1483322810Shselasky 1484322810Shselasky mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1485322810Shselasky if (!mr) 1486322810Shselasky return ERR_PTR(-ENOMEM); 1487322810Shselasky 1488331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 1489322810Shselasky if (!in) { 1490322810Shselasky err = -ENOMEM; 1491322810Shselasky goto err_free; 1492322810Shselasky } 1493322810Shselasky 1494331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1495331769Shselasky MLX5_SET(mkc, mkc, free, 1); 1496331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1497331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1498331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1499322810Shselasky 1500331769Shselasky if (mr_type == IB_MR_TYPE_MEM_REG) { 1501331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_MTT; 1502331769Shselasky MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); 1503331769Shselasky err = mlx5_alloc_priv_descs(pd->device, mr, 1504331769Shselasky ndescs, sizeof(u64)); 1505331769Shselasky if (err) 1506331769Shselasky goto err_free_in; 1507322810Shselasky 1508331769Shselasky mr->desc_size = sizeof(u64); 1509331769Shselasky mr->max_descs = ndescs; 1510331769Shselasky } else if (mr_type == IB_MR_TYPE_SG_GAPS) { 1511331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_KLM; 1512322810Shselasky 1513331769Shselasky err = mlx5_alloc_priv_descs(pd->device, mr, 1514331769Shselasky ndescs, sizeof(struct mlx5_klm)); 1515331769Shselasky if (err) 1516331769Shselasky goto err_free_in; 1517331769Shselasky mr->desc_size = sizeof(struct mlx5_klm); 1518331769Shselasky mr->max_descs = ndescs; 1519331769Shselasky } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1520331769Shselasky u32 psv_index[2]; 1521322810Shselasky 1522331769Shselasky MLX5_SET(mkc, mkc, bsf_en, 1); 1523331769Shselasky MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1524331769Shselasky mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1525331769Shselasky if (!mr->sig) { 1526331769Shselasky err = -ENOMEM; 1527331769Shselasky goto err_free_in; 1528331769Shselasky } 1529322810Shselasky 1530331769Shselasky /* create mem & wire PSVs */ 1531331769Shselasky err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1532331769Shselasky 2, psv_index); 1533331769Shselasky if (err) 1534331769Shselasky goto err_free_sig; 1535322810Shselasky 1536331769Shselasky mr->access_mode = MLX5_ACCESS_MODE_KLM; 1537331769Shselasky mr->sig->psv_memory.psv_idx = psv_index[0]; 1538331769Shselasky mr->sig->psv_wire.psv_idx = psv_index[1]; 1539322810Shselasky 1540331769Shselasky mr->sig->sig_status_checked = true; 1541331769Shselasky mr->sig->sig_err_exists = false; 1542331769Shselasky /* Next UMR, Arm SIGERR */ 1543331769Shselasky ++mr->sig->sigerr_count; 1544331769Shselasky } else { 1545331769Shselasky mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1546331769Shselasky err = -EINVAL; 1547331769Shselasky goto err_free_in; 1548331769Shselasky } 1549322810Shselasky 1550331769Shselasky MLX5_SET(mkc, mkc, access_mode, mr->access_mode); 1551331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 1552322810Shselasky 1553331807Shselasky err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1554331769Shselasky if (err) 1555331769Shselasky goto err_destroy_psv; 1556322810Shselasky 1557331769Shselasky mr->ibmr.lkey = mr->mmkey.key; 1558331769Shselasky mr->ibmr.rkey = mr->mmkey.key; 1559331769Shselasky mr->umem = NULL; 1560331769Shselasky kfree(in); 1561322810Shselasky 1562331769Shselasky return &mr->ibmr; 1563331769Shselasky 1564331769Shselaskyerr_destroy_psv: 1565331769Shselasky if (mr->sig) { 1566331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1567331769Shselasky mr->sig->psv_memory.psv_idx)) 1568331769Shselasky mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1569331769Shselasky mr->sig->psv_memory.psv_idx); 1570331769Shselasky if (mlx5_core_destroy_psv(dev->mdev, 1571331769Shselasky mr->sig->psv_wire.psv_idx)) 1572331769Shselasky mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1573331769Shselasky mr->sig->psv_wire.psv_idx); 1574331769Shselasky } 1575331769Shselasky mlx5_free_priv_descs(mr); 1576331769Shselaskyerr_free_sig: 1577331769Shselasky kfree(mr->sig); 1578331769Shselaskyerr_free_in: 1579331769Shselasky kfree(in); 1580322810Shselaskyerr_free: 1581331769Shselasky kfree(mr); 1582331769Shselasky return ERR_PTR(err); 1583322810Shselasky} 1584322810Shselasky 1585331769Shselaskystruct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1586331769Shselasky struct ib_udata *udata) 1587322810Shselasky{ 1588331769Shselasky struct mlx5_ib_dev *dev = to_mdev(pd->device); 1589331769Shselasky int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1590331769Shselasky struct mlx5_ib_mw *mw = NULL; 1591331769Shselasky u32 *in = NULL; 1592331769Shselasky void *mkc; 1593331769Shselasky int ndescs; 1594322810Shselasky int err; 1595331769Shselasky struct mlx5_ib_alloc_mw req = {}; 1596331769Shselasky struct { 1597331769Shselasky __u32 comp_mask; 1598331769Shselasky __u32 response_length; 1599331769Shselasky } resp = {}; 1600322810Shselasky 1601331769Shselasky err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1602331769Shselasky if (err) 1603331769Shselasky return ERR_PTR(err); 1604322810Shselasky 1605331769Shselasky if (req.comp_mask || req.reserved1 || req.reserved2) 1606331769Shselasky return ERR_PTR(-EOPNOTSUPP); 1607322810Shselasky 1608331769Shselasky if (udata->inlen > sizeof(req) && 1609331769Shselasky !ib_is_udata_cleared(udata, sizeof(req), 1610331769Shselasky udata->inlen - sizeof(req))) 1611331769Shselasky return ERR_PTR(-EOPNOTSUPP); 1612322810Shselasky 1613331769Shselasky ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1614322810Shselasky 1615331769Shselasky mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1616331769Shselasky in = kzalloc(inlen, GFP_KERNEL); 1617331769Shselasky if (!mw || !in) { 1618331769Shselasky err = -ENOMEM; 1619331769Shselasky goto free; 1620322810Shselasky } 1621322810Shselasky 1622331769Shselasky mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1623322810Shselasky 1624331769Shselasky MLX5_SET(mkc, mkc, free, 1); 1625331769Shselasky MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1626331769Shselasky MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1627331769Shselasky MLX5_SET(mkc, mkc, umr_en, 1); 1628331769Shselasky MLX5_SET(mkc, mkc, lr, 1); 1629331769Shselasky MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_KLM); 1630331769Shselasky MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1631331769Shselasky MLX5_SET(mkc, mkc, qpn, 0xffffff); 1632322810Shselasky 1633331807Shselasky err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); 1634331769Shselasky if (err) 1635331769Shselasky goto free; 1636322810Shselasky 1637331769Shselasky mw->ibmw.rkey = mw->mmkey.key; 1638322810Shselasky 1639331769Shselasky resp.response_length = min(offsetof(typeof(resp), response_length) + 1640331769Shselasky sizeof(resp.response_length), udata->outlen); 1641331769Shselasky if (resp.response_length) { 1642331769Shselasky err = ib_copy_to_udata(udata, &resp, resp.response_length); 1643331769Shselasky if (err) { 1644331769Shselasky mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1645331769Shselasky goto free; 1646331769Shselasky } 1647331769Shselasky } 1648322810Shselasky 1649331769Shselasky kfree(in); 1650331769Shselasky return &mw->ibmw; 1651322810Shselasky 1652331769Shselaskyfree: 1653331769Shselasky kfree(mw); 1654331769Shselasky kfree(in); 1655331769Shselasky return ERR_PTR(err); 1656322810Shselasky} 1657322810Shselasky 1658331769Shselaskyint mlx5_ib_dealloc_mw(struct ib_mw *mw) 1659322810Shselasky{ 1660331769Shselasky struct mlx5_ib_mw *mmw = to_mmw(mw); 1661322810Shselasky int err; 1662322810Shselasky 1663331769Shselasky err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev, 1664331769Shselasky &mmw->mmkey); 1665331769Shselasky if (!err) 1666331769Shselasky kfree(mmw); 1667322810Shselasky return err; 1668322810Shselasky} 1669322810Shselasky 1670331769Shselaskyint mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1671331769Shselasky struct ib_mr_status *mr_status) 1672322810Shselasky{ 1673331769Shselasky struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1674331769Shselasky int ret = 0; 1675322810Shselasky 1676331769Shselasky if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1677331769Shselasky pr_err("Invalid status check mask\n"); 1678331769Shselasky ret = -EINVAL; 1679331769Shselasky goto done; 1680322810Shselasky } 1681322810Shselasky 1682331769Shselasky mr_status->fail_status = 0; 1683331769Shselasky if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1684331769Shselasky if (!mmr->sig) { 1685331769Shselasky ret = -EINVAL; 1686331769Shselasky pr_err("signature status check requested on a non-signature enabled MR\n"); 1687331769Shselasky goto done; 1688331769Shselasky } 1689322810Shselasky 1690331769Shselasky mmr->sig->sig_status_checked = true; 1691331769Shselasky if (!mmr->sig->sig_err_exists) 1692331769Shselasky goto done; 1693322810Shselasky 1694331769Shselasky if (ibmr->lkey == mmr->sig->err_item.key) 1695331769Shselasky memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1696331769Shselasky sizeof(mr_status->sig_err)); 1697331769Shselasky else { 1698331769Shselasky mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1699331769Shselasky mr_status->sig_err.sig_err_offset = 0; 1700331769Shselasky mr_status->sig_err.key = mmr->sig->err_item.key; 1701331769Shselasky } 1702322810Shselasky 1703331769Shselasky mmr->sig->sig_err_exists = false; 1704331769Shselasky mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1705331769Shselasky } 1706322810Shselasky 1707331769Shselaskydone: 1708331769Shselasky return ret; 1709322810Shselasky} 1710322810Shselasky 1711331769Shselaskystatic int 1712331769Shselaskymlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 1713331769Shselasky struct scatterlist *sgl, 1714331769Shselasky unsigned short sg_nents, 1715331769Shselasky unsigned int *sg_offset_p) 1716322810Shselasky{ 1717331769Shselasky struct scatterlist *sg = sgl; 1718331769Shselasky struct mlx5_klm *klms = mr->descs; 1719331769Shselasky unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 1720331769Shselasky u32 lkey = mr->ibmr.pd->local_dma_lkey; 1721322810Shselasky int i; 1722322810Shselasky 1723331769Shselasky mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 1724331769Shselasky mr->ibmr.length = 0; 1725331769Shselasky mr->ndescs = sg_nents; 1726322810Shselasky 1727331769Shselasky for_each_sg(sgl, sg, sg_nents, i) { 1728331769Shselasky if (unlikely(i > mr->max_descs)) 1729331769Shselasky break; 1730331769Shselasky klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 1731331769Shselasky klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 1732331769Shselasky klms[i].key = cpu_to_be32(lkey); 1733331769Shselasky mr->ibmr.length += sg_dma_len(sg); 1734322810Shselasky 1735331769Shselasky sg_offset = 0; 1736322810Shselasky } 1737322810Shselasky 1738331769Shselasky if (sg_offset_p) 1739331769Shselasky *sg_offset_p = sg_offset; 1740322810Shselasky 1741331769Shselasky return i; 1742322810Shselasky} 1743322810Shselasky 1744331769Shselaskystatic int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1745322810Shselasky{ 1746331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1747331769Shselasky __be64 *descs; 1748322810Shselasky 1749331769Shselasky if (unlikely(mr->ndescs == mr->max_descs)) 1750331769Shselasky return -ENOMEM; 1751322810Shselasky 1752331769Shselasky descs = mr->descs; 1753331769Shselasky descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 1754322810Shselasky 1755331769Shselasky return 0; 1756322810Shselasky} 1757322810Shselasky 1758331769Shselaskyint mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 1759331769Shselasky unsigned int *sg_offset) 1760322810Shselasky{ 1761331769Shselasky struct mlx5_ib_mr *mr = to_mmr(ibmr); 1762331769Shselasky int n; 1763322810Shselasky 1764331769Shselasky mr->ndescs = 0; 1765322810Shselasky 1766331769Shselasky ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 1767331769Shselasky mr->desc_size * mr->max_descs, 1768331769Shselasky DMA_TO_DEVICE); 1769322810Shselasky 1770331769Shselasky if (mr->access_mode == MLX5_ACCESS_MODE_KLM) 1771331769Shselasky n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); 1772331769Shselasky else 1773331769Shselasky n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 1774331769Shselasky mlx5_set_page); 1775322810Shselasky 1776331769Shselasky ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1777331769Shselasky mr->desc_size * mr->max_descs, 1778331769Shselasky DMA_TO_DEVICE); 1779322810Shselasky 1780331769Shselasky return n; 1781322810Shselasky} 1782