mlx5_pagealloc.c revision 331807
1/*- 2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c 331807 2018-03-30 19:13:17Z hselasky $ 26 */ 27 28#include <linux/kernel.h> 29#include <linux/module.h> 30#include <linux/delay.h> 31#include <dev/mlx5/driver.h> 32#include "mlx5_core.h" 33 34CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 35 36struct mlx5_pages_req { 37 struct mlx5_core_dev *dev; 38 u16 func_id; 39 s32 npages; 40 struct work_struct work; 41}; 42 43 44enum { 45 MAX_RECLAIM_TIME_MSECS = 5000, 46}; 47 48static void 49mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 50{ 51 struct mlx5_fw_page *fwp; 52 uint8_t owned; 53 54 fwp = (struct mlx5_fw_page *)arg; 55 owned = MLX5_DMA_OWNED(fwp->dev); 56 57 if (!owned) 58 MLX5_DMA_LOCK(fwp->dev); 59 60 if (error == 0) { 61 KASSERT(nseg == 1, ("Number of segments is different from 1")); 62 fwp->dma_addr = segs->ds_addr; 63 fwp->load_done = MLX5_LOAD_ST_SUCCESS; 64 } else { 65 fwp->load_done = MLX5_LOAD_ST_FAILURE; 66 } 67 MLX5_DMA_DONE(fwp->dev); 68 69 if (!owned) 70 MLX5_DMA_UNLOCK(fwp->dev); 71} 72 73void 74mlx5_fwp_flush(struct mlx5_fw_page *fwp) 75{ 76 unsigned num = fwp->numpages; 77 78 while (num--) 79 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE); 80} 81 82void 83mlx5_fwp_invalidate(struct mlx5_fw_page *fwp) 84{ 85 unsigned num = fwp->numpages; 86 87 while (num--) { 88 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD); 89 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD); 90 } 91} 92 93struct mlx5_fw_page * 94mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num) 95{ 96 struct mlx5_fw_page *fwp; 97 unsigned x; 98 int err; 99 100 /* check for special case */ 101 if (num == 0) { 102 fwp = kzalloc(sizeof(*fwp), flags); 103 if (fwp != NULL) 104 fwp->dev = dev; 105 return (fwp); 106 } 107 108 /* we need sleeping context for this function */ 109 if (flags & M_NOWAIT) 110 return (NULL); 111 112 fwp = kzalloc(sizeof(*fwp) * num, flags); 113 114 /* serialize loading the DMA map(s) */ 115 sx_xlock(&dev->cmd.dma_sx); 116 117 for (x = 0; x != num; x++) { 118 /* store pointer to MLX5 core device */ 119 fwp[x].dev = dev; 120 /* store number of pages left from the array */ 121 fwp[x].numpages = num - x; 122 123 /* allocate memory */ 124 err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr, 125 BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map); 126 if (err != 0) 127 goto failure; 128 129 /* load memory into DMA */ 130 MLX5_DMA_LOCK(dev); 131 err = bus_dmamap_load( 132 dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr, 133 MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb, 134 fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT); 135 136 while (fwp[x].load_done == MLX5_LOAD_ST_NONE) 137 MLX5_DMA_WAIT(dev); 138 MLX5_DMA_UNLOCK(dev); 139 140 /* check for error */ 141 if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) { 142 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, 143 fwp[x].dma_map); 144 goto failure; 145 } 146 } 147 sx_xunlock(&dev->cmd.dma_sx); 148 return (fwp); 149 150failure: 151 while (x--) { 152 bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map); 153 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map); 154 } 155 sx_xunlock(&dev->cmd.dma_sx); 156 return (NULL); 157} 158 159void 160mlx5_fwp_free(struct mlx5_fw_page *fwp) 161{ 162 struct mlx5_core_dev *dev; 163 unsigned num; 164 165 /* be NULL safe */ 166 if (fwp == NULL) 167 return; 168 169 /* check for special case */ 170 if (fwp->numpages == 0) { 171 kfree(fwp); 172 return; 173 } 174 175 num = fwp->numpages; 176 dev = fwp->dev; 177 178 while (num--) { 179 bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map); 180 bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map); 181 } 182 183 kfree(fwp); 184} 185 186u64 187mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset) 188{ 189 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 190 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 191 192 return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 193} 194 195void * 196mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset) 197{ 198 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 199 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 200 201 return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 202} 203 204static int 205mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp) 206{ 207 struct rb_root *root = &dev->priv.page_root; 208 struct rb_node **new = &root->rb_node; 209 struct rb_node *parent = NULL; 210 struct mlx5_fw_page *tfp; 211 212 while (*new) { 213 parent = *new; 214 tfp = rb_entry(parent, struct mlx5_fw_page, rb_node); 215 if (tfp->dma_addr < nfp->dma_addr) 216 new = &parent->rb_left; 217 else if (tfp->dma_addr > nfp->dma_addr) 218 new = &parent->rb_right; 219 else 220 return (-EEXIST); 221 } 222 223 rb_link_node(&nfp->rb_node, parent, new); 224 rb_insert_color(&nfp->rb_node, root); 225 return (0); 226} 227 228static struct mlx5_fw_page * 229mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr) 230{ 231 struct rb_root *root = &dev->priv.page_root; 232 struct rb_node *tmp = root->rb_node; 233 struct mlx5_fw_page *result = NULL; 234 struct mlx5_fw_page *tfp; 235 236 while (tmp) { 237 tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node); 238 if (tfp->dma_addr < addr) { 239 tmp = tmp->rb_left; 240 } else if (tfp->dma_addr > addr) { 241 tmp = tmp->rb_right; 242 } else { 243 rb_erase(&tfp->rb_node, &dev->priv.page_root); 244 result = tfp; 245 break; 246 } 247 } 248 return (result); 249} 250 251static int 252alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) 253{ 254 struct mlx5_fw_page *fwp; 255 int err; 256 257 fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); 258 if (fwp == NULL) 259 return (-ENOMEM); 260 261 fwp->func_id = func_id; 262 263 MLX5_DMA_LOCK(dev); 264 err = mlx5_insert_fw_page_locked(dev, fwp); 265 MLX5_DMA_UNLOCK(dev); 266 267 if (err != 0) { 268 mlx5_fwp_free(fwp); 269 } else { 270 /* make sure cached data is cleaned */ 271 mlx5_fwp_invalidate(fwp); 272 273 /* store DMA address */ 274 *addr = fwp->dma_addr; 275 } 276 return (err); 277} 278 279static void 280free_4k(struct mlx5_core_dev *dev, u64 addr) 281{ 282 struct mlx5_fw_page *fwp; 283 284 MLX5_DMA_LOCK(dev); 285 fwp = mlx5_remove_fw_page_locked(dev, addr); 286 MLX5_DMA_UNLOCK(dev); 287 288 if (fwp == NULL) { 289 mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr); 290 return; 291 } 292 mlx5_fwp_free(fwp); 293} 294 295static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, 296 s32 *npages, int boot) 297{ 298 u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; 299 u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; 300 int err; 301 302 MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); 303 MLX5_SET(query_pages_in, in, op_mod, boot ? 304 MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : 305 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); 306 307 err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); 308 if (err) 309 return err; 310 311 *npages = MLX5_GET(query_pages_out, out, num_pages); 312 *func_id = MLX5_GET(query_pages_out, out, function_id); 313 314 return 0; 315} 316 317static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 318 int notify_fail) 319{ 320 u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; 321 int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); 322 u64 addr; 323 int err; 324 u32 *in, *nin; 325 int i = 0; 326 327 inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); 328 in = mlx5_vzalloc(inlen); 329 if (!in) { 330 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); 331 err = -ENOMEM; 332 goto out_alloc; 333 } 334 335 for (i = 0; i < npages; i++) { 336 err = alloc_4k(dev, &addr, func_id); 337 if (err) 338 goto out_alloc; 339 MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr); 340 } 341 342 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 343 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); 344 MLX5_SET(manage_pages_in, in, function_id, func_id); 345 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 346 347 err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); 348 if (err) { 349 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", 350 func_id, npages, err); 351 goto out_alloc; 352 } 353 dev->priv.fw_pages += npages; 354 dev->priv.pages_per_func[func_id] += npages; 355 356 mlx5_core_dbg(dev, "err %d\n", err); 357 358 goto out_free; 359 360out_alloc: 361 if (notify_fail) { 362 nin = mlx5_vzalloc(inlen); 363 if (!nin) 364 goto out_4k; 365 366 memset(&out, 0, sizeof(out)); 367 MLX5_SET(manage_pages_in, nin, opcode, MLX5_CMD_OP_MANAGE_PAGES); 368 MLX5_SET(manage_pages_in, nin, op_mod, MLX5_PAGES_CANT_GIVE); 369 MLX5_SET(manage_pages_in, nin, function_id, func_id); 370 if (mlx5_cmd_exec(dev, nin, inlen, out, sizeof(out))) 371 mlx5_core_warn(dev, "page notify failed\n"); 372 kvfree(nin); 373 } 374 375out_4k: 376 for (i--; i >= 0; i--) 377 free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); 378out_free: 379 kvfree(in); 380 return err; 381} 382 383static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, 384 int *nclaimed) 385{ 386 int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); 387 u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; 388 int num_claimed; 389 u32 *out; 390 int err; 391 int i; 392 393 if (nclaimed) 394 *nclaimed = 0; 395 396 outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 397 out = mlx5_vzalloc(outlen); 398 if (!out) 399 return -ENOMEM; 400 401 MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); 402 MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); 403 MLX5_SET(manage_pages_in, in, function_id, func_id); 404 MLX5_SET(manage_pages_in, in, input_num_entries, npages); 405 406 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); 407 err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); 408 if (err) { 409 mlx5_core_err(dev, "failed reclaiming pages\n"); 410 goto out_free; 411 } 412 413 num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); 414 if (nclaimed) 415 *nclaimed = num_claimed; 416 417 dev->priv.fw_pages -= num_claimed; 418 dev->priv.pages_per_func[func_id] -= num_claimed; 419 for (i = 0; i < num_claimed; i++) 420 free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); 421 422out_free: 423 kvfree(out); 424 return err; 425} 426 427static void pages_work_handler(struct work_struct *work) 428{ 429 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); 430 struct mlx5_core_dev *dev = req->dev; 431 int err = 0; 432 433 if (req->npages < 0) 434 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); 435 else if (req->npages > 0) 436 err = give_pages(dev, req->func_id, req->npages, 1); 437 438 if (err) 439 mlx5_core_warn(dev, "%s fail %d\n", 440 req->npages < 0 ? "reclaim" : "give", err); 441 442 kfree(req); 443} 444 445void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, 446 s32 npages) 447{ 448 struct mlx5_pages_req *req; 449 450 req = kzalloc(sizeof(*req), GFP_ATOMIC); 451 if (!req) { 452 mlx5_core_warn(dev, "failed to allocate pages request\n"); 453 return; 454 } 455 456 req->dev = dev; 457 req->func_id = func_id; 458 req->npages = npages; 459 INIT_WORK(&req->work, pages_work_handler); 460 if (!queue_work(dev->priv.pg_wq, &req->work)) 461 mlx5_core_warn(dev, "failed to queue pages handler work\n"); 462} 463 464int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) 465{ 466 u16 uninitialized_var(func_id); 467 s32 uninitialized_var(npages); 468 int err; 469 470 err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); 471 if (err) 472 return err; 473 474 mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", 475 npages, boot ? "boot" : "init", func_id); 476 477 return give_pages(dev, func_id, npages, 0); 478} 479 480enum { 481 MLX5_BLKS_FOR_RECLAIM_PAGES = 12 482}; 483 484s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev) 485{ 486 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 487 s64 prevpages = 0; 488 s64 npages = 0; 489 490 while (!time_after(jiffies, end)) { 491 /* exclude own function, VFs only */ 492 npages = dev->priv.fw_pages - dev->priv.pages_per_func[0]; 493 if (!npages) 494 break; 495 496 if (npages != prevpages) 497 end = end + msecs_to_jiffies(100); 498 499 prevpages = npages; 500 msleep(1); 501 } 502 503 if (npages) 504 mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n"); 505 506 return -npages; 507} 508 509static int optimal_reclaimed_pages(void) 510{ 511 struct mlx5_cmd_prot_block *block; 512 struct mlx5_cmd_layout *lay; 513 int ret; 514 515 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - 516 MLX5_ST_SZ_BYTES(manage_pages_out)) / 517 MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); 518 519 return ret; 520} 521 522int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) 523{ 524 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 525 struct mlx5_fw_page *fwp; 526 struct rb_node *p; 527 int nclaimed = 0; 528 int err; 529 530 do { 531 p = rb_first(&dev->priv.page_root); 532 if (p) { 533 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 534 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 535 --dev->priv.fw_pages; 536 free_4k(dev, fwp->dma_addr); 537 nclaimed = 1; 538 } else { 539 err = reclaim_pages(dev, fwp->func_id, 540 optimal_reclaimed_pages(), 541 &nclaimed); 542 if (err) { 543 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 544 err); 545 return err; 546 } 547 } 548 549 if (nclaimed) 550 end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 551 } 552 if (time_after(jiffies, end)) { 553 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); 554 break; 555 } 556 } while (p); 557 558 return 0; 559} 560 561void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 562{ 563 564 dev->priv.page_root = RB_ROOT; 565} 566 567void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 568{ 569 /* nothing */ 570} 571 572int mlx5_pagealloc_start(struct mlx5_core_dev *dev) 573{ 574 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); 575 if (!dev->priv.pg_wq) 576 return -ENOMEM; 577 578 return 0; 579} 580 581void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) 582{ 583 destroy_workqueue(dev->priv.pg_wq); 584} 585