1/*- 2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_core/mlx5_pagealloc.c 322151 2017-08-07 12:49:30Z hselasky $ 26 */ 27 28#include <linux/kernel.h> 29#include <linux/module.h> 30#include <linux/delay.h> 31#include <dev/mlx5/driver.h> 32#include "mlx5_core.h" 33 34CTASSERT((uintptr_t)PAGE_MASK > (uintptr_t)PAGE_SIZE); 35 36struct mlx5_pages_req { 37 struct mlx5_core_dev *dev; 38 u16 func_id; 39 s32 npages; 40 struct work_struct work; 41}; 42 43 44struct mlx5_manage_pages_inbox { 45 struct mlx5_inbox_hdr hdr; 46 __be16 rsvd; 47 __be16 func_id; 48 __be32 num_entries; 49 __be64 pas[0]; 50}; 51 52struct mlx5_manage_pages_outbox { 53 struct mlx5_outbox_hdr hdr; 54 __be32 num_entries; 55 u8 rsvd[4]; 56 __be64 pas[0]; 57}; 58 59enum { 60 MAX_RECLAIM_TIME_MSECS = 5000, 61}; 62 63static void 64mlx5_fwp_load_mem_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 65{ 66 struct mlx5_fw_page *fwp; 67 uint8_t owned; 68 69 fwp = (struct mlx5_fw_page *)arg; 70 owned = MLX5_DMA_OWNED(fwp->dev); 71 72 if (!owned) 73 MLX5_DMA_LOCK(fwp->dev); 74 75 if (error == 0) { 76 KASSERT(nseg == 1, ("Number of segments is different from 1")); 77 fwp->dma_addr = segs->ds_addr; 78 fwp->load_done = MLX5_LOAD_ST_SUCCESS; 79 } else { 80 fwp->load_done = MLX5_LOAD_ST_FAILURE; 81 } 82 MLX5_DMA_DONE(fwp->dev); 83 84 if (!owned) 85 MLX5_DMA_UNLOCK(fwp->dev); 86} 87 88void 89mlx5_fwp_flush(struct mlx5_fw_page *fwp) 90{ 91 unsigned num = fwp->numpages; 92 93 while (num--) 94 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREWRITE); 95} 96 97void 98mlx5_fwp_invalidate(struct mlx5_fw_page *fwp) 99{ 100 unsigned num = fwp->numpages; 101 102 while (num--) { 103 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_POSTREAD); 104 bus_dmamap_sync(fwp[num].dev->cmd.dma_tag, fwp[num].dma_map, BUS_DMASYNC_PREREAD); 105 } 106} 107 108struct mlx5_fw_page * 109mlx5_fwp_alloc(struct mlx5_core_dev *dev, gfp_t flags, unsigned num) 110{ 111 struct mlx5_fw_page *fwp; 112 unsigned x; 113 int err; 114 115 /* check for special case */ 116 if (num == 0) { 117 fwp = kzalloc(sizeof(*fwp), flags); 118 if (fwp != NULL) 119 fwp->dev = dev; 120 return (fwp); 121 } 122 123 /* we need sleeping context for this function */ 124 if (flags & M_NOWAIT) 125 return (NULL); 126 127 fwp = kzalloc(sizeof(*fwp) * num, flags); 128 129 /* serialize loading the DMA map(s) */ 130 sx_xlock(&dev->cmd.dma_sx); 131 132 for (x = 0; x != num; x++) { 133 /* store pointer to MLX5 core device */ 134 fwp[x].dev = dev; 135 /* store number of pages left from the array */ 136 fwp[x].numpages = num - x; 137 138 /* allocate memory */ 139 err = bus_dmamem_alloc(dev->cmd.dma_tag, &fwp[x].virt_addr, 140 BUS_DMA_WAITOK | BUS_DMA_COHERENT, &fwp[x].dma_map); 141 if (err != 0) 142 goto failure; 143 144 /* load memory into DMA */ 145 MLX5_DMA_LOCK(dev); 146 err = bus_dmamap_load( 147 dev->cmd.dma_tag, fwp[x].dma_map, fwp[x].virt_addr, 148 MLX5_ADAPTER_PAGE_SIZE, &mlx5_fwp_load_mem_cb, 149 fwp + x, BUS_DMA_WAITOK | BUS_DMA_COHERENT); 150 151 while (fwp[x].load_done == MLX5_LOAD_ST_NONE) 152 MLX5_DMA_WAIT(dev); 153 MLX5_DMA_UNLOCK(dev); 154 155 /* check for error */ 156 if (fwp[x].load_done != MLX5_LOAD_ST_SUCCESS) { 157 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, 158 fwp[x].dma_map); 159 goto failure; 160 } 161 } 162 sx_xunlock(&dev->cmd.dma_sx); 163 return (fwp); 164 165failure: 166 while (x--) { 167 bus_dmamap_unload(dev->cmd.dma_tag, fwp[x].dma_map); 168 bus_dmamem_free(dev->cmd.dma_tag, fwp[x].virt_addr, fwp[x].dma_map); 169 } 170 sx_xunlock(&dev->cmd.dma_sx); 171 return (NULL); 172} 173 174void 175mlx5_fwp_free(struct mlx5_fw_page *fwp) 176{ 177 struct mlx5_core_dev *dev; 178 unsigned num; 179 180 /* be NULL safe */ 181 if (fwp == NULL) 182 return; 183 184 /* check for special case */ 185 if (fwp->numpages == 0) { 186 kfree(fwp); 187 return; 188 } 189 190 num = fwp->numpages; 191 dev = fwp->dev; 192 193 while (num--) { 194 bus_dmamap_unload(dev->cmd.dma_tag, fwp[num].dma_map); 195 bus_dmamem_free(dev->cmd.dma_tag, fwp[num].virt_addr, fwp[num].dma_map); 196 } 197 198 kfree(fwp); 199} 200 201u64 202mlx5_fwp_get_dma(struct mlx5_fw_page *fwp, size_t offset) 203{ 204 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 205 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 206 207 return ((fwp + index)->dma_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 208} 209 210void * 211mlx5_fwp_get_virt(struct mlx5_fw_page *fwp, size_t offset) 212{ 213 size_t index = (offset / MLX5_ADAPTER_PAGE_SIZE); 214 KASSERT(index < fwp->numpages, ("Invalid offset: %lld", (long long)offset)); 215 216 return ((char *)(fwp + index)->virt_addr + (offset % MLX5_ADAPTER_PAGE_SIZE)); 217} 218 219static int 220mlx5_insert_fw_page_locked(struct mlx5_core_dev *dev, struct mlx5_fw_page *nfp) 221{ 222 struct rb_root *root = &dev->priv.page_root; 223 struct rb_node **new = &root->rb_node; 224 struct rb_node *parent = NULL; 225 struct mlx5_fw_page *tfp; 226 227 while (*new) { 228 parent = *new; 229 tfp = rb_entry(parent, struct mlx5_fw_page, rb_node); 230 if (tfp->dma_addr < nfp->dma_addr) 231 new = &parent->rb_left; 232 else if (tfp->dma_addr > nfp->dma_addr) 233 new = &parent->rb_right; 234 else 235 return (-EEXIST); 236 } 237 238 rb_link_node(&nfp->rb_node, parent, new); 239 rb_insert_color(&nfp->rb_node, root); 240 return (0); 241} 242 243static struct mlx5_fw_page * 244mlx5_remove_fw_page_locked(struct mlx5_core_dev *dev, bus_addr_t addr) 245{ 246 struct rb_root *root = &dev->priv.page_root; 247 struct rb_node *tmp = root->rb_node; 248 struct mlx5_fw_page *result = NULL; 249 struct mlx5_fw_page *tfp; 250 251 while (tmp) { 252 tfp = rb_entry(tmp, struct mlx5_fw_page, rb_node); 253 if (tfp->dma_addr < addr) { 254 tmp = tmp->rb_left; 255 } else if (tfp->dma_addr > addr) { 256 tmp = tmp->rb_right; 257 } else { 258 rb_erase(&tfp->rb_node, &dev->priv.page_root); 259 result = tfp; 260 break; 261 } 262 } 263 return (result); 264} 265 266static int 267alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id) 268{ 269 struct mlx5_fw_page *fwp; 270 int err; 271 272 fwp = mlx5_fwp_alloc(dev, GFP_KERNEL, 1); 273 if (fwp == NULL) 274 return (-ENOMEM); 275 276 fwp->func_id = func_id; 277 278 MLX5_DMA_LOCK(dev); 279 err = mlx5_insert_fw_page_locked(dev, fwp); 280 MLX5_DMA_UNLOCK(dev); 281 282 if (err != 0) { 283 mlx5_fwp_free(fwp); 284 } else { 285 /* make sure cached data is cleaned */ 286 mlx5_fwp_invalidate(fwp); 287 288 /* store DMA address */ 289 *addr = fwp->dma_addr; 290 } 291 return (err); 292} 293 294static void 295free_4k(struct mlx5_core_dev *dev, u64 addr) 296{ 297 struct mlx5_fw_page *fwp; 298 299 MLX5_DMA_LOCK(dev); 300 fwp = mlx5_remove_fw_page_locked(dev, addr); 301 MLX5_DMA_UNLOCK(dev); 302 303 if (fwp == NULL) { 304 mlx5_core_warn(dev, "Cannot free 4K page at 0x%llx\n", (long long)addr); 305 return; 306 } 307 mlx5_fwp_free(fwp); 308} 309 310static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, 311 s32 *npages, int boot) 312{ 313 u32 in[MLX5_ST_SZ_DW(query_pages_in)]; 314 u32 out[MLX5_ST_SZ_DW(query_pages_out)]; 315 int err; 316 317 memset(in, 0, sizeof(in)); 318 319 MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); 320 MLX5_SET(query_pages_in, in, op_mod, 321 boot ? MLX5_BOOT_PAGES : MLX5_INIT_PAGES); 322 323 memset(out, 0, sizeof(out)); 324 err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); 325 if (err) 326 return err; 327 328 *npages = MLX5_GET(query_pages_out, out, num_pages); 329 *func_id = MLX5_GET(query_pages_out, out, function_id); 330 331 return 0; 332} 333 334static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, 335 int notify_fail) 336{ 337 struct mlx5_manage_pages_inbox *in; 338 struct mlx5_manage_pages_outbox out; 339 struct mlx5_manage_pages_inbox *nin; 340 int inlen; 341 u64 addr; 342 int err; 343 int i = 0; 344 345 inlen = sizeof(*in) + npages * sizeof(in->pas[0]); 346 in = mlx5_vzalloc(inlen); 347 if (!in) { 348 mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); 349 err = -ENOMEM; 350 goto out_alloc; 351 } 352 memset(&out, 0, sizeof(out)); 353 354 for (i = 0; i < npages; i++) { 355 err = alloc_4k(dev, &addr, func_id); 356 if (err) 357 goto out_alloc; 358 in->pas[i] = cpu_to_be64(addr); 359 } 360 361 in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 362 in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); 363 in->func_id = cpu_to_be16(func_id); 364 in->num_entries = cpu_to_be32(npages); 365 err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); 366 if (err) { 367 mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", 368 func_id, npages, err); 369 goto out_alloc; 370 } 371 dev->priv.fw_pages += npages; 372 dev->priv.pages_per_func[func_id] += npages; 373 374 if (out.hdr.status) { 375 err = mlx5_cmd_status_to_err(&out.hdr); 376 if (err) { 377 mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", 378 func_id, npages, out.hdr.status); 379 goto out_alloc; 380 } 381 } 382 383 mlx5_core_dbg(dev, "err %d\n", err); 384 385 goto out_free; 386 387out_alloc: 388 if (notify_fail) { 389 nin = kzalloc(sizeof(*nin), GFP_KERNEL); 390 if (!nin) 391 goto out_4k; 392 393 memset(&out, 0, sizeof(out)); 394 nin->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 395 nin->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); 396 nin->func_id = cpu_to_be16(func_id); 397 if (mlx5_cmd_exec(dev, nin, sizeof(*nin), &out, sizeof(out))) 398 mlx5_core_warn(dev, "page notify failed\n"); 399 kfree(nin); 400 } 401 402out_4k: 403 for (i--; i >= 0; i--) 404 free_4k(dev, be64_to_cpu(in->pas[i])); 405out_free: 406 kvfree(in); 407 return err; 408} 409 410static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, 411 int *nclaimed) 412{ 413 struct mlx5_manage_pages_inbox in; 414 struct mlx5_manage_pages_outbox *out; 415 int num_claimed; 416 int outlen; 417 u64 addr; 418 int err; 419 int i; 420 421 if (nclaimed) 422 *nclaimed = 0; 423 424 memset(&in, 0, sizeof(in)); 425 outlen = sizeof(*out) + npages * sizeof(out->pas[0]); 426 out = mlx5_vzalloc(outlen); 427 if (!out) 428 return -ENOMEM; 429 430 in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); 431 in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); 432 in.func_id = cpu_to_be16(func_id); 433 in.num_entries = cpu_to_be32(npages); 434 mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); 435 err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); 436 if (err) { 437 mlx5_core_err(dev, "failed reclaiming pages\n"); 438 goto out_free; 439 } 440 441 if (out->hdr.status) { 442 err = mlx5_cmd_status_to_err(&out->hdr); 443 goto out_free; 444 } 445 446 num_claimed = be32_to_cpu(out->num_entries); 447 if (nclaimed) 448 *nclaimed = num_claimed; 449 450 dev->priv.fw_pages -= num_claimed; 451 dev->priv.pages_per_func[func_id] -= num_claimed; 452 for (i = 0; i < num_claimed; i++) { 453 addr = be64_to_cpu(out->pas[i]); 454 free_4k(dev, addr); 455 } 456 457out_free: 458 kvfree(out); 459 return err; 460} 461 462static void pages_work_handler(struct work_struct *work) 463{ 464 struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work); 465 struct mlx5_core_dev *dev = req->dev; 466 int err = 0; 467 468 if (req->npages < 0) 469 err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL); 470 else if (req->npages > 0) 471 err = give_pages(dev, req->func_id, req->npages, 1); 472 473 if (err) 474 mlx5_core_warn(dev, "%s fail %d\n", 475 req->npages < 0 ? "reclaim" : "give", err); 476 477 kfree(req); 478} 479 480void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, 481 s32 npages) 482{ 483 struct mlx5_pages_req *req; 484 485 req = kzalloc(sizeof(*req), GFP_ATOMIC); 486 if (!req) { 487 mlx5_core_warn(dev, "failed to allocate pages request\n"); 488 return; 489 } 490 491 req->dev = dev; 492 req->func_id = func_id; 493 req->npages = npages; 494 INIT_WORK(&req->work, pages_work_handler); 495 if (!queue_work(dev->priv.pg_wq, &req->work)) 496 mlx5_core_warn(dev, "failed to queue pages handler work\n"); 497} 498 499int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot) 500{ 501 u16 uninitialized_var(func_id); 502 s32 uninitialized_var(npages); 503 int err; 504 505 err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot); 506 if (err) 507 return err; 508 509 mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n", 510 npages, boot ? "boot" : "init", func_id); 511 512 return give_pages(dev, func_id, npages, 0); 513} 514 515enum { 516 MLX5_BLKS_FOR_RECLAIM_PAGES = 12 517}; 518 519s64 mlx5_wait_for_reclaim_vfs_pages(struct mlx5_core_dev *dev) 520{ 521 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 522 s64 prevpages = 0; 523 s64 npages = 0; 524 525 while (!time_after(jiffies, end)) { 526 /* exclude own function, VFs only */ 527 npages = dev->priv.fw_pages - dev->priv.pages_per_func[0]; 528 if (!npages) 529 break; 530 531 if (npages != prevpages) 532 end = end + msecs_to_jiffies(100); 533 534 prevpages = npages; 535 msleep(1); 536 } 537 538 if (npages) 539 mlx5_core_warn(dev, "FW did not return all VFs pages, will cause to memory leak\n"); 540 541 return -npages; 542} 543 544static int optimal_reclaimed_pages(void) 545{ 546 struct mlx5_cmd_prot_block *block; 547 struct mlx5_cmd_layout *lay; 548 int ret; 549 550 ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - 551 sizeof(struct mlx5_manage_pages_outbox)) / 552 FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]); 553 554 return ret; 555} 556 557int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) 558{ 559 int end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 560 struct mlx5_fw_page *fwp; 561 struct rb_node *p; 562 int nclaimed = 0; 563 int err; 564 565 do { 566 p = rb_first(&dev->priv.page_root); 567 if (p) { 568 fwp = rb_entry(p, struct mlx5_fw_page, rb_node); 569 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 570 --dev->priv.fw_pages; 571 free_4k(dev, fwp->dma_addr); 572 nclaimed = 1; 573 } else { 574 err = reclaim_pages(dev, fwp->func_id, 575 optimal_reclaimed_pages(), 576 &nclaimed); 577 if (err) { 578 mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", 579 err); 580 return err; 581 } 582 } 583 584 if (nclaimed) 585 end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS); 586 } 587 if (time_after(jiffies, end)) { 588 mlx5_core_warn(dev, "FW did not return all pages. giving up...\n"); 589 break; 590 } 591 } while (p); 592 593 return 0; 594} 595 596void mlx5_pagealloc_init(struct mlx5_core_dev *dev) 597{ 598 599 dev->priv.page_root = RB_ROOT; 600} 601 602void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev) 603{ 604 /* nothing */ 605} 606 607int mlx5_pagealloc_start(struct mlx5_core_dev *dev) 608{ 609 dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator"); 610 if (!dev->priv.pg_wq) 611 return -ENOMEM; 612 613 return 0; 614} 615 616void mlx5_pagealloc_stop(struct mlx5_core_dev *dev) 617{ 618 destroy_workqueue(dev->priv.pg_wq); 619} 620