1/****************************************************************************** 2 3Copyright (c) 2006-2013, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD$"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50#include <net/if.h> 51#include <net/if_arp.h> 52#include <net/ethernet.h> 53#include <net/if_dl.h> 54#include <net/if_media.h> 55 56#include <net/bpf.h> 57 58#include <net/if_types.h> 59#include <net/if_vlan_var.h> 60#include <net/zlib.h> 61 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/ip.h> 65#include <netinet/ip6.h> 66#include <netinet/tcp.h> 67#include <netinet/tcp_lro.h> 68#include <netinet6/ip6_var.h> 69 70#include <machine/bus.h> 71#include <machine/in_cksum.h> 72#include <machine/resource.h> 73#include <sys/bus.h> 74#include <sys/rman.h> 75#include <sys/smp.h> 76 77#include <dev/pci/pcireg.h> 78#include <dev/pci/pcivar.h> 79#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 80 81#include <vm/vm.h> /* for pmap_mapdev() */ 82#include <vm/pmap.h> 83 84#if defined(__i386) || defined(__amd64) 85#include <machine/specialreg.h> 86#endif 87 88#include <dev/mxge/mxge_mcp.h> 89#include <dev/mxge/mcp_gen_header.h> 90/*#define MXGE_FAKE_IFP*/ 91#include <dev/mxge/if_mxge_var.h> 92#ifdef IFNET_BUF_RING 93#include <sys/buf_ring.h> 94#endif 95 96#include "opt_inet.h" 97#include "opt_inet6.h" 98 99/* tunable params */ 100static int mxge_nvidia_ecrc_enable = 1; 101static int mxge_force_firmware = 0; 102static int mxge_intr_coal_delay = 30; 103static int mxge_deassert_wait = 1; 104static int mxge_flow_control = 1; 105static int mxge_verbose = 0; 106static int mxge_ticks; 107static int mxge_max_slices = 1; 108static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 109static int mxge_always_promisc = 0; 110static int mxge_initial_mtu = ETHERMTU_JUMBO; 111static int mxge_throttle = 0; 112static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 113static char *mxge_fw_aligned = "mxge_eth_z8e"; 114static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 115static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 116 117static int mxge_probe(device_t dev); 118static int mxge_attach(device_t dev); 119static int mxge_detach(device_t dev); 120static int mxge_shutdown(device_t dev); 121static void mxge_intr(void *arg); 122 123static device_method_t mxge_methods[] = 124{ 125 /* Device interface */ 126 DEVMETHOD(device_probe, mxge_probe), 127 DEVMETHOD(device_attach, mxge_attach), 128 DEVMETHOD(device_detach, mxge_detach), 129 DEVMETHOD(device_shutdown, mxge_shutdown), 130 {0, 0} 131}; 132 133static driver_t mxge_driver = 134{ 135 "mxge", 136 mxge_methods, 137 sizeof(mxge_softc_t), 138}; 139 140static devclass_t mxge_devclass; 141 142/* Declare ourselves to be a child of the PCI bus.*/ 143DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 144MODULE_DEPEND(mxge, firmware, 1, 1, 1); 145MODULE_DEPEND(mxge, zlib, 1, 1, 1); 146 147static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 148static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 149static int mxge_close(mxge_softc_t *sc, int down); 150static int mxge_open(mxge_softc_t *sc); 151static void mxge_tick(void *arg); 152 153static int 154mxge_probe(device_t dev) 155{ 156 int rev; 157 158 159 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 160 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 161 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 162 rev = pci_get_revid(dev); 163 switch (rev) { 164 case MXGE_PCI_REV_Z8E: 165 device_set_desc(dev, "Myri10G-PCIE-8A"); 166 break; 167 case MXGE_PCI_REV_Z8ES: 168 device_set_desc(dev, "Myri10G-PCIE-8B"); 169 break; 170 default: 171 device_set_desc(dev, "Myri10G-PCIE-8??"); 172 device_printf(dev, "Unrecognized rev %d NIC\n", 173 rev); 174 break; 175 } 176 return 0; 177 } 178 return ENXIO; 179} 180 181static void 182mxge_enable_wc(mxge_softc_t *sc) 183{ 184#if defined(__i386) || defined(__amd64) 185 vm_offset_t len; 186 int err; 187 188 sc->wc = 1; 189 len = rman_get_size(sc->mem_res); 190 err = pmap_change_attr((vm_offset_t) sc->sram, 191 len, PAT_WRITE_COMBINING); 192 if (err != 0) { 193 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 194 err); 195 sc->wc = 0; 196 } 197#endif 198} 199 200 201/* callback to get our DMA address */ 202static void 203mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 204 int error) 205{ 206 if (error == 0) { 207 *(bus_addr_t *) arg = segs->ds_addr; 208 } 209} 210 211static int 212mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 213 bus_size_t alignment) 214{ 215 int err; 216 device_t dev = sc->dev; 217 bus_size_t boundary, maxsegsize; 218 219 if (bytes > 4096 && alignment == 4096) { 220 boundary = 0; 221 maxsegsize = bytes; 222 } else { 223 boundary = 4096; 224 maxsegsize = 4096; 225 } 226 227 /* allocate DMAable memory tags */ 228 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 229 alignment, /* alignment */ 230 boundary, /* boundary */ 231 BUS_SPACE_MAXADDR, /* low */ 232 BUS_SPACE_MAXADDR, /* high */ 233 NULL, NULL, /* filter */ 234 bytes, /* maxsize */ 235 1, /* num segs */ 236 maxsegsize, /* maxsegsize */ 237 BUS_DMA_COHERENT, /* flags */ 238 NULL, NULL, /* lock */ 239 &dma->dmat); /* tag */ 240 if (err != 0) { 241 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 242 return err; 243 } 244 245 /* allocate DMAable memory & map */ 246 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 247 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 248 | BUS_DMA_ZERO), &dma->map); 249 if (err != 0) { 250 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 251 goto abort_with_dmat; 252 } 253 254 /* load the memory */ 255 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 256 mxge_dmamap_callback, 257 (void *)&dma->bus_addr, 0); 258 if (err != 0) { 259 device_printf(dev, "couldn't load map (err = %d)\n", err); 260 goto abort_with_mem; 261 } 262 return 0; 263 264abort_with_mem: 265 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 266abort_with_dmat: 267 (void)bus_dma_tag_destroy(dma->dmat); 268 return err; 269} 270 271 272static void 273mxge_dma_free(mxge_dma_t *dma) 274{ 275 bus_dmamap_unload(dma->dmat, dma->map); 276 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 277 (void)bus_dma_tag_destroy(dma->dmat); 278} 279 280/* 281 * The eeprom strings on the lanaiX have the format 282 * SN=x\0 283 * MAC=x:x:x:x:x:x\0 284 * PC=text\0 285 */ 286 287static int 288mxge_parse_strings(mxge_softc_t *sc) 289{ 290 char *ptr; 291 int i, found_mac, found_sn2; 292 char *endptr; 293 294 ptr = sc->eeprom_strings; 295 found_mac = 0; 296 found_sn2 = 0; 297 while (*ptr != '\0') { 298 if (strncmp(ptr, "MAC=", 4) == 0) { 299 ptr += 4; 300 for (i = 0;;) { 301 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 302 if (endptr - ptr != 2) 303 goto abort; 304 ptr = endptr; 305 if (++i == 6) 306 break; 307 if (*ptr++ != ':') 308 goto abort; 309 } 310 found_mac = 1; 311 } else if (strncmp(ptr, "PC=", 3) == 0) { 312 ptr += 3; 313 strlcpy(sc->product_code_string, ptr, 314 sizeof(sc->product_code_string)); 315 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 316 ptr += 3; 317 strlcpy(sc->serial_number_string, ptr, 318 sizeof(sc->serial_number_string)); 319 } else if (strncmp(ptr, "SN2=", 4) == 0) { 320 /* SN2 takes precedence over SN */ 321 ptr += 4; 322 found_sn2 = 1; 323 strlcpy(sc->serial_number_string, ptr, 324 sizeof(sc->serial_number_string)); 325 } 326 while (*ptr++ != '\0') {} 327 } 328 329 if (found_mac) 330 return 0; 331 332 abort: 333 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 334 335 return ENXIO; 336} 337 338#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 339static void 340mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 341{ 342 uint32_t val; 343 unsigned long base, off; 344 char *va, *cfgptr; 345 device_t pdev, mcp55; 346 uint16_t vendor_id, device_id, word; 347 uintptr_t bus, slot, func, ivend, idev; 348 uint32_t *ptr32; 349 350 351 if (!mxge_nvidia_ecrc_enable) 352 return; 353 354 pdev = device_get_parent(device_get_parent(sc->dev)); 355 if (pdev == NULL) { 356 device_printf(sc->dev, "could not find parent?\n"); 357 return; 358 } 359 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 360 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 361 362 if (vendor_id != 0x10de) 363 return; 364 365 base = 0; 366 367 if (device_id == 0x005d) { 368 /* ck804, base address is magic */ 369 base = 0xe0000000UL; 370 } else if (device_id >= 0x0374 && device_id <= 0x378) { 371 /* mcp55, base address stored in chipset */ 372 mcp55 = pci_find_bsf(0, 0, 0); 373 if (mcp55 && 374 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 375 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 376 word = pci_read_config(mcp55, 0x90, 2); 377 base = ((unsigned long)word & 0x7ffeU) << 25; 378 } 379 } 380 if (!base) 381 return; 382 383 /* XXXX 384 Test below is commented because it is believed that doing 385 config read/write beyond 0xff will access the config space 386 for the next larger function. Uncomment this and remove 387 the hacky pmap_mapdev() way of accessing config space when 388 FreeBSD grows support for extended pcie config space access 389 */ 390#if 0 391 /* See if we can, by some miracle, access the extended 392 config space */ 393 val = pci_read_config(pdev, 0x178, 4); 394 if (val != 0xffffffff) { 395 val |= 0x40; 396 pci_write_config(pdev, 0x178, val, 4); 397 return; 398 } 399#endif 400 /* Rather than using normal pci config space writes, we must 401 * map the Nvidia config space ourselves. This is because on 402 * opteron/nvidia class machine the 0xe000000 mapping is 403 * handled by the nvidia chipset, that means the internal PCI 404 * device (the on-chip northbridge), or the amd-8131 bridge 405 * and things behind them are not visible by this method. 406 */ 407 408 BUS_READ_IVAR(device_get_parent(pdev), pdev, 409 PCI_IVAR_BUS, &bus); 410 BUS_READ_IVAR(device_get_parent(pdev), pdev, 411 PCI_IVAR_SLOT, &slot); 412 BUS_READ_IVAR(device_get_parent(pdev), pdev, 413 PCI_IVAR_FUNCTION, &func); 414 BUS_READ_IVAR(device_get_parent(pdev), pdev, 415 PCI_IVAR_VENDOR, &ivend); 416 BUS_READ_IVAR(device_get_parent(pdev), pdev, 417 PCI_IVAR_DEVICE, &idev); 418 419 off = base 420 + 0x00100000UL * (unsigned long)bus 421 + 0x00001000UL * (unsigned long)(func 422 + 8 * slot); 423 424 /* map it into the kernel */ 425 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 426 427 428 if (va == NULL) { 429 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 430 return; 431 } 432 /* get a pointer to the config space mapped into the kernel */ 433 cfgptr = va + (off & PAGE_MASK); 434 435 /* make sure that we can really access it */ 436 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 437 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 438 if (! (vendor_id == ivend && device_id == idev)) { 439 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 440 vendor_id, device_id); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 445 ptr32 = (uint32_t*)(cfgptr + 0x178); 446 val = *ptr32; 447 448 if (val == 0xffffffff) { 449 device_printf(sc->dev, "extended mapping failed\n"); 450 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 451 return; 452 } 453 *ptr32 = val | 0x40; 454 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 455 if (mxge_verbose) 456 device_printf(sc->dev, 457 "Enabled ECRC on upstream Nvidia bridge " 458 "at %d:%d:%d\n", 459 (int)bus, (int)slot, (int)func); 460 return; 461} 462#else 463static void 464mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 465{ 466 device_printf(sc->dev, 467 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 468 return; 469} 470#endif 471 472 473static int 474mxge_dma_test(mxge_softc_t *sc, int test_type) 475{ 476 mxge_cmd_t cmd; 477 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 478 int status; 479 uint32_t len; 480 char *test = " "; 481 482 483 /* Run a small DMA test. 484 * The magic multipliers to the length tell the firmware 485 * to do DMA read, write, or read+write tests. The 486 * results are returned in cmd.data0. The upper 16 487 * bits of the return is the number of transfers completed. 488 * The lower 16 bits is the time in 0.5us ticks that the 489 * transfers took to complete. 490 */ 491 492 len = sc->tx_boundary; 493 494 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 495 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 496 cmd.data2 = len * 0x10000; 497 status = mxge_send_cmd(sc, test_type, &cmd); 498 if (status != 0) { 499 test = "read"; 500 goto abort; 501 } 502 sc->read_dma = ((cmd.data0>>16) * len * 2) / 503 (cmd.data0 & 0xffff); 504 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 505 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 506 cmd.data2 = len * 0x1; 507 status = mxge_send_cmd(sc, test_type, &cmd); 508 if (status != 0) { 509 test = "write"; 510 goto abort; 511 } 512 sc->write_dma = ((cmd.data0>>16) * len * 2) / 513 (cmd.data0 & 0xffff); 514 515 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 516 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 517 cmd.data2 = len * 0x10001; 518 status = mxge_send_cmd(sc, test_type, &cmd); 519 if (status != 0) { 520 test = "read/write"; 521 goto abort; 522 } 523 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 524 (cmd.data0 & 0xffff); 525 526abort: 527 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 528 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 529 test, status); 530 531 return status; 532} 533 534/* 535 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 536 * when the PCI-E Completion packets are aligned on an 8-byte 537 * boundary. Some PCI-E chip sets always align Completion packets; on 538 * the ones that do not, the alignment can be enforced by enabling 539 * ECRC generation (if supported). 540 * 541 * When PCI-E Completion packets are not aligned, it is actually more 542 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 543 * 544 * If the driver can neither enable ECRC nor verify that it has 545 * already been enabled, then it must use a firmware image which works 546 * around unaligned completion packets (ethp_z8e.dat), and it should 547 * also ensure that it never gives the device a Read-DMA which is 548 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 549 * enabled, then the driver should use the aligned (eth_z8e.dat) 550 * firmware image, and set tx_boundary to 4KB. 551 */ 552 553static int 554mxge_firmware_probe(mxge_softc_t *sc) 555{ 556 device_t dev = sc->dev; 557 int reg, status; 558 uint16_t pectl; 559 560 sc->tx_boundary = 4096; 561 /* 562 * Verify the max read request size was set to 4KB 563 * before trying the test with 4KB. 564 */ 565 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 566 pectl = pci_read_config(dev, reg + 0x8, 2); 567 if ((pectl & (5 << 12)) != (5 << 12)) { 568 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 569 pectl); 570 sc->tx_boundary = 2048; 571 } 572 } 573 574 /* 575 * load the optimized firmware (which assumes aligned PCIe 576 * completions) in order to see if it works on this host. 577 */ 578 sc->fw_name = mxge_fw_aligned; 579 status = mxge_load_firmware(sc, 1); 580 if (status != 0) { 581 return status; 582 } 583 584 /* 585 * Enable ECRC if possible 586 */ 587 mxge_enable_nvidia_ecrc(sc); 588 589 /* 590 * Run a DMA test which watches for unaligned completions and 591 * aborts on the first one seen. Not required on Z8ES or newer. 592 */ 593 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 594 return 0; 595 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 596 if (status == 0) 597 return 0; /* keep the aligned firmware */ 598 599 if (status != E2BIG) 600 device_printf(dev, "DMA test failed: %d\n", status); 601 if (status == ENOSYS) 602 device_printf(dev, "Falling back to ethp! " 603 "Please install up to date fw\n"); 604 return status; 605} 606 607static int 608mxge_select_firmware(mxge_softc_t *sc) 609{ 610 int aligned = 0; 611 int force_firmware = mxge_force_firmware; 612 613 if (sc->throttle) 614 force_firmware = sc->throttle; 615 616 if (force_firmware != 0) { 617 if (force_firmware == 1) 618 aligned = 1; 619 else 620 aligned = 0; 621 if (mxge_verbose) 622 device_printf(sc->dev, 623 "Assuming %s completions (forced)\n", 624 aligned ? "aligned" : "unaligned"); 625 goto abort; 626 } 627 628 /* if the PCIe link width is 4 or less, we can use the aligned 629 firmware and skip any checks */ 630 if (sc->link_width != 0 && sc->link_width <= 4) { 631 device_printf(sc->dev, 632 "PCIe x%d Link, expect reduced performance\n", 633 sc->link_width); 634 aligned = 1; 635 goto abort; 636 } 637 638 if (0 == mxge_firmware_probe(sc)) 639 return 0; 640 641abort: 642 if (aligned) { 643 sc->fw_name = mxge_fw_aligned; 644 sc->tx_boundary = 4096; 645 } else { 646 sc->fw_name = mxge_fw_unaligned; 647 sc->tx_boundary = 2048; 648 } 649 return (mxge_load_firmware(sc, 0)); 650} 651 652static int 653mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 654{ 655 656 657 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 658 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 659 be32toh(hdr->mcp_type)); 660 return EIO; 661 } 662 663 /* save firmware version for sysctl */ 664 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 665 if (mxge_verbose) 666 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 667 668 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 669 &sc->fw_ver_minor, &sc->fw_ver_tiny); 670 671 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 672 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 673 device_printf(sc->dev, "Found firmware version %s\n", 674 sc->fw_version); 675 device_printf(sc->dev, "Driver needs %d.%d\n", 676 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 677 return EINVAL; 678 } 679 return 0; 680 681} 682 683static void * 684z_alloc(void *nil, u_int items, u_int size) 685{ 686 void *ptr; 687 688 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 689 return ptr; 690} 691 692static void 693z_free(void *nil, void *ptr) 694{ 695 free(ptr, M_TEMP); 696} 697 698 699static int 700mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 701{ 702 z_stream zs; 703 char *inflate_buffer; 704 const struct firmware *fw; 705 const mcp_gen_header_t *hdr; 706 unsigned hdr_offset; 707 int status; 708 unsigned int i; 709 char dummy; 710 size_t fw_len; 711 712 fw = firmware_get(sc->fw_name); 713 if (fw == NULL) { 714 device_printf(sc->dev, "Could not find firmware image %s\n", 715 sc->fw_name); 716 return ENOENT; 717 } 718 719 720 721 /* setup zlib and decompress f/w */ 722 bzero(&zs, sizeof (zs)); 723 zs.zalloc = z_alloc; 724 zs.zfree = z_free; 725 status = inflateInit(&zs); 726 if (status != Z_OK) { 727 status = EIO; 728 goto abort_with_fw; 729 } 730 731 /* the uncompressed size is stored as the firmware version, 732 which would otherwise go unused */ 733 fw_len = (size_t) fw->version; 734 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 735 if (inflate_buffer == NULL) 736 goto abort_with_zs; 737 zs.avail_in = fw->datasize; 738 zs.next_in = __DECONST(char *, fw->data); 739 zs.avail_out = fw_len; 740 zs.next_out = inflate_buffer; 741 status = inflate(&zs, Z_FINISH); 742 if (status != Z_STREAM_END) { 743 device_printf(sc->dev, "zlib %d\n", status); 744 status = EIO; 745 goto abort_with_buffer; 746 } 747 748 /* check id */ 749 hdr_offset = htobe32(*(const uint32_t *) 750 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 751 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 752 device_printf(sc->dev, "Bad firmware file"); 753 status = EIO; 754 goto abort_with_buffer; 755 } 756 hdr = (const void*)(inflate_buffer + hdr_offset); 757 758 status = mxge_validate_firmware(sc, hdr); 759 if (status != 0) 760 goto abort_with_buffer; 761 762 /* Copy the inflated firmware to NIC SRAM. */ 763 for (i = 0; i < fw_len; i += 256) { 764 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 765 inflate_buffer + i, 766 min(256U, (unsigned)(fw_len - i))); 767 wmb(); 768 dummy = *sc->sram; 769 wmb(); 770 } 771 772 *limit = fw_len; 773 status = 0; 774abort_with_buffer: 775 free(inflate_buffer, M_TEMP); 776abort_with_zs: 777 inflateEnd(&zs); 778abort_with_fw: 779 firmware_put(fw, FIRMWARE_UNLOAD); 780 return status; 781} 782 783/* 784 * Enable or disable periodic RDMAs from the host to make certain 785 * chipsets resend dropped PCIe messages 786 */ 787 788static void 789mxge_dummy_rdma(mxge_softc_t *sc, int enable) 790{ 791 char buf_bytes[72]; 792 volatile uint32_t *confirm; 793 volatile char *submit; 794 uint32_t *buf, dma_low, dma_high; 795 int i; 796 797 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 798 799 /* clear confirmation addr */ 800 confirm = (volatile uint32_t *)sc->cmd; 801 *confirm = 0; 802 wmb(); 803 804 /* send an rdma command to the PCIe engine, and wait for the 805 response in the confirmation address. The firmware should 806 write a -1 there to indicate it is alive and well 807 */ 808 809 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 810 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 811 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 812 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 813 buf[2] = htobe32(0xffffffff); /* confirm data */ 814 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 815 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 816 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 817 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 818 buf[5] = htobe32(enable); /* enable? */ 819 820 821 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 822 823 mxge_pio_copy(submit, buf, 64); 824 wmb(); 825 DELAY(1000); 826 wmb(); 827 i = 0; 828 while (*confirm != 0xffffffff && i < 20) { 829 DELAY(1000); 830 i++; 831 } 832 if (*confirm != 0xffffffff) { 833 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 834 (enable ? "enable" : "disable"), confirm, 835 *confirm); 836 } 837 return; 838} 839 840static int 841mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 842{ 843 mcp_cmd_t *buf; 844 char buf_bytes[sizeof(*buf) + 8]; 845 volatile mcp_cmd_response_t *response = sc->cmd; 846 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 847 uint32_t dma_low, dma_high; 848 int err, sleep_total = 0; 849 850 /* ensure buf is aligned to 8 bytes */ 851 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 852 853 buf->data0 = htobe32(data->data0); 854 buf->data1 = htobe32(data->data1); 855 buf->data2 = htobe32(data->data2); 856 buf->cmd = htobe32(cmd); 857 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 858 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 859 860 buf->response_addr.low = htobe32(dma_low); 861 buf->response_addr.high = htobe32(dma_high); 862 mtx_lock(&sc->cmd_mtx); 863 response->result = 0xffffffff; 864 wmb(); 865 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 866 867 /* wait up to 20ms */ 868 err = EAGAIN; 869 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 870 bus_dmamap_sync(sc->cmd_dma.dmat, 871 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 872 wmb(); 873 switch (be32toh(response->result)) { 874 case 0: 875 data->data0 = be32toh(response->data); 876 err = 0; 877 break; 878 case 0xffffffff: 879 DELAY(1000); 880 break; 881 case MXGEFW_CMD_UNKNOWN: 882 err = ENOSYS; 883 break; 884 case MXGEFW_CMD_ERROR_UNALIGNED: 885 err = E2BIG; 886 break; 887 case MXGEFW_CMD_ERROR_BUSY: 888 err = EBUSY; 889 break; 890 case MXGEFW_CMD_ERROR_I2C_ABSENT: 891 err = ENXIO; 892 break; 893 default: 894 device_printf(sc->dev, 895 "mxge: command %d " 896 "failed, result = %d\n", 897 cmd, be32toh(response->result)); 898 err = ENXIO; 899 break; 900 } 901 if (err != EAGAIN) 902 break; 903 } 904 if (err == EAGAIN) 905 device_printf(sc->dev, "mxge: command %d timed out" 906 "result = %d\n", 907 cmd, be32toh(response->result)); 908 mtx_unlock(&sc->cmd_mtx); 909 return err; 910} 911 912static int 913mxge_adopt_running_firmware(mxge_softc_t *sc) 914{ 915 struct mcp_gen_header *hdr; 916 const size_t bytes = sizeof (struct mcp_gen_header); 917 size_t hdr_offset; 918 int status; 919 920 /* find running firmware header */ 921 hdr_offset = htobe32(*(volatile uint32_t *) 922 (sc->sram + MCP_HEADER_PTR_OFFSET)); 923 924 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 925 device_printf(sc->dev, 926 "Running firmware has bad header offset (%d)\n", 927 (int)hdr_offset); 928 return EIO; 929 } 930 931 /* copy header of running firmware from SRAM to host memory to 932 * validate firmware */ 933 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 934 if (hdr == NULL) { 935 device_printf(sc->dev, "could not malloc firmware hdr\n"); 936 return ENOMEM; 937 } 938 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 939 rman_get_bushandle(sc->mem_res), 940 hdr_offset, (char *)hdr, bytes); 941 status = mxge_validate_firmware(sc, hdr); 942 free(hdr, M_DEVBUF); 943 944 /* 945 * check to see if adopted firmware has bug where adopting 946 * it will cause broadcasts to be filtered unless the NIC 947 * is kept in ALLMULTI mode 948 */ 949 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 950 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 951 sc->adopted_rx_filter_bug = 1; 952 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 953 "working around rx filter bug\n", 954 sc->fw_ver_major, sc->fw_ver_minor, 955 sc->fw_ver_tiny); 956 } 957 958 return status; 959} 960 961 962static int 963mxge_load_firmware(mxge_softc_t *sc, int adopt) 964{ 965 volatile uint32_t *confirm; 966 volatile char *submit; 967 char buf_bytes[72]; 968 uint32_t *buf, size, dma_low, dma_high; 969 int status, i; 970 971 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 972 973 size = sc->sram_size; 974 status = mxge_load_firmware_helper(sc, &size); 975 if (status) { 976 if (!adopt) 977 return status; 978 /* Try to use the currently running firmware, if 979 it is new enough */ 980 status = mxge_adopt_running_firmware(sc); 981 if (status) { 982 device_printf(sc->dev, 983 "failed to adopt running firmware\n"); 984 return status; 985 } 986 device_printf(sc->dev, 987 "Successfully adopted running firmware\n"); 988 if (sc->tx_boundary == 4096) { 989 device_printf(sc->dev, 990 "Using firmware currently running on NIC" 991 ". For optimal\n"); 992 device_printf(sc->dev, 993 "performance consider loading optimized " 994 "firmware\n"); 995 } 996 sc->fw_name = mxge_fw_unaligned; 997 sc->tx_boundary = 2048; 998 return 0; 999 } 1000 /* clear confirmation addr */ 1001 confirm = (volatile uint32_t *)sc->cmd; 1002 *confirm = 0; 1003 wmb(); 1004 /* send a reload command to the bootstrap MCP, and wait for the 1005 response in the confirmation address. The firmware should 1006 write a -1 there to indicate it is alive and well 1007 */ 1008 1009 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1010 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1011 1012 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1013 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1014 buf[2] = htobe32(0xffffffff); /* confirm data */ 1015 1016 /* FIX: All newest firmware should un-protect the bottom of 1017 the sram before handoff. However, the very first interfaces 1018 do not. Therefore the handoff copy must skip the first 8 bytes 1019 */ 1020 /* where the code starts*/ 1021 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1022 buf[4] = htobe32(size - 8); /* length of code */ 1023 buf[5] = htobe32(8); /* where to copy to */ 1024 buf[6] = htobe32(0); /* where to jump to */ 1025 1026 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1027 mxge_pio_copy(submit, buf, 64); 1028 wmb(); 1029 DELAY(1000); 1030 wmb(); 1031 i = 0; 1032 while (*confirm != 0xffffffff && i < 20) { 1033 DELAY(1000*10); 1034 i++; 1035 bus_dmamap_sync(sc->cmd_dma.dmat, 1036 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1037 } 1038 if (*confirm != 0xffffffff) { 1039 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1040 confirm, *confirm); 1041 1042 return ENXIO; 1043 } 1044 return 0; 1045} 1046 1047static int 1048mxge_update_mac_address(mxge_softc_t *sc) 1049{ 1050 mxge_cmd_t cmd; 1051 uint8_t *addr = sc->mac_addr; 1052 int status; 1053 1054 1055 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1056 | (addr[2] << 8) | addr[3]); 1057 1058 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1059 1060 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1061 return status; 1062} 1063 1064static int 1065mxge_change_pause(mxge_softc_t *sc, int pause) 1066{ 1067 mxge_cmd_t cmd; 1068 int status; 1069 1070 if (pause) 1071 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1072 &cmd); 1073 else 1074 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1075 &cmd); 1076 1077 if (status) { 1078 device_printf(sc->dev, "Failed to set flow control mode\n"); 1079 return ENXIO; 1080 } 1081 sc->pause = pause; 1082 return 0; 1083} 1084 1085static void 1086mxge_change_promisc(mxge_softc_t *sc, int promisc) 1087{ 1088 mxge_cmd_t cmd; 1089 int status; 1090 1091 if (mxge_always_promisc) 1092 promisc = 1; 1093 1094 if (promisc) 1095 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1096 &cmd); 1097 else 1098 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1099 &cmd); 1100 1101 if (status) { 1102 device_printf(sc->dev, "Failed to set promisc mode\n"); 1103 } 1104} 1105 1106static void 1107mxge_set_multicast_list(mxge_softc_t *sc) 1108{ 1109 mxge_cmd_t cmd; 1110 struct ifmultiaddr *ifma; 1111 struct ifnet *ifp = sc->ifp; 1112 int err; 1113 1114 /* This firmware is known to not support multicast */ 1115 if (!sc->fw_multicast_support) 1116 return; 1117 1118 /* Disable multicast filtering while we play with the lists*/ 1119 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1120 if (err != 0) { 1121 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1122 " error status: %d\n", err); 1123 return; 1124 } 1125 1126 if (sc->adopted_rx_filter_bug) 1127 return; 1128 1129 if (ifp->if_flags & IFF_ALLMULTI) 1130 /* request to disable multicast filtering, so quit here */ 1131 return; 1132 1133 /* Flush all the filters */ 1134 1135 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1136 if (err != 0) { 1137 device_printf(sc->dev, 1138 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1139 ", error status: %d\n", err); 1140 return; 1141 } 1142 1143 /* Walk the multicast list, and add each address */ 1144 1145 if_maddr_rlock(ifp); 1146 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1147 if (ifma->ifma_addr->sa_family != AF_LINK) 1148 continue; 1149 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1150 &cmd.data0, 4); 1151 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1152 &cmd.data1, 2); 1153 cmd.data0 = htonl(cmd.data0); 1154 cmd.data1 = htonl(cmd.data1); 1155 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1156 if (err != 0) { 1157 device_printf(sc->dev, "Failed " 1158 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1159 "%d\t", err); 1160 /* abort, leaving multicast filtering off */ 1161 if_maddr_runlock(ifp); 1162 return; 1163 } 1164 } 1165 if_maddr_runlock(ifp); 1166 /* Enable multicast filtering */ 1167 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1168 if (err != 0) { 1169 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1170 ", error status: %d\n", err); 1171 } 1172} 1173 1174static int 1175mxge_max_mtu(mxge_softc_t *sc) 1176{ 1177 mxge_cmd_t cmd; 1178 int status; 1179 1180 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1181 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1182 1183 /* try to set nbufs to see if it we can 1184 use virtually contiguous jumbos */ 1185 cmd.data0 = 0; 1186 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1187 &cmd); 1188 if (status == 0) 1189 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1190 1191 /* otherwise, we're limited to MJUMPAGESIZE */ 1192 return MJUMPAGESIZE - MXGEFW_PAD; 1193} 1194 1195static int 1196mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1197{ 1198 struct mxge_slice_state *ss; 1199 mxge_rx_done_t *rx_done; 1200 volatile uint32_t *irq_claim; 1201 mxge_cmd_t cmd; 1202 int slice, status; 1203 1204 /* try to send a reset command to the card to see if it 1205 is alive */ 1206 memset(&cmd, 0, sizeof (cmd)); 1207 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1208 if (status != 0) { 1209 device_printf(sc->dev, "failed reset\n"); 1210 return ENXIO; 1211 } 1212 1213 mxge_dummy_rdma(sc, 1); 1214 1215 1216 /* set the intrq size */ 1217 cmd.data0 = sc->rx_ring_size; 1218 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1219 1220 /* 1221 * Even though we already know how many slices are supported 1222 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1223 * has magic side effects, and must be called after a reset. 1224 * It must be called prior to calling any RSS related cmds, 1225 * including assigning an interrupt queue for anything but 1226 * slice 0. It must also be called *after* 1227 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1228 * the firmware to compute offsets. 1229 */ 1230 1231 if (sc->num_slices > 1) { 1232 /* ask the maximum number of slices it supports */ 1233 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1234 &cmd); 1235 if (status != 0) { 1236 device_printf(sc->dev, 1237 "failed to get number of slices\n"); 1238 return status; 1239 } 1240 /* 1241 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1242 * to setting up the interrupt queue DMA 1243 */ 1244 cmd.data0 = sc->num_slices; 1245 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1246#ifdef IFNET_BUF_RING 1247 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1248#endif 1249 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1250 &cmd); 1251 if (status != 0) { 1252 device_printf(sc->dev, 1253 "failed to set number of slices\n"); 1254 return status; 1255 } 1256 } 1257 1258 1259 if (interrupts_setup) { 1260 /* Now exchange information about interrupts */ 1261 for (slice = 0; slice < sc->num_slices; slice++) { 1262 rx_done = &sc->ss[slice].rx_done; 1263 memset(rx_done->entry, 0, sc->rx_ring_size); 1264 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1265 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1266 cmd.data2 = slice; 1267 status |= mxge_send_cmd(sc, 1268 MXGEFW_CMD_SET_INTRQ_DMA, 1269 &cmd); 1270 } 1271 } 1272 1273 status |= mxge_send_cmd(sc, 1274 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1275 1276 1277 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1280 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1281 1282 1283 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1284 &cmd); 1285 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1286 if (status != 0) { 1287 device_printf(sc->dev, "failed set interrupt parameters\n"); 1288 return status; 1289 } 1290 1291 1292 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1293 1294 1295 /* run a DMA benchmark */ 1296 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1297 1298 for (slice = 0; slice < sc->num_slices; slice++) { 1299 ss = &sc->ss[slice]; 1300 1301 ss->irq_claim = irq_claim + (2 * slice); 1302 /* reset mcp/driver shared state back to 0 */ 1303 ss->rx_done.idx = 0; 1304 ss->rx_done.cnt = 0; 1305 ss->tx.req = 0; 1306 ss->tx.done = 0; 1307 ss->tx.pkt_done = 0; 1308 ss->tx.queue_active = 0; 1309 ss->tx.activate = 0; 1310 ss->tx.deactivate = 0; 1311 ss->tx.wake = 0; 1312 ss->tx.defrag = 0; 1313 ss->tx.stall = 0; 1314 ss->rx_big.cnt = 0; 1315 ss->rx_small.cnt = 0; 1316 ss->lc.lro_bad_csum = 0; 1317 ss->lc.lro_queued = 0; 1318 ss->lc.lro_flushed = 0; 1319 if (ss->fw_stats != NULL) { 1320 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1321 } 1322 } 1323 sc->rdma_tags_available = 15; 1324 status = mxge_update_mac_address(sc); 1325 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1326 mxge_change_pause(sc, sc->pause); 1327 mxge_set_multicast_list(sc); 1328 if (sc->throttle) { 1329 cmd.data0 = sc->throttle; 1330 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1331 &cmd)) { 1332 device_printf(sc->dev, 1333 "can't enable throttle\n"); 1334 } 1335 } 1336 return status; 1337} 1338 1339static int 1340mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1341{ 1342 mxge_cmd_t cmd; 1343 mxge_softc_t *sc; 1344 int err; 1345 unsigned int throttle; 1346 1347 sc = arg1; 1348 throttle = sc->throttle; 1349 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1350 if (err != 0) { 1351 return err; 1352 } 1353 1354 if (throttle == sc->throttle) 1355 return 0; 1356 1357 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1358 return EINVAL; 1359 1360 mtx_lock(&sc->driver_mtx); 1361 cmd.data0 = throttle; 1362 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1363 if (err == 0) 1364 sc->throttle = throttle; 1365 mtx_unlock(&sc->driver_mtx); 1366 return err; 1367} 1368 1369static int 1370mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1371{ 1372 mxge_softc_t *sc; 1373 unsigned int intr_coal_delay; 1374 int err; 1375 1376 sc = arg1; 1377 intr_coal_delay = sc->intr_coal_delay; 1378 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1379 if (err != 0) { 1380 return err; 1381 } 1382 if (intr_coal_delay == sc->intr_coal_delay) 1383 return 0; 1384 1385 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1386 return EINVAL; 1387 1388 mtx_lock(&sc->driver_mtx); 1389 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1390 sc->intr_coal_delay = intr_coal_delay; 1391 1392 mtx_unlock(&sc->driver_mtx); 1393 return err; 1394} 1395 1396static int 1397mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1398{ 1399 mxge_softc_t *sc; 1400 unsigned int enabled; 1401 int err; 1402 1403 sc = arg1; 1404 enabled = sc->pause; 1405 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1406 if (err != 0) { 1407 return err; 1408 } 1409 if (enabled == sc->pause) 1410 return 0; 1411 1412 mtx_lock(&sc->driver_mtx); 1413 err = mxge_change_pause(sc, enabled); 1414 mtx_unlock(&sc->driver_mtx); 1415 return err; 1416} 1417 1418static int 1419mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1420{ 1421 int err; 1422 1423 if (arg1 == NULL) 1424 return EFAULT; 1425 arg2 = be32toh(*(int *)arg1); 1426 arg1 = NULL; 1427 err = sysctl_handle_int(oidp, arg1, arg2, req); 1428 1429 return err; 1430} 1431 1432static void 1433mxge_rem_sysctls(mxge_softc_t *sc) 1434{ 1435 struct mxge_slice_state *ss; 1436 int slice; 1437 1438 if (sc->slice_sysctl_tree == NULL) 1439 return; 1440 1441 for (slice = 0; slice < sc->num_slices; slice++) { 1442 ss = &sc->ss[slice]; 1443 if (ss == NULL || ss->sysctl_tree == NULL) 1444 continue; 1445 sysctl_ctx_free(&ss->sysctl_ctx); 1446 ss->sysctl_tree = NULL; 1447 } 1448 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1449 sc->slice_sysctl_tree = NULL; 1450} 1451 1452static void 1453mxge_add_sysctls(mxge_softc_t *sc) 1454{ 1455 struct sysctl_ctx_list *ctx; 1456 struct sysctl_oid_list *children; 1457 mcp_irq_data_t *fw; 1458 struct mxge_slice_state *ss; 1459 int slice; 1460 char slice_num[8]; 1461 1462 ctx = device_get_sysctl_ctx(sc->dev); 1463 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1464 fw = sc->ss[0].fw_stats; 1465 1466 /* random information */ 1467 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1468 "firmware_version", 1469 CTLFLAG_RD, &sc->fw_version, 1470 0, "firmware version"); 1471 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1472 "serial_number", 1473 CTLFLAG_RD, &sc->serial_number_string, 1474 0, "serial number"); 1475 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1476 "product_code", 1477 CTLFLAG_RD, &sc->product_code_string, 1478 0, "product_code"); 1479 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1480 "pcie_link_width", 1481 CTLFLAG_RD, &sc->link_width, 1482 0, "tx_boundary"); 1483 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1484 "tx_boundary", 1485 CTLFLAG_RD, &sc->tx_boundary, 1486 0, "tx_boundary"); 1487 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1488 "write_combine", 1489 CTLFLAG_RD, &sc->wc, 1490 0, "write combining PIO?"); 1491 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1492 "read_dma_MBs", 1493 CTLFLAG_RD, &sc->read_dma, 1494 0, "DMA Read speed in MB/s"); 1495 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1496 "write_dma_MBs", 1497 CTLFLAG_RD, &sc->write_dma, 1498 0, "DMA Write speed in MB/s"); 1499 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1500 "read_write_dma_MBs", 1501 CTLFLAG_RD, &sc->read_write_dma, 1502 0, "DMA concurrent Read/Write speed in MB/s"); 1503 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1504 "watchdog_resets", 1505 CTLFLAG_RD, &sc->watchdog_resets, 1506 0, "Number of times NIC was reset"); 1507 1508 1509 /* performance related tunables */ 1510 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1511 "intr_coal_delay", 1512 CTLTYPE_INT|CTLFLAG_RW, sc, 1513 0, mxge_change_intr_coal, 1514 "I", "interrupt coalescing delay in usecs"); 1515 1516 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1517 "throttle", 1518 CTLTYPE_INT|CTLFLAG_RW, sc, 1519 0, mxge_change_throttle, 1520 "I", "transmit throttling"); 1521 1522 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1523 "flow_control_enabled", 1524 CTLTYPE_INT|CTLFLAG_RW, sc, 1525 0, mxge_change_flow_control, 1526 "I", "interrupt coalescing delay in usecs"); 1527 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "deassert_wait", 1530 CTLFLAG_RW, &mxge_deassert_wait, 1531 0, "Wait for IRQ line to go low in ihandler"); 1532 1533 /* stats block from firmware is in network byte order. 1534 Need to swap it */ 1535 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1536 "link_up", 1537 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1538 0, mxge_handle_be32, 1539 "I", "link up"); 1540 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1541 "rdma_tags_available", 1542 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1543 0, mxge_handle_be32, 1544 "I", "rdma_tags_available"); 1545 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1546 "dropped_bad_crc32", 1547 CTLTYPE_INT|CTLFLAG_RD, 1548 &fw->dropped_bad_crc32, 1549 0, mxge_handle_be32, 1550 "I", "dropped_bad_crc32"); 1551 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1552 "dropped_bad_phy", 1553 CTLTYPE_INT|CTLFLAG_RD, 1554 &fw->dropped_bad_phy, 1555 0, mxge_handle_be32, 1556 "I", "dropped_bad_phy"); 1557 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1558 "dropped_link_error_or_filtered", 1559 CTLTYPE_INT|CTLFLAG_RD, 1560 &fw->dropped_link_error_or_filtered, 1561 0, mxge_handle_be32, 1562 "I", "dropped_link_error_or_filtered"); 1563 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1564 "dropped_link_overflow", 1565 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1566 0, mxge_handle_be32, 1567 "I", "dropped_link_overflow"); 1568 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1569 "dropped_multicast_filtered", 1570 CTLTYPE_INT|CTLFLAG_RD, 1571 &fw->dropped_multicast_filtered, 1572 0, mxge_handle_be32, 1573 "I", "dropped_multicast_filtered"); 1574 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1575 "dropped_no_big_buffer", 1576 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1577 0, mxge_handle_be32, 1578 "I", "dropped_no_big_buffer"); 1579 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1580 "dropped_no_small_buffer", 1581 CTLTYPE_INT|CTLFLAG_RD, 1582 &fw->dropped_no_small_buffer, 1583 0, mxge_handle_be32, 1584 "I", "dropped_no_small_buffer"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_overrun", 1587 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1588 0, mxge_handle_be32, 1589 "I", "dropped_overrun"); 1590 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1591 "dropped_pause", 1592 CTLTYPE_INT|CTLFLAG_RD, 1593 &fw->dropped_pause, 1594 0, mxge_handle_be32, 1595 "I", "dropped_pause"); 1596 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1597 "dropped_runt", 1598 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1599 0, mxge_handle_be32, 1600 "I", "dropped_runt"); 1601 1602 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1603 "dropped_unicast_filtered", 1604 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1605 0, mxge_handle_be32, 1606 "I", "dropped_unicast_filtered"); 1607 1608 /* verbose printing? */ 1609 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1610 "verbose", 1611 CTLFLAG_RW, &mxge_verbose, 1612 0, "verbose printing"); 1613 1614 /* add counters exported for debugging from all slices */ 1615 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1616 sc->slice_sysctl_tree = 1617 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1618 "slice", CTLFLAG_RD, 0, ""); 1619 1620 for (slice = 0; slice < sc->num_slices; slice++) { 1621 ss = &sc->ss[slice]; 1622 sysctl_ctx_init(&ss->sysctl_ctx); 1623 ctx = &ss->sysctl_ctx; 1624 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1625 sprintf(slice_num, "%d", slice); 1626 ss->sysctl_tree = 1627 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1628 CTLFLAG_RD, 0, ""); 1629 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1630 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1631 "rx_small_cnt", 1632 CTLFLAG_RD, &ss->rx_small.cnt, 1633 0, "rx_small_cnt"); 1634 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1635 "rx_big_cnt", 1636 CTLFLAG_RD, &ss->rx_big.cnt, 1637 0, "rx_small_cnt"); 1638 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1639 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1640 0, "number of lro merge queues flushed"); 1641 1642 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1643 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1644 0, "number of bad csums preventing LRO"); 1645 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1648 0, "number of frames appended to lro merge" 1649 "queues"); 1650 1651#ifndef IFNET_BUF_RING 1652 /* only transmit from slice 0 for now */ 1653 if (slice > 0) 1654 continue; 1655#endif 1656 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1657 "tx_req", 1658 CTLFLAG_RD, &ss->tx.req, 1659 0, "tx_req"); 1660 1661 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1662 "tx_done", 1663 CTLFLAG_RD, &ss->tx.done, 1664 0, "tx_done"); 1665 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1666 "tx_pkt_done", 1667 CTLFLAG_RD, &ss->tx.pkt_done, 1668 0, "tx_done"); 1669 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1670 "tx_stall", 1671 CTLFLAG_RD, &ss->tx.stall, 1672 0, "tx_stall"); 1673 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1674 "tx_wake", 1675 CTLFLAG_RD, &ss->tx.wake, 1676 0, "tx_wake"); 1677 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1678 "tx_defrag", 1679 CTLFLAG_RD, &ss->tx.defrag, 1680 0, "tx_defrag"); 1681 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1682 "tx_queue_active", 1683 CTLFLAG_RD, &ss->tx.queue_active, 1684 0, "tx_queue_active"); 1685 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1686 "tx_activate", 1687 CTLFLAG_RD, &ss->tx.activate, 1688 0, "tx_activate"); 1689 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1690 "tx_deactivate", 1691 CTLFLAG_RD, &ss->tx.deactivate, 1692 0, "tx_deactivate"); 1693 } 1694} 1695 1696/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1697 backwards one at a time and handle ring wraps */ 1698 1699static inline void 1700mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1701 mcp_kreq_ether_send_t *src, int cnt) 1702{ 1703 int idx, starting_slot; 1704 starting_slot = tx->req; 1705 while (cnt > 1) { 1706 cnt--; 1707 idx = (starting_slot + cnt) & tx->mask; 1708 mxge_pio_copy(&tx->lanai[idx], 1709 &src[cnt], sizeof(*src)); 1710 wmb(); 1711 } 1712} 1713 1714/* 1715 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1716 * at most 32 bytes at a time, so as to avoid involving the software 1717 * pio handler in the nic. We re-write the first segment's flags 1718 * to mark them valid only after writing the entire chain 1719 */ 1720 1721static inline void 1722mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1723 int cnt) 1724{ 1725 int idx, i; 1726 uint32_t *src_ints; 1727 volatile uint32_t *dst_ints; 1728 mcp_kreq_ether_send_t *srcp; 1729 volatile mcp_kreq_ether_send_t *dstp, *dst; 1730 uint8_t last_flags; 1731 1732 idx = tx->req & tx->mask; 1733 1734 last_flags = src->flags; 1735 src->flags = 0; 1736 wmb(); 1737 dst = dstp = &tx->lanai[idx]; 1738 srcp = src; 1739 1740 if ((idx + cnt) < tx->mask) { 1741 for (i = 0; i < (cnt - 1); i += 2) { 1742 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1743 wmb(); /* force write every 32 bytes */ 1744 srcp += 2; 1745 dstp += 2; 1746 } 1747 } else { 1748 /* submit all but the first request, and ensure 1749 that it is submitted below */ 1750 mxge_submit_req_backwards(tx, src, cnt); 1751 i = 0; 1752 } 1753 if (i < cnt) { 1754 /* submit the first request */ 1755 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1756 wmb(); /* barrier before setting valid flag */ 1757 } 1758 1759 /* re-write the last 32-bits with the valid flags */ 1760 src->flags = last_flags; 1761 src_ints = (uint32_t *)src; 1762 src_ints+=3; 1763 dst_ints = (volatile uint32_t *)dst; 1764 dst_ints+=3; 1765 *dst_ints = *src_ints; 1766 tx->req += cnt; 1767 wmb(); 1768} 1769 1770static int 1771mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1772 struct mxge_pkt_info *pi) 1773{ 1774 struct ether_vlan_header *eh; 1775 uint16_t etype; 1776 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1777#if IFCAP_TSO6 && defined(INET6) 1778 int nxt; 1779#endif 1780 1781 eh = mtod(m, struct ether_vlan_header *); 1782 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1783 etype = ntohs(eh->evl_proto); 1784 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1785 } else { 1786 etype = ntohs(eh->evl_encap_proto); 1787 pi->ip_off = ETHER_HDR_LEN; 1788 } 1789 1790 switch (etype) { 1791 case ETHERTYPE_IP: 1792 /* 1793 * ensure ip header is in first mbuf, copy it to a 1794 * scratch buffer if not 1795 */ 1796 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1797 pi->ip6 = NULL; 1798 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1799 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1800 ss->scratch); 1801 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1802 } 1803 pi->ip_hlen = pi->ip->ip_hl << 2; 1804 if (!tso) 1805 return 0; 1806 1807 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1808 sizeof(struct tcphdr))) { 1809 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1810 sizeof(struct tcphdr), ss->scratch); 1811 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1812 } 1813 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1814 break; 1815#if IFCAP_TSO6 && defined(INET6) 1816 case ETHERTYPE_IPV6: 1817 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1818 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1819 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1820 ss->scratch); 1821 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1822 } 1823 nxt = 0; 1824 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1825 pi->ip_hlen -= pi->ip_off; 1826 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1827 return EINVAL; 1828 1829 if (!tso) 1830 return 0; 1831 1832 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1833 return EINVAL; 1834 1835 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1836 sizeof(struct tcphdr))) { 1837 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1838 sizeof(struct tcphdr), ss->scratch); 1839 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1840 } 1841 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1842 break; 1843#endif 1844 default: 1845 return EINVAL; 1846 } 1847 return 0; 1848} 1849 1850#if IFCAP_TSO4 1851 1852static void 1853mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1854 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1855{ 1856 mxge_tx_ring_t *tx; 1857 mcp_kreq_ether_send_t *req; 1858 bus_dma_segment_t *seg; 1859 uint32_t low, high_swapped; 1860 int len, seglen, cum_len, cum_len_next; 1861 int next_is_first, chop, cnt, rdma_count, small; 1862 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1863 uint8_t flags, flags_next; 1864 static int once; 1865 1866 mss = m->m_pkthdr.tso_segsz; 1867 1868 /* negative cum_len signifies to the 1869 * send loop that we are still in the 1870 * header portion of the TSO packet. 1871 */ 1872 1873 cksum_offset = pi->ip_off + pi->ip_hlen; 1874 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1875 1876 /* TSO implies checksum offload on this hardware */ 1877 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1878 /* 1879 * If packet has full TCP csum, replace it with pseudo hdr 1880 * sum that the NIC expects, otherwise the NIC will emit 1881 * packets with bad TCP checksums. 1882 */ 1883 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1884 if (pi->ip6) { 1885#if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1886 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1887 sum = in6_cksum_pseudo(pi->ip6, 1888 m->m_pkthdr.len - cksum_offset, 1889 IPPROTO_TCP, 0); 1890#endif 1891 } else { 1892#ifdef INET 1893 m->m_pkthdr.csum_flags |= CSUM_TCP; 1894 sum = in_pseudo(pi->ip->ip_src.s_addr, 1895 pi->ip->ip_dst.s_addr, 1896 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1897 cksum_offset))); 1898#endif 1899 } 1900 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1901 cksum_offset, sizeof(sum), (caddr_t)&sum); 1902 } 1903 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1904 1905 1906 /* for TSO, pseudo_hdr_offset holds mss. 1907 * The firmware figures out where to put 1908 * the checksum by parsing the header. */ 1909 pseudo_hdr_offset = htobe16(mss); 1910 1911 if (pi->ip6) { 1912 /* 1913 * for IPv6 TSO, the "checksum offset" is re-purposed 1914 * to store the TCP header len 1915 */ 1916 cksum_offset = (pi->tcp->th_off << 2); 1917 } 1918 1919 tx = &ss->tx; 1920 req = tx->req_list; 1921 seg = tx->seg_list; 1922 cnt = 0; 1923 rdma_count = 0; 1924 /* "rdma_count" is the number of RDMAs belonging to the 1925 * current packet BEFORE the current send request. For 1926 * non-TSO packets, this is equal to "count". 1927 * For TSO packets, rdma_count needs to be reset 1928 * to 0 after a segment cut. 1929 * 1930 * The rdma_count field of the send request is 1931 * the number of RDMAs of the packet starting at 1932 * that request. For TSO send requests with one ore more cuts 1933 * in the middle, this is the number of RDMAs starting 1934 * after the last cut in the request. All previous 1935 * segments before the last cut implicitly have 1 RDMA. 1936 * 1937 * Since the number of RDMAs is not known beforehand, 1938 * it must be filled-in retroactively - after each 1939 * segmentation cut or at the end of the entire packet. 1940 */ 1941 1942 while (busdma_seg_cnt) { 1943 /* Break the busdma segment up into pieces*/ 1944 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1945 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1946 len = seg->ds_len; 1947 1948 while (len) { 1949 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1950 seglen = len; 1951 cum_len_next = cum_len + seglen; 1952 (req-rdma_count)->rdma_count = rdma_count + 1; 1953 if (__predict_true(cum_len >= 0)) { 1954 /* payload */ 1955 chop = (cum_len_next > mss); 1956 cum_len_next = cum_len_next % mss; 1957 next_is_first = (cum_len_next == 0); 1958 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1959 flags_next |= next_is_first * 1960 MXGEFW_FLAGS_FIRST; 1961 rdma_count |= -(chop | next_is_first); 1962 rdma_count += chop & !next_is_first; 1963 } else if (cum_len_next >= 0) { 1964 /* header ends */ 1965 rdma_count = -1; 1966 cum_len_next = 0; 1967 seglen = -cum_len; 1968 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1969 flags_next = MXGEFW_FLAGS_TSO_PLD | 1970 MXGEFW_FLAGS_FIRST | 1971 (small * MXGEFW_FLAGS_SMALL); 1972 } 1973 1974 req->addr_high = high_swapped; 1975 req->addr_low = htobe32(low); 1976 req->pseudo_hdr_offset = pseudo_hdr_offset; 1977 req->pad = 0; 1978 req->rdma_count = 1; 1979 req->length = htobe16(seglen); 1980 req->cksum_offset = cksum_offset; 1981 req->flags = flags | ((cum_len & 1) * 1982 MXGEFW_FLAGS_ALIGN_ODD); 1983 low += seglen; 1984 len -= seglen; 1985 cum_len = cum_len_next; 1986 flags = flags_next; 1987 req++; 1988 cnt++; 1989 rdma_count++; 1990 if (cksum_offset != 0 && !pi->ip6) { 1991 if (__predict_false(cksum_offset > seglen)) 1992 cksum_offset -= seglen; 1993 else 1994 cksum_offset = 0; 1995 } 1996 if (__predict_false(cnt > tx->max_desc)) 1997 goto drop; 1998 } 1999 busdma_seg_cnt--; 2000 seg++; 2001 } 2002 (req-rdma_count)->rdma_count = rdma_count; 2003 2004 do { 2005 req--; 2006 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2007 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2008 2009 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2010 mxge_submit_req(tx, tx->req_list, cnt); 2011#ifdef IFNET_BUF_RING 2012 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2013 /* tell the NIC to start polling this slice */ 2014 *tx->send_go = 1; 2015 tx->queue_active = 1; 2016 tx->activate++; 2017 wmb(); 2018 } 2019#endif 2020 return; 2021 2022drop: 2023 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2024 m_freem(m); 2025 ss->oerrors++; 2026 if (!once) { 2027 printf("tx->max_desc exceeded via TSO!\n"); 2028 printf("mss = %d, %ld, %d!\n", mss, 2029 (long)seg - (long)tx->seg_list, tx->max_desc); 2030 once = 1; 2031 } 2032 return; 2033 2034} 2035 2036#endif /* IFCAP_TSO4 */ 2037 2038#ifdef MXGE_NEW_VLAN_API 2039/* 2040 * We reproduce the software vlan tag insertion from 2041 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2042 * vlan tag insertion. We need to advertise this in order to have the 2043 * vlan interface respect our csum offload flags. 2044 */ 2045static struct mbuf * 2046mxge_vlan_tag_insert(struct mbuf *m) 2047{ 2048 struct ether_vlan_header *evl; 2049 2050 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2051 if (__predict_false(m == NULL)) 2052 return NULL; 2053 if (m->m_len < sizeof(*evl)) { 2054 m = m_pullup(m, sizeof(*evl)); 2055 if (__predict_false(m == NULL)) 2056 return NULL; 2057 } 2058 /* 2059 * Transform the Ethernet header into an Ethernet header 2060 * with 802.1Q encapsulation. 2061 */ 2062 evl = mtod(m, struct ether_vlan_header *); 2063 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2064 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2065 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2066 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2067 m->m_flags &= ~M_VLANTAG; 2068 return m; 2069} 2070#endif /* MXGE_NEW_VLAN_API */ 2071 2072static void 2073mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2074{ 2075 struct mxge_pkt_info pi = {0,0,0,0}; 2076 mxge_softc_t *sc; 2077 mcp_kreq_ether_send_t *req; 2078 bus_dma_segment_t *seg; 2079 struct mbuf *m_tmp; 2080 struct ifnet *ifp; 2081 mxge_tx_ring_t *tx; 2082 int cnt, cum_len, err, i, idx, odd_flag; 2083 uint16_t pseudo_hdr_offset; 2084 uint8_t flags, cksum_offset; 2085 2086 2087 sc = ss->sc; 2088 ifp = sc->ifp; 2089 tx = &ss->tx; 2090 2091#ifdef MXGE_NEW_VLAN_API 2092 if (m->m_flags & M_VLANTAG) { 2093 m = mxge_vlan_tag_insert(m); 2094 if (__predict_false(m == NULL)) 2095 goto drop_without_m; 2096 } 2097#endif 2098 if (m->m_pkthdr.csum_flags & 2099 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2100 if (mxge_parse_tx(ss, m, &pi)) 2101 goto drop; 2102 } 2103 2104 /* (try to) map the frame for DMA */ 2105 idx = tx->req & tx->mask; 2106 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2107 m, tx->seg_list, &cnt, 2108 BUS_DMA_NOWAIT); 2109 if (__predict_false(err == EFBIG)) { 2110 /* Too many segments in the chain. Try 2111 to defrag */ 2112 m_tmp = m_defrag(m, M_NOWAIT); 2113 if (m_tmp == NULL) { 2114 goto drop; 2115 } 2116 ss->tx.defrag++; 2117 m = m_tmp; 2118 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2119 tx->info[idx].map, 2120 m, tx->seg_list, &cnt, 2121 BUS_DMA_NOWAIT); 2122 } 2123 if (__predict_false(err != 0)) { 2124 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2125 " packet len = %d\n", err, m->m_pkthdr.len); 2126 goto drop; 2127 } 2128 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2129 BUS_DMASYNC_PREWRITE); 2130 tx->info[idx].m = m; 2131 2132#if IFCAP_TSO4 2133 /* TSO is different enough, we handle it in another routine */ 2134 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2135 mxge_encap_tso(ss, m, cnt, &pi); 2136 return; 2137 } 2138#endif 2139 2140 req = tx->req_list; 2141 cksum_offset = 0; 2142 pseudo_hdr_offset = 0; 2143 flags = MXGEFW_FLAGS_NO_TSO; 2144 2145 /* checksum offloading? */ 2146 if (m->m_pkthdr.csum_flags & 2147 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2148 /* ensure ip header is in first mbuf, copy 2149 it to a scratch buffer if not */ 2150 cksum_offset = pi.ip_off + pi.ip_hlen; 2151 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2152 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2153 req->cksum_offset = cksum_offset; 2154 flags |= MXGEFW_FLAGS_CKSUM; 2155 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2156 } else { 2157 odd_flag = 0; 2158 } 2159 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2160 flags |= MXGEFW_FLAGS_SMALL; 2161 2162 /* convert segments into a request list */ 2163 cum_len = 0; 2164 seg = tx->seg_list; 2165 req->flags = MXGEFW_FLAGS_FIRST; 2166 for (i = 0; i < cnt; i++) { 2167 req->addr_low = 2168 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2169 req->addr_high = 2170 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2171 req->length = htobe16(seg->ds_len); 2172 req->cksum_offset = cksum_offset; 2173 if (cksum_offset > seg->ds_len) 2174 cksum_offset -= seg->ds_len; 2175 else 2176 cksum_offset = 0; 2177 req->pseudo_hdr_offset = pseudo_hdr_offset; 2178 req->pad = 0; /* complete solid 16-byte block */ 2179 req->rdma_count = 1; 2180 req->flags |= flags | ((cum_len & 1) * odd_flag); 2181 cum_len += seg->ds_len; 2182 seg++; 2183 req++; 2184 req->flags = 0; 2185 } 2186 req--; 2187 /* pad runts to 60 bytes */ 2188 if (cum_len < 60) { 2189 req++; 2190 req->addr_low = 2191 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2192 req->addr_high = 2193 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2194 req->length = htobe16(60 - cum_len); 2195 req->cksum_offset = 0; 2196 req->pseudo_hdr_offset = pseudo_hdr_offset; 2197 req->pad = 0; /* complete solid 16-byte block */ 2198 req->rdma_count = 1; 2199 req->flags |= flags | ((cum_len & 1) * odd_flag); 2200 cnt++; 2201 } 2202 2203 tx->req_list[0].rdma_count = cnt; 2204#if 0 2205 /* print what the firmware will see */ 2206 for (i = 0; i < cnt; i++) { 2207 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2208 "cso:%d, flags:0x%x, rdma:%d\n", 2209 i, (int)ntohl(tx->req_list[i].addr_high), 2210 (int)ntohl(tx->req_list[i].addr_low), 2211 (int)ntohs(tx->req_list[i].length), 2212 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2213 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2214 tx->req_list[i].rdma_count); 2215 } 2216 printf("--------------\n"); 2217#endif 2218 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2219 mxge_submit_req(tx, tx->req_list, cnt); 2220#ifdef IFNET_BUF_RING 2221 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2222 /* tell the NIC to start polling this slice */ 2223 *tx->send_go = 1; 2224 tx->queue_active = 1; 2225 tx->activate++; 2226 wmb(); 2227 } 2228#endif 2229 return; 2230 2231drop: 2232 m_freem(m); 2233drop_without_m: 2234 ss->oerrors++; 2235 return; 2236} 2237 2238#ifdef IFNET_BUF_RING 2239static void 2240mxge_qflush(struct ifnet *ifp) 2241{ 2242 mxge_softc_t *sc = ifp->if_softc; 2243 mxge_tx_ring_t *tx; 2244 struct mbuf *m; 2245 int slice; 2246 2247 for (slice = 0; slice < sc->num_slices; slice++) { 2248 tx = &sc->ss[slice].tx; 2249 mtx_lock(&tx->mtx); 2250 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2251 m_freem(m); 2252 mtx_unlock(&tx->mtx); 2253 } 2254 if_qflush(ifp); 2255} 2256 2257static inline void 2258mxge_start_locked(struct mxge_slice_state *ss) 2259{ 2260 mxge_softc_t *sc; 2261 struct mbuf *m; 2262 struct ifnet *ifp; 2263 mxge_tx_ring_t *tx; 2264 2265 sc = ss->sc; 2266 ifp = sc->ifp; 2267 tx = &ss->tx; 2268 2269 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2270 m = drbr_dequeue(ifp, tx->br); 2271 if (m == NULL) { 2272 return; 2273 } 2274 /* let BPF see it */ 2275 BPF_MTAP(ifp, m); 2276 2277 /* give it to the nic */ 2278 mxge_encap(ss, m); 2279 } 2280 /* ran out of transmit slots */ 2281 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2282 && (!drbr_empty(ifp, tx->br))) { 2283 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2284 tx->stall++; 2285 } 2286} 2287 2288static int 2289mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2290{ 2291 mxge_softc_t *sc; 2292 struct ifnet *ifp; 2293 mxge_tx_ring_t *tx; 2294 int err; 2295 2296 sc = ss->sc; 2297 ifp = sc->ifp; 2298 tx = &ss->tx; 2299 2300 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2301 IFF_DRV_RUNNING) { 2302 err = drbr_enqueue(ifp, tx->br, m); 2303 return (err); 2304 } 2305 2306 if (!drbr_needs_enqueue(ifp, tx->br) && 2307 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2308 /* let BPF see it */ 2309 BPF_MTAP(ifp, m); 2310 /* give it to the nic */ 2311 mxge_encap(ss, m); 2312 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2313 return (err); 2314 } 2315 if (!drbr_empty(ifp, tx->br)) 2316 mxge_start_locked(ss); 2317 return (0); 2318} 2319 2320static int 2321mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2322{ 2323 mxge_softc_t *sc = ifp->if_softc; 2324 struct mxge_slice_state *ss; 2325 mxge_tx_ring_t *tx; 2326 int err = 0; 2327 int slice; 2328 2329 slice = m->m_pkthdr.flowid; 2330 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2331 2332 ss = &sc->ss[slice]; 2333 tx = &ss->tx; 2334 2335 if (mtx_trylock(&tx->mtx)) { 2336 err = mxge_transmit_locked(ss, m); 2337 mtx_unlock(&tx->mtx); 2338 } else { 2339 err = drbr_enqueue(ifp, tx->br, m); 2340 } 2341 2342 return (err); 2343} 2344 2345#else 2346 2347static inline void 2348mxge_start_locked(struct mxge_slice_state *ss) 2349{ 2350 mxge_softc_t *sc; 2351 struct mbuf *m; 2352 struct ifnet *ifp; 2353 mxge_tx_ring_t *tx; 2354 2355 sc = ss->sc; 2356 ifp = sc->ifp; 2357 tx = &ss->tx; 2358 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2359 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2360 if (m == NULL) { 2361 return; 2362 } 2363 /* let BPF see it */ 2364 BPF_MTAP(ifp, m); 2365 2366 /* give it to the nic */ 2367 mxge_encap(ss, m); 2368 } 2369 /* ran out of transmit slots */ 2370 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2371 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2372 tx->stall++; 2373 } 2374} 2375#endif 2376static void 2377mxge_start(struct ifnet *ifp) 2378{ 2379 mxge_softc_t *sc = ifp->if_softc; 2380 struct mxge_slice_state *ss; 2381 2382 /* only use the first slice for now */ 2383 ss = &sc->ss[0]; 2384 mtx_lock(&ss->tx.mtx); 2385 mxge_start_locked(ss); 2386 mtx_unlock(&ss->tx.mtx); 2387} 2388 2389/* 2390 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2391 * at most 32 bytes at a time, so as to avoid involving the software 2392 * pio handler in the nic. We re-write the first segment's low 2393 * DMA address to mark it valid only after we write the entire chunk 2394 * in a burst 2395 */ 2396static inline void 2397mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2398 mcp_kreq_ether_recv_t *src) 2399{ 2400 uint32_t low; 2401 2402 low = src->addr_low; 2403 src->addr_low = 0xffffffff; 2404 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2405 wmb(); 2406 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2407 wmb(); 2408 src->addr_low = low; 2409 dst->addr_low = low; 2410 wmb(); 2411} 2412 2413static int 2414mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2415{ 2416 bus_dma_segment_t seg; 2417 struct mbuf *m; 2418 mxge_rx_ring_t *rx = &ss->rx_small; 2419 int cnt, err; 2420 2421 m = m_gethdr(M_NOWAIT, MT_DATA); 2422 if (m == NULL) { 2423 rx->alloc_fail++; 2424 err = ENOBUFS; 2425 goto done; 2426 } 2427 m->m_len = MHLEN; 2428 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2429 &seg, &cnt, BUS_DMA_NOWAIT); 2430 if (err != 0) { 2431 m_free(m); 2432 goto done; 2433 } 2434 rx->info[idx].m = m; 2435 rx->shadow[idx].addr_low = 2436 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2437 rx->shadow[idx].addr_high = 2438 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2439 2440done: 2441 if ((idx & 7) == 7) 2442 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2443 return err; 2444} 2445 2446static int 2447mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2448{ 2449 bus_dma_segment_t seg[3]; 2450 struct mbuf *m; 2451 mxge_rx_ring_t *rx = &ss->rx_big; 2452 int cnt, err, i; 2453 2454 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2455 if (m == NULL) { 2456 rx->alloc_fail++; 2457 err = ENOBUFS; 2458 goto done; 2459 } 2460 m->m_len = rx->mlen; 2461 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2462 seg, &cnt, BUS_DMA_NOWAIT); 2463 if (err != 0) { 2464 m_free(m); 2465 goto done; 2466 } 2467 rx->info[idx].m = m; 2468 rx->shadow[idx].addr_low = 2469 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2470 rx->shadow[idx].addr_high = 2471 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2472 2473#if MXGE_VIRT_JUMBOS 2474 for (i = 1; i < cnt; i++) { 2475 rx->shadow[idx + i].addr_low = 2476 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2477 rx->shadow[idx + i].addr_high = 2478 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2479 } 2480#endif 2481 2482done: 2483 for (i = 0; i < rx->nbufs; i++) { 2484 if ((idx & 7) == 7) { 2485 mxge_submit_8rx(&rx->lanai[idx - 7], 2486 &rx->shadow[idx - 7]); 2487 } 2488 idx++; 2489 } 2490 return err; 2491} 2492 2493#ifdef INET6 2494 2495static uint16_t 2496mxge_csum_generic(uint16_t *raw, int len) 2497{ 2498 uint32_t csum; 2499 2500 2501 csum = 0; 2502 while (len > 0) { 2503 csum += *raw; 2504 raw++; 2505 len -= 2; 2506 } 2507 csum = (csum >> 16) + (csum & 0xffff); 2508 csum = (csum >> 16) + (csum & 0xffff); 2509 return (uint16_t)csum; 2510} 2511 2512static inline uint16_t 2513mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2514{ 2515 uint32_t partial; 2516 int nxt, cksum_offset; 2517 struct ip6_hdr *ip6 = p; 2518 uint16_t c; 2519 2520 nxt = ip6->ip6_nxt; 2521 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2522 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2523 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2524 IPPROTO_IPV6, &nxt); 2525 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2526 return (1); 2527 } 2528 2529 /* 2530 * IPv6 headers do not contain a checksum, and hence 2531 * do not checksum to zero, so they don't "fall out" 2532 * of the partial checksum calculation like IPv4 2533 * headers do. We need to fix the partial checksum by 2534 * subtracting the checksum of the IPv6 header. 2535 */ 2536 2537 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2538 ETHER_HDR_LEN); 2539 csum += ~partial; 2540 csum += (csum < ~partial); 2541 csum = (csum >> 16) + (csum & 0xFFFF); 2542 csum = (csum >> 16) + (csum & 0xFFFF); 2543 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2544 csum); 2545 c ^= 0xffff; 2546 return (c); 2547} 2548#endif /* INET6 */ 2549/* 2550 * Myri10GE hardware checksums are not valid if the sender 2551 * padded the frame with non-zero padding. This is because 2552 * the firmware just does a simple 16-bit 1s complement 2553 * checksum across the entire frame, excluding the first 14 2554 * bytes. It is best to simply to check the checksum and 2555 * tell the stack about it only if the checksum is good 2556 */ 2557 2558static inline uint16_t 2559mxge_rx_csum(struct mbuf *m, int csum) 2560{ 2561 struct ether_header *eh; 2562#ifdef INET 2563 struct ip *ip; 2564#endif 2565#if defined(INET) || defined(INET6) 2566 int cap = m->m_pkthdr.rcvif->if_capenable; 2567#endif 2568 uint16_t c, etype; 2569 2570 2571 eh = mtod(m, struct ether_header *); 2572 etype = ntohs(eh->ether_type); 2573 switch (etype) { 2574#ifdef INET 2575 case ETHERTYPE_IP: 2576 if ((cap & IFCAP_RXCSUM) == 0) 2577 return (1); 2578 ip = (struct ip *)(eh + 1); 2579 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2580 return (1); 2581 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2582 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2583 (ip->ip_hl << 2) + ip->ip_p)); 2584 c ^= 0xffff; 2585 break; 2586#endif 2587#ifdef INET6 2588 case ETHERTYPE_IPV6: 2589 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2590 return (1); 2591 c = mxge_rx_csum6((eh + 1), m, csum); 2592 break; 2593#endif 2594 default: 2595 c = 1; 2596 } 2597 return (c); 2598} 2599 2600static void 2601mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2602{ 2603 struct ether_vlan_header *evl; 2604 struct ether_header *eh; 2605 uint32_t partial; 2606 2607 evl = mtod(m, struct ether_vlan_header *); 2608 eh = mtod(m, struct ether_header *); 2609 2610 /* 2611 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2612 * after what the firmware thought was the end of the ethernet 2613 * header. 2614 */ 2615 2616 /* put checksum into host byte order */ 2617 *csum = ntohs(*csum); 2618 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2619 (*csum) += ~partial; 2620 (*csum) += ((*csum) < ~partial); 2621 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2623 2624 /* restore checksum to network byte order; 2625 later consumers expect this */ 2626 *csum = htons(*csum); 2627 2628 /* save the tag */ 2629#ifdef MXGE_NEW_VLAN_API 2630 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2631#else 2632 { 2633 struct m_tag *mtag; 2634 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2635 M_NOWAIT); 2636 if (mtag == NULL) 2637 return; 2638 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2639 m_tag_prepend(m, mtag); 2640 } 2641 2642#endif 2643 m->m_flags |= M_VLANTAG; 2644 2645 /* 2646 * Remove the 802.1q header by copying the Ethernet 2647 * addresses over it and adjusting the beginning of 2648 * the data in the mbuf. The encapsulated Ethernet 2649 * type field is already in place. 2650 */ 2651 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2652 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2653 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2654} 2655 2656 2657static inline void 2658mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2659 uint32_t csum, int lro) 2660{ 2661 mxge_softc_t *sc; 2662 struct ifnet *ifp; 2663 struct mbuf *m; 2664 struct ether_header *eh; 2665 mxge_rx_ring_t *rx; 2666 bus_dmamap_t old_map; 2667 int idx; 2668 2669 sc = ss->sc; 2670 ifp = sc->ifp; 2671 rx = &ss->rx_big; 2672 idx = rx->cnt & rx->mask; 2673 rx->cnt += rx->nbufs; 2674 /* save a pointer to the received mbuf */ 2675 m = rx->info[idx].m; 2676 /* try to replace the received mbuf */ 2677 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2678 /* drop the frame -- the old mbuf is re-cycled */ 2679 ifp->if_ierrors++; 2680 return; 2681 } 2682 2683 /* unmap the received buffer */ 2684 old_map = rx->info[idx].map; 2685 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2686 bus_dmamap_unload(rx->dmat, old_map); 2687 2688 /* swap the bus_dmamap_t's */ 2689 rx->info[idx].map = rx->extra_map; 2690 rx->extra_map = old_map; 2691 2692 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2693 * aligned */ 2694 m->m_data += MXGEFW_PAD; 2695 2696 m->m_pkthdr.rcvif = ifp; 2697 m->m_len = m->m_pkthdr.len = len; 2698 ss->ipackets++; 2699 eh = mtod(m, struct ether_header *); 2700 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2701 mxge_vlan_tag_remove(m, &csum); 2702 } 2703 /* if the checksum is valid, mark it in the mbuf header */ 2704 2705 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2706 (0 == mxge_rx_csum(m, csum))) { 2707 /* Tell the stack that the checksum is good */ 2708 m->m_pkthdr.csum_data = 0xffff; 2709 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2710 CSUM_DATA_VALID; 2711 2712#if defined(INET) || defined (INET6) 2713 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2714 return; 2715#endif 2716 } 2717 /* flowid only valid if RSS hashing is enabled */ 2718 if (sc->num_slices > 1) { 2719 m->m_pkthdr.flowid = (ss - sc->ss); 2720 m->m_flags |= M_FLOWID; 2721 } 2722 /* pass the frame up the stack */ 2723 (*ifp->if_input)(ifp, m); 2724} 2725 2726static inline void 2727mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2728 uint32_t csum, int lro) 2729{ 2730 mxge_softc_t *sc; 2731 struct ifnet *ifp; 2732 struct ether_header *eh; 2733 struct mbuf *m; 2734 mxge_rx_ring_t *rx; 2735 bus_dmamap_t old_map; 2736 int idx; 2737 2738 sc = ss->sc; 2739 ifp = sc->ifp; 2740 rx = &ss->rx_small; 2741 idx = rx->cnt & rx->mask; 2742 rx->cnt++; 2743 /* save a pointer to the received mbuf */ 2744 m = rx->info[idx].m; 2745 /* try to replace the received mbuf */ 2746 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2747 /* drop the frame -- the old mbuf is re-cycled */ 2748 ifp->if_ierrors++; 2749 return; 2750 } 2751 2752 /* unmap the received buffer */ 2753 old_map = rx->info[idx].map; 2754 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2755 bus_dmamap_unload(rx->dmat, old_map); 2756 2757 /* swap the bus_dmamap_t's */ 2758 rx->info[idx].map = rx->extra_map; 2759 rx->extra_map = old_map; 2760 2761 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2762 * aligned */ 2763 m->m_data += MXGEFW_PAD; 2764 2765 m->m_pkthdr.rcvif = ifp; 2766 m->m_len = m->m_pkthdr.len = len; 2767 ss->ipackets++; 2768 eh = mtod(m, struct ether_header *); 2769 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2770 mxge_vlan_tag_remove(m, &csum); 2771 } 2772 /* if the checksum is valid, mark it in the mbuf header */ 2773 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2774 (0 == mxge_rx_csum(m, csum))) { 2775 /* Tell the stack that the checksum is good */ 2776 m->m_pkthdr.csum_data = 0xffff; 2777 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2778 CSUM_DATA_VALID; 2779 2780#if defined(INET) || defined (INET6) 2781 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2782 return; 2783#endif 2784 } 2785 /* flowid only valid if RSS hashing is enabled */ 2786 if (sc->num_slices > 1) { 2787 m->m_pkthdr.flowid = (ss - sc->ss); 2788 m->m_flags |= M_FLOWID; 2789 } 2790 /* pass the frame up the stack */ 2791 (*ifp->if_input)(ifp, m); 2792} 2793 2794static inline void 2795mxge_clean_rx_done(struct mxge_slice_state *ss) 2796{ 2797 mxge_rx_done_t *rx_done = &ss->rx_done; 2798 int limit = 0; 2799 uint16_t length; 2800 uint16_t checksum; 2801 int lro; 2802 2803 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2804 while (rx_done->entry[rx_done->idx].length != 0) { 2805 length = ntohs(rx_done->entry[rx_done->idx].length); 2806 rx_done->entry[rx_done->idx].length = 0; 2807 checksum = rx_done->entry[rx_done->idx].checksum; 2808 if (length <= (MHLEN - MXGEFW_PAD)) 2809 mxge_rx_done_small(ss, length, checksum, lro); 2810 else 2811 mxge_rx_done_big(ss, length, checksum, lro); 2812 rx_done->cnt++; 2813 rx_done->idx = rx_done->cnt & rx_done->mask; 2814 2815 /* limit potential for livelock */ 2816 if (__predict_false(++limit > rx_done->mask / 2)) 2817 break; 2818 } 2819#if defined(INET) || defined (INET6) 2820 while (!SLIST_EMPTY(&ss->lc.lro_active)) { 2821 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); 2822 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); 2823 tcp_lro_flush(&ss->lc, lro); 2824 } 2825#endif 2826} 2827 2828 2829static inline void 2830mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2831{ 2832 struct ifnet *ifp; 2833 mxge_tx_ring_t *tx; 2834 struct mbuf *m; 2835 bus_dmamap_t map; 2836 int idx; 2837 int *flags; 2838 2839 tx = &ss->tx; 2840 ifp = ss->sc->ifp; 2841 while (tx->pkt_done != mcp_idx) { 2842 idx = tx->done & tx->mask; 2843 tx->done++; 2844 m = tx->info[idx].m; 2845 /* mbuf and DMA map only attached to the first 2846 segment per-mbuf */ 2847 if (m != NULL) { 2848 ss->obytes += m->m_pkthdr.len; 2849 if (m->m_flags & M_MCAST) 2850 ss->omcasts++; 2851 ss->opackets++; 2852 tx->info[idx].m = NULL; 2853 map = tx->info[idx].map; 2854 bus_dmamap_unload(tx->dmat, map); 2855 m_freem(m); 2856 } 2857 if (tx->info[idx].flag) { 2858 tx->info[idx].flag = 0; 2859 tx->pkt_done++; 2860 } 2861 } 2862 2863 /* If we have space, clear IFF_OACTIVE to tell the stack that 2864 its OK to send packets */ 2865#ifdef IFNET_BUF_RING 2866 flags = &ss->if_drv_flags; 2867#else 2868 flags = &ifp->if_drv_flags; 2869#endif 2870 mtx_lock(&ss->tx.mtx); 2871 if ((*flags) & IFF_DRV_OACTIVE && 2872 tx->req - tx->done < (tx->mask + 1)/4) { 2873 *(flags) &= ~IFF_DRV_OACTIVE; 2874 ss->tx.wake++; 2875 mxge_start_locked(ss); 2876 } 2877#ifdef IFNET_BUF_RING 2878 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2879 /* let the NIC stop polling this queue, since there 2880 * are no more transmits pending */ 2881 if (tx->req == tx->done) { 2882 *tx->send_stop = 1; 2883 tx->queue_active = 0; 2884 tx->deactivate++; 2885 wmb(); 2886 } 2887 } 2888#endif 2889 mtx_unlock(&ss->tx.mtx); 2890 2891} 2892 2893static struct mxge_media_type mxge_xfp_media_types[] = 2894{ 2895 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2896 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2897 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2898 {0, (1 << 5), "10GBASE-ER"}, 2899 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2900 {0, (1 << 3), "10GBASE-SW"}, 2901 {0, (1 << 2), "10GBASE-LW"}, 2902 {0, (1 << 1), "10GBASE-EW"}, 2903 {0, (1 << 0), "Reserved"} 2904}; 2905static struct mxge_media_type mxge_sfp_media_types[] = 2906{ 2907 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2908 {0, (1 << 7), "Reserved"}, 2909 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2910 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2911 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2912 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2913}; 2914 2915static void 2916mxge_media_set(mxge_softc_t *sc, int media_type) 2917{ 2918 2919 2920 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2921 0, NULL); 2922 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2923 sc->current_media = media_type; 2924 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2925} 2926 2927static void 2928mxge_media_init(mxge_softc_t *sc) 2929{ 2930 char *ptr; 2931 int i; 2932 2933 ifmedia_removeall(&sc->media); 2934 mxge_media_set(sc, IFM_AUTO); 2935 2936 /* 2937 * parse the product code to deterimine the interface type 2938 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2939 * after the 3rd dash in the driver's cached copy of the 2940 * EEPROM's product code string. 2941 */ 2942 ptr = sc->product_code_string; 2943 if (ptr == NULL) { 2944 device_printf(sc->dev, "Missing product code\n"); 2945 return; 2946 } 2947 2948 for (i = 0; i < 3; i++, ptr++) { 2949 ptr = index(ptr, '-'); 2950 if (ptr == NULL) { 2951 device_printf(sc->dev, 2952 "only %d dashes in PC?!?\n", i); 2953 return; 2954 } 2955 } 2956 if (*ptr == 'C' || *(ptr +1) == 'C') { 2957 /* -C is CX4 */ 2958 sc->connector = MXGE_CX4; 2959 mxge_media_set(sc, IFM_10G_CX4); 2960 } else if (*ptr == 'Q') { 2961 /* -Q is Quad Ribbon Fiber */ 2962 sc->connector = MXGE_QRF; 2963 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2964 /* FreeBSD has no media type for Quad ribbon fiber */ 2965 } else if (*ptr == 'R') { 2966 /* -R is XFP */ 2967 sc->connector = MXGE_XFP; 2968 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2969 /* -S or -2S is SFP+ */ 2970 sc->connector = MXGE_SFP; 2971 } else { 2972 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2973 } 2974} 2975 2976/* 2977 * Determine the media type for a NIC. Some XFPs will identify 2978 * themselves only when their link is up, so this is initiated via a 2979 * link up interrupt. However, this can potentially take up to 2980 * several milliseconds, so it is run via the watchdog routine, rather 2981 * than in the interrupt handler itself. 2982 */ 2983static void 2984mxge_media_probe(mxge_softc_t *sc) 2985{ 2986 mxge_cmd_t cmd; 2987 char *cage_type; 2988 2989 struct mxge_media_type *mxge_media_types = NULL; 2990 int i, err, ms, mxge_media_type_entries; 2991 uint32_t byte; 2992 2993 sc->need_media_probe = 0; 2994 2995 if (sc->connector == MXGE_XFP) { 2996 /* -R is XFP */ 2997 mxge_media_types = mxge_xfp_media_types; 2998 mxge_media_type_entries = 2999 sizeof (mxge_xfp_media_types) / 3000 sizeof (mxge_xfp_media_types[0]); 3001 byte = MXGE_XFP_COMPLIANCE_BYTE; 3002 cage_type = "XFP"; 3003 } else if (sc->connector == MXGE_SFP) { 3004 /* -S or -2S is SFP+ */ 3005 mxge_media_types = mxge_sfp_media_types; 3006 mxge_media_type_entries = 3007 sizeof (mxge_sfp_media_types) / 3008 sizeof (mxge_sfp_media_types[0]); 3009 cage_type = "SFP+"; 3010 byte = 3; 3011 } else { 3012 /* nothing to do; media type cannot change */ 3013 return; 3014 } 3015 3016 /* 3017 * At this point we know the NIC has an XFP cage, so now we 3018 * try to determine what is in the cage by using the 3019 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3020 * register. We read just one byte, which may take over 3021 * a millisecond 3022 */ 3023 3024 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3025 cmd.data1 = byte; 3026 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3027 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3028 device_printf(sc->dev, "failed to read XFP\n"); 3029 } 3030 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3031 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3032 } 3033 if (err != MXGEFW_CMD_OK) { 3034 return; 3035 } 3036 3037 /* now we wait for the data to be cached */ 3038 cmd.data0 = byte; 3039 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3040 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3041 DELAY(1000); 3042 cmd.data0 = byte; 3043 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3044 } 3045 if (err != MXGEFW_CMD_OK) { 3046 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3047 cage_type, err, ms); 3048 return; 3049 } 3050 3051 if (cmd.data0 == mxge_media_types[0].bitmask) { 3052 if (mxge_verbose) 3053 device_printf(sc->dev, "%s:%s\n", cage_type, 3054 mxge_media_types[0].name); 3055 if (sc->current_media != mxge_media_types[0].flag) { 3056 mxge_media_init(sc); 3057 mxge_media_set(sc, mxge_media_types[0].flag); 3058 } 3059 return; 3060 } 3061 for (i = 1; i < mxge_media_type_entries; i++) { 3062 if (cmd.data0 & mxge_media_types[i].bitmask) { 3063 if (mxge_verbose) 3064 device_printf(sc->dev, "%s:%s\n", 3065 cage_type, 3066 mxge_media_types[i].name); 3067 3068 if (sc->current_media != mxge_media_types[i].flag) { 3069 mxge_media_init(sc); 3070 mxge_media_set(sc, mxge_media_types[i].flag); 3071 } 3072 return; 3073 } 3074 } 3075 if (mxge_verbose) 3076 device_printf(sc->dev, "%s media 0x%x unknown\n", 3077 cage_type, cmd.data0); 3078 3079 return; 3080} 3081 3082static void 3083mxge_intr(void *arg) 3084{ 3085 struct mxge_slice_state *ss = arg; 3086 mxge_softc_t *sc = ss->sc; 3087 mcp_irq_data_t *stats = ss->fw_stats; 3088 mxge_tx_ring_t *tx = &ss->tx; 3089 mxge_rx_done_t *rx_done = &ss->rx_done; 3090 uint32_t send_done_count; 3091 uint8_t valid; 3092 3093 3094#ifndef IFNET_BUF_RING 3095 /* an interrupt on a non-zero slice is implicitly valid 3096 since MSI-X irqs are not shared */ 3097 if (ss != sc->ss) { 3098 mxge_clean_rx_done(ss); 3099 *ss->irq_claim = be32toh(3); 3100 return; 3101 } 3102#endif 3103 3104 /* make sure the DMA has finished */ 3105 if (!stats->valid) { 3106 return; 3107 } 3108 valid = stats->valid; 3109 3110 if (sc->legacy_irq) { 3111 /* lower legacy IRQ */ 3112 *sc->irq_deassert = 0; 3113 if (!mxge_deassert_wait) 3114 /* don't wait for conf. that irq is low */ 3115 stats->valid = 0; 3116 } else { 3117 stats->valid = 0; 3118 } 3119 3120 /* loop while waiting for legacy irq deassertion */ 3121 do { 3122 /* check for transmit completes and receives */ 3123 send_done_count = be32toh(stats->send_done_count); 3124 while ((send_done_count != tx->pkt_done) || 3125 (rx_done->entry[rx_done->idx].length != 0)) { 3126 if (send_done_count != tx->pkt_done) 3127 mxge_tx_done(ss, (int)send_done_count); 3128 mxge_clean_rx_done(ss); 3129 send_done_count = be32toh(stats->send_done_count); 3130 } 3131 if (sc->legacy_irq && mxge_deassert_wait) 3132 wmb(); 3133 } while (*((volatile uint8_t *) &stats->valid)); 3134 3135 /* fw link & error stats meaningful only on the first slice */ 3136 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3137 if (sc->link_state != stats->link_up) { 3138 sc->link_state = stats->link_up; 3139 if (sc->link_state) { 3140 if_link_state_change(sc->ifp, LINK_STATE_UP); 3141 sc->ifp->if_baudrate = IF_Gbps(10UL); 3142 if (mxge_verbose) 3143 device_printf(sc->dev, "link up\n"); 3144 } else { 3145 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3146 sc->ifp->if_baudrate = 0; 3147 if (mxge_verbose) 3148 device_printf(sc->dev, "link down\n"); 3149 } 3150 sc->need_media_probe = 1; 3151 } 3152 if (sc->rdma_tags_available != 3153 be32toh(stats->rdma_tags_available)) { 3154 sc->rdma_tags_available = 3155 be32toh(stats->rdma_tags_available); 3156 device_printf(sc->dev, "RDMA timed out! %d tags " 3157 "left\n", sc->rdma_tags_available); 3158 } 3159 3160 if (stats->link_down) { 3161 sc->down_cnt += stats->link_down; 3162 sc->link_state = 0; 3163 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3164 } 3165 } 3166 3167 /* check to see if we have rx token to pass back */ 3168 if (valid & 0x1) 3169 *ss->irq_claim = be32toh(3); 3170 *(ss->irq_claim + 1) = be32toh(3); 3171} 3172 3173static void 3174mxge_init(void *arg) 3175{ 3176 mxge_softc_t *sc = arg; 3177 struct ifnet *ifp = sc->ifp; 3178 3179 3180 mtx_lock(&sc->driver_mtx); 3181 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3182 (void) mxge_open(sc); 3183 mtx_unlock(&sc->driver_mtx); 3184} 3185 3186 3187 3188static void 3189mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3190{ 3191 int i; 3192 3193#if defined(INET) || defined(INET6) 3194 tcp_lro_free(&ss->lc); 3195#endif 3196 for (i = 0; i <= ss->rx_big.mask; i++) { 3197 if (ss->rx_big.info[i].m == NULL) 3198 continue; 3199 bus_dmamap_unload(ss->rx_big.dmat, 3200 ss->rx_big.info[i].map); 3201 m_freem(ss->rx_big.info[i].m); 3202 ss->rx_big.info[i].m = NULL; 3203 } 3204 3205 for (i = 0; i <= ss->rx_small.mask; i++) { 3206 if (ss->rx_small.info[i].m == NULL) 3207 continue; 3208 bus_dmamap_unload(ss->rx_small.dmat, 3209 ss->rx_small.info[i].map); 3210 m_freem(ss->rx_small.info[i].m); 3211 ss->rx_small.info[i].m = NULL; 3212 } 3213 3214 /* transmit ring used only on the first slice */ 3215 if (ss->tx.info == NULL) 3216 return; 3217 3218 for (i = 0; i <= ss->tx.mask; i++) { 3219 ss->tx.info[i].flag = 0; 3220 if (ss->tx.info[i].m == NULL) 3221 continue; 3222 bus_dmamap_unload(ss->tx.dmat, 3223 ss->tx.info[i].map); 3224 m_freem(ss->tx.info[i].m); 3225 ss->tx.info[i].m = NULL; 3226 } 3227} 3228 3229static void 3230mxge_free_mbufs(mxge_softc_t *sc) 3231{ 3232 int slice; 3233 3234 for (slice = 0; slice < sc->num_slices; slice++) 3235 mxge_free_slice_mbufs(&sc->ss[slice]); 3236} 3237 3238static void 3239mxge_free_slice_rings(struct mxge_slice_state *ss) 3240{ 3241 int i; 3242 3243 3244 if (ss->rx_done.entry != NULL) 3245 mxge_dma_free(&ss->rx_done.dma); 3246 ss->rx_done.entry = NULL; 3247 3248 if (ss->tx.req_bytes != NULL) 3249 free(ss->tx.req_bytes, M_DEVBUF); 3250 ss->tx.req_bytes = NULL; 3251 3252 if (ss->tx.seg_list != NULL) 3253 free(ss->tx.seg_list, M_DEVBUF); 3254 ss->tx.seg_list = NULL; 3255 3256 if (ss->rx_small.shadow != NULL) 3257 free(ss->rx_small.shadow, M_DEVBUF); 3258 ss->rx_small.shadow = NULL; 3259 3260 if (ss->rx_big.shadow != NULL) 3261 free(ss->rx_big.shadow, M_DEVBUF); 3262 ss->rx_big.shadow = NULL; 3263 3264 if (ss->tx.info != NULL) { 3265 if (ss->tx.dmat != NULL) { 3266 for (i = 0; i <= ss->tx.mask; i++) { 3267 bus_dmamap_destroy(ss->tx.dmat, 3268 ss->tx.info[i].map); 3269 } 3270 bus_dma_tag_destroy(ss->tx.dmat); 3271 } 3272 free(ss->tx.info, M_DEVBUF); 3273 } 3274 ss->tx.info = NULL; 3275 3276 if (ss->rx_small.info != NULL) { 3277 if (ss->rx_small.dmat != NULL) { 3278 for (i = 0; i <= ss->rx_small.mask; i++) { 3279 bus_dmamap_destroy(ss->rx_small.dmat, 3280 ss->rx_small.info[i].map); 3281 } 3282 bus_dmamap_destroy(ss->rx_small.dmat, 3283 ss->rx_small.extra_map); 3284 bus_dma_tag_destroy(ss->rx_small.dmat); 3285 } 3286 free(ss->rx_small.info, M_DEVBUF); 3287 } 3288 ss->rx_small.info = NULL; 3289 3290 if (ss->rx_big.info != NULL) { 3291 if (ss->rx_big.dmat != NULL) { 3292 for (i = 0; i <= ss->rx_big.mask; i++) { 3293 bus_dmamap_destroy(ss->rx_big.dmat, 3294 ss->rx_big.info[i].map); 3295 } 3296 bus_dmamap_destroy(ss->rx_big.dmat, 3297 ss->rx_big.extra_map); 3298 bus_dma_tag_destroy(ss->rx_big.dmat); 3299 } 3300 free(ss->rx_big.info, M_DEVBUF); 3301 } 3302 ss->rx_big.info = NULL; 3303} 3304 3305static void 3306mxge_free_rings(mxge_softc_t *sc) 3307{ 3308 int slice; 3309 3310 for (slice = 0; slice < sc->num_slices; slice++) 3311 mxge_free_slice_rings(&sc->ss[slice]); 3312} 3313 3314static int 3315mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3316 int tx_ring_entries) 3317{ 3318 mxge_softc_t *sc = ss->sc; 3319 size_t bytes; 3320 int err, i; 3321 3322 /* allocate per-slice receive resources */ 3323 3324 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3325 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3326 3327 /* allocate the rx shadow rings */ 3328 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3329 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3330 3331 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3332 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3333 3334 /* allocate the rx host info rings */ 3335 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3336 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3337 3338 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3339 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3340 3341 /* allocate the rx busdma resources */ 3342 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3343 1, /* alignment */ 3344 4096, /* boundary */ 3345 BUS_SPACE_MAXADDR, /* low */ 3346 BUS_SPACE_MAXADDR, /* high */ 3347 NULL, NULL, /* filter */ 3348 MHLEN, /* maxsize */ 3349 1, /* num segs */ 3350 MHLEN, /* maxsegsize */ 3351 BUS_DMA_ALLOCNOW, /* flags */ 3352 NULL, NULL, /* lock */ 3353 &ss->rx_small.dmat); /* tag */ 3354 if (err != 0) { 3355 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3356 err); 3357 return err; 3358 } 3359 3360 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3361 1, /* alignment */ 3362#if MXGE_VIRT_JUMBOS 3363 4096, /* boundary */ 3364#else 3365 0, /* boundary */ 3366#endif 3367 BUS_SPACE_MAXADDR, /* low */ 3368 BUS_SPACE_MAXADDR, /* high */ 3369 NULL, NULL, /* filter */ 3370 3*4096, /* maxsize */ 3371#if MXGE_VIRT_JUMBOS 3372 3, /* num segs */ 3373 4096, /* maxsegsize*/ 3374#else 3375 1, /* num segs */ 3376 MJUM9BYTES, /* maxsegsize*/ 3377#endif 3378 BUS_DMA_ALLOCNOW, /* flags */ 3379 NULL, NULL, /* lock */ 3380 &ss->rx_big.dmat); /* tag */ 3381 if (err != 0) { 3382 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3383 err); 3384 return err; 3385 } 3386 for (i = 0; i <= ss->rx_small.mask; i++) { 3387 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3388 &ss->rx_small.info[i].map); 3389 if (err != 0) { 3390 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3391 err); 3392 return err; 3393 } 3394 } 3395 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3396 &ss->rx_small.extra_map); 3397 if (err != 0) { 3398 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3399 err); 3400 return err; 3401 } 3402 3403 for (i = 0; i <= ss->rx_big.mask; i++) { 3404 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3405 &ss->rx_big.info[i].map); 3406 if (err != 0) { 3407 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3408 err); 3409 return err; 3410 } 3411 } 3412 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3413 &ss->rx_big.extra_map); 3414 if (err != 0) { 3415 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3416 err); 3417 return err; 3418 } 3419 3420 /* now allocate TX resouces */ 3421 3422#ifndef IFNET_BUF_RING 3423 /* only use a single TX ring for now */ 3424 if (ss != ss->sc->ss) 3425 return 0; 3426#endif 3427 3428 ss->tx.mask = tx_ring_entries - 1; 3429 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3430 3431 3432 /* allocate the tx request copy block */ 3433 bytes = 8 + 3434 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3435 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3436 /* ensure req_list entries are aligned to 8 bytes */ 3437 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3438 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3439 3440 /* allocate the tx busdma segment list */ 3441 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3442 ss->tx.seg_list = (bus_dma_segment_t *) 3443 malloc(bytes, M_DEVBUF, M_WAITOK); 3444 3445 /* allocate the tx host info ring */ 3446 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3447 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3448 3449 /* allocate the tx busdma resources */ 3450 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3451 1, /* alignment */ 3452 sc->tx_boundary, /* boundary */ 3453 BUS_SPACE_MAXADDR, /* low */ 3454 BUS_SPACE_MAXADDR, /* high */ 3455 NULL, NULL, /* filter */ 3456 65536 + 256, /* maxsize */ 3457 ss->tx.max_desc - 2, /* num segs */ 3458 sc->tx_boundary, /* maxsegsz */ 3459 BUS_DMA_ALLOCNOW, /* flags */ 3460 NULL, NULL, /* lock */ 3461 &ss->tx.dmat); /* tag */ 3462 3463 if (err != 0) { 3464 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3465 err); 3466 return err; 3467 } 3468 3469 /* now use these tags to setup dmamaps for each slot 3470 in the ring */ 3471 for (i = 0; i <= ss->tx.mask; i++) { 3472 err = bus_dmamap_create(ss->tx.dmat, 0, 3473 &ss->tx.info[i].map); 3474 if (err != 0) { 3475 device_printf(sc->dev, "Err %d tx dmamap\n", 3476 err); 3477 return err; 3478 } 3479 } 3480 return 0; 3481 3482} 3483 3484static int 3485mxge_alloc_rings(mxge_softc_t *sc) 3486{ 3487 mxge_cmd_t cmd; 3488 int tx_ring_size; 3489 int tx_ring_entries, rx_ring_entries; 3490 int err, slice; 3491 3492 /* get ring sizes */ 3493 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3494 tx_ring_size = cmd.data0; 3495 if (err != 0) { 3496 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3497 goto abort; 3498 } 3499 3500 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3501 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3502 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3503 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3504 IFQ_SET_READY(&sc->ifp->if_snd); 3505 3506 for (slice = 0; slice < sc->num_slices; slice++) { 3507 err = mxge_alloc_slice_rings(&sc->ss[slice], 3508 rx_ring_entries, 3509 tx_ring_entries); 3510 if (err != 0) 3511 goto abort; 3512 } 3513 return 0; 3514 3515abort: 3516 mxge_free_rings(sc); 3517 return err; 3518 3519} 3520 3521 3522static void 3523mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3524{ 3525 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3526 3527 if (bufsize < MCLBYTES) { 3528 /* easy, everything fits in a single buffer */ 3529 *big_buf_size = MCLBYTES; 3530 *cl_size = MCLBYTES; 3531 *nbufs = 1; 3532 return; 3533 } 3534 3535 if (bufsize < MJUMPAGESIZE) { 3536 /* still easy, everything still fits in a single buffer */ 3537 *big_buf_size = MJUMPAGESIZE; 3538 *cl_size = MJUMPAGESIZE; 3539 *nbufs = 1; 3540 return; 3541 } 3542#if MXGE_VIRT_JUMBOS 3543 /* now we need to use virtually contiguous buffers */ 3544 *cl_size = MJUM9BYTES; 3545 *big_buf_size = 4096; 3546 *nbufs = mtu / 4096 + 1; 3547 /* needs to be a power of two, so round up */ 3548 if (*nbufs == 3) 3549 *nbufs = 4; 3550#else 3551 *cl_size = MJUM9BYTES; 3552 *big_buf_size = MJUM9BYTES; 3553 *nbufs = 1; 3554#endif 3555} 3556 3557static int 3558mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3559{ 3560 mxge_softc_t *sc; 3561 mxge_cmd_t cmd; 3562 bus_dmamap_t map; 3563 int err, i, slice; 3564 3565 3566 sc = ss->sc; 3567 slice = ss - sc->ss; 3568 3569#if defined(INET) || defined(INET6) 3570 (void)tcp_lro_init(&ss->lc); 3571#endif 3572 ss->lc.ifp = sc->ifp; 3573 3574 /* get the lanai pointers to the send and receive rings */ 3575 3576 err = 0; 3577#ifndef IFNET_BUF_RING 3578 /* We currently only send from the first slice */ 3579 if (slice == 0) { 3580#endif 3581 cmd.data0 = slice; 3582 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3583 ss->tx.lanai = 3584 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3585 ss->tx.send_go = (volatile uint32_t *) 3586 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3587 ss->tx.send_stop = (volatile uint32_t *) 3588 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3589#ifndef IFNET_BUF_RING 3590 } 3591#endif 3592 cmd.data0 = slice; 3593 err |= mxge_send_cmd(sc, 3594 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3595 ss->rx_small.lanai = 3596 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3597 cmd.data0 = slice; 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3599 ss->rx_big.lanai = 3600 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3601 3602 if (err != 0) { 3603 device_printf(sc->dev, 3604 "failed to get ring sizes or locations\n"); 3605 return EIO; 3606 } 3607 3608 /* stock receive rings */ 3609 for (i = 0; i <= ss->rx_small.mask; i++) { 3610 map = ss->rx_small.info[i].map; 3611 err = mxge_get_buf_small(ss, map, i); 3612 if (err) { 3613 device_printf(sc->dev, "alloced %d/%d smalls\n", 3614 i, ss->rx_small.mask + 1); 3615 return ENOMEM; 3616 } 3617 } 3618 for (i = 0; i <= ss->rx_big.mask; i++) { 3619 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3620 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3621 } 3622 ss->rx_big.nbufs = nbufs; 3623 ss->rx_big.cl_size = cl_size; 3624 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3625 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3626 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3627 map = ss->rx_big.info[i].map; 3628 err = mxge_get_buf_big(ss, map, i); 3629 if (err) { 3630 device_printf(sc->dev, "alloced %d/%d bigs\n", 3631 i, ss->rx_big.mask + 1); 3632 return ENOMEM; 3633 } 3634 } 3635 return 0; 3636} 3637 3638static int 3639mxge_open(mxge_softc_t *sc) 3640{ 3641 mxge_cmd_t cmd; 3642 int err, big_bytes, nbufs, slice, cl_size, i; 3643 bus_addr_t bus; 3644 volatile uint8_t *itable; 3645 struct mxge_slice_state *ss; 3646 3647 /* Copy the MAC address in case it was overridden */ 3648 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3649 3650 err = mxge_reset(sc, 1); 3651 if (err != 0) { 3652 device_printf(sc->dev, "failed to reset\n"); 3653 return EIO; 3654 } 3655 3656 if (sc->num_slices > 1) { 3657 /* setup the indirection table */ 3658 cmd.data0 = sc->num_slices; 3659 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3660 &cmd); 3661 3662 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3663 &cmd); 3664 if (err != 0) { 3665 device_printf(sc->dev, 3666 "failed to setup rss tables\n"); 3667 return err; 3668 } 3669 3670 /* just enable an identity mapping */ 3671 itable = sc->sram + cmd.data0; 3672 for (i = 0; i < sc->num_slices; i++) 3673 itable[i] = (uint8_t)i; 3674 3675 cmd.data0 = 1; 3676 cmd.data1 = mxge_rss_hash_type; 3677 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3678 if (err != 0) { 3679 device_printf(sc->dev, "failed to enable slices\n"); 3680 return err; 3681 } 3682 } 3683 3684 3685 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3686 3687 cmd.data0 = nbufs; 3688 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3689 &cmd); 3690 /* error is only meaningful if we're trying to set 3691 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3692 if (err && nbufs > 1) { 3693 device_printf(sc->dev, 3694 "Failed to set alway-use-n to %d\n", 3695 nbufs); 3696 return EIO; 3697 } 3698 /* Give the firmware the mtu and the big and small buffer 3699 sizes. The firmware wants the big buf size to be a power 3700 of two. Luckily, FreeBSD's clusters are powers of two */ 3701 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3702 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3703 cmd.data0 = MHLEN - MXGEFW_PAD; 3704 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3705 &cmd); 3706 cmd.data0 = big_bytes; 3707 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3708 3709 if (err != 0) { 3710 device_printf(sc->dev, "failed to setup params\n"); 3711 goto abort; 3712 } 3713 3714 /* Now give him the pointer to the stats block */ 3715 for (slice = 0; 3716#ifdef IFNET_BUF_RING 3717 slice < sc->num_slices; 3718#else 3719 slice < 1; 3720#endif 3721 slice++) { 3722 ss = &sc->ss[slice]; 3723 cmd.data0 = 3724 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3725 cmd.data1 = 3726 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3727 cmd.data2 = sizeof(struct mcp_irq_data); 3728 cmd.data2 |= (slice << 16); 3729 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3730 } 3731 3732 if (err != 0) { 3733 bus = sc->ss->fw_stats_dma.bus_addr; 3734 bus += offsetof(struct mcp_irq_data, send_done_count); 3735 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3736 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3737 err = mxge_send_cmd(sc, 3738 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3739 &cmd); 3740 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3741 sc->fw_multicast_support = 0; 3742 } else { 3743 sc->fw_multicast_support = 1; 3744 } 3745 3746 if (err != 0) { 3747 device_printf(sc->dev, "failed to setup params\n"); 3748 goto abort; 3749 } 3750 3751 for (slice = 0; slice < sc->num_slices; slice++) { 3752 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3753 if (err != 0) { 3754 device_printf(sc->dev, "couldn't open slice %d\n", 3755 slice); 3756 goto abort; 3757 } 3758 } 3759 3760 /* Finally, start the firmware running */ 3761 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3762 if (err) { 3763 device_printf(sc->dev, "Couldn't bring up link\n"); 3764 goto abort; 3765 } 3766#ifdef IFNET_BUF_RING 3767 for (slice = 0; slice < sc->num_slices; slice++) { 3768 ss = &sc->ss[slice]; 3769 ss->if_drv_flags |= IFF_DRV_RUNNING; 3770 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3771 } 3772#endif 3773 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3774 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3775 3776 return 0; 3777 3778 3779abort: 3780 mxge_free_mbufs(sc); 3781 3782 return err; 3783} 3784 3785static int 3786mxge_close(mxge_softc_t *sc, int down) 3787{ 3788 mxge_cmd_t cmd; 3789 int err, old_down_cnt; 3790#ifdef IFNET_BUF_RING 3791 struct mxge_slice_state *ss; 3792 int slice; 3793#endif 3794 3795#ifdef IFNET_BUF_RING 3796 for (slice = 0; slice < sc->num_slices; slice++) { 3797 ss = &sc->ss[slice]; 3798 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3799 } 3800#endif 3801 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3802 if (!down) { 3803 old_down_cnt = sc->down_cnt; 3804 wmb(); 3805 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3806 if (err) { 3807 device_printf(sc->dev, 3808 "Couldn't bring down link\n"); 3809 } 3810 if (old_down_cnt == sc->down_cnt) { 3811 /* wait for down irq */ 3812 DELAY(10 * sc->intr_coal_delay); 3813 } 3814 wmb(); 3815 if (old_down_cnt == sc->down_cnt) { 3816 device_printf(sc->dev, "never got down irq\n"); 3817 } 3818 } 3819 mxge_free_mbufs(sc); 3820 3821 return 0; 3822} 3823 3824static void 3825mxge_setup_cfg_space(mxge_softc_t *sc) 3826{ 3827 device_t dev = sc->dev; 3828 int reg; 3829 uint16_t lnk, pectl; 3830 3831 /* find the PCIe link width and set max read request to 4KB*/ 3832 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3833 lnk = pci_read_config(dev, reg + 0x12, 2); 3834 sc->link_width = (lnk >> 4) & 0x3f; 3835 3836 if (sc->pectl == 0) { 3837 pectl = pci_read_config(dev, reg + 0x8, 2); 3838 pectl = (pectl & ~0x7000) | (5 << 12); 3839 pci_write_config(dev, reg + 0x8, pectl, 2); 3840 sc->pectl = pectl; 3841 } else { 3842 /* restore saved pectl after watchdog reset */ 3843 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3844 } 3845 } 3846 3847 /* Enable DMA and Memory space access */ 3848 pci_enable_busmaster(dev); 3849} 3850 3851static uint32_t 3852mxge_read_reboot(mxge_softc_t *sc) 3853{ 3854 device_t dev = sc->dev; 3855 uint32_t vs; 3856 3857 /* find the vendor specific offset */ 3858 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3859 device_printf(sc->dev, 3860 "could not find vendor specific offset\n"); 3861 return (uint32_t)-1; 3862 } 3863 /* enable read32 mode */ 3864 pci_write_config(dev, vs + 0x10, 0x3, 1); 3865 /* tell NIC which register to read */ 3866 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3867 return (pci_read_config(dev, vs + 0x14, 4)); 3868} 3869 3870static void 3871mxge_watchdog_reset(mxge_softc_t *sc) 3872{ 3873 struct pci_devinfo *dinfo; 3874 struct mxge_slice_state *ss; 3875 int err, running, s, num_tx_slices = 1; 3876 uint32_t reboot; 3877 uint16_t cmd; 3878 3879 err = ENXIO; 3880 3881 device_printf(sc->dev, "Watchdog reset!\n"); 3882 3883 /* 3884 * check to see if the NIC rebooted. If it did, then all of 3885 * PCI config space has been reset, and things like the 3886 * busmaster bit will be zero. If this is the case, then we 3887 * must restore PCI config space before the NIC can be used 3888 * again 3889 */ 3890 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3891 if (cmd == 0xffff) { 3892 /* 3893 * maybe the watchdog caught the NIC rebooting; wait 3894 * up to 100ms for it to finish. If it does not come 3895 * back, then give up 3896 */ 3897 DELAY(1000*100); 3898 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3899 if (cmd == 0xffff) { 3900 device_printf(sc->dev, "NIC disappeared!\n"); 3901 } 3902 } 3903 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3904 /* print the reboot status */ 3905 reboot = mxge_read_reboot(sc); 3906 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3907 reboot); 3908 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3909 if (running) { 3910 3911 /* 3912 * quiesce NIC so that TX routines will not try to 3913 * xmit after restoration of BAR 3914 */ 3915 3916 /* Mark the link as down */ 3917 if (sc->link_state) { 3918 sc->link_state = 0; 3919 if_link_state_change(sc->ifp, 3920 LINK_STATE_DOWN); 3921 } 3922#ifdef IFNET_BUF_RING 3923 num_tx_slices = sc->num_slices; 3924#endif 3925 /* grab all TX locks to ensure no tx */ 3926 for (s = 0; s < num_tx_slices; s++) { 3927 ss = &sc->ss[s]; 3928 mtx_lock(&ss->tx.mtx); 3929 } 3930 mxge_close(sc, 1); 3931 } 3932 /* restore PCI configuration space */ 3933 dinfo = device_get_ivars(sc->dev); 3934 pci_cfg_restore(sc->dev, dinfo); 3935 3936 /* and redo any changes we made to our config space */ 3937 mxge_setup_cfg_space(sc); 3938 3939 /* reload f/w */ 3940 err = mxge_load_firmware(sc, 0); 3941 if (err) { 3942 device_printf(sc->dev, 3943 "Unable to re-load f/w\n"); 3944 } 3945 if (running) { 3946 if (!err) 3947 err = mxge_open(sc); 3948 /* release all TX locks */ 3949 for (s = 0; s < num_tx_slices; s++) { 3950 ss = &sc->ss[s]; 3951#ifdef IFNET_BUF_RING 3952 mxge_start_locked(ss); 3953#endif 3954 mtx_unlock(&ss->tx.mtx); 3955 } 3956 } 3957 sc->watchdog_resets++; 3958 } else { 3959 device_printf(sc->dev, 3960 "NIC did not reboot, not resetting\n"); 3961 err = 0; 3962 } 3963 if (err) { 3964 device_printf(sc->dev, "watchdog reset failed\n"); 3965 } else { 3966 if (sc->dying == 2) 3967 sc->dying = 0; 3968 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3969 } 3970} 3971 3972static void 3973mxge_watchdog_task(void *arg, int pending) 3974{ 3975 mxge_softc_t *sc = arg; 3976 3977 3978 mtx_lock(&sc->driver_mtx); 3979 mxge_watchdog_reset(sc); 3980 mtx_unlock(&sc->driver_mtx); 3981} 3982 3983static void 3984mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3985{ 3986 tx = &sc->ss[slice].tx; 3987 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3988 device_printf(sc->dev, 3989 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3990 tx->req, tx->done, tx->queue_active); 3991 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3992 tx->activate, tx->deactivate); 3993 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3994 tx->pkt_done, 3995 be32toh(sc->ss->fw_stats->send_done_count)); 3996} 3997 3998static int 3999mxge_watchdog(mxge_softc_t *sc) 4000{ 4001 mxge_tx_ring_t *tx; 4002 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 4003 int i, err = 0; 4004 4005 /* see if we have outstanding transmits, which 4006 have been pending for more than mxge_ticks */ 4007 for (i = 0; 4008#ifdef IFNET_BUF_RING 4009 (i < sc->num_slices) && (err == 0); 4010#else 4011 (i < 1) && (err == 0); 4012#endif 4013 i++) { 4014 tx = &sc->ss[i].tx; 4015 if (tx->req != tx->done && 4016 tx->watchdog_req != tx->watchdog_done && 4017 tx->done == tx->watchdog_done) { 4018 /* check for pause blocking before resetting */ 4019 if (tx->watchdog_rx_pause == rx_pause) { 4020 mxge_warn_stuck(sc, tx, i); 4021 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4022 return (ENXIO); 4023 } 4024 else 4025 device_printf(sc->dev, "Flow control blocking " 4026 "xmits, check link partner\n"); 4027 } 4028 4029 tx->watchdog_req = tx->req; 4030 tx->watchdog_done = tx->done; 4031 tx->watchdog_rx_pause = rx_pause; 4032 } 4033 4034 if (sc->need_media_probe) 4035 mxge_media_probe(sc); 4036 return (err); 4037} 4038 4039static u_long 4040mxge_update_stats(mxge_softc_t *sc) 4041{ 4042 struct mxge_slice_state *ss; 4043 u_long pkts = 0; 4044 u_long ipackets = 0; 4045 u_long opackets = 0; 4046#ifdef IFNET_BUF_RING 4047 u_long obytes = 0; 4048 u_long omcasts = 0; 4049 u_long odrops = 0; 4050#endif 4051 u_long oerrors = 0; 4052 int slice; 4053 4054 for (slice = 0; slice < sc->num_slices; slice++) { 4055 ss = &sc->ss[slice]; 4056 ipackets += ss->ipackets; 4057 opackets += ss->opackets; 4058#ifdef IFNET_BUF_RING 4059 obytes += ss->obytes; 4060 omcasts += ss->omcasts; 4061 odrops += ss->tx.br->br_drops; 4062#endif 4063 oerrors += ss->oerrors; 4064 } 4065 pkts = (ipackets - sc->ifp->if_ipackets); 4066 pkts += (opackets - sc->ifp->if_opackets); 4067 sc->ifp->if_ipackets = ipackets; 4068 sc->ifp->if_opackets = opackets; 4069#ifdef IFNET_BUF_RING 4070 sc->ifp->if_obytes = obytes; 4071 sc->ifp->if_omcasts = omcasts; 4072 sc->ifp->if_snd.ifq_drops = odrops; 4073#endif 4074 sc->ifp->if_oerrors = oerrors; 4075 return pkts; 4076} 4077 4078static void 4079mxge_tick(void *arg) 4080{ 4081 mxge_softc_t *sc = arg; 4082 u_long pkts = 0; 4083 int err = 0; 4084 int running, ticks; 4085 uint16_t cmd; 4086 4087 ticks = mxge_ticks; 4088 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4089 if (running) { 4090 /* aggregate stats from different slices */ 4091 pkts = mxge_update_stats(sc); 4092 if (!sc->watchdog_countdown) { 4093 err = mxge_watchdog(sc); 4094 sc->watchdog_countdown = 4; 4095 } 4096 sc->watchdog_countdown--; 4097 } 4098 if (pkts == 0) { 4099 /* ensure NIC did not suffer h/w fault while idle */ 4100 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4101 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4102 sc->dying = 2; 4103 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4104 err = ENXIO; 4105 } 4106 /* look less often if NIC is idle */ 4107 ticks *= 4; 4108 } 4109 4110 if (err == 0) 4111 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4112 4113} 4114 4115static int 4116mxge_media_change(struct ifnet *ifp) 4117{ 4118 return EINVAL; 4119} 4120 4121static int 4122mxge_change_mtu(mxge_softc_t *sc, int mtu) 4123{ 4124 struct ifnet *ifp = sc->ifp; 4125 int real_mtu, old_mtu; 4126 int err = 0; 4127 4128 4129 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4130 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4131 return EINVAL; 4132 mtx_lock(&sc->driver_mtx); 4133 old_mtu = ifp->if_mtu; 4134 ifp->if_mtu = mtu; 4135 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4136 mxge_close(sc, 0); 4137 err = mxge_open(sc); 4138 if (err != 0) { 4139 ifp->if_mtu = old_mtu; 4140 mxge_close(sc, 0); 4141 (void) mxge_open(sc); 4142 } 4143 } 4144 mtx_unlock(&sc->driver_mtx); 4145 return err; 4146} 4147 4148static void 4149mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4150{ 4151 mxge_softc_t *sc = ifp->if_softc; 4152 4153 4154 if (sc == NULL) 4155 return; 4156 ifmr->ifm_status = IFM_AVALID; 4157 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4158 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4159 ifmr->ifm_active |= sc->current_media; 4160} 4161 4162static int 4163mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4164{ 4165 mxge_softc_t *sc = ifp->if_softc; 4166 struct ifreq *ifr = (struct ifreq *)data; 4167 int err, mask; 4168 4169 err = 0; 4170 switch (command) { 4171 case SIOCSIFADDR: 4172 case SIOCGIFADDR: 4173 err = ether_ioctl(ifp, command, data); 4174 break; 4175 4176 case SIOCSIFMTU: 4177 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4178 break; 4179 4180 case SIOCSIFFLAGS: 4181 mtx_lock(&sc->driver_mtx); 4182 if (sc->dying) { 4183 mtx_unlock(&sc->driver_mtx); 4184 return EINVAL; 4185 } 4186 if (ifp->if_flags & IFF_UP) { 4187 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4188 err = mxge_open(sc); 4189 } else { 4190 /* take care of promis can allmulti 4191 flag chages */ 4192 mxge_change_promisc(sc, 4193 ifp->if_flags & IFF_PROMISC); 4194 mxge_set_multicast_list(sc); 4195 } 4196 } else { 4197 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4198 mxge_close(sc, 0); 4199 } 4200 } 4201 mtx_unlock(&sc->driver_mtx); 4202 break; 4203 4204 case SIOCADDMULTI: 4205 case SIOCDELMULTI: 4206 mtx_lock(&sc->driver_mtx); 4207 mxge_set_multicast_list(sc); 4208 mtx_unlock(&sc->driver_mtx); 4209 break; 4210 4211 case SIOCSIFCAP: 4212 mtx_lock(&sc->driver_mtx); 4213 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4214 if (mask & IFCAP_TXCSUM) { 4215 if (IFCAP_TXCSUM & ifp->if_capenable) { 4216 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4217 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4218 } else { 4219 ifp->if_capenable |= IFCAP_TXCSUM; 4220 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4221 } 4222 } else if (mask & IFCAP_RXCSUM) { 4223 if (IFCAP_RXCSUM & ifp->if_capenable) { 4224 ifp->if_capenable &= ~IFCAP_RXCSUM; 4225 } else { 4226 ifp->if_capenable |= IFCAP_RXCSUM; 4227 } 4228 } 4229 if (mask & IFCAP_TSO4) { 4230 if (IFCAP_TSO4 & ifp->if_capenable) { 4231 ifp->if_capenable &= ~IFCAP_TSO4; 4232 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4233 ifp->if_capenable |= IFCAP_TSO4; 4234 ifp->if_hwassist |= CSUM_TSO; 4235 } else { 4236 printf("mxge requires tx checksum offload" 4237 " be enabled to use TSO\n"); 4238 err = EINVAL; 4239 } 4240 } 4241#if IFCAP_TSO6 4242 if (mask & IFCAP_TXCSUM_IPV6) { 4243 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4244 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4245 | IFCAP_TSO6); 4246 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4247 | CSUM_UDP); 4248 } else { 4249 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4250 ifp->if_hwassist |= (CSUM_TCP_IPV6 4251 | CSUM_UDP_IPV6); 4252 } 4253 } else if (mask & IFCAP_RXCSUM_IPV6) { 4254 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4255 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4256 } else { 4257 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4258 } 4259 } 4260 if (mask & IFCAP_TSO6) { 4261 if (IFCAP_TSO6 & ifp->if_capenable) { 4262 ifp->if_capenable &= ~IFCAP_TSO6; 4263 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4264 ifp->if_capenable |= IFCAP_TSO6; 4265 ifp->if_hwassist |= CSUM_TSO; 4266 } else { 4267 printf("mxge requires tx checksum offload" 4268 " be enabled to use TSO\n"); 4269 err = EINVAL; 4270 } 4271 } 4272#endif /*IFCAP_TSO6 */ 4273 4274 if (mask & IFCAP_LRO) 4275 ifp->if_capenable ^= IFCAP_LRO; 4276 if (mask & IFCAP_VLAN_HWTAGGING) 4277 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4278 if (mask & IFCAP_VLAN_HWTSO) 4279 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4280 4281 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4282 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4283 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4284 4285 mtx_unlock(&sc->driver_mtx); 4286 VLAN_CAPABILITIES(ifp); 4287 4288 break; 4289 4290 case SIOCGIFMEDIA: 4291 mtx_lock(&sc->driver_mtx); 4292 mxge_media_probe(sc); 4293 mtx_unlock(&sc->driver_mtx); 4294 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4295 &sc->media, command); 4296 break; 4297 4298 default: 4299 err = ENOTTY; 4300 } 4301 return err; 4302} 4303 4304static void 4305mxge_fetch_tunables(mxge_softc_t *sc) 4306{ 4307 4308 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4309 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4310 &mxge_flow_control); 4311 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4312 &mxge_intr_coal_delay); 4313 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4314 &mxge_nvidia_ecrc_enable); 4315 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4316 &mxge_force_firmware); 4317 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4318 &mxge_deassert_wait); 4319 TUNABLE_INT_FETCH("hw.mxge.verbose", 4320 &mxge_verbose); 4321 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4322 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4323 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4324 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4325 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4326 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4327 4328 if (bootverbose) 4329 mxge_verbose = 1; 4330 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4331 mxge_intr_coal_delay = 30; 4332 if (mxge_ticks == 0) 4333 mxge_ticks = hz / 2; 4334 sc->pause = mxge_flow_control; 4335 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4336 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4337 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4338 } 4339 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4340 mxge_initial_mtu < ETHER_MIN_LEN) 4341 mxge_initial_mtu = ETHERMTU_JUMBO; 4342 4343 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4344 mxge_throttle = MXGE_MAX_THROTTLE; 4345 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4346 mxge_throttle = MXGE_MIN_THROTTLE; 4347 sc->throttle = mxge_throttle; 4348} 4349 4350 4351static void 4352mxge_free_slices(mxge_softc_t *sc) 4353{ 4354 struct mxge_slice_state *ss; 4355 int i; 4356 4357 4358 if (sc->ss == NULL) 4359 return; 4360 4361 for (i = 0; i < sc->num_slices; i++) { 4362 ss = &sc->ss[i]; 4363 if (ss->fw_stats != NULL) { 4364 mxge_dma_free(&ss->fw_stats_dma); 4365 ss->fw_stats = NULL; 4366#ifdef IFNET_BUF_RING 4367 if (ss->tx.br != NULL) { 4368 drbr_free(ss->tx.br, M_DEVBUF); 4369 ss->tx.br = NULL; 4370 } 4371#endif 4372 mtx_destroy(&ss->tx.mtx); 4373 } 4374 if (ss->rx_done.entry != NULL) { 4375 mxge_dma_free(&ss->rx_done.dma); 4376 ss->rx_done.entry = NULL; 4377 } 4378 } 4379 free(sc->ss, M_DEVBUF); 4380 sc->ss = NULL; 4381} 4382 4383static int 4384mxge_alloc_slices(mxge_softc_t *sc) 4385{ 4386 mxge_cmd_t cmd; 4387 struct mxge_slice_state *ss; 4388 size_t bytes; 4389 int err, i, max_intr_slots; 4390 4391 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4392 if (err != 0) { 4393 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4394 return err; 4395 } 4396 sc->rx_ring_size = cmd.data0; 4397 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4398 4399 bytes = sizeof (*sc->ss) * sc->num_slices; 4400 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4401 if (sc->ss == NULL) 4402 return (ENOMEM); 4403 for (i = 0; i < sc->num_slices; i++) { 4404 ss = &sc->ss[i]; 4405 4406 ss->sc = sc; 4407 4408 /* allocate per-slice rx interrupt queues */ 4409 4410 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4411 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4412 if (err != 0) 4413 goto abort; 4414 ss->rx_done.entry = ss->rx_done.dma.addr; 4415 bzero(ss->rx_done.entry, bytes); 4416 4417 /* 4418 * allocate the per-slice firmware stats; stats 4419 * (including tx) are used used only on the first 4420 * slice for now 4421 */ 4422#ifndef IFNET_BUF_RING 4423 if (i > 0) 4424 continue; 4425#endif 4426 4427 bytes = sizeof (*ss->fw_stats); 4428 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4429 sizeof (*ss->fw_stats), 64); 4430 if (err != 0) 4431 goto abort; 4432 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4433 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4434 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4435 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4436#ifdef IFNET_BUF_RING 4437 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4438 &ss->tx.mtx); 4439#endif 4440 } 4441 4442 return (0); 4443 4444abort: 4445 mxge_free_slices(sc); 4446 return (ENOMEM); 4447} 4448 4449static void 4450mxge_slice_probe(mxge_softc_t *sc) 4451{ 4452 mxge_cmd_t cmd; 4453 char *old_fw; 4454 int msix_cnt, status, max_intr_slots; 4455 4456 sc->num_slices = 1; 4457 /* 4458 * don't enable multiple slices if they are not enabled, 4459 * or if this is not an SMP system 4460 */ 4461 4462 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4463 return; 4464 4465 /* see how many MSI-X interrupts are available */ 4466 msix_cnt = pci_msix_count(sc->dev); 4467 if (msix_cnt < 2) 4468 return; 4469 4470 /* now load the slice aware firmware see what it supports */ 4471 old_fw = sc->fw_name; 4472 if (old_fw == mxge_fw_aligned) 4473 sc->fw_name = mxge_fw_rss_aligned; 4474 else 4475 sc->fw_name = mxge_fw_rss_unaligned; 4476 status = mxge_load_firmware(sc, 0); 4477 if (status != 0) { 4478 device_printf(sc->dev, "Falling back to a single slice\n"); 4479 return; 4480 } 4481 4482 /* try to send a reset command to the card to see if it 4483 is alive */ 4484 memset(&cmd, 0, sizeof (cmd)); 4485 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4486 if (status != 0) { 4487 device_printf(sc->dev, "failed reset\n"); 4488 goto abort_with_fw; 4489 } 4490 4491 /* get rx ring size */ 4492 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4493 if (status != 0) { 4494 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4495 goto abort_with_fw; 4496 } 4497 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4498 4499 /* tell it the size of the interrupt queues */ 4500 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4501 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4502 if (status != 0) { 4503 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4504 goto abort_with_fw; 4505 } 4506 4507 /* ask the maximum number of slices it supports */ 4508 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4509 if (status != 0) { 4510 device_printf(sc->dev, 4511 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4512 goto abort_with_fw; 4513 } 4514 sc->num_slices = cmd.data0; 4515 if (sc->num_slices > msix_cnt) 4516 sc->num_slices = msix_cnt; 4517 4518 if (mxge_max_slices == -1) { 4519 /* cap to number of CPUs in system */ 4520 if (sc->num_slices > mp_ncpus) 4521 sc->num_slices = mp_ncpus; 4522 } else { 4523 if (sc->num_slices > mxge_max_slices) 4524 sc->num_slices = mxge_max_slices; 4525 } 4526 /* make sure it is a power of two */ 4527 while (sc->num_slices & (sc->num_slices - 1)) 4528 sc->num_slices--; 4529 4530 if (mxge_verbose) 4531 device_printf(sc->dev, "using %d slices\n", 4532 sc->num_slices); 4533 4534 return; 4535 4536abort_with_fw: 4537 sc->fw_name = old_fw; 4538 (void) mxge_load_firmware(sc, 0); 4539} 4540 4541static int 4542mxge_add_msix_irqs(mxge_softc_t *sc) 4543{ 4544 size_t bytes; 4545 int count, err, i, rid; 4546 4547 rid = PCIR_BAR(2); 4548 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4549 &rid, RF_ACTIVE); 4550 4551 if (sc->msix_table_res == NULL) { 4552 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4553 return ENXIO; 4554 } 4555 4556 count = sc->num_slices; 4557 err = pci_alloc_msix(sc->dev, &count); 4558 if (err != 0) { 4559 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4560 "err = %d \n", sc->num_slices, err); 4561 goto abort_with_msix_table; 4562 } 4563 if (count < sc->num_slices) { 4564 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4565 count, sc->num_slices); 4566 device_printf(sc->dev, 4567 "Try setting hw.mxge.max_slices to %d\n", 4568 count); 4569 err = ENOSPC; 4570 goto abort_with_msix; 4571 } 4572 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4573 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4574 if (sc->msix_irq_res == NULL) { 4575 err = ENOMEM; 4576 goto abort_with_msix; 4577 } 4578 4579 for (i = 0; i < sc->num_slices; i++) { 4580 rid = i + 1; 4581 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4582 SYS_RES_IRQ, 4583 &rid, RF_ACTIVE); 4584 if (sc->msix_irq_res[i] == NULL) { 4585 device_printf(sc->dev, "couldn't allocate IRQ res" 4586 " for message %d\n", i); 4587 err = ENXIO; 4588 goto abort_with_res; 4589 } 4590 } 4591 4592 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4593 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4594 4595 for (i = 0; i < sc->num_slices; i++) { 4596 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4597 INTR_TYPE_NET | INTR_MPSAFE, 4598#if __FreeBSD_version > 700030 4599 NULL, 4600#endif 4601 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4602 if (err != 0) { 4603 device_printf(sc->dev, "couldn't setup intr for " 4604 "message %d\n", i); 4605 goto abort_with_intr; 4606 } 4607 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4608 sc->msix_ih[i], "s%d", i); 4609 } 4610 4611 if (mxge_verbose) { 4612 device_printf(sc->dev, "using %d msix IRQs:", 4613 sc->num_slices); 4614 for (i = 0; i < sc->num_slices; i++) 4615 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4616 printf("\n"); 4617 } 4618 return (0); 4619 4620abort_with_intr: 4621 for (i = 0; i < sc->num_slices; i++) { 4622 if (sc->msix_ih[i] != NULL) { 4623 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4624 sc->msix_ih[i]); 4625 sc->msix_ih[i] = NULL; 4626 } 4627 } 4628 free(sc->msix_ih, M_DEVBUF); 4629 4630 4631abort_with_res: 4632 for (i = 0; i < sc->num_slices; i++) { 4633 rid = i + 1; 4634 if (sc->msix_irq_res[i] != NULL) 4635 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4636 sc->msix_irq_res[i]); 4637 sc->msix_irq_res[i] = NULL; 4638 } 4639 free(sc->msix_irq_res, M_DEVBUF); 4640 4641 4642abort_with_msix: 4643 pci_release_msi(sc->dev); 4644 4645abort_with_msix_table: 4646 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4647 sc->msix_table_res); 4648 4649 return err; 4650} 4651 4652static int 4653mxge_add_single_irq(mxge_softc_t *sc) 4654{ 4655 int count, err, rid; 4656 4657 count = pci_msi_count(sc->dev); 4658 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4659 rid = 1; 4660 } else { 4661 rid = 0; 4662 sc->legacy_irq = 1; 4663 } 4664 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4665 1, RF_SHAREABLE | RF_ACTIVE); 4666 if (sc->irq_res == NULL) { 4667 device_printf(sc->dev, "could not alloc interrupt\n"); 4668 return ENXIO; 4669 } 4670 if (mxge_verbose) 4671 device_printf(sc->dev, "using %s irq %ld\n", 4672 sc->legacy_irq ? "INTx" : "MSI", 4673 rman_get_start(sc->irq_res)); 4674 err = bus_setup_intr(sc->dev, sc->irq_res, 4675 INTR_TYPE_NET | INTR_MPSAFE, 4676#if __FreeBSD_version > 700030 4677 NULL, 4678#endif 4679 mxge_intr, &sc->ss[0], &sc->ih); 4680 if (err != 0) { 4681 bus_release_resource(sc->dev, SYS_RES_IRQ, 4682 sc->legacy_irq ? 0 : 1, sc->irq_res); 4683 if (!sc->legacy_irq) 4684 pci_release_msi(sc->dev); 4685 } 4686 return err; 4687} 4688 4689static void 4690mxge_rem_msix_irqs(mxge_softc_t *sc) 4691{ 4692 int i, rid; 4693 4694 for (i = 0; i < sc->num_slices; i++) { 4695 if (sc->msix_ih[i] != NULL) { 4696 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4697 sc->msix_ih[i]); 4698 sc->msix_ih[i] = NULL; 4699 } 4700 } 4701 free(sc->msix_ih, M_DEVBUF); 4702 4703 for (i = 0; i < sc->num_slices; i++) { 4704 rid = i + 1; 4705 if (sc->msix_irq_res[i] != NULL) 4706 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4707 sc->msix_irq_res[i]); 4708 sc->msix_irq_res[i] = NULL; 4709 } 4710 free(sc->msix_irq_res, M_DEVBUF); 4711 4712 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4713 sc->msix_table_res); 4714 4715 pci_release_msi(sc->dev); 4716 return; 4717} 4718 4719static void 4720mxge_rem_single_irq(mxge_softc_t *sc) 4721{ 4722 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4723 bus_release_resource(sc->dev, SYS_RES_IRQ, 4724 sc->legacy_irq ? 0 : 1, sc->irq_res); 4725 if (!sc->legacy_irq) 4726 pci_release_msi(sc->dev); 4727} 4728 4729static void 4730mxge_rem_irq(mxge_softc_t *sc) 4731{ 4732 if (sc->num_slices > 1) 4733 mxge_rem_msix_irqs(sc); 4734 else 4735 mxge_rem_single_irq(sc); 4736} 4737 4738static int 4739mxge_add_irq(mxge_softc_t *sc) 4740{ 4741 int err; 4742 4743 if (sc->num_slices > 1) 4744 err = mxge_add_msix_irqs(sc); 4745 else 4746 err = mxge_add_single_irq(sc); 4747 4748 if (0 && err == 0 && sc->num_slices > 1) { 4749 mxge_rem_msix_irqs(sc); 4750 err = mxge_add_msix_irqs(sc); 4751 } 4752 return err; 4753} 4754 4755 4756static int 4757mxge_attach(device_t dev) 4758{ 4759 mxge_cmd_t cmd; 4760 mxge_softc_t *sc = device_get_softc(dev); 4761 struct ifnet *ifp; 4762 int err, rid; 4763 4764 sc->dev = dev; 4765 mxge_fetch_tunables(sc); 4766 4767 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4768 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4769 taskqueue_thread_enqueue, &sc->tq); 4770 if (sc->tq == NULL) { 4771 err = ENOMEM; 4772 goto abort_with_nothing; 4773 } 4774 4775 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4776 1, /* alignment */ 4777 0, /* boundary */ 4778 BUS_SPACE_MAXADDR, /* low */ 4779 BUS_SPACE_MAXADDR, /* high */ 4780 NULL, NULL, /* filter */ 4781 65536 + 256, /* maxsize */ 4782 MXGE_MAX_SEND_DESC, /* num segs */ 4783 65536, /* maxsegsize */ 4784 0, /* flags */ 4785 NULL, NULL, /* lock */ 4786 &sc->parent_dmat); /* tag */ 4787 4788 if (err != 0) { 4789 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4790 err); 4791 goto abort_with_tq; 4792 } 4793 4794 ifp = sc->ifp = if_alloc(IFT_ETHER); 4795 if (ifp == NULL) { 4796 device_printf(dev, "can not if_alloc()\n"); 4797 err = ENOSPC; 4798 goto abort_with_parent_dmat; 4799 } 4800 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4801 4802 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4803 device_get_nameunit(dev)); 4804 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4805 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4806 "%s:drv", device_get_nameunit(dev)); 4807 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4808 MTX_NETWORK_LOCK, MTX_DEF); 4809 4810 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4811 4812 mxge_setup_cfg_space(sc); 4813 4814 /* Map the board into the kernel */ 4815 rid = PCIR_BARS; 4816 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4817 ~0, 1, RF_ACTIVE); 4818 if (sc->mem_res == NULL) { 4819 device_printf(dev, "could not map memory\n"); 4820 err = ENXIO; 4821 goto abort_with_lock; 4822 } 4823 sc->sram = rman_get_virtual(sc->mem_res); 4824 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4825 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4826 device_printf(dev, "impossible memory region size %ld\n", 4827 rman_get_size(sc->mem_res)); 4828 err = ENXIO; 4829 goto abort_with_mem_res; 4830 } 4831 4832 /* make NULL terminated copy of the EEPROM strings section of 4833 lanai SRAM */ 4834 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4835 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4836 rman_get_bushandle(sc->mem_res), 4837 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4838 sc->eeprom_strings, 4839 MXGE_EEPROM_STRINGS_SIZE - 2); 4840 err = mxge_parse_strings(sc); 4841 if (err != 0) 4842 goto abort_with_mem_res; 4843 4844 /* Enable write combining for efficient use of PCIe bus */ 4845 mxge_enable_wc(sc); 4846 4847 /* Allocate the out of band dma memory */ 4848 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4849 sizeof (mxge_cmd_t), 64); 4850 if (err != 0) 4851 goto abort_with_mem_res; 4852 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4853 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4854 if (err != 0) 4855 goto abort_with_cmd_dma; 4856 4857 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4858 if (err != 0) 4859 goto abort_with_zeropad_dma; 4860 4861 /* select & load the firmware */ 4862 err = mxge_select_firmware(sc); 4863 if (err != 0) 4864 goto abort_with_dmabench; 4865 sc->intr_coal_delay = mxge_intr_coal_delay; 4866 4867 mxge_slice_probe(sc); 4868 err = mxge_alloc_slices(sc); 4869 if (err != 0) 4870 goto abort_with_dmabench; 4871 4872 err = mxge_reset(sc, 0); 4873 if (err != 0) 4874 goto abort_with_slices; 4875 4876 err = mxge_alloc_rings(sc); 4877 if (err != 0) { 4878 device_printf(sc->dev, "failed to allocate rings\n"); 4879 goto abort_with_slices; 4880 } 4881 4882 err = mxge_add_irq(sc); 4883 if (err != 0) { 4884 device_printf(sc->dev, "failed to add irq\n"); 4885 goto abort_with_rings; 4886 } 4887 4888 ifp->if_baudrate = IF_Gbps(10UL); 4889 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4890 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4891 IFCAP_RXCSUM_IPV6; 4892#if defined(INET) || defined(INET6) 4893 ifp->if_capabilities |= IFCAP_LRO; 4894#endif 4895 4896#ifdef MXGE_NEW_VLAN_API 4897 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4898 4899 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4900 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4901 sc->fw_ver_tiny >= 32) 4902 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4903#endif 4904 sc->max_mtu = mxge_max_mtu(sc); 4905 if (sc->max_mtu >= 9000) 4906 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4907 else 4908 device_printf(dev, "MTU limited to %d. Install " 4909 "latest firmware for 9000 byte jumbo support\n", 4910 sc->max_mtu - ETHER_HDR_LEN); 4911 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4912 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4913 /* check to see if f/w supports TSO for IPv6 */ 4914 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4915 if (CSUM_TCP_IPV6) 4916 ifp->if_capabilities |= IFCAP_TSO6; 4917 sc->max_tso6_hlen = min(cmd.data0, 4918 sizeof (sc->ss[0].scratch)); 4919 } 4920 ifp->if_capenable = ifp->if_capabilities; 4921 if (sc->lro_cnt == 0) 4922 ifp->if_capenable &= ~IFCAP_LRO; 4923 ifp->if_init = mxge_init; 4924 ifp->if_softc = sc; 4925 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4926 ifp->if_ioctl = mxge_ioctl; 4927 ifp->if_start = mxge_start; 4928 /* Initialise the ifmedia structure */ 4929 ifmedia_init(&sc->media, 0, mxge_media_change, 4930 mxge_media_status); 4931 mxge_media_init(sc); 4932 mxge_media_probe(sc); 4933 sc->dying = 0; 4934 ether_ifattach(ifp, sc->mac_addr); 4935 /* ether_ifattach sets mtu to ETHERMTU */ 4936 if (mxge_initial_mtu != ETHERMTU) 4937 mxge_change_mtu(sc, mxge_initial_mtu); 4938 4939 mxge_add_sysctls(sc); 4940#ifdef IFNET_BUF_RING 4941 ifp->if_transmit = mxge_transmit; 4942 ifp->if_qflush = mxge_qflush; 4943#endif 4944 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4945 device_get_nameunit(sc->dev)); 4946 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4947 return 0; 4948 4949abort_with_rings: 4950 mxge_free_rings(sc); 4951abort_with_slices: 4952 mxge_free_slices(sc); 4953abort_with_dmabench: 4954 mxge_dma_free(&sc->dmabench_dma); 4955abort_with_zeropad_dma: 4956 mxge_dma_free(&sc->zeropad_dma); 4957abort_with_cmd_dma: 4958 mxge_dma_free(&sc->cmd_dma); 4959abort_with_mem_res: 4960 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4961abort_with_lock: 4962 pci_disable_busmaster(dev); 4963 mtx_destroy(&sc->cmd_mtx); 4964 mtx_destroy(&sc->driver_mtx); 4965 if_free(ifp); 4966abort_with_parent_dmat: 4967 bus_dma_tag_destroy(sc->parent_dmat); 4968abort_with_tq: 4969 if (sc->tq != NULL) { 4970 taskqueue_drain(sc->tq, &sc->watchdog_task); 4971 taskqueue_free(sc->tq); 4972 sc->tq = NULL; 4973 } 4974abort_with_nothing: 4975 return err; 4976} 4977 4978static int 4979mxge_detach(device_t dev) 4980{ 4981 mxge_softc_t *sc = device_get_softc(dev); 4982 4983 if (mxge_vlans_active(sc)) { 4984 device_printf(sc->dev, 4985 "Detach vlans before removing module\n"); 4986 return EBUSY; 4987 } 4988 mtx_lock(&sc->driver_mtx); 4989 sc->dying = 1; 4990 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4991 mxge_close(sc, 0); 4992 mtx_unlock(&sc->driver_mtx); 4993 ether_ifdetach(sc->ifp); 4994 if (sc->tq != NULL) { 4995 taskqueue_drain(sc->tq, &sc->watchdog_task); 4996 taskqueue_free(sc->tq); 4997 sc->tq = NULL; 4998 } 4999 callout_drain(&sc->co_hdl); 5000 ifmedia_removeall(&sc->media); 5001 mxge_dummy_rdma(sc, 0); 5002 mxge_rem_sysctls(sc); 5003 mxge_rem_irq(sc); 5004 mxge_free_rings(sc); 5005 mxge_free_slices(sc); 5006 mxge_dma_free(&sc->dmabench_dma); 5007 mxge_dma_free(&sc->zeropad_dma); 5008 mxge_dma_free(&sc->cmd_dma); 5009 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5010 pci_disable_busmaster(dev); 5011 mtx_destroy(&sc->cmd_mtx); 5012 mtx_destroy(&sc->driver_mtx); 5013 if_free(sc->ifp); 5014 bus_dma_tag_destroy(sc->parent_dmat); 5015 return 0; 5016} 5017 5018static int 5019mxge_shutdown(device_t dev) 5020{ 5021 return 0; 5022} 5023 5024/* 5025 This file uses Myri10GE driver indentation. 5026 5027 Local Variables: 5028 c-file-style:"linux" 5029 tab-width:8 5030 End: 5031*/ 5032