32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50#include <net/if.h> 51#include <net/if_arp.h> 52#include <net/ethernet.h> 53#include <net/if_dl.h> 54#include <net/if_media.h> 55 56#include <net/bpf.h> 57 58#include <net/if_types.h> 59#include <net/if_vlan_var.h> 60#include <net/zlib.h> 61 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/ip.h> 65#include <netinet/ip6.h> 66#include <netinet/tcp.h> 67#include <netinet/tcp_lro.h> 68#include <netinet6/ip6_var.h> 69 70#include <machine/bus.h> 71#include <machine/in_cksum.h> 72#include <machine/resource.h> 73#include <sys/bus.h> 74#include <sys/rman.h> 75#include <sys/smp.h> 76 77#include <dev/pci/pcireg.h> 78#include <dev/pci/pcivar.h> 79#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 80 81#include <vm/vm.h> /* for pmap_mapdev() */ 82#include <vm/pmap.h> 83 84#if defined(__i386) || defined(__amd64) 85#include <machine/specialreg.h> 86#endif 87 88#include <dev/mxge/mxge_mcp.h> 89#include <dev/mxge/mcp_gen_header.h> 90/*#define MXGE_FAKE_IFP*/ 91#include <dev/mxge/if_mxge_var.h> 92#ifdef IFNET_BUF_RING 93#include <sys/buf_ring.h> 94#endif 95 96#include "opt_inet.h" 97#include "opt_inet6.h" 98 99/* tunable params */ 100static int mxge_nvidia_ecrc_enable = 1; 101static int mxge_force_firmware = 0; 102static int mxge_intr_coal_delay = 30; 103static int mxge_deassert_wait = 1; 104static int mxge_flow_control = 1; 105static int mxge_verbose = 0; 106static int mxge_ticks; 107static int mxge_max_slices = 1; 108static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 109static int mxge_always_promisc = 0; 110static int mxge_initial_mtu = ETHERMTU_JUMBO; 111static int mxge_throttle = 0; 112static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 113static char *mxge_fw_aligned = "mxge_eth_z8e"; 114static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 115static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 116 117static int mxge_probe(device_t dev); 118static int mxge_attach(device_t dev); 119static int mxge_detach(device_t dev); 120static int mxge_shutdown(device_t dev); 121static void mxge_intr(void *arg); 122 123static device_method_t mxge_methods[] = 124{ 125 /* Device interface */ 126 DEVMETHOD(device_probe, mxge_probe), 127 DEVMETHOD(device_attach, mxge_attach), 128 DEVMETHOD(device_detach, mxge_detach), 129 DEVMETHOD(device_shutdown, mxge_shutdown), 130 131 DEVMETHOD_END 132}; 133 134static driver_t mxge_driver = 135{ 136 "mxge", 137 mxge_methods, 138 sizeof(mxge_softc_t), 139}; 140 141static devclass_t mxge_devclass; 142 143/* Declare ourselves to be a child of the PCI bus.*/ 144DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 145MODULE_DEPEND(mxge, firmware, 1, 1, 1); 146MODULE_DEPEND(mxge, zlib, 1, 1, 1); 147 148static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 149static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 150static int mxge_close(mxge_softc_t *sc, int down); 151static int mxge_open(mxge_softc_t *sc); 152static void mxge_tick(void *arg); 153 154static int 155mxge_probe(device_t dev) 156{ 157 int rev; 158 159 160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 163 rev = pci_get_revid(dev); 164 switch (rev) { 165 case MXGE_PCI_REV_Z8E: 166 device_set_desc(dev, "Myri10G-PCIE-8A"); 167 break; 168 case MXGE_PCI_REV_Z8ES: 169 device_set_desc(dev, "Myri10G-PCIE-8B"); 170 break; 171 default: 172 device_set_desc(dev, "Myri10G-PCIE-8??"); 173 device_printf(dev, "Unrecognized rev %d NIC\n", 174 rev); 175 break; 176 } 177 return 0; 178 } 179 return ENXIO; 180} 181 182static void 183mxge_enable_wc(mxge_softc_t *sc) 184{ 185#if defined(__i386) || defined(__amd64) 186 vm_offset_t len; 187 int err; 188 189 sc->wc = 1; 190 len = rman_get_size(sc->mem_res); 191 err = pmap_change_attr((vm_offset_t) sc->sram, 192 len, PAT_WRITE_COMBINING); 193 if (err != 0) { 194 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 195 err); 196 sc->wc = 0; 197 } 198#endif 199} 200 201 202/* callback to get our DMA address */ 203static void 204mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 205 int error) 206{ 207 if (error == 0) { 208 *(bus_addr_t *) arg = segs->ds_addr; 209 } 210} 211 212static int 213mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 214 bus_size_t alignment) 215{ 216 int err; 217 device_t dev = sc->dev; 218 bus_size_t boundary, maxsegsize; 219 220 if (bytes > 4096 && alignment == 4096) { 221 boundary = 0; 222 maxsegsize = bytes; 223 } else { 224 boundary = 4096; 225 maxsegsize = 4096; 226 } 227 228 /* allocate DMAable memory tags */ 229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 230 alignment, /* alignment */ 231 boundary, /* boundary */ 232 BUS_SPACE_MAXADDR, /* low */ 233 BUS_SPACE_MAXADDR, /* high */ 234 NULL, NULL, /* filter */ 235 bytes, /* maxsize */ 236 1, /* num segs */ 237 maxsegsize, /* maxsegsize */ 238 BUS_DMA_COHERENT, /* flags */ 239 NULL, NULL, /* lock */ 240 &dma->dmat); /* tag */ 241 if (err != 0) { 242 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 243 return err; 244 } 245 246 /* allocate DMAable memory & map */ 247 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 248 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 249 | BUS_DMA_ZERO), &dma->map); 250 if (err != 0) { 251 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 252 goto abort_with_dmat; 253 } 254 255 /* load the memory */ 256 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 257 mxge_dmamap_callback, 258 (void *)&dma->bus_addr, 0); 259 if (err != 0) { 260 device_printf(dev, "couldn't load map (err = %d)\n", err); 261 goto abort_with_mem; 262 } 263 return 0; 264 265abort_with_mem: 266 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 267abort_with_dmat: 268 (void)bus_dma_tag_destroy(dma->dmat); 269 return err; 270} 271 272 273static void 274mxge_dma_free(mxge_dma_t *dma) 275{ 276 bus_dmamap_unload(dma->dmat, dma->map); 277 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 278 (void)bus_dma_tag_destroy(dma->dmat); 279} 280 281/* 282 * The eeprom strings on the lanaiX have the format 283 * SN=x\0 284 * MAC=x:x:x:x:x:x\0 285 * PC=text\0 286 */ 287 288static int 289mxge_parse_strings(mxge_softc_t *sc) 290{ 291 char *ptr; 292 int i, found_mac, found_sn2; 293 char *endptr; 294 295 ptr = sc->eeprom_strings; 296 found_mac = 0; 297 found_sn2 = 0; 298 while (*ptr != '\0') { 299 if (strncmp(ptr, "MAC=", 4) == 0) { 300 ptr += 4; 301 for (i = 0;;) { 302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 303 if (endptr - ptr != 2) 304 goto abort; 305 ptr = endptr; 306 if (++i == 6) 307 break; 308 if (*ptr++ != ':') 309 goto abort; 310 } 311 found_mac = 1; 312 } else if (strncmp(ptr, "PC=", 3) == 0) { 313 ptr += 3; 314 strlcpy(sc->product_code_string, ptr, 315 sizeof(sc->product_code_string)); 316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 317 ptr += 3; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } else if (strncmp(ptr, "SN2=", 4) == 0) { 321 /* SN2 takes precedence over SN */ 322 ptr += 4; 323 found_sn2 = 1; 324 strlcpy(sc->serial_number_string, ptr, 325 sizeof(sc->serial_number_string)); 326 } 327 while (*ptr++ != '\0') {} 328 } 329 330 if (found_mac) 331 return 0; 332 333 abort: 334 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 335 336 return ENXIO; 337} 338 339#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 340static void 341mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 342{ 343 uint32_t val; 344 unsigned long base, off; 345 char *va, *cfgptr; 346 device_t pdev, mcp55; 347 uint16_t vendor_id, device_id, word; 348 uintptr_t bus, slot, func, ivend, idev; 349 uint32_t *ptr32; 350 351 352 if (!mxge_nvidia_ecrc_enable) 353 return; 354 355 pdev = device_get_parent(device_get_parent(sc->dev)); 356 if (pdev == NULL) { 357 device_printf(sc->dev, "could not find parent?\n"); 358 return; 359 } 360 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 361 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 362 363 if (vendor_id != 0x10de) 364 return; 365 366 base = 0; 367 368 if (device_id == 0x005d) { 369 /* ck804, base address is magic */ 370 base = 0xe0000000UL; 371 } else if (device_id >= 0x0374 && device_id <= 0x378) { 372 /* mcp55, base address stored in chipset */ 373 mcp55 = pci_find_bsf(0, 0, 0); 374 if (mcp55 && 375 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 376 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 377 word = pci_read_config(mcp55, 0x90, 2); 378 base = ((unsigned long)word & 0x7ffeU) << 25; 379 } 380 } 381 if (!base) 382 return; 383 384 /* XXXX 385 Test below is commented because it is believed that doing 386 config read/write beyond 0xff will access the config space 387 for the next larger function. Uncomment this and remove 388 the hacky pmap_mapdev() way of accessing config space when 389 FreeBSD grows support for extended pcie config space access 390 */ 391#if 0 392 /* See if we can, by some miracle, access the extended 393 config space */ 394 val = pci_read_config(pdev, 0x178, 4); 395 if (val != 0xffffffff) { 396 val |= 0x40; 397 pci_write_config(pdev, 0x178, val, 4); 398 return; 399 } 400#endif 401 /* Rather than using normal pci config space writes, we must 402 * map the Nvidia config space ourselves. This is because on 403 * opteron/nvidia class machine the 0xe000000 mapping is 404 * handled by the nvidia chipset, that means the internal PCI 405 * device (the on-chip northbridge), or the amd-8131 bridge 406 * and things behind them are not visible by this method. 407 */ 408 409 BUS_READ_IVAR(device_get_parent(pdev), pdev, 410 PCI_IVAR_BUS, &bus); 411 BUS_READ_IVAR(device_get_parent(pdev), pdev, 412 PCI_IVAR_SLOT, &slot); 413 BUS_READ_IVAR(device_get_parent(pdev), pdev, 414 PCI_IVAR_FUNCTION, &func); 415 BUS_READ_IVAR(device_get_parent(pdev), pdev, 416 PCI_IVAR_VENDOR, &ivend); 417 BUS_READ_IVAR(device_get_parent(pdev), pdev, 418 PCI_IVAR_DEVICE, &idev); 419 420 off = base 421 + 0x00100000UL * (unsigned long)bus 422 + 0x00001000UL * (unsigned long)(func 423 + 8 * slot); 424 425 /* map it into the kernel */ 426 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 427 428 429 if (va == NULL) { 430 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 431 return; 432 } 433 /* get a pointer to the config space mapped into the kernel */ 434 cfgptr = va + (off & PAGE_MASK); 435 436 /* make sure that we can really access it */ 437 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 438 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 439 if (! (vendor_id == ivend && device_id == idev)) { 440 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 441 vendor_id, device_id); 442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 443 return; 444 } 445 446 ptr32 = (uint32_t*)(cfgptr + 0x178); 447 val = *ptr32; 448 449 if (val == 0xffffffff) { 450 device_printf(sc->dev, "extended mapping failed\n"); 451 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 452 return; 453 } 454 *ptr32 = val | 0x40; 455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 456 if (mxge_verbose) 457 device_printf(sc->dev, 458 "Enabled ECRC on upstream Nvidia bridge " 459 "at %d:%d:%d\n", 460 (int)bus, (int)slot, (int)func); 461 return; 462} 463#else 464static void 465mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 466{ 467 device_printf(sc->dev, 468 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 469 return; 470} 471#endif 472 473 474static int 475mxge_dma_test(mxge_softc_t *sc, int test_type) 476{ 477 mxge_cmd_t cmd; 478 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 479 int status; 480 uint32_t len; 481 char *test = " "; 482 483 484 /* Run a small DMA test. 485 * The magic multipliers to the length tell the firmware 486 * to do DMA read, write, or read+write tests. The 487 * results are returned in cmd.data0. The upper 16 488 * bits of the return is the number of transfers completed. 489 * The lower 16 bits is the time in 0.5us ticks that the 490 * transfers took to complete. 491 */ 492 493 len = sc->tx_boundary; 494 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x10000; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "read"; 501 goto abort; 502 } 503 sc->read_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 507 cmd.data2 = len * 0x1; 508 status = mxge_send_cmd(sc, test_type, &cmd); 509 if (status != 0) { 510 test = "write"; 511 goto abort; 512 } 513 sc->write_dma = ((cmd.data0>>16) * len * 2) / 514 (cmd.data0 & 0xffff); 515 516 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 517 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 518 cmd.data2 = len * 0x10001; 519 status = mxge_send_cmd(sc, test_type, &cmd); 520 if (status != 0) { 521 test = "read/write"; 522 goto abort; 523 } 524 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 525 (cmd.data0 & 0xffff); 526 527abort: 528 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 529 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 530 test, status); 531 532 return status; 533} 534 535/* 536 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 537 * when the PCI-E Completion packets are aligned on an 8-byte 538 * boundary. Some PCI-E chip sets always align Completion packets; on 539 * the ones that do not, the alignment can be enforced by enabling 540 * ECRC generation (if supported). 541 * 542 * When PCI-E Completion packets are not aligned, it is actually more 543 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 544 * 545 * If the driver can neither enable ECRC nor verify that it has 546 * already been enabled, then it must use a firmware image which works 547 * around unaligned completion packets (ethp_z8e.dat), and it should 548 * also ensure that it never gives the device a Read-DMA which is 549 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 550 * enabled, then the driver should use the aligned (eth_z8e.dat) 551 * firmware image, and set tx_boundary to 4KB. 552 */ 553 554static int 555mxge_firmware_probe(mxge_softc_t *sc) 556{ 557 device_t dev = sc->dev; 558 int reg, status; 559 uint16_t pectl; 560 561 sc->tx_boundary = 4096; 562 /* 563 * Verify the max read request size was set to 4KB 564 * before trying the test with 4KB. 565 */ 566 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 567 pectl = pci_read_config(dev, reg + 0x8, 2); 568 if ((pectl & (5 << 12)) != (5 << 12)) { 569 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 570 pectl); 571 sc->tx_boundary = 2048; 572 } 573 } 574 575 /* 576 * load the optimized firmware (which assumes aligned PCIe 577 * completions) in order to see if it works on this host. 578 */ 579 sc->fw_name = mxge_fw_aligned; 580 status = mxge_load_firmware(sc, 1); 581 if (status != 0) { 582 return status; 583 } 584 585 /* 586 * Enable ECRC if possible 587 */ 588 mxge_enable_nvidia_ecrc(sc); 589 590 /* 591 * Run a DMA test which watches for unaligned completions and 592 * aborts on the first one seen. Not required on Z8ES or newer. 593 */ 594 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 595 return 0; 596 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 597 if (status == 0) 598 return 0; /* keep the aligned firmware */ 599 600 if (status != E2BIG) 601 device_printf(dev, "DMA test failed: %d\n", status); 602 if (status == ENOSYS) 603 device_printf(dev, "Falling back to ethp! " 604 "Please install up to date fw\n"); 605 return status; 606} 607 608static int 609mxge_select_firmware(mxge_softc_t *sc) 610{ 611 int aligned = 0; 612 int force_firmware = mxge_force_firmware; 613 614 if (sc->throttle) 615 force_firmware = sc->throttle; 616 617 if (force_firmware != 0) { 618 if (force_firmware == 1) 619 aligned = 1; 620 else 621 aligned = 0; 622 if (mxge_verbose) 623 device_printf(sc->dev, 624 "Assuming %s completions (forced)\n", 625 aligned ? "aligned" : "unaligned"); 626 goto abort; 627 } 628 629 /* if the PCIe link width is 4 or less, we can use the aligned 630 firmware and skip any checks */ 631 if (sc->link_width != 0 && sc->link_width <= 4) { 632 device_printf(sc->dev, 633 "PCIe x%d Link, expect reduced performance\n", 634 sc->link_width); 635 aligned = 1; 636 goto abort; 637 } 638 639 if (0 == mxge_firmware_probe(sc)) 640 return 0; 641 642abort: 643 if (aligned) { 644 sc->fw_name = mxge_fw_aligned; 645 sc->tx_boundary = 4096; 646 } else { 647 sc->fw_name = mxge_fw_unaligned; 648 sc->tx_boundary = 2048; 649 } 650 return (mxge_load_firmware(sc, 0)); 651} 652 653static int 654mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 655{ 656 657 658 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 659 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 660 be32toh(hdr->mcp_type)); 661 return EIO; 662 } 663 664 /* save firmware version for sysctl */ 665 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 666 if (mxge_verbose) 667 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 668 669 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 670 &sc->fw_ver_minor, &sc->fw_ver_tiny); 671 672 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 673 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 674 device_printf(sc->dev, "Found firmware version %s\n", 675 sc->fw_version); 676 device_printf(sc->dev, "Driver needs %d.%d\n", 677 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 678 return EINVAL; 679 } 680 return 0; 681 682} 683 684static void * 685z_alloc(void *nil, u_int items, u_int size) 686{ 687 void *ptr; 688 689 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 690 return ptr; 691} 692 693static void 694z_free(void *nil, void *ptr) 695{ 696 free(ptr, M_TEMP); 697} 698 699 700static int 701mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 702{ 703 z_stream zs; 704 char *inflate_buffer; 705 const struct firmware *fw; 706 const mcp_gen_header_t *hdr; 707 unsigned hdr_offset; 708 int status; 709 unsigned int i; 710 char dummy; 711 size_t fw_len; 712 713 fw = firmware_get(sc->fw_name); 714 if (fw == NULL) { 715 device_printf(sc->dev, "Could not find firmware image %s\n", 716 sc->fw_name); 717 return ENOENT; 718 } 719 720 721 722 /* setup zlib and decompress f/w */ 723 bzero(&zs, sizeof (zs)); 724 zs.zalloc = z_alloc; 725 zs.zfree = z_free; 726 status = inflateInit(&zs); 727 if (status != Z_OK) { 728 status = EIO; 729 goto abort_with_fw; 730 } 731 732 /* the uncompressed size is stored as the firmware version, 733 which would otherwise go unused */ 734 fw_len = (size_t) fw->version; 735 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 736 if (inflate_buffer == NULL) 737 goto abort_with_zs; 738 zs.avail_in = fw->datasize; 739 zs.next_in = __DECONST(char *, fw->data); 740 zs.avail_out = fw_len; 741 zs.next_out = inflate_buffer; 742 status = inflate(&zs, Z_FINISH); 743 if (status != Z_STREAM_END) { 744 device_printf(sc->dev, "zlib %d\n", status); 745 status = EIO; 746 goto abort_with_buffer; 747 } 748 749 /* check id */ 750 hdr_offset = htobe32(*(const uint32_t *) 751 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 752 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 753 device_printf(sc->dev, "Bad firmware file"); 754 status = EIO; 755 goto abort_with_buffer; 756 } 757 hdr = (const void*)(inflate_buffer + hdr_offset); 758 759 status = mxge_validate_firmware(sc, hdr); 760 if (status != 0) 761 goto abort_with_buffer; 762 763 /* Copy the inflated firmware to NIC SRAM. */ 764 for (i = 0; i < fw_len; i += 256) { 765 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 766 inflate_buffer + i, 767 min(256U, (unsigned)(fw_len - i))); 768 wmb(); 769 dummy = *sc->sram; 770 wmb(); 771 } 772 773 *limit = fw_len; 774 status = 0; 775abort_with_buffer: 776 free(inflate_buffer, M_TEMP); 777abort_with_zs: 778 inflateEnd(&zs); 779abort_with_fw: 780 firmware_put(fw, FIRMWARE_UNLOAD); 781 return status; 782} 783 784/* 785 * Enable or disable periodic RDMAs from the host to make certain 786 * chipsets resend dropped PCIe messages 787 */ 788 789static void 790mxge_dummy_rdma(mxge_softc_t *sc, int enable) 791{ 792 char buf_bytes[72]; 793 volatile uint32_t *confirm; 794 volatile char *submit; 795 uint32_t *buf, dma_low, dma_high; 796 int i; 797 798 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 799 800 /* clear confirmation addr */ 801 confirm = (volatile uint32_t *)sc->cmd; 802 *confirm = 0; 803 wmb(); 804 805 /* send an rdma command to the PCIe engine, and wait for the 806 response in the confirmation address. The firmware should 807 write a -1 there to indicate it is alive and well 808 */ 809 810 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 812 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 813 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 814 buf[2] = htobe32(0xffffffff); /* confirm data */ 815 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 816 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 817 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 818 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 819 buf[5] = htobe32(enable); /* enable? */ 820 821 822 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 823 824 mxge_pio_copy(submit, buf, 64); 825 wmb(); 826 DELAY(1000); 827 wmb(); 828 i = 0; 829 while (*confirm != 0xffffffff && i < 20) { 830 DELAY(1000); 831 i++; 832 } 833 if (*confirm != 0xffffffff) { 834 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 835 (enable ? "enable" : "disable"), confirm, 836 *confirm); 837 } 838 return; 839} 840 841static int 842mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 843{ 844 mcp_cmd_t *buf; 845 char buf_bytes[sizeof(*buf) + 8]; 846 volatile mcp_cmd_response_t *response = sc->cmd; 847 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 848 uint32_t dma_low, dma_high; 849 int err, sleep_total = 0; 850 851 /* ensure buf is aligned to 8 bytes */ 852 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 853 854 buf->data0 = htobe32(data->data0); 855 buf->data1 = htobe32(data->data1); 856 buf->data2 = htobe32(data->data2); 857 buf->cmd = htobe32(cmd); 858 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 859 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 860 861 buf->response_addr.low = htobe32(dma_low); 862 buf->response_addr.high = htobe32(dma_high); 863 mtx_lock(&sc->cmd_mtx); 864 response->result = 0xffffffff; 865 wmb(); 866 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 867 868 /* wait up to 20ms */ 869 err = EAGAIN; 870 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 871 bus_dmamap_sync(sc->cmd_dma.dmat, 872 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 873 wmb(); 874 switch (be32toh(response->result)) { 875 case 0: 876 data->data0 = be32toh(response->data); 877 err = 0; 878 break; 879 case 0xffffffff: 880 DELAY(1000); 881 break; 882 case MXGEFW_CMD_UNKNOWN: 883 err = ENOSYS; 884 break; 885 case MXGEFW_CMD_ERROR_UNALIGNED: 886 err = E2BIG; 887 break; 888 case MXGEFW_CMD_ERROR_BUSY: 889 err = EBUSY; 890 break; 891 case MXGEFW_CMD_ERROR_I2C_ABSENT: 892 err = ENXIO; 893 break; 894 default: 895 device_printf(sc->dev, 896 "mxge: command %d " 897 "failed, result = %d\n", 898 cmd, be32toh(response->result)); 899 err = ENXIO; 900 break; 901 } 902 if (err != EAGAIN) 903 break; 904 } 905 if (err == EAGAIN) 906 device_printf(sc->dev, "mxge: command %d timed out" 907 "result = %d\n", 908 cmd, be32toh(response->result)); 909 mtx_unlock(&sc->cmd_mtx); 910 return err; 911} 912 913static int 914mxge_adopt_running_firmware(mxge_softc_t *sc) 915{ 916 struct mcp_gen_header *hdr; 917 const size_t bytes = sizeof (struct mcp_gen_header); 918 size_t hdr_offset; 919 int status; 920 921 /* find running firmware header */ 922 hdr_offset = htobe32(*(volatile uint32_t *) 923 (sc->sram + MCP_HEADER_PTR_OFFSET)); 924 925 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 926 device_printf(sc->dev, 927 "Running firmware has bad header offset (%d)\n", 928 (int)hdr_offset); 929 return EIO; 930 } 931 932 /* copy header of running firmware from SRAM to host memory to 933 * validate firmware */ 934 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 935 if (hdr == NULL) { 936 device_printf(sc->dev, "could not malloc firmware hdr\n"); 937 return ENOMEM; 938 } 939 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 940 rman_get_bushandle(sc->mem_res), 941 hdr_offset, (char *)hdr, bytes); 942 status = mxge_validate_firmware(sc, hdr); 943 free(hdr, M_DEVBUF); 944 945 /* 946 * check to see if adopted firmware has bug where adopting 947 * it will cause broadcasts to be filtered unless the NIC 948 * is kept in ALLMULTI mode 949 */ 950 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 951 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 952 sc->adopted_rx_filter_bug = 1; 953 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 954 "working around rx filter bug\n", 955 sc->fw_ver_major, sc->fw_ver_minor, 956 sc->fw_ver_tiny); 957 } 958 959 return status; 960} 961 962 963static int 964mxge_load_firmware(mxge_softc_t *sc, int adopt) 965{ 966 volatile uint32_t *confirm; 967 volatile char *submit; 968 char buf_bytes[72]; 969 uint32_t *buf, size, dma_low, dma_high; 970 int status, i; 971 972 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 973 974 size = sc->sram_size; 975 status = mxge_load_firmware_helper(sc, &size); 976 if (status) { 977 if (!adopt) 978 return status; 979 /* Try to use the currently running firmware, if 980 it is new enough */ 981 status = mxge_adopt_running_firmware(sc); 982 if (status) { 983 device_printf(sc->dev, 984 "failed to adopt running firmware\n"); 985 return status; 986 } 987 device_printf(sc->dev, 988 "Successfully adopted running firmware\n"); 989 if (sc->tx_boundary == 4096) { 990 device_printf(sc->dev, 991 "Using firmware currently running on NIC" 992 ". For optimal\n"); 993 device_printf(sc->dev, 994 "performance consider loading optimized " 995 "firmware\n"); 996 } 997 sc->fw_name = mxge_fw_unaligned; 998 sc->tx_boundary = 2048; 999 return 0; 1000 } 1001 /* clear confirmation addr */ 1002 confirm = (volatile uint32_t *)sc->cmd; 1003 *confirm = 0; 1004 wmb(); 1005 /* send a reload command to the bootstrap MCP, and wait for the 1006 response in the confirmation address. The firmware should 1007 write a -1 there to indicate it is alive and well 1008 */ 1009 1010 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1011 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1012 1013 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1014 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1015 buf[2] = htobe32(0xffffffff); /* confirm data */ 1016 1017 /* FIX: All newest firmware should un-protect the bottom of 1018 the sram before handoff. However, the very first interfaces 1019 do not. Therefore the handoff copy must skip the first 8 bytes 1020 */ 1021 /* where the code starts*/ 1022 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1023 buf[4] = htobe32(size - 8); /* length of code */ 1024 buf[5] = htobe32(8); /* where to copy to */ 1025 buf[6] = htobe32(0); /* where to jump to */ 1026 1027 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1028 mxge_pio_copy(submit, buf, 64); 1029 wmb(); 1030 DELAY(1000); 1031 wmb(); 1032 i = 0; 1033 while (*confirm != 0xffffffff && i < 20) { 1034 DELAY(1000*10); 1035 i++; 1036 bus_dmamap_sync(sc->cmd_dma.dmat, 1037 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1038 } 1039 if (*confirm != 0xffffffff) { 1040 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1041 confirm, *confirm); 1042 1043 return ENXIO; 1044 } 1045 return 0; 1046} 1047 1048static int 1049mxge_update_mac_address(mxge_softc_t *sc) 1050{ 1051 mxge_cmd_t cmd; 1052 uint8_t *addr = sc->mac_addr; 1053 int status; 1054 1055 1056 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1057 | (addr[2] << 8) | addr[3]); 1058 1059 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1060 1061 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1062 return status; 1063} 1064 1065static int 1066mxge_change_pause(mxge_softc_t *sc, int pause) 1067{ 1068 mxge_cmd_t cmd; 1069 int status; 1070 1071 if (pause) 1072 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1073 &cmd); 1074 else 1075 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1076 &cmd); 1077 1078 if (status) { 1079 device_printf(sc->dev, "Failed to set flow control mode\n"); 1080 return ENXIO; 1081 } 1082 sc->pause = pause; 1083 return 0; 1084} 1085 1086static void 1087mxge_change_promisc(mxge_softc_t *sc, int promisc) 1088{ 1089 mxge_cmd_t cmd; 1090 int status; 1091 1092 if (mxge_always_promisc) 1093 promisc = 1; 1094 1095 if (promisc) 1096 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1097 &cmd); 1098 else 1099 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1100 &cmd); 1101 1102 if (status) { 1103 device_printf(sc->dev, "Failed to set promisc mode\n"); 1104 } 1105} 1106 1107static void 1108mxge_set_multicast_list(mxge_softc_t *sc) 1109{ 1110 mxge_cmd_t cmd; 1111 struct ifmultiaddr *ifma; 1112 struct ifnet *ifp = sc->ifp; 1113 int err; 1114 1115 /* This firmware is known to not support multicast */ 1116 if (!sc->fw_multicast_support) 1117 return; 1118 1119 /* Disable multicast filtering while we play with the lists*/ 1120 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1121 if (err != 0) { 1122 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1123 " error status: %d\n", err); 1124 return; 1125 } 1126 1127 if (sc->adopted_rx_filter_bug) 1128 return; 1129 1130 if (ifp->if_flags & IFF_ALLMULTI) 1131 /* request to disable multicast filtering, so quit here */ 1132 return; 1133 1134 /* Flush all the filters */ 1135 1136 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1137 if (err != 0) { 1138 device_printf(sc->dev, 1139 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1140 ", error status: %d\n", err); 1141 return; 1142 } 1143 1144 /* Walk the multicast list, and add each address */ 1145 1146 if_maddr_rlock(ifp); 1147 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1148 if (ifma->ifma_addr->sa_family != AF_LINK) 1149 continue; 1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1151 &cmd.data0, 4); 1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1153 &cmd.data1, 2); 1154 cmd.data0 = htonl(cmd.data0); 1155 cmd.data1 = htonl(cmd.data1); 1156 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1157 if (err != 0) { 1158 device_printf(sc->dev, "Failed " 1159 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1160 "%d\t", err); 1161 /* abort, leaving multicast filtering off */ 1162 if_maddr_runlock(ifp); 1163 return; 1164 } 1165 } 1166 if_maddr_runlock(ifp); 1167 /* Enable multicast filtering */ 1168 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1169 if (err != 0) { 1170 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1171 ", error status: %d\n", err); 1172 } 1173} 1174 1175static int 1176mxge_max_mtu(mxge_softc_t *sc) 1177{ 1178 mxge_cmd_t cmd; 1179 int status; 1180 1181 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* try to set nbufs to see if it we can 1185 use virtually contiguous jumbos */ 1186 cmd.data0 = 0; 1187 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1188 &cmd); 1189 if (status == 0) 1190 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1191 1192 /* otherwise, we're limited to MJUMPAGESIZE */ 1193 return MJUMPAGESIZE - MXGEFW_PAD; 1194} 1195 1196static int 1197mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1198{ 1199 struct mxge_slice_state *ss; 1200 mxge_rx_done_t *rx_done; 1201 volatile uint32_t *irq_claim; 1202 mxge_cmd_t cmd; 1203 int slice, status; 1204 1205 /* try to send a reset command to the card to see if it 1206 is alive */ 1207 memset(&cmd, 0, sizeof (cmd)); 1208 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1209 if (status != 0) { 1210 device_printf(sc->dev, "failed reset\n"); 1211 return ENXIO; 1212 } 1213 1214 mxge_dummy_rdma(sc, 1); 1215 1216 1217 /* set the intrq size */ 1218 cmd.data0 = sc->rx_ring_size; 1219 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1220 1221 /* 1222 * Even though we already know how many slices are supported 1223 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1224 * has magic side effects, and must be called after a reset. 1225 * It must be called prior to calling any RSS related cmds, 1226 * including assigning an interrupt queue for anything but 1227 * slice 0. It must also be called *after* 1228 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1229 * the firmware to compute offsets. 1230 */ 1231 1232 if (sc->num_slices > 1) { 1233 /* ask the maximum number of slices it supports */ 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1235 &cmd); 1236 if (status != 0) { 1237 device_printf(sc->dev, 1238 "failed to get number of slices\n"); 1239 return status; 1240 } 1241 /* 1242 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1243 * to setting up the interrupt queue DMA 1244 */ 1245 cmd.data0 = sc->num_slices; 1246 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1247#ifdef IFNET_BUF_RING 1248 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1249#endif 1250 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1251 &cmd); 1252 if (status != 0) { 1253 device_printf(sc->dev, 1254 "failed to set number of slices\n"); 1255 return status; 1256 } 1257 } 1258 1259 1260 if (interrupts_setup) { 1261 /* Now exchange information about interrupts */ 1262 for (slice = 0; slice < sc->num_slices; slice++) { 1263 rx_done = &sc->ss[slice].rx_done; 1264 memset(rx_done->entry, 0, sc->rx_ring_size); 1265 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1266 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1267 cmd.data2 = slice; 1268 status |= mxge_send_cmd(sc, 1269 MXGEFW_CMD_SET_INTRQ_DMA, 1270 &cmd); 1271 } 1272 } 1273 1274 status |= mxge_send_cmd(sc, 1275 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1276 1277 1278 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 1280 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1281 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 1283 1284 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1285 &cmd); 1286 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1287 if (status != 0) { 1288 device_printf(sc->dev, "failed set interrupt parameters\n"); 1289 return status; 1290 } 1291 1292 1293 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1294 1295 1296 /* run a DMA benchmark */ 1297 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1298 1299 for (slice = 0; slice < sc->num_slices; slice++) { 1300 ss = &sc->ss[slice]; 1301 1302 ss->irq_claim = irq_claim + (2 * slice); 1303 /* reset mcp/driver shared state back to 0 */ 1304 ss->rx_done.idx = 0; 1305 ss->rx_done.cnt = 0; 1306 ss->tx.req = 0; 1307 ss->tx.done = 0; 1308 ss->tx.pkt_done = 0; 1309 ss->tx.queue_active = 0; 1310 ss->tx.activate = 0; 1311 ss->tx.deactivate = 0; 1312 ss->tx.wake = 0; 1313 ss->tx.defrag = 0; 1314 ss->tx.stall = 0; 1315 ss->rx_big.cnt = 0; 1316 ss->rx_small.cnt = 0; 1317 ss->lc.lro_bad_csum = 0; 1318 ss->lc.lro_queued = 0; 1319 ss->lc.lro_flushed = 0; 1320 if (ss->fw_stats != NULL) { 1321 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1322 } 1323 } 1324 sc->rdma_tags_available = 15; 1325 status = mxge_update_mac_address(sc); 1326 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1327 mxge_change_pause(sc, sc->pause); 1328 mxge_set_multicast_list(sc); 1329 if (sc->throttle) { 1330 cmd.data0 = sc->throttle; 1331 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1332 &cmd)) { 1333 device_printf(sc->dev, 1334 "can't enable throttle\n"); 1335 } 1336 } 1337 return status; 1338} 1339 1340static int 1341mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1342{ 1343 mxge_cmd_t cmd; 1344 mxge_softc_t *sc; 1345 int err; 1346 unsigned int throttle; 1347 1348 sc = arg1; 1349 throttle = sc->throttle; 1350 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1351 if (err != 0) { 1352 return err; 1353 } 1354 1355 if (throttle == sc->throttle) 1356 return 0; 1357 1358 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1359 return EINVAL; 1360 1361 mtx_lock(&sc->driver_mtx); 1362 cmd.data0 = throttle; 1363 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1364 if (err == 0) 1365 sc->throttle = throttle; 1366 mtx_unlock(&sc->driver_mtx); 1367 return err; 1368} 1369 1370static int 1371mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1372{ 1373 mxge_softc_t *sc; 1374 unsigned int intr_coal_delay; 1375 int err; 1376 1377 sc = arg1; 1378 intr_coal_delay = sc->intr_coal_delay; 1379 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1380 if (err != 0) { 1381 return err; 1382 } 1383 if (intr_coal_delay == sc->intr_coal_delay) 1384 return 0; 1385 1386 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1387 return EINVAL; 1388 1389 mtx_lock(&sc->driver_mtx); 1390 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1391 sc->intr_coal_delay = intr_coal_delay; 1392 1393 mtx_unlock(&sc->driver_mtx); 1394 return err; 1395} 1396 1397static int 1398mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1399{ 1400 mxge_softc_t *sc; 1401 unsigned int enabled; 1402 int err; 1403 1404 sc = arg1; 1405 enabled = sc->pause; 1406 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1407 if (err != 0) { 1408 return err; 1409 } 1410 if (enabled == sc->pause) 1411 return 0; 1412 1413 mtx_lock(&sc->driver_mtx); 1414 err = mxge_change_pause(sc, enabled); 1415 mtx_unlock(&sc->driver_mtx); 1416 return err; 1417} 1418 1419static int 1420mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1421{ 1422 int err; 1423 1424 if (arg1 == NULL) 1425 return EFAULT; 1426 arg2 = be32toh(*(int *)arg1); 1427 arg1 = NULL; 1428 err = sysctl_handle_int(oidp, arg1, arg2, req); 1429 1430 return err; 1431} 1432 1433static void 1434mxge_rem_sysctls(mxge_softc_t *sc) 1435{ 1436 struct mxge_slice_state *ss; 1437 int slice; 1438 1439 if (sc->slice_sysctl_tree == NULL) 1440 return; 1441 1442 for (slice = 0; slice < sc->num_slices; slice++) { 1443 ss = &sc->ss[slice]; 1444 if (ss == NULL || ss->sysctl_tree == NULL) 1445 continue; 1446 sysctl_ctx_free(&ss->sysctl_ctx); 1447 ss->sysctl_tree = NULL; 1448 } 1449 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1450 sc->slice_sysctl_tree = NULL; 1451} 1452 1453static void 1454mxge_add_sysctls(mxge_softc_t *sc) 1455{ 1456 struct sysctl_ctx_list *ctx; 1457 struct sysctl_oid_list *children; 1458 mcp_irq_data_t *fw; 1459 struct mxge_slice_state *ss; 1460 int slice; 1461 char slice_num[8]; 1462 1463 ctx = device_get_sysctl_ctx(sc->dev); 1464 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1465 fw = sc->ss[0].fw_stats; 1466 1467 /* random information */ 1468 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1469 "firmware_version", 1470 CTLFLAG_RD, &sc->fw_version, 1471 0, "firmware version"); 1472 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1473 "serial_number", 1474 CTLFLAG_RD, &sc->serial_number_string, 1475 0, "serial number"); 1476 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1477 "product_code", 1478 CTLFLAG_RD, &sc->product_code_string, 1479 0, "product_code"); 1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1481 "pcie_link_width", 1482 CTLFLAG_RD, &sc->link_width, 1483 0, "tx_boundary"); 1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1485 "tx_boundary", 1486 CTLFLAG_RD, &sc->tx_boundary, 1487 0, "tx_boundary"); 1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1489 "write_combine", 1490 CTLFLAG_RD, &sc->wc, 1491 0, "write combining PIO?"); 1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1493 "read_dma_MBs", 1494 CTLFLAG_RD, &sc->read_dma, 1495 0, "DMA Read speed in MB/s"); 1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1497 "write_dma_MBs", 1498 CTLFLAG_RD, &sc->write_dma, 1499 0, "DMA Write speed in MB/s"); 1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1501 "read_write_dma_MBs", 1502 CTLFLAG_RD, &sc->read_write_dma, 1503 0, "DMA concurrent Read/Write speed in MB/s"); 1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1505 "watchdog_resets", 1506 CTLFLAG_RD, &sc->watchdog_resets, 1507 0, "Number of times NIC was reset"); 1508 1509 1510 /* performance related tunables */ 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "intr_coal_delay", 1513 CTLTYPE_INT|CTLFLAG_RW, sc, 1514 0, mxge_change_intr_coal, 1515 "I", "interrupt coalescing delay in usecs"); 1516 1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1518 "throttle", 1519 CTLTYPE_INT|CTLFLAG_RW, sc, 1520 0, mxge_change_throttle, 1521 "I", "transmit throttling"); 1522 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "flow_control_enabled", 1525 CTLTYPE_INT|CTLFLAG_RW, sc, 1526 0, mxge_change_flow_control, 1527 "I", "interrupt coalescing delay in usecs"); 1528 1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1530 "deassert_wait", 1531 CTLFLAG_RW, &mxge_deassert_wait, 1532 0, "Wait for IRQ line to go low in ihandler"); 1533 1534 /* stats block from firmware is in network byte order. 1535 Need to swap it */ 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "link_up", 1538 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1539 0, mxge_handle_be32, 1540 "I", "link up"); 1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1542 "rdma_tags_available", 1543 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1544 0, mxge_handle_be32, 1545 "I", "rdma_tags_available"); 1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1547 "dropped_bad_crc32", 1548 CTLTYPE_INT|CTLFLAG_RD, 1549 &fw->dropped_bad_crc32, 1550 0, mxge_handle_be32, 1551 "I", "dropped_bad_crc32"); 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "dropped_bad_phy", 1554 CTLTYPE_INT|CTLFLAG_RD, 1555 &fw->dropped_bad_phy, 1556 0, mxge_handle_be32, 1557 "I", "dropped_bad_phy"); 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "dropped_link_error_or_filtered", 1560 CTLTYPE_INT|CTLFLAG_RD, 1561 &fw->dropped_link_error_or_filtered, 1562 0, mxge_handle_be32, 1563 "I", "dropped_link_error_or_filtered"); 1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1565 "dropped_link_overflow", 1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1567 0, mxge_handle_be32, 1568 "I", "dropped_link_overflow"); 1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1570 "dropped_multicast_filtered", 1571 CTLTYPE_INT|CTLFLAG_RD, 1572 &fw->dropped_multicast_filtered, 1573 0, mxge_handle_be32, 1574 "I", "dropped_multicast_filtered"); 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "dropped_no_big_buffer", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1578 0, mxge_handle_be32, 1579 "I", "dropped_no_big_buffer"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "dropped_no_small_buffer", 1582 CTLTYPE_INT|CTLFLAG_RD, 1583 &fw->dropped_no_small_buffer, 1584 0, mxge_handle_be32, 1585 "I", "dropped_no_small_buffer"); 1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1587 "dropped_overrun", 1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1589 0, mxge_handle_be32, 1590 "I", "dropped_overrun"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_pause", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_pause, 1595 0, mxge_handle_be32, 1596 "I", "dropped_pause"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_runt", 1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1600 0, mxge_handle_be32, 1601 "I", "dropped_runt"); 1602 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_unicast_filtered", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1606 0, mxge_handle_be32, 1607 "I", "dropped_unicast_filtered"); 1608 1609 /* verbose printing? */ 1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1611 "verbose", 1612 CTLFLAG_RW, &mxge_verbose, 1613 0, "verbose printing"); 1614 1615 /* add counters exported for debugging from all slices */ 1616 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1617 sc->slice_sysctl_tree = 1618 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1619 "slice", CTLFLAG_RD, 0, ""); 1620 1621 for (slice = 0; slice < sc->num_slices; slice++) { 1622 ss = &sc->ss[slice]; 1623 sysctl_ctx_init(&ss->sysctl_ctx); 1624 ctx = &ss->sysctl_ctx; 1625 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1626 sprintf(slice_num, "%d", slice); 1627 ss->sysctl_tree = 1628 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1629 CTLFLAG_RD, 0, ""); 1630 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1632 "rx_small_cnt", 1633 CTLFLAG_RD, &ss->rx_small.cnt, 1634 0, "rx_small_cnt"); 1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1636 "rx_big_cnt", 1637 CTLFLAG_RD, &ss->rx_big.cnt, 1638 0, "rx_small_cnt"); 1639 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1640 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1641 0, "number of lro merge queues flushed"); 1642 1643 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1644 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1645 0, "number of bad csums preventing LRO"); 1646 1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1648 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1649 0, "number of frames appended to lro merge" 1650 "queues"); 1651 1652#ifndef IFNET_BUF_RING 1653 /* only transmit from slice 0 for now */ 1654 if (slice > 0) 1655 continue; 1656#endif 1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1658 "tx_req", 1659 CTLFLAG_RD, &ss->tx.req, 1660 0, "tx_req"); 1661 1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1663 "tx_done", 1664 CTLFLAG_RD, &ss->tx.done, 1665 0, "tx_done"); 1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1667 "tx_pkt_done", 1668 CTLFLAG_RD, &ss->tx.pkt_done, 1669 0, "tx_done"); 1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1671 "tx_stall", 1672 CTLFLAG_RD, &ss->tx.stall, 1673 0, "tx_stall"); 1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1675 "tx_wake", 1676 CTLFLAG_RD, &ss->tx.wake, 1677 0, "tx_wake"); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "tx_defrag", 1680 CTLFLAG_RD, &ss->tx.defrag, 1681 0, "tx_defrag"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "tx_queue_active", 1684 CTLFLAG_RD, &ss->tx.queue_active, 1685 0, "tx_queue_active"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "tx_activate", 1688 CTLFLAG_RD, &ss->tx.activate, 1689 0, "tx_activate"); 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "tx_deactivate", 1692 CTLFLAG_RD, &ss->tx.deactivate, 1693 0, "tx_deactivate"); 1694 } 1695} 1696 1697/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1698 backwards one at a time and handle ring wraps */ 1699 1700static inline void 1701mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1702 mcp_kreq_ether_send_t *src, int cnt) 1703{ 1704 int idx, starting_slot; 1705 starting_slot = tx->req; 1706 while (cnt > 1) { 1707 cnt--; 1708 idx = (starting_slot + cnt) & tx->mask; 1709 mxge_pio_copy(&tx->lanai[idx], 1710 &src[cnt], sizeof(*src)); 1711 wmb(); 1712 } 1713} 1714 1715/* 1716 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1717 * at most 32 bytes at a time, so as to avoid involving the software 1718 * pio handler in the nic. We re-write the first segment's flags 1719 * to mark them valid only after writing the entire chain 1720 */ 1721 1722static inline void 1723mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1724 int cnt) 1725{ 1726 int idx, i; 1727 uint32_t *src_ints; 1728 volatile uint32_t *dst_ints; 1729 mcp_kreq_ether_send_t *srcp; 1730 volatile mcp_kreq_ether_send_t *dstp, *dst; 1731 uint8_t last_flags; 1732 1733 idx = tx->req & tx->mask; 1734 1735 last_flags = src->flags; 1736 src->flags = 0; 1737 wmb(); 1738 dst = dstp = &tx->lanai[idx]; 1739 srcp = src; 1740 1741 if ((idx + cnt) < tx->mask) { 1742 for (i = 0; i < (cnt - 1); i += 2) { 1743 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1744 wmb(); /* force write every 32 bytes */ 1745 srcp += 2; 1746 dstp += 2; 1747 } 1748 } else { 1749 /* submit all but the first request, and ensure 1750 that it is submitted below */ 1751 mxge_submit_req_backwards(tx, src, cnt); 1752 i = 0; 1753 } 1754 if (i < cnt) { 1755 /* submit the first request */ 1756 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1757 wmb(); /* barrier before setting valid flag */ 1758 } 1759 1760 /* re-write the last 32-bits with the valid flags */ 1761 src->flags = last_flags; 1762 src_ints = (uint32_t *)src; 1763 src_ints+=3; 1764 dst_ints = (volatile uint32_t *)dst; 1765 dst_ints+=3; 1766 *dst_ints = *src_ints; 1767 tx->req += cnt; 1768 wmb(); 1769} 1770 1771static int 1772mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1773 struct mxge_pkt_info *pi) 1774{ 1775 struct ether_vlan_header *eh; 1776 uint16_t etype; 1777 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1778#if IFCAP_TSO6 && defined(INET6) 1779 int nxt; 1780#endif 1781 1782 eh = mtod(m, struct ether_vlan_header *); 1783 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1784 etype = ntohs(eh->evl_proto); 1785 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1786 } else { 1787 etype = ntohs(eh->evl_encap_proto); 1788 pi->ip_off = ETHER_HDR_LEN; 1789 } 1790 1791 switch (etype) { 1792 case ETHERTYPE_IP: 1793 /* 1794 * ensure ip header is in first mbuf, copy it to a 1795 * scratch buffer if not 1796 */ 1797 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1798 pi->ip6 = NULL; 1799 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1800 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1801 ss->scratch); 1802 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1803 } 1804 pi->ip_hlen = pi->ip->ip_hl << 2; 1805 if (!tso) 1806 return 0; 1807 1808 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1809 sizeof(struct tcphdr))) { 1810 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1811 sizeof(struct tcphdr), ss->scratch); 1812 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1813 } 1814 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1815 break; 1816#if IFCAP_TSO6 && defined(INET6) 1817 case ETHERTYPE_IPV6: 1818 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1819 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1820 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1821 ss->scratch); 1822 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1823 } 1824 nxt = 0; 1825 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1826 pi->ip_hlen -= pi->ip_off; 1827 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1828 return EINVAL; 1829 1830 if (!tso) 1831 return 0; 1832 1833 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1834 return EINVAL; 1835 1836 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1837 sizeof(struct tcphdr))) { 1838 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1839 sizeof(struct tcphdr), ss->scratch); 1840 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1841 } 1842 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1843 break; 1844#endif 1845 default: 1846 return EINVAL; 1847 } 1848 return 0; 1849} 1850 1851#if IFCAP_TSO4 1852 1853static void 1854mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1855 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1856{ 1857 mxge_tx_ring_t *tx; 1858 mcp_kreq_ether_send_t *req; 1859 bus_dma_segment_t *seg; 1860 uint32_t low, high_swapped; 1861 int len, seglen, cum_len, cum_len_next; 1862 int next_is_first, chop, cnt, rdma_count, small; 1863 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1864 uint8_t flags, flags_next; 1865 static int once; 1866 1867 mss = m->m_pkthdr.tso_segsz; 1868 1869 /* negative cum_len signifies to the 1870 * send loop that we are still in the 1871 * header portion of the TSO packet. 1872 */ 1873 1874 cksum_offset = pi->ip_off + pi->ip_hlen; 1875 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1876 1877 /* TSO implies checksum offload on this hardware */ 1878 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1879 /* 1880 * If packet has full TCP csum, replace it with pseudo hdr 1881 * sum that the NIC expects, otherwise the NIC will emit 1882 * packets with bad TCP checksums. 1883 */ 1884 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1885 if (pi->ip6) { 1886#if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1887 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1888 sum = in6_cksum_pseudo(pi->ip6, 1889 m->m_pkthdr.len - cksum_offset, 1890 IPPROTO_TCP, 0); 1891#endif 1892 } else { 1893#ifdef INET 1894 m->m_pkthdr.csum_flags |= CSUM_TCP; 1895 sum = in_pseudo(pi->ip->ip_src.s_addr, 1896 pi->ip->ip_dst.s_addr, 1897 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1898 cksum_offset))); 1899#endif 1900 } 1901 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1902 cksum_offset, sizeof(sum), (caddr_t)&sum); 1903 } 1904 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1905 1906 1907 /* for TSO, pseudo_hdr_offset holds mss. 1908 * The firmware figures out where to put 1909 * the checksum by parsing the header. */ 1910 pseudo_hdr_offset = htobe16(mss); 1911 1912 if (pi->ip6) { 1913 /* 1914 * for IPv6 TSO, the "checksum offset" is re-purposed 1915 * to store the TCP header len 1916 */ 1917 cksum_offset = (pi->tcp->th_off << 2); 1918 } 1919 1920 tx = &ss->tx; 1921 req = tx->req_list; 1922 seg = tx->seg_list; 1923 cnt = 0; 1924 rdma_count = 0; 1925 /* "rdma_count" is the number of RDMAs belonging to the 1926 * current packet BEFORE the current send request. For 1927 * non-TSO packets, this is equal to "count". 1928 * For TSO packets, rdma_count needs to be reset 1929 * to 0 after a segment cut. 1930 * 1931 * The rdma_count field of the send request is 1932 * the number of RDMAs of the packet starting at 1933 * that request. For TSO send requests with one ore more cuts 1934 * in the middle, this is the number of RDMAs starting 1935 * after the last cut in the request. All previous 1936 * segments before the last cut implicitly have 1 RDMA. 1937 * 1938 * Since the number of RDMAs is not known beforehand, 1939 * it must be filled-in retroactively - after each 1940 * segmentation cut or at the end of the entire packet. 1941 */ 1942 1943 while (busdma_seg_cnt) { 1944 /* Break the busdma segment up into pieces*/ 1945 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1946 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1947 len = seg->ds_len; 1948 1949 while (len) { 1950 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1951 seglen = len; 1952 cum_len_next = cum_len + seglen; 1953 (req-rdma_count)->rdma_count = rdma_count + 1; 1954 if (__predict_true(cum_len >= 0)) { 1955 /* payload */ 1956 chop = (cum_len_next > mss); 1957 cum_len_next = cum_len_next % mss; 1958 next_is_first = (cum_len_next == 0); 1959 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1960 flags_next |= next_is_first * 1961 MXGEFW_FLAGS_FIRST; 1962 rdma_count |= -(chop | next_is_first); 1963 rdma_count += chop & !next_is_first; 1964 } else if (cum_len_next >= 0) { 1965 /* header ends */ 1966 rdma_count = -1; 1967 cum_len_next = 0; 1968 seglen = -cum_len; 1969 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1970 flags_next = MXGEFW_FLAGS_TSO_PLD | 1971 MXGEFW_FLAGS_FIRST | 1972 (small * MXGEFW_FLAGS_SMALL); 1973 } 1974 1975 req->addr_high = high_swapped; 1976 req->addr_low = htobe32(low); 1977 req->pseudo_hdr_offset = pseudo_hdr_offset; 1978 req->pad = 0; 1979 req->rdma_count = 1; 1980 req->length = htobe16(seglen); 1981 req->cksum_offset = cksum_offset; 1982 req->flags = flags | ((cum_len & 1) * 1983 MXGEFW_FLAGS_ALIGN_ODD); 1984 low += seglen; 1985 len -= seglen; 1986 cum_len = cum_len_next; 1987 flags = flags_next; 1988 req++; 1989 cnt++; 1990 rdma_count++; 1991 if (cksum_offset != 0 && !pi->ip6) { 1992 if (__predict_false(cksum_offset > seglen)) 1993 cksum_offset -= seglen; 1994 else 1995 cksum_offset = 0; 1996 } 1997 if (__predict_false(cnt > tx->max_desc)) 1998 goto drop; 1999 } 2000 busdma_seg_cnt--; 2001 seg++; 2002 } 2003 (req-rdma_count)->rdma_count = rdma_count; 2004 2005 do { 2006 req--; 2007 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2008 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2009 2010 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2011 mxge_submit_req(tx, tx->req_list, cnt); 2012#ifdef IFNET_BUF_RING 2013 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2014 /* tell the NIC to start polling this slice */ 2015 *tx->send_go = 1; 2016 tx->queue_active = 1; 2017 tx->activate++; 2018 wmb(); 2019 } 2020#endif 2021 return; 2022 2023drop: 2024 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2025 m_freem(m); 2026 ss->oerrors++; 2027 if (!once) { 2028 printf("tx->max_desc exceeded via TSO!\n"); 2029 printf("mss = %d, %ld, %d!\n", mss, 2030 (long)seg - (long)tx->seg_list, tx->max_desc); 2031 once = 1; 2032 } 2033 return; 2034 2035} 2036 2037#endif /* IFCAP_TSO4 */ 2038 2039#ifdef MXGE_NEW_VLAN_API 2040/* 2041 * We reproduce the software vlan tag insertion from 2042 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2043 * vlan tag insertion. We need to advertise this in order to have the 2044 * vlan interface respect our csum offload flags. 2045 */ 2046static struct mbuf * 2047mxge_vlan_tag_insert(struct mbuf *m) 2048{ 2049 struct ether_vlan_header *evl; 2050 2051 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2052 if (__predict_false(m == NULL)) 2053 return NULL; 2054 if (m->m_len < sizeof(*evl)) { 2055 m = m_pullup(m, sizeof(*evl)); 2056 if (__predict_false(m == NULL)) 2057 return NULL; 2058 } 2059 /* 2060 * Transform the Ethernet header into an Ethernet header 2061 * with 802.1Q encapsulation. 2062 */ 2063 evl = mtod(m, struct ether_vlan_header *); 2064 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2065 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2066 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2067 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2068 m->m_flags &= ~M_VLANTAG; 2069 return m; 2070} 2071#endif /* MXGE_NEW_VLAN_API */ 2072 2073static void 2074mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2075{ 2076 struct mxge_pkt_info pi = {0,0,0,0}; 2077 mxge_softc_t *sc; 2078 mcp_kreq_ether_send_t *req; 2079 bus_dma_segment_t *seg; 2080 struct mbuf *m_tmp; 2081 struct ifnet *ifp; 2082 mxge_tx_ring_t *tx; 2083 int cnt, cum_len, err, i, idx, odd_flag; 2084 uint16_t pseudo_hdr_offset; 2085 uint8_t flags, cksum_offset; 2086 2087 2088 sc = ss->sc; 2089 ifp = sc->ifp; 2090 tx = &ss->tx; 2091 2092#ifdef MXGE_NEW_VLAN_API 2093 if (m->m_flags & M_VLANTAG) { 2094 m = mxge_vlan_tag_insert(m); 2095 if (__predict_false(m == NULL)) 2096 goto drop_without_m; 2097 } 2098#endif 2099 if (m->m_pkthdr.csum_flags & 2100 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2101 if (mxge_parse_tx(ss, m, &pi)) 2102 goto drop; 2103 } 2104 2105 /* (try to) map the frame for DMA */ 2106 idx = tx->req & tx->mask; 2107 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2108 m, tx->seg_list, &cnt, 2109 BUS_DMA_NOWAIT); 2110 if (__predict_false(err == EFBIG)) { 2111 /* Too many segments in the chain. Try 2112 to defrag */ 2113 m_tmp = m_defrag(m, M_NOWAIT); 2114 if (m_tmp == NULL) { 2115 goto drop; 2116 } 2117 ss->tx.defrag++; 2118 m = m_tmp; 2119 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2120 tx->info[idx].map, 2121 m, tx->seg_list, &cnt, 2122 BUS_DMA_NOWAIT); 2123 } 2124 if (__predict_false(err != 0)) { 2125 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2126 " packet len = %d\n", err, m->m_pkthdr.len); 2127 goto drop; 2128 } 2129 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2130 BUS_DMASYNC_PREWRITE); 2131 tx->info[idx].m = m; 2132 2133#if IFCAP_TSO4 2134 /* TSO is different enough, we handle it in another routine */ 2135 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2136 mxge_encap_tso(ss, m, cnt, &pi); 2137 return; 2138 } 2139#endif 2140 2141 req = tx->req_list; 2142 cksum_offset = 0; 2143 pseudo_hdr_offset = 0; 2144 flags = MXGEFW_FLAGS_NO_TSO; 2145 2146 /* checksum offloading? */ 2147 if (m->m_pkthdr.csum_flags & 2148 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2149 /* ensure ip header is in first mbuf, copy 2150 it to a scratch buffer if not */ 2151 cksum_offset = pi.ip_off + pi.ip_hlen; 2152 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2153 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2154 req->cksum_offset = cksum_offset; 2155 flags |= MXGEFW_FLAGS_CKSUM; 2156 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2157 } else { 2158 odd_flag = 0; 2159 } 2160 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2161 flags |= MXGEFW_FLAGS_SMALL; 2162 2163 /* convert segments into a request list */ 2164 cum_len = 0; 2165 seg = tx->seg_list; 2166 req->flags = MXGEFW_FLAGS_FIRST; 2167 for (i = 0; i < cnt; i++) { 2168 req->addr_low = 2169 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2170 req->addr_high = 2171 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2172 req->length = htobe16(seg->ds_len); 2173 req->cksum_offset = cksum_offset; 2174 if (cksum_offset > seg->ds_len) 2175 cksum_offset -= seg->ds_len; 2176 else 2177 cksum_offset = 0; 2178 req->pseudo_hdr_offset = pseudo_hdr_offset; 2179 req->pad = 0; /* complete solid 16-byte block */ 2180 req->rdma_count = 1; 2181 req->flags |= flags | ((cum_len & 1) * odd_flag); 2182 cum_len += seg->ds_len; 2183 seg++; 2184 req++; 2185 req->flags = 0; 2186 } 2187 req--; 2188 /* pad runts to 60 bytes */ 2189 if (cum_len < 60) { 2190 req++; 2191 req->addr_low = 2192 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2193 req->addr_high = 2194 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2195 req->length = htobe16(60 - cum_len); 2196 req->cksum_offset = 0; 2197 req->pseudo_hdr_offset = pseudo_hdr_offset; 2198 req->pad = 0; /* complete solid 16-byte block */ 2199 req->rdma_count = 1; 2200 req->flags |= flags | ((cum_len & 1) * odd_flag); 2201 cnt++; 2202 } 2203 2204 tx->req_list[0].rdma_count = cnt; 2205#if 0 2206 /* print what the firmware will see */ 2207 for (i = 0; i < cnt; i++) { 2208 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2209 "cso:%d, flags:0x%x, rdma:%d\n", 2210 i, (int)ntohl(tx->req_list[i].addr_high), 2211 (int)ntohl(tx->req_list[i].addr_low), 2212 (int)ntohs(tx->req_list[i].length), 2213 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2214 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2215 tx->req_list[i].rdma_count); 2216 } 2217 printf("--------------\n"); 2218#endif 2219 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2220 mxge_submit_req(tx, tx->req_list, cnt); 2221#ifdef IFNET_BUF_RING 2222 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2223 /* tell the NIC to start polling this slice */ 2224 *tx->send_go = 1; 2225 tx->queue_active = 1; 2226 tx->activate++; 2227 wmb(); 2228 } 2229#endif 2230 return; 2231 2232drop: 2233 m_freem(m); 2234drop_without_m: 2235 ss->oerrors++; 2236 return; 2237} 2238 2239#ifdef IFNET_BUF_RING 2240static void 2241mxge_qflush(struct ifnet *ifp) 2242{ 2243 mxge_softc_t *sc = ifp->if_softc; 2244 mxge_tx_ring_t *tx; 2245 struct mbuf *m; 2246 int slice; 2247 2248 for (slice = 0; slice < sc->num_slices; slice++) { 2249 tx = &sc->ss[slice].tx; 2250 mtx_lock(&tx->mtx); 2251 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2252 m_freem(m); 2253 mtx_unlock(&tx->mtx); 2254 } 2255 if_qflush(ifp); 2256} 2257 2258static inline void 2259mxge_start_locked(struct mxge_slice_state *ss) 2260{ 2261 mxge_softc_t *sc; 2262 struct mbuf *m; 2263 struct ifnet *ifp; 2264 mxge_tx_ring_t *tx; 2265 2266 sc = ss->sc; 2267 ifp = sc->ifp; 2268 tx = &ss->tx; 2269 2270 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2271 m = drbr_dequeue(ifp, tx->br); 2272 if (m == NULL) { 2273 return; 2274 } 2275 /* let BPF see it */ 2276 BPF_MTAP(ifp, m); 2277 2278 /* give it to the nic */ 2279 mxge_encap(ss, m); 2280 } 2281 /* ran out of transmit slots */ 2282 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2283 && (!drbr_empty(ifp, tx->br))) { 2284 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2285 tx->stall++; 2286 } 2287} 2288 2289static int 2290mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2291{ 2292 mxge_softc_t *sc; 2293 struct ifnet *ifp; 2294 mxge_tx_ring_t *tx; 2295 int err; 2296 2297 sc = ss->sc; 2298 ifp = sc->ifp; 2299 tx = &ss->tx; 2300 2301 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2302 IFF_DRV_RUNNING) { 2303 err = drbr_enqueue(ifp, tx->br, m); 2304 return (err); 2305 } 2306 2307 if (!drbr_needs_enqueue(ifp, tx->br) && 2308 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 /* give it to the nic */ 2312 mxge_encap(ss, m); 2313 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2314 return (err); 2315 } 2316 if (!drbr_empty(ifp, tx->br)) 2317 mxge_start_locked(ss); 2318 return (0); 2319} 2320 2321static int 2322mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2323{ 2324 mxge_softc_t *sc = ifp->if_softc; 2325 struct mxge_slice_state *ss; 2326 mxge_tx_ring_t *tx; 2327 int err = 0; 2328 int slice; 2329 2330 slice = m->m_pkthdr.flowid; 2331 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2332 2333 ss = &sc->ss[slice]; 2334 tx = &ss->tx; 2335 2336 if (mtx_trylock(&tx->mtx)) { 2337 err = mxge_transmit_locked(ss, m); 2338 mtx_unlock(&tx->mtx); 2339 } else { 2340 err = drbr_enqueue(ifp, tx->br, m); 2341 } 2342 2343 return (err); 2344} 2345 2346#else 2347 2348static inline void 2349mxge_start_locked(struct mxge_slice_state *ss) 2350{ 2351 mxge_softc_t *sc; 2352 struct mbuf *m; 2353 struct ifnet *ifp; 2354 mxge_tx_ring_t *tx; 2355 2356 sc = ss->sc; 2357 ifp = sc->ifp; 2358 tx = &ss->tx; 2359 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2360 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2361 if (m == NULL) { 2362 return; 2363 } 2364 /* let BPF see it */ 2365 BPF_MTAP(ifp, m); 2366 2367 /* give it to the nic */ 2368 mxge_encap(ss, m); 2369 } 2370 /* ran out of transmit slots */ 2371 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2372 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2373 tx->stall++; 2374 } 2375} 2376#endif 2377static void 2378mxge_start(struct ifnet *ifp) 2379{ 2380 mxge_softc_t *sc = ifp->if_softc; 2381 struct mxge_slice_state *ss; 2382 2383 /* only use the first slice for now */ 2384 ss = &sc->ss[0]; 2385 mtx_lock(&ss->tx.mtx); 2386 mxge_start_locked(ss); 2387 mtx_unlock(&ss->tx.mtx); 2388} 2389 2390/* 2391 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2392 * at most 32 bytes at a time, so as to avoid involving the software 2393 * pio handler in the nic. We re-write the first segment's low 2394 * DMA address to mark it valid only after we write the entire chunk 2395 * in a burst 2396 */ 2397static inline void 2398mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2399 mcp_kreq_ether_recv_t *src) 2400{ 2401 uint32_t low; 2402 2403 low = src->addr_low; 2404 src->addr_low = 0xffffffff; 2405 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2406 wmb(); 2407 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2408 wmb(); 2409 src->addr_low = low; 2410 dst->addr_low = low; 2411 wmb(); 2412} 2413 2414static int 2415mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2416{ 2417 bus_dma_segment_t seg; 2418 struct mbuf *m; 2419 mxge_rx_ring_t *rx = &ss->rx_small; 2420 int cnt, err; 2421 2422 m = m_gethdr(M_NOWAIT, MT_DATA); 2423 if (m == NULL) { 2424 rx->alloc_fail++; 2425 err = ENOBUFS; 2426 goto done; 2427 } 2428 m->m_len = MHLEN; 2429 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2430 &seg, &cnt, BUS_DMA_NOWAIT); 2431 if (err != 0) { 2432 m_free(m); 2433 goto done; 2434 } 2435 rx->info[idx].m = m; 2436 rx->shadow[idx].addr_low = 2437 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2438 rx->shadow[idx].addr_high = 2439 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2440 2441done: 2442 if ((idx & 7) == 7) 2443 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2444 return err; 2445} 2446 2447static int 2448mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2449{ 2450 bus_dma_segment_t seg[3]; 2451 struct mbuf *m; 2452 mxge_rx_ring_t *rx = &ss->rx_big; 2453 int cnt, err, i; 2454 2455 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2456 if (m == NULL) { 2457 rx->alloc_fail++; 2458 err = ENOBUFS; 2459 goto done; 2460 } 2461 m->m_len = rx->mlen; 2462 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2463 seg, &cnt, BUS_DMA_NOWAIT); 2464 if (err != 0) { 2465 m_free(m); 2466 goto done; 2467 } 2468 rx->info[idx].m = m; 2469 rx->shadow[idx].addr_low = 2470 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2471 rx->shadow[idx].addr_high = 2472 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2473 2474#if MXGE_VIRT_JUMBOS 2475 for (i = 1; i < cnt; i++) { 2476 rx->shadow[idx + i].addr_low = 2477 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2478 rx->shadow[idx + i].addr_high = 2479 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2480 } 2481#endif 2482 2483done: 2484 for (i = 0; i < rx->nbufs; i++) { 2485 if ((idx & 7) == 7) { 2486 mxge_submit_8rx(&rx->lanai[idx - 7], 2487 &rx->shadow[idx - 7]); 2488 } 2489 idx++; 2490 } 2491 return err; 2492} 2493 2494#ifdef INET6 2495 2496static uint16_t 2497mxge_csum_generic(uint16_t *raw, int len) 2498{ 2499 uint32_t csum; 2500 2501 2502 csum = 0; 2503 while (len > 0) { 2504 csum += *raw; 2505 raw++; 2506 len -= 2; 2507 } 2508 csum = (csum >> 16) + (csum & 0xffff); 2509 csum = (csum >> 16) + (csum & 0xffff); 2510 return (uint16_t)csum; 2511} 2512 2513static inline uint16_t 2514mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2515{ 2516 uint32_t partial; 2517 int nxt, cksum_offset; 2518 struct ip6_hdr *ip6 = p; 2519 uint16_t c; 2520 2521 nxt = ip6->ip6_nxt; 2522 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2523 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2524 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2525 IPPROTO_IPV6, &nxt); 2526 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2527 return (1); 2528 } 2529 2530 /* 2531 * IPv6 headers do not contain a checksum, and hence 2532 * do not checksum to zero, so they don't "fall out" 2533 * of the partial checksum calculation like IPv4 2534 * headers do. We need to fix the partial checksum by 2535 * subtracting the checksum of the IPv6 header. 2536 */ 2537 2538 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2539 ETHER_HDR_LEN); 2540 csum += ~partial; 2541 csum += (csum < ~partial); 2542 csum = (csum >> 16) + (csum & 0xFFFF); 2543 csum = (csum >> 16) + (csum & 0xFFFF); 2544 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2545 csum); 2546 c ^= 0xffff; 2547 return (c); 2548} 2549#endif /* INET6 */ 2550/* 2551 * Myri10GE hardware checksums are not valid if the sender 2552 * padded the frame with non-zero padding. This is because 2553 * the firmware just does a simple 16-bit 1s complement 2554 * checksum across the entire frame, excluding the first 14 2555 * bytes. It is best to simply to check the checksum and 2556 * tell the stack about it only if the checksum is good 2557 */ 2558 2559static inline uint16_t 2560mxge_rx_csum(struct mbuf *m, int csum) 2561{ 2562 struct ether_header *eh; 2563#ifdef INET 2564 struct ip *ip; 2565#endif 2566#if defined(INET) || defined(INET6) 2567 int cap = m->m_pkthdr.rcvif->if_capenable; 2568#endif 2569 uint16_t c, etype; 2570 2571 2572 eh = mtod(m, struct ether_header *); 2573 etype = ntohs(eh->ether_type); 2574 switch (etype) { 2575#ifdef INET 2576 case ETHERTYPE_IP: 2577 if ((cap & IFCAP_RXCSUM) == 0) 2578 return (1); 2579 ip = (struct ip *)(eh + 1); 2580 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2581 return (1); 2582 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2583 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2584 (ip->ip_hl << 2) + ip->ip_p)); 2585 c ^= 0xffff; 2586 break; 2587#endif 2588#ifdef INET6 2589 case ETHERTYPE_IPV6: 2590 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2591 return (1); 2592 c = mxge_rx_csum6((eh + 1), m, csum); 2593 break; 2594#endif 2595 default: 2596 c = 1; 2597 } 2598 return (c); 2599} 2600 2601static void 2602mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2603{ 2604 struct ether_vlan_header *evl; 2605 struct ether_header *eh; 2606 uint32_t partial; 2607 2608 evl = mtod(m, struct ether_vlan_header *); 2609 eh = mtod(m, struct ether_header *); 2610 2611 /* 2612 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2613 * after what the firmware thought was the end of the ethernet 2614 * header. 2615 */ 2616 2617 /* put checksum into host byte order */ 2618 *csum = ntohs(*csum); 2619 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2620 (*csum) += ~partial; 2621 (*csum) += ((*csum) < ~partial); 2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2624 2625 /* restore checksum to network byte order; 2626 later consumers expect this */ 2627 *csum = htons(*csum); 2628 2629 /* save the tag */ 2630#ifdef MXGE_NEW_VLAN_API 2631 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2632#else 2633 { 2634 struct m_tag *mtag; 2635 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2636 M_NOWAIT); 2637 if (mtag == NULL) 2638 return; 2639 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2640 m_tag_prepend(m, mtag); 2641 } 2642 2643#endif 2644 m->m_flags |= M_VLANTAG; 2645 2646 /* 2647 * Remove the 802.1q header by copying the Ethernet 2648 * addresses over it and adjusting the beginning of 2649 * the data in the mbuf. The encapsulated Ethernet 2650 * type field is already in place. 2651 */ 2652 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2653 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2654 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2655} 2656 2657 2658static inline void 2659mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2660 uint32_t csum, int lro) 2661{ 2662 mxge_softc_t *sc; 2663 struct ifnet *ifp; 2664 struct mbuf *m; 2665 struct ether_header *eh; 2666 mxge_rx_ring_t *rx; 2667 bus_dmamap_t old_map; 2668 int idx; 2669 2670 sc = ss->sc; 2671 ifp = sc->ifp; 2672 rx = &ss->rx_big; 2673 idx = rx->cnt & rx->mask; 2674 rx->cnt += rx->nbufs; 2675 /* save a pointer to the received mbuf */ 2676 m = rx->info[idx].m; 2677 /* try to replace the received mbuf */ 2678 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2679 /* drop the frame -- the old mbuf is re-cycled */ 2680 ifp->if_ierrors++; 2681 return; 2682 } 2683 2684 /* unmap the received buffer */ 2685 old_map = rx->info[idx].map; 2686 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2687 bus_dmamap_unload(rx->dmat, old_map); 2688 2689 /* swap the bus_dmamap_t's */ 2690 rx->info[idx].map = rx->extra_map; 2691 rx->extra_map = old_map; 2692 2693 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2694 * aligned */ 2695 m->m_data += MXGEFW_PAD; 2696 2697 m->m_pkthdr.rcvif = ifp; 2698 m->m_len = m->m_pkthdr.len = len; 2699 ss->ipackets++; 2700 eh = mtod(m, struct ether_header *); 2701 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2702 mxge_vlan_tag_remove(m, &csum); 2703 } 2704 /* if the checksum is valid, mark it in the mbuf header */ 2705 2706 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2707 (0 == mxge_rx_csum(m, csum))) { 2708 /* Tell the stack that the checksum is good */ 2709 m->m_pkthdr.csum_data = 0xffff; 2710 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2711 CSUM_DATA_VALID; 2712 2713#if defined(INET) || defined (INET6) 2714 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2715 return; 2716#endif 2717 } 2718 /* flowid only valid if RSS hashing is enabled */ 2719 if (sc->num_slices > 1) { 2720 m->m_pkthdr.flowid = (ss - sc->ss); 2721 m->m_flags |= M_FLOWID; 2722 } 2723 /* pass the frame up the stack */ 2724 (*ifp->if_input)(ifp, m); 2725} 2726 2727static inline void 2728mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2729 uint32_t csum, int lro) 2730{ 2731 mxge_softc_t *sc; 2732 struct ifnet *ifp; 2733 struct ether_header *eh; 2734 struct mbuf *m; 2735 mxge_rx_ring_t *rx; 2736 bus_dmamap_t old_map; 2737 int idx; 2738 2739 sc = ss->sc; 2740 ifp = sc->ifp; 2741 rx = &ss->rx_small; 2742 idx = rx->cnt & rx->mask; 2743 rx->cnt++; 2744 /* save a pointer to the received mbuf */ 2745 m = rx->info[idx].m; 2746 /* try to replace the received mbuf */ 2747 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2748 /* drop the frame -- the old mbuf is re-cycled */ 2749 ifp->if_ierrors++; 2750 return; 2751 } 2752 2753 /* unmap the received buffer */ 2754 old_map = rx->info[idx].map; 2755 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2756 bus_dmamap_unload(rx->dmat, old_map); 2757 2758 /* swap the bus_dmamap_t's */ 2759 rx->info[idx].map = rx->extra_map; 2760 rx->extra_map = old_map; 2761 2762 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2763 * aligned */ 2764 m->m_data += MXGEFW_PAD; 2765 2766 m->m_pkthdr.rcvif = ifp; 2767 m->m_len = m->m_pkthdr.len = len; 2768 ss->ipackets++; 2769 eh = mtod(m, struct ether_header *); 2770 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2771 mxge_vlan_tag_remove(m, &csum); 2772 } 2773 /* if the checksum is valid, mark it in the mbuf header */ 2774 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2775 (0 == mxge_rx_csum(m, csum))) { 2776 /* Tell the stack that the checksum is good */ 2777 m->m_pkthdr.csum_data = 0xffff; 2778 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2779 CSUM_DATA_VALID; 2780 2781#if defined(INET) || defined (INET6) 2782 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2783 return; 2784#endif 2785 } 2786 /* flowid only valid if RSS hashing is enabled */ 2787 if (sc->num_slices > 1) { 2788 m->m_pkthdr.flowid = (ss - sc->ss); 2789 m->m_flags |= M_FLOWID; 2790 } 2791 /* pass the frame up the stack */ 2792 (*ifp->if_input)(ifp, m); 2793} 2794 2795static inline void 2796mxge_clean_rx_done(struct mxge_slice_state *ss) 2797{ 2798 mxge_rx_done_t *rx_done = &ss->rx_done; 2799 int limit = 0; 2800 uint16_t length; 2801 uint16_t checksum; 2802 int lro; 2803 2804 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2805 while (rx_done->entry[rx_done->idx].length != 0) { 2806 length = ntohs(rx_done->entry[rx_done->idx].length); 2807 rx_done->entry[rx_done->idx].length = 0; 2808 checksum = rx_done->entry[rx_done->idx].checksum; 2809 if (length <= (MHLEN - MXGEFW_PAD)) 2810 mxge_rx_done_small(ss, length, checksum, lro); 2811 else 2812 mxge_rx_done_big(ss, length, checksum, lro); 2813 rx_done->cnt++; 2814 rx_done->idx = rx_done->cnt & rx_done->mask; 2815 2816 /* limit potential for livelock */ 2817 if (__predict_false(++limit > rx_done->mask / 2)) 2818 break; 2819 } 2820#if defined(INET) || defined (INET6) 2821 while (!SLIST_EMPTY(&ss->lc.lro_active)) { 2822 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); 2823 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); 2824 tcp_lro_flush(&ss->lc, lro); 2825 } 2826#endif 2827} 2828 2829 2830static inline void 2831mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2832{ 2833 struct ifnet *ifp; 2834 mxge_tx_ring_t *tx; 2835 struct mbuf *m; 2836 bus_dmamap_t map; 2837 int idx; 2838 int *flags; 2839 2840 tx = &ss->tx; 2841 ifp = ss->sc->ifp; 2842 while (tx->pkt_done != mcp_idx) { 2843 idx = tx->done & tx->mask; 2844 tx->done++; 2845 m = tx->info[idx].m; 2846 /* mbuf and DMA map only attached to the first 2847 segment per-mbuf */ 2848 if (m != NULL) { 2849 ss->obytes += m->m_pkthdr.len; 2850 if (m->m_flags & M_MCAST) 2851 ss->omcasts++; 2852 ss->opackets++; 2853 tx->info[idx].m = NULL; 2854 map = tx->info[idx].map; 2855 bus_dmamap_unload(tx->dmat, map); 2856 m_freem(m); 2857 } 2858 if (tx->info[idx].flag) { 2859 tx->info[idx].flag = 0; 2860 tx->pkt_done++; 2861 } 2862 } 2863 2864 /* If we have space, clear IFF_OACTIVE to tell the stack that 2865 its OK to send packets */ 2866#ifdef IFNET_BUF_RING 2867 flags = &ss->if_drv_flags; 2868#else 2869 flags = &ifp->if_drv_flags; 2870#endif 2871 mtx_lock(&ss->tx.mtx); 2872 if ((*flags) & IFF_DRV_OACTIVE && 2873 tx->req - tx->done < (tx->mask + 1)/4) { 2874 *(flags) &= ~IFF_DRV_OACTIVE; 2875 ss->tx.wake++; 2876 mxge_start_locked(ss); 2877 } 2878#ifdef IFNET_BUF_RING 2879 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2880 /* let the NIC stop polling this queue, since there 2881 * are no more transmits pending */ 2882 if (tx->req == tx->done) { 2883 *tx->send_stop = 1; 2884 tx->queue_active = 0; 2885 tx->deactivate++; 2886 wmb(); 2887 } 2888 } 2889#endif 2890 mtx_unlock(&ss->tx.mtx); 2891 2892} 2893 2894static struct mxge_media_type mxge_xfp_media_types[] = 2895{ 2896 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2897 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2898 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2899 {0, (1 << 5), "10GBASE-ER"}, 2900 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2901 {0, (1 << 3), "10GBASE-SW"}, 2902 {0, (1 << 2), "10GBASE-LW"}, 2903 {0, (1 << 1), "10GBASE-EW"}, 2904 {0, (1 << 0), "Reserved"} 2905}; 2906static struct mxge_media_type mxge_sfp_media_types[] = 2907{ 2908 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2909 {0, (1 << 7), "Reserved"}, 2910 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2911 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2912 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2913 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2914}; 2915 2916static void 2917mxge_media_set(mxge_softc_t *sc, int media_type) 2918{ 2919 2920 2921 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2922 0, NULL); 2923 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2924 sc->current_media = media_type; 2925 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2926} 2927 2928static void 2929mxge_media_init(mxge_softc_t *sc) 2930{ 2931 char *ptr; 2932 int i; 2933 2934 ifmedia_removeall(&sc->media); 2935 mxge_media_set(sc, IFM_AUTO); 2936 2937 /* 2938 * parse the product code to deterimine the interface type 2939 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2940 * after the 3rd dash in the driver's cached copy of the 2941 * EEPROM's product code string. 2942 */ 2943 ptr = sc->product_code_string; 2944 if (ptr == NULL) { 2945 device_printf(sc->dev, "Missing product code\n"); 2946 return; 2947 } 2948 2949 for (i = 0; i < 3; i++, ptr++) { 2950 ptr = strchr(ptr, '-'); 2951 if (ptr == NULL) { 2952 device_printf(sc->dev, 2953 "only %d dashes in PC?!?\n", i); 2954 return; 2955 } 2956 } 2957 if (*ptr == 'C' || *(ptr +1) == 'C') { 2958 /* -C is CX4 */ 2959 sc->connector = MXGE_CX4; 2960 mxge_media_set(sc, IFM_10G_CX4); 2961 } else if (*ptr == 'Q') { 2962 /* -Q is Quad Ribbon Fiber */ 2963 sc->connector = MXGE_QRF; 2964 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2965 /* FreeBSD has no media type for Quad ribbon fiber */ 2966 } else if (*ptr == 'R') { 2967 /* -R is XFP */ 2968 sc->connector = MXGE_XFP; 2969 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2970 /* -S or -2S is SFP+ */ 2971 sc->connector = MXGE_SFP; 2972 } else { 2973 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2974 } 2975} 2976 2977/* 2978 * Determine the media type for a NIC. Some XFPs will identify 2979 * themselves only when their link is up, so this is initiated via a 2980 * link up interrupt. However, this can potentially take up to 2981 * several milliseconds, so it is run via the watchdog routine, rather 2982 * than in the interrupt handler itself. 2983 */ 2984static void 2985mxge_media_probe(mxge_softc_t *sc) 2986{ 2987 mxge_cmd_t cmd; 2988 char *cage_type; 2989 2990 struct mxge_media_type *mxge_media_types = NULL; 2991 int i, err, ms, mxge_media_type_entries; 2992 uint32_t byte; 2993 2994 sc->need_media_probe = 0; 2995 2996 if (sc->connector == MXGE_XFP) { 2997 /* -R is XFP */ 2998 mxge_media_types = mxge_xfp_media_types; 2999 mxge_media_type_entries = 3000 sizeof (mxge_xfp_media_types) / 3001 sizeof (mxge_xfp_media_types[0]); 3002 byte = MXGE_XFP_COMPLIANCE_BYTE; 3003 cage_type = "XFP"; 3004 } else if (sc->connector == MXGE_SFP) { 3005 /* -S or -2S is SFP+ */ 3006 mxge_media_types = mxge_sfp_media_types; 3007 mxge_media_type_entries = 3008 sizeof (mxge_sfp_media_types) / 3009 sizeof (mxge_sfp_media_types[0]); 3010 cage_type = "SFP+"; 3011 byte = 3; 3012 } else { 3013 /* nothing to do; media type cannot change */ 3014 return; 3015 } 3016 3017 /* 3018 * At this point we know the NIC has an XFP cage, so now we 3019 * try to determine what is in the cage by using the 3020 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3021 * register. We read just one byte, which may take over 3022 * a millisecond 3023 */ 3024 3025 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3026 cmd.data1 = byte; 3027 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3028 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3029 device_printf(sc->dev, "failed to read XFP\n"); 3030 } 3031 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3032 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3033 } 3034 if (err != MXGEFW_CMD_OK) { 3035 return; 3036 } 3037 3038 /* now we wait for the data to be cached */ 3039 cmd.data0 = byte; 3040 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3041 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3042 DELAY(1000); 3043 cmd.data0 = byte; 3044 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3045 } 3046 if (err != MXGEFW_CMD_OK) { 3047 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3048 cage_type, err, ms); 3049 return; 3050 } 3051 3052 if (cmd.data0 == mxge_media_types[0].bitmask) { 3053 if (mxge_verbose) 3054 device_printf(sc->dev, "%s:%s\n", cage_type, 3055 mxge_media_types[0].name); 3056 if (sc->current_media != mxge_media_types[0].flag) { 3057 mxge_media_init(sc); 3058 mxge_media_set(sc, mxge_media_types[0].flag); 3059 } 3060 return; 3061 } 3062 for (i = 1; i < mxge_media_type_entries; i++) { 3063 if (cmd.data0 & mxge_media_types[i].bitmask) { 3064 if (mxge_verbose) 3065 device_printf(sc->dev, "%s:%s\n", 3066 cage_type, 3067 mxge_media_types[i].name); 3068 3069 if (sc->current_media != mxge_media_types[i].flag) { 3070 mxge_media_init(sc); 3071 mxge_media_set(sc, mxge_media_types[i].flag); 3072 } 3073 return; 3074 } 3075 } 3076 if (mxge_verbose) 3077 device_printf(sc->dev, "%s media 0x%x unknown\n", 3078 cage_type, cmd.data0); 3079 3080 return; 3081} 3082 3083static void 3084mxge_intr(void *arg) 3085{ 3086 struct mxge_slice_state *ss = arg; 3087 mxge_softc_t *sc = ss->sc; 3088 mcp_irq_data_t *stats = ss->fw_stats; 3089 mxge_tx_ring_t *tx = &ss->tx; 3090 mxge_rx_done_t *rx_done = &ss->rx_done; 3091 uint32_t send_done_count; 3092 uint8_t valid; 3093 3094 3095#ifndef IFNET_BUF_RING 3096 /* an interrupt on a non-zero slice is implicitly valid 3097 since MSI-X irqs are not shared */ 3098 if (ss != sc->ss) { 3099 mxge_clean_rx_done(ss); 3100 *ss->irq_claim = be32toh(3); 3101 return; 3102 } 3103#endif 3104 3105 /* make sure the DMA has finished */ 3106 if (!stats->valid) { 3107 return; 3108 } 3109 valid = stats->valid; 3110 3111 if (sc->legacy_irq) { 3112 /* lower legacy IRQ */ 3113 *sc->irq_deassert = 0; 3114 if (!mxge_deassert_wait) 3115 /* don't wait for conf. that irq is low */ 3116 stats->valid = 0; 3117 } else { 3118 stats->valid = 0; 3119 } 3120 3121 /* loop while waiting for legacy irq deassertion */ 3122 do { 3123 /* check for transmit completes and receives */ 3124 send_done_count = be32toh(stats->send_done_count); 3125 while ((send_done_count != tx->pkt_done) || 3126 (rx_done->entry[rx_done->idx].length != 0)) { 3127 if (send_done_count != tx->pkt_done) 3128 mxge_tx_done(ss, (int)send_done_count); 3129 mxge_clean_rx_done(ss); 3130 send_done_count = be32toh(stats->send_done_count); 3131 } 3132 if (sc->legacy_irq && mxge_deassert_wait) 3133 wmb(); 3134 } while (*((volatile uint8_t *) &stats->valid)); 3135 3136 /* fw link & error stats meaningful only on the first slice */ 3137 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3138 if (sc->link_state != stats->link_up) { 3139 sc->link_state = stats->link_up; 3140 if (sc->link_state) { 3141 if_link_state_change(sc->ifp, LINK_STATE_UP); 3142 if_initbaudrate(sc->ifp, IF_Gbps(10)); 3143 if (mxge_verbose) 3144 device_printf(sc->dev, "link up\n"); 3145 } else { 3146 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3147 sc->ifp->if_baudrate = 0; 3148 if (mxge_verbose) 3149 device_printf(sc->dev, "link down\n"); 3150 } 3151 sc->need_media_probe = 1; 3152 } 3153 if (sc->rdma_tags_available != 3154 be32toh(stats->rdma_tags_available)) { 3155 sc->rdma_tags_available = 3156 be32toh(stats->rdma_tags_available); 3157 device_printf(sc->dev, "RDMA timed out! %d tags " 3158 "left\n", sc->rdma_tags_available); 3159 } 3160 3161 if (stats->link_down) { 3162 sc->down_cnt += stats->link_down; 3163 sc->link_state = 0; 3164 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3165 } 3166 } 3167 3168 /* check to see if we have rx token to pass back */ 3169 if (valid & 0x1) 3170 *ss->irq_claim = be32toh(3); 3171 *(ss->irq_claim + 1) = be32toh(3); 3172} 3173 3174static void 3175mxge_init(void *arg) 3176{ 3177 mxge_softc_t *sc = arg; 3178 struct ifnet *ifp = sc->ifp; 3179 3180 3181 mtx_lock(&sc->driver_mtx); 3182 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3183 (void) mxge_open(sc); 3184 mtx_unlock(&sc->driver_mtx); 3185} 3186 3187 3188 3189static void 3190mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3191{ 3192 int i; 3193 3194#if defined(INET) || defined(INET6) 3195 tcp_lro_free(&ss->lc); 3196#endif 3197 for (i = 0; i <= ss->rx_big.mask; i++) { 3198 if (ss->rx_big.info[i].m == NULL) 3199 continue; 3200 bus_dmamap_unload(ss->rx_big.dmat, 3201 ss->rx_big.info[i].map); 3202 m_freem(ss->rx_big.info[i].m); 3203 ss->rx_big.info[i].m = NULL; 3204 } 3205 3206 for (i = 0; i <= ss->rx_small.mask; i++) { 3207 if (ss->rx_small.info[i].m == NULL) 3208 continue; 3209 bus_dmamap_unload(ss->rx_small.dmat, 3210 ss->rx_small.info[i].map); 3211 m_freem(ss->rx_small.info[i].m); 3212 ss->rx_small.info[i].m = NULL; 3213 } 3214 3215 /* transmit ring used only on the first slice */ 3216 if (ss->tx.info == NULL) 3217 return; 3218 3219 for (i = 0; i <= ss->tx.mask; i++) { 3220 ss->tx.info[i].flag = 0; 3221 if (ss->tx.info[i].m == NULL) 3222 continue; 3223 bus_dmamap_unload(ss->tx.dmat, 3224 ss->tx.info[i].map); 3225 m_freem(ss->tx.info[i].m); 3226 ss->tx.info[i].m = NULL; 3227 } 3228} 3229 3230static void 3231mxge_free_mbufs(mxge_softc_t *sc) 3232{ 3233 int slice; 3234 3235 for (slice = 0; slice < sc->num_slices; slice++) 3236 mxge_free_slice_mbufs(&sc->ss[slice]); 3237} 3238 3239static void 3240mxge_free_slice_rings(struct mxge_slice_state *ss) 3241{ 3242 int i; 3243 3244 3245 if (ss->rx_done.entry != NULL) 3246 mxge_dma_free(&ss->rx_done.dma); 3247 ss->rx_done.entry = NULL; 3248 3249 if (ss->tx.req_bytes != NULL) 3250 free(ss->tx.req_bytes, M_DEVBUF); 3251 ss->tx.req_bytes = NULL; 3252 3253 if (ss->tx.seg_list != NULL) 3254 free(ss->tx.seg_list, M_DEVBUF); 3255 ss->tx.seg_list = NULL; 3256 3257 if (ss->rx_small.shadow != NULL) 3258 free(ss->rx_small.shadow, M_DEVBUF); 3259 ss->rx_small.shadow = NULL; 3260 3261 if (ss->rx_big.shadow != NULL) 3262 free(ss->rx_big.shadow, M_DEVBUF); 3263 ss->rx_big.shadow = NULL; 3264 3265 if (ss->tx.info != NULL) { 3266 if (ss->tx.dmat != NULL) { 3267 for (i = 0; i <= ss->tx.mask; i++) { 3268 bus_dmamap_destroy(ss->tx.dmat, 3269 ss->tx.info[i].map); 3270 } 3271 bus_dma_tag_destroy(ss->tx.dmat); 3272 } 3273 free(ss->tx.info, M_DEVBUF); 3274 } 3275 ss->tx.info = NULL; 3276 3277 if (ss->rx_small.info != NULL) { 3278 if (ss->rx_small.dmat != NULL) { 3279 for (i = 0; i <= ss->rx_small.mask; i++) { 3280 bus_dmamap_destroy(ss->rx_small.dmat, 3281 ss->rx_small.info[i].map); 3282 } 3283 bus_dmamap_destroy(ss->rx_small.dmat, 3284 ss->rx_small.extra_map); 3285 bus_dma_tag_destroy(ss->rx_small.dmat); 3286 } 3287 free(ss->rx_small.info, M_DEVBUF); 3288 } 3289 ss->rx_small.info = NULL; 3290 3291 if (ss->rx_big.info != NULL) { 3292 if (ss->rx_big.dmat != NULL) { 3293 for (i = 0; i <= ss->rx_big.mask; i++) { 3294 bus_dmamap_destroy(ss->rx_big.dmat, 3295 ss->rx_big.info[i].map); 3296 } 3297 bus_dmamap_destroy(ss->rx_big.dmat, 3298 ss->rx_big.extra_map); 3299 bus_dma_tag_destroy(ss->rx_big.dmat); 3300 } 3301 free(ss->rx_big.info, M_DEVBUF); 3302 } 3303 ss->rx_big.info = NULL; 3304} 3305 3306static void 3307mxge_free_rings(mxge_softc_t *sc) 3308{ 3309 int slice; 3310 3311 for (slice = 0; slice < sc->num_slices; slice++) 3312 mxge_free_slice_rings(&sc->ss[slice]); 3313} 3314 3315static int 3316mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3317 int tx_ring_entries) 3318{ 3319 mxge_softc_t *sc = ss->sc; 3320 size_t bytes; 3321 int err, i; 3322 3323 /* allocate per-slice receive resources */ 3324 3325 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3326 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3327 3328 /* allocate the rx shadow rings */ 3329 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3330 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3331 3332 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3333 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3334 3335 /* allocate the rx host info rings */ 3336 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3337 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3338 3339 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3340 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3341 3342 /* allocate the rx busdma resources */ 3343 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3344 1, /* alignment */ 3345 4096, /* boundary */ 3346 BUS_SPACE_MAXADDR, /* low */ 3347 BUS_SPACE_MAXADDR, /* high */ 3348 NULL, NULL, /* filter */ 3349 MHLEN, /* maxsize */ 3350 1, /* num segs */ 3351 MHLEN, /* maxsegsize */ 3352 BUS_DMA_ALLOCNOW, /* flags */ 3353 NULL, NULL, /* lock */ 3354 &ss->rx_small.dmat); /* tag */ 3355 if (err != 0) { 3356 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3357 err); 3358 return err; 3359 } 3360 3361 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3362 1, /* alignment */ 3363#if MXGE_VIRT_JUMBOS 3364 4096, /* boundary */ 3365#else 3366 0, /* boundary */ 3367#endif 3368 BUS_SPACE_MAXADDR, /* low */ 3369 BUS_SPACE_MAXADDR, /* high */ 3370 NULL, NULL, /* filter */ 3371 3*4096, /* maxsize */ 3372#if MXGE_VIRT_JUMBOS 3373 3, /* num segs */ 3374 4096, /* maxsegsize*/ 3375#else 3376 1, /* num segs */ 3377 MJUM9BYTES, /* maxsegsize*/ 3378#endif 3379 BUS_DMA_ALLOCNOW, /* flags */ 3380 NULL, NULL, /* lock */ 3381 &ss->rx_big.dmat); /* tag */ 3382 if (err != 0) { 3383 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3384 err); 3385 return err; 3386 } 3387 for (i = 0; i <= ss->rx_small.mask; i++) { 3388 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3389 &ss->rx_small.info[i].map); 3390 if (err != 0) { 3391 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3392 err); 3393 return err; 3394 } 3395 } 3396 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3397 &ss->rx_small.extra_map); 3398 if (err != 0) { 3399 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3400 err); 3401 return err; 3402 } 3403 3404 for (i = 0; i <= ss->rx_big.mask; i++) { 3405 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3406 &ss->rx_big.info[i].map); 3407 if (err != 0) { 3408 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3409 err); 3410 return err; 3411 } 3412 } 3413 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3414 &ss->rx_big.extra_map); 3415 if (err != 0) { 3416 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3417 err); 3418 return err; 3419 } 3420
|
3422 3423#ifndef IFNET_BUF_RING 3424 /* only use a single TX ring for now */ 3425 if (ss != ss->sc->ss) 3426 return 0; 3427#endif 3428 3429 ss->tx.mask = tx_ring_entries - 1; 3430 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3431 3432 3433 /* allocate the tx request copy block */ 3434 bytes = 8 + 3435 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3436 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3437 /* ensure req_list entries are aligned to 8 bytes */ 3438 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3439 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3440 3441 /* allocate the tx busdma segment list */ 3442 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3443 ss->tx.seg_list = (bus_dma_segment_t *) 3444 malloc(bytes, M_DEVBUF, M_WAITOK); 3445 3446 /* allocate the tx host info ring */ 3447 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3448 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3449 3450 /* allocate the tx busdma resources */ 3451 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3452 1, /* alignment */ 3453 sc->tx_boundary, /* boundary */ 3454 BUS_SPACE_MAXADDR, /* low */ 3455 BUS_SPACE_MAXADDR, /* high */ 3456 NULL, NULL, /* filter */ 3457 65536 + 256, /* maxsize */ 3458 ss->tx.max_desc - 2, /* num segs */ 3459 sc->tx_boundary, /* maxsegsz */ 3460 BUS_DMA_ALLOCNOW, /* flags */ 3461 NULL, NULL, /* lock */ 3462 &ss->tx.dmat); /* tag */ 3463 3464 if (err != 0) { 3465 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3466 err); 3467 return err; 3468 } 3469 3470 /* now use these tags to setup dmamaps for each slot 3471 in the ring */ 3472 for (i = 0; i <= ss->tx.mask; i++) { 3473 err = bus_dmamap_create(ss->tx.dmat, 0, 3474 &ss->tx.info[i].map); 3475 if (err != 0) { 3476 device_printf(sc->dev, "Err %d tx dmamap\n", 3477 err); 3478 return err; 3479 } 3480 } 3481 return 0; 3482 3483} 3484 3485static int 3486mxge_alloc_rings(mxge_softc_t *sc) 3487{ 3488 mxge_cmd_t cmd; 3489 int tx_ring_size; 3490 int tx_ring_entries, rx_ring_entries; 3491 int err, slice; 3492 3493 /* get ring sizes */ 3494 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3495 tx_ring_size = cmd.data0; 3496 if (err != 0) { 3497 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3498 goto abort; 3499 } 3500 3501 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3502 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3503 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3504 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3505 IFQ_SET_READY(&sc->ifp->if_snd); 3506 3507 for (slice = 0; slice < sc->num_slices; slice++) { 3508 err = mxge_alloc_slice_rings(&sc->ss[slice], 3509 rx_ring_entries, 3510 tx_ring_entries); 3511 if (err != 0) 3512 goto abort; 3513 } 3514 return 0; 3515 3516abort: 3517 mxge_free_rings(sc); 3518 return err; 3519 3520} 3521 3522 3523static void 3524mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3525{ 3526 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3527 3528 if (bufsize < MCLBYTES) { 3529 /* easy, everything fits in a single buffer */ 3530 *big_buf_size = MCLBYTES; 3531 *cl_size = MCLBYTES; 3532 *nbufs = 1; 3533 return; 3534 } 3535 3536 if (bufsize < MJUMPAGESIZE) { 3537 /* still easy, everything still fits in a single buffer */ 3538 *big_buf_size = MJUMPAGESIZE; 3539 *cl_size = MJUMPAGESIZE; 3540 *nbufs = 1; 3541 return; 3542 } 3543#if MXGE_VIRT_JUMBOS 3544 /* now we need to use virtually contiguous buffers */ 3545 *cl_size = MJUM9BYTES; 3546 *big_buf_size = 4096; 3547 *nbufs = mtu / 4096 + 1; 3548 /* needs to be a power of two, so round up */ 3549 if (*nbufs == 3) 3550 *nbufs = 4; 3551#else 3552 *cl_size = MJUM9BYTES; 3553 *big_buf_size = MJUM9BYTES; 3554 *nbufs = 1; 3555#endif 3556} 3557 3558static int 3559mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3560{ 3561 mxge_softc_t *sc; 3562 mxge_cmd_t cmd; 3563 bus_dmamap_t map; 3564 int err, i, slice; 3565 3566 3567 sc = ss->sc; 3568 slice = ss - sc->ss; 3569 3570#if defined(INET) || defined(INET6) 3571 (void)tcp_lro_init(&ss->lc); 3572#endif 3573 ss->lc.ifp = sc->ifp; 3574 3575 /* get the lanai pointers to the send and receive rings */ 3576 3577 err = 0; 3578#ifndef IFNET_BUF_RING 3579 /* We currently only send from the first slice */ 3580 if (slice == 0) { 3581#endif 3582 cmd.data0 = slice; 3583 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3584 ss->tx.lanai = 3585 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3586 ss->tx.send_go = (volatile uint32_t *) 3587 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3588 ss->tx.send_stop = (volatile uint32_t *) 3589 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3590#ifndef IFNET_BUF_RING 3591 } 3592#endif 3593 cmd.data0 = slice; 3594 err |= mxge_send_cmd(sc, 3595 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3596 ss->rx_small.lanai = 3597 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3598 cmd.data0 = slice; 3599 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3600 ss->rx_big.lanai = 3601 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3602 3603 if (err != 0) { 3604 device_printf(sc->dev, 3605 "failed to get ring sizes or locations\n"); 3606 return EIO; 3607 } 3608 3609 /* stock receive rings */ 3610 for (i = 0; i <= ss->rx_small.mask; i++) { 3611 map = ss->rx_small.info[i].map; 3612 err = mxge_get_buf_small(ss, map, i); 3613 if (err) { 3614 device_printf(sc->dev, "alloced %d/%d smalls\n", 3615 i, ss->rx_small.mask + 1); 3616 return ENOMEM; 3617 } 3618 } 3619 for (i = 0; i <= ss->rx_big.mask; i++) { 3620 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3621 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3622 } 3623 ss->rx_big.nbufs = nbufs; 3624 ss->rx_big.cl_size = cl_size; 3625 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3626 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3627 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3628 map = ss->rx_big.info[i].map; 3629 err = mxge_get_buf_big(ss, map, i); 3630 if (err) { 3631 device_printf(sc->dev, "alloced %d/%d bigs\n", 3632 i, ss->rx_big.mask + 1); 3633 return ENOMEM; 3634 } 3635 } 3636 return 0; 3637} 3638 3639static int 3640mxge_open(mxge_softc_t *sc) 3641{ 3642 mxge_cmd_t cmd; 3643 int err, big_bytes, nbufs, slice, cl_size, i; 3644 bus_addr_t bus; 3645 volatile uint8_t *itable; 3646 struct mxge_slice_state *ss; 3647 3648 /* Copy the MAC address in case it was overridden */ 3649 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3650 3651 err = mxge_reset(sc, 1); 3652 if (err != 0) { 3653 device_printf(sc->dev, "failed to reset\n"); 3654 return EIO; 3655 } 3656 3657 if (sc->num_slices > 1) { 3658 /* setup the indirection table */ 3659 cmd.data0 = sc->num_slices; 3660 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3661 &cmd); 3662 3663 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3664 &cmd); 3665 if (err != 0) { 3666 device_printf(sc->dev, 3667 "failed to setup rss tables\n"); 3668 return err; 3669 } 3670 3671 /* just enable an identity mapping */ 3672 itable = sc->sram + cmd.data0; 3673 for (i = 0; i < sc->num_slices; i++) 3674 itable[i] = (uint8_t)i; 3675 3676 cmd.data0 = 1; 3677 cmd.data1 = mxge_rss_hash_type; 3678 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3679 if (err != 0) { 3680 device_printf(sc->dev, "failed to enable slices\n"); 3681 return err; 3682 } 3683 } 3684 3685 3686 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3687 3688 cmd.data0 = nbufs; 3689 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3690 &cmd); 3691 /* error is only meaningful if we're trying to set 3692 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3693 if (err && nbufs > 1) { 3694 device_printf(sc->dev, 3695 "Failed to set alway-use-n to %d\n", 3696 nbufs); 3697 return EIO; 3698 } 3699 /* Give the firmware the mtu and the big and small buffer 3700 sizes. The firmware wants the big buf size to be a power 3701 of two. Luckily, FreeBSD's clusters are powers of two */ 3702 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3703 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3704 cmd.data0 = MHLEN - MXGEFW_PAD; 3705 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3706 &cmd); 3707 cmd.data0 = big_bytes; 3708 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3709 3710 if (err != 0) { 3711 device_printf(sc->dev, "failed to setup params\n"); 3712 goto abort; 3713 } 3714 3715 /* Now give him the pointer to the stats block */ 3716 for (slice = 0; 3717#ifdef IFNET_BUF_RING 3718 slice < sc->num_slices; 3719#else 3720 slice < 1; 3721#endif 3722 slice++) { 3723 ss = &sc->ss[slice]; 3724 cmd.data0 = 3725 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3726 cmd.data1 = 3727 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3728 cmd.data2 = sizeof(struct mcp_irq_data); 3729 cmd.data2 |= (slice << 16); 3730 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3731 } 3732 3733 if (err != 0) { 3734 bus = sc->ss->fw_stats_dma.bus_addr; 3735 bus += offsetof(struct mcp_irq_data, send_done_count); 3736 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3737 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3738 err = mxge_send_cmd(sc, 3739 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3740 &cmd); 3741 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3742 sc->fw_multicast_support = 0; 3743 } else { 3744 sc->fw_multicast_support = 1; 3745 } 3746 3747 if (err != 0) { 3748 device_printf(sc->dev, "failed to setup params\n"); 3749 goto abort; 3750 } 3751 3752 for (slice = 0; slice < sc->num_slices; slice++) { 3753 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3754 if (err != 0) { 3755 device_printf(sc->dev, "couldn't open slice %d\n", 3756 slice); 3757 goto abort; 3758 } 3759 } 3760 3761 /* Finally, start the firmware running */ 3762 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3763 if (err) { 3764 device_printf(sc->dev, "Couldn't bring up link\n"); 3765 goto abort; 3766 } 3767#ifdef IFNET_BUF_RING 3768 for (slice = 0; slice < sc->num_slices; slice++) { 3769 ss = &sc->ss[slice]; 3770 ss->if_drv_flags |= IFF_DRV_RUNNING; 3771 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3772 } 3773#endif 3774 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3775 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3776 3777 return 0; 3778 3779 3780abort: 3781 mxge_free_mbufs(sc); 3782 3783 return err; 3784} 3785 3786static int 3787mxge_close(mxge_softc_t *sc, int down) 3788{ 3789 mxge_cmd_t cmd; 3790 int err, old_down_cnt; 3791#ifdef IFNET_BUF_RING 3792 struct mxge_slice_state *ss; 3793 int slice; 3794#endif 3795 3796#ifdef IFNET_BUF_RING 3797 for (slice = 0; slice < sc->num_slices; slice++) { 3798 ss = &sc->ss[slice]; 3799 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3800 } 3801#endif 3802 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3803 if (!down) { 3804 old_down_cnt = sc->down_cnt; 3805 wmb(); 3806 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3807 if (err) { 3808 device_printf(sc->dev, 3809 "Couldn't bring down link\n"); 3810 } 3811 if (old_down_cnt == sc->down_cnt) { 3812 /* wait for down irq */ 3813 DELAY(10 * sc->intr_coal_delay); 3814 } 3815 wmb(); 3816 if (old_down_cnt == sc->down_cnt) { 3817 device_printf(sc->dev, "never got down irq\n"); 3818 } 3819 } 3820 mxge_free_mbufs(sc); 3821 3822 return 0; 3823} 3824 3825static void 3826mxge_setup_cfg_space(mxge_softc_t *sc) 3827{ 3828 device_t dev = sc->dev; 3829 int reg; 3830 uint16_t cmd, lnk, pectl; 3831 3832 /* find the PCIe link width and set max read request to 4KB*/ 3833 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3834 lnk = pci_read_config(dev, reg + 0x12, 2); 3835 sc->link_width = (lnk >> 4) & 0x3f; 3836 3837 if (sc->pectl == 0) { 3838 pectl = pci_read_config(dev, reg + 0x8, 2); 3839 pectl = (pectl & ~0x7000) | (5 << 12); 3840 pci_write_config(dev, reg + 0x8, pectl, 2); 3841 sc->pectl = pectl; 3842 } else { 3843 /* restore saved pectl after watchdog reset */ 3844 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3845 } 3846 } 3847 3848 /* Enable DMA and Memory space access */ 3849 pci_enable_busmaster(dev); 3850 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3851 cmd |= PCIM_CMD_MEMEN; 3852 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3853} 3854 3855static uint32_t 3856mxge_read_reboot(mxge_softc_t *sc) 3857{ 3858 device_t dev = sc->dev; 3859 uint32_t vs; 3860 3861 /* find the vendor specific offset */ 3862 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3863 device_printf(sc->dev, 3864 "could not find vendor specific offset\n"); 3865 return (uint32_t)-1; 3866 } 3867 /* enable read32 mode */ 3868 pci_write_config(dev, vs + 0x10, 0x3, 1); 3869 /* tell NIC which register to read */ 3870 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3871 return (pci_read_config(dev, vs + 0x14, 4)); 3872} 3873 3874static void 3875mxge_watchdog_reset(mxge_softc_t *sc) 3876{ 3877 struct pci_devinfo *dinfo; 3878 struct mxge_slice_state *ss; 3879 int err, running, s, num_tx_slices = 1; 3880 uint32_t reboot; 3881 uint16_t cmd; 3882 3883 err = ENXIO; 3884 3885 device_printf(sc->dev, "Watchdog reset!\n"); 3886 3887 /* 3888 * check to see if the NIC rebooted. If it did, then all of 3889 * PCI config space has been reset, and things like the 3890 * busmaster bit will be zero. If this is the case, then we 3891 * must restore PCI config space before the NIC can be used 3892 * again 3893 */ 3894 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3895 if (cmd == 0xffff) { 3896 /* 3897 * maybe the watchdog caught the NIC rebooting; wait 3898 * up to 100ms for it to finish. If it does not come 3899 * back, then give up 3900 */ 3901 DELAY(1000*100); 3902 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3903 if (cmd == 0xffff) { 3904 device_printf(sc->dev, "NIC disappeared!\n"); 3905 } 3906 } 3907 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3908 /* print the reboot status */ 3909 reboot = mxge_read_reboot(sc); 3910 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3911 reboot); 3912 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3913 if (running) { 3914 3915 /* 3916 * quiesce NIC so that TX routines will not try to 3917 * xmit after restoration of BAR 3918 */ 3919 3920 /* Mark the link as down */ 3921 if (sc->link_state) { 3922 sc->link_state = 0; 3923 if_link_state_change(sc->ifp, 3924 LINK_STATE_DOWN); 3925 } 3926#ifdef IFNET_BUF_RING 3927 num_tx_slices = sc->num_slices; 3928#endif 3929 /* grab all TX locks to ensure no tx */ 3930 for (s = 0; s < num_tx_slices; s++) { 3931 ss = &sc->ss[s]; 3932 mtx_lock(&ss->tx.mtx); 3933 } 3934 mxge_close(sc, 1); 3935 } 3936 /* restore PCI configuration space */ 3937 dinfo = device_get_ivars(sc->dev); 3938 pci_cfg_restore(sc->dev, dinfo); 3939 3940 /* and redo any changes we made to our config space */ 3941 mxge_setup_cfg_space(sc); 3942 3943 /* reload f/w */ 3944 err = mxge_load_firmware(sc, 0); 3945 if (err) { 3946 device_printf(sc->dev, 3947 "Unable to re-load f/w\n"); 3948 } 3949 if (running) { 3950 if (!err) 3951 err = mxge_open(sc); 3952 /* release all TX locks */ 3953 for (s = 0; s < num_tx_slices; s++) { 3954 ss = &sc->ss[s]; 3955#ifdef IFNET_BUF_RING 3956 mxge_start_locked(ss); 3957#endif 3958 mtx_unlock(&ss->tx.mtx); 3959 } 3960 } 3961 sc->watchdog_resets++; 3962 } else { 3963 device_printf(sc->dev, 3964 "NIC did not reboot, not resetting\n"); 3965 err = 0; 3966 } 3967 if (err) { 3968 device_printf(sc->dev, "watchdog reset failed\n"); 3969 } else { 3970 if (sc->dying == 2) 3971 sc->dying = 0; 3972 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3973 } 3974} 3975 3976static void 3977mxge_watchdog_task(void *arg, int pending) 3978{ 3979 mxge_softc_t *sc = arg; 3980 3981 3982 mtx_lock(&sc->driver_mtx); 3983 mxge_watchdog_reset(sc); 3984 mtx_unlock(&sc->driver_mtx); 3985} 3986 3987static void 3988mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3989{ 3990 tx = &sc->ss[slice].tx; 3991 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3992 device_printf(sc->dev, 3993 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3994 tx->req, tx->done, tx->queue_active); 3995 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3996 tx->activate, tx->deactivate); 3997 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3998 tx->pkt_done, 3999 be32toh(sc->ss->fw_stats->send_done_count)); 4000} 4001 4002static int 4003mxge_watchdog(mxge_softc_t *sc) 4004{ 4005 mxge_tx_ring_t *tx; 4006 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 4007 int i, err = 0; 4008 4009 /* see if we have outstanding transmits, which 4010 have been pending for more than mxge_ticks */ 4011 for (i = 0; 4012#ifdef IFNET_BUF_RING 4013 (i < sc->num_slices) && (err == 0); 4014#else 4015 (i < 1) && (err == 0); 4016#endif 4017 i++) { 4018 tx = &sc->ss[i].tx; 4019 if (tx->req != tx->done && 4020 tx->watchdog_req != tx->watchdog_done && 4021 tx->done == tx->watchdog_done) { 4022 /* check for pause blocking before resetting */ 4023 if (tx->watchdog_rx_pause == rx_pause) { 4024 mxge_warn_stuck(sc, tx, i); 4025 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4026 return (ENXIO); 4027 } 4028 else 4029 device_printf(sc->dev, "Flow control blocking " 4030 "xmits, check link partner\n"); 4031 } 4032 4033 tx->watchdog_req = tx->req; 4034 tx->watchdog_done = tx->done; 4035 tx->watchdog_rx_pause = rx_pause; 4036 } 4037 4038 if (sc->need_media_probe) 4039 mxge_media_probe(sc); 4040 return (err); 4041} 4042 4043static u_long 4044mxge_update_stats(mxge_softc_t *sc) 4045{ 4046 struct mxge_slice_state *ss; 4047 u_long pkts = 0; 4048 u_long ipackets = 0; 4049 u_long opackets = 0; 4050#ifdef IFNET_BUF_RING 4051 u_long obytes = 0; 4052 u_long omcasts = 0; 4053 u_long odrops = 0; 4054#endif 4055 u_long oerrors = 0; 4056 int slice; 4057 4058 for (slice = 0; slice < sc->num_slices; slice++) { 4059 ss = &sc->ss[slice]; 4060 ipackets += ss->ipackets; 4061 opackets += ss->opackets; 4062#ifdef IFNET_BUF_RING 4063 obytes += ss->obytes; 4064 omcasts += ss->omcasts; 4065 odrops += ss->tx.br->br_drops; 4066#endif 4067 oerrors += ss->oerrors; 4068 } 4069 pkts = (ipackets - sc->ifp->if_ipackets); 4070 pkts += (opackets - sc->ifp->if_opackets); 4071 sc->ifp->if_ipackets = ipackets; 4072 sc->ifp->if_opackets = opackets; 4073#ifdef IFNET_BUF_RING 4074 sc->ifp->if_obytes = obytes; 4075 sc->ifp->if_omcasts = omcasts; 4076 sc->ifp->if_snd.ifq_drops = odrops; 4077#endif 4078 sc->ifp->if_oerrors = oerrors; 4079 return pkts; 4080} 4081 4082static void 4083mxge_tick(void *arg) 4084{ 4085 mxge_softc_t *sc = arg; 4086 u_long pkts = 0; 4087 int err = 0; 4088 int running, ticks; 4089 uint16_t cmd; 4090 4091 ticks = mxge_ticks; 4092 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4093 if (running) { 4094 /* aggregate stats from different slices */ 4095 pkts = mxge_update_stats(sc); 4096 if (!sc->watchdog_countdown) { 4097 err = mxge_watchdog(sc); 4098 sc->watchdog_countdown = 4; 4099 } 4100 sc->watchdog_countdown--; 4101 } 4102 if (pkts == 0) { 4103 /* ensure NIC did not suffer h/w fault while idle */ 4104 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4105 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4106 sc->dying = 2; 4107 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4108 err = ENXIO; 4109 } 4110 /* look less often if NIC is idle */ 4111 ticks *= 4; 4112 } 4113 4114 if (err == 0) 4115 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4116 4117} 4118 4119static int 4120mxge_media_change(struct ifnet *ifp) 4121{ 4122 return EINVAL; 4123} 4124 4125static int 4126mxge_change_mtu(mxge_softc_t *sc, int mtu) 4127{ 4128 struct ifnet *ifp = sc->ifp; 4129 int real_mtu, old_mtu; 4130 int err = 0; 4131 4132 4133 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4134 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4135 return EINVAL; 4136 mtx_lock(&sc->driver_mtx); 4137 old_mtu = ifp->if_mtu; 4138 ifp->if_mtu = mtu; 4139 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4140 mxge_close(sc, 0); 4141 err = mxge_open(sc); 4142 if (err != 0) { 4143 ifp->if_mtu = old_mtu; 4144 mxge_close(sc, 0); 4145 (void) mxge_open(sc); 4146 } 4147 } 4148 mtx_unlock(&sc->driver_mtx); 4149 return err; 4150} 4151 4152static void 4153mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4154{ 4155 mxge_softc_t *sc = ifp->if_softc; 4156 4157 4158 if (sc == NULL) 4159 return; 4160 ifmr->ifm_status = IFM_AVALID; 4161 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4162 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4163 ifmr->ifm_active |= sc->current_media; 4164} 4165 4166static int 4167mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4168{ 4169 mxge_softc_t *sc = ifp->if_softc; 4170 struct ifreq *ifr = (struct ifreq *)data; 4171 int err, mask; 4172 4173 err = 0; 4174 switch (command) { 4175 case SIOCSIFADDR: 4176 case SIOCGIFADDR: 4177 err = ether_ioctl(ifp, command, data); 4178 break; 4179 4180 case SIOCSIFMTU: 4181 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4182 break; 4183 4184 case SIOCSIFFLAGS: 4185 mtx_lock(&sc->driver_mtx); 4186 if (sc->dying) { 4187 mtx_unlock(&sc->driver_mtx); 4188 return EINVAL; 4189 } 4190 if (ifp->if_flags & IFF_UP) { 4191 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4192 err = mxge_open(sc); 4193 } else { 4194 /* take care of promis can allmulti 4195 flag chages */ 4196 mxge_change_promisc(sc, 4197 ifp->if_flags & IFF_PROMISC); 4198 mxge_set_multicast_list(sc); 4199 } 4200 } else { 4201 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4202 mxge_close(sc, 0); 4203 } 4204 } 4205 mtx_unlock(&sc->driver_mtx); 4206 break; 4207 4208 case SIOCADDMULTI: 4209 case SIOCDELMULTI: 4210 mtx_lock(&sc->driver_mtx); 4211 mxge_set_multicast_list(sc); 4212 mtx_unlock(&sc->driver_mtx); 4213 break; 4214 4215 case SIOCSIFCAP: 4216 mtx_lock(&sc->driver_mtx); 4217 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4218 if (mask & IFCAP_TXCSUM) { 4219 if (IFCAP_TXCSUM & ifp->if_capenable) { 4220 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4221 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4222 } else { 4223 ifp->if_capenable |= IFCAP_TXCSUM; 4224 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4225 } 4226 } else if (mask & IFCAP_RXCSUM) { 4227 if (IFCAP_RXCSUM & ifp->if_capenable) { 4228 ifp->if_capenable &= ~IFCAP_RXCSUM; 4229 } else { 4230 ifp->if_capenable |= IFCAP_RXCSUM; 4231 } 4232 } 4233 if (mask & IFCAP_TSO4) { 4234 if (IFCAP_TSO4 & ifp->if_capenable) { 4235 ifp->if_capenable &= ~IFCAP_TSO4; 4236 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4237 ifp->if_capenable |= IFCAP_TSO4; 4238 ifp->if_hwassist |= CSUM_TSO; 4239 } else { 4240 printf("mxge requires tx checksum offload" 4241 " be enabled to use TSO\n"); 4242 err = EINVAL; 4243 } 4244 } 4245#if IFCAP_TSO6 4246 if (mask & IFCAP_TXCSUM_IPV6) { 4247 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4248 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4249 | IFCAP_TSO6); 4250 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4251 | CSUM_UDP); 4252 } else { 4253 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4254 ifp->if_hwassist |= (CSUM_TCP_IPV6 4255 | CSUM_UDP_IPV6); 4256 } 4257 } else if (mask & IFCAP_RXCSUM_IPV6) { 4258 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4259 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4260 } else { 4261 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4262 } 4263 } 4264 if (mask & IFCAP_TSO6) { 4265 if (IFCAP_TSO6 & ifp->if_capenable) { 4266 ifp->if_capenable &= ~IFCAP_TSO6; 4267 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4268 ifp->if_capenable |= IFCAP_TSO6; 4269 ifp->if_hwassist |= CSUM_TSO; 4270 } else { 4271 printf("mxge requires tx checksum offload" 4272 " be enabled to use TSO\n"); 4273 err = EINVAL; 4274 } 4275 } 4276#endif /*IFCAP_TSO6 */ 4277 4278 if (mask & IFCAP_LRO) 4279 ifp->if_capenable ^= IFCAP_LRO; 4280 if (mask & IFCAP_VLAN_HWTAGGING) 4281 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4282 if (mask & IFCAP_VLAN_HWTSO) 4283 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4284 4285 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4286 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4287 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4288 4289 mtx_unlock(&sc->driver_mtx); 4290 VLAN_CAPABILITIES(ifp); 4291 4292 break; 4293 4294 case SIOCGIFMEDIA: 4295 mtx_lock(&sc->driver_mtx); 4296 mxge_media_probe(sc); 4297 mtx_unlock(&sc->driver_mtx); 4298 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4299 &sc->media, command); 4300 break; 4301 4302 default: 4303 err = ENOTTY; 4304 } 4305 return err; 4306} 4307 4308static void 4309mxge_fetch_tunables(mxge_softc_t *sc) 4310{ 4311 4312 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4313 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4314 &mxge_flow_control); 4315 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4316 &mxge_intr_coal_delay); 4317 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4318 &mxge_nvidia_ecrc_enable); 4319 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4320 &mxge_force_firmware); 4321 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4322 &mxge_deassert_wait); 4323 TUNABLE_INT_FETCH("hw.mxge.verbose", 4324 &mxge_verbose); 4325 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4326 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4327 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4328 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4329 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4330 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4331 4332 if (bootverbose) 4333 mxge_verbose = 1; 4334 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4335 mxge_intr_coal_delay = 30; 4336 if (mxge_ticks == 0) 4337 mxge_ticks = hz / 2; 4338 sc->pause = mxge_flow_control; 4339 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4340 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4341 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4342 } 4343 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4344 mxge_initial_mtu < ETHER_MIN_LEN) 4345 mxge_initial_mtu = ETHERMTU_JUMBO; 4346 4347 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4348 mxge_throttle = MXGE_MAX_THROTTLE; 4349 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4350 mxge_throttle = MXGE_MIN_THROTTLE; 4351 sc->throttle = mxge_throttle; 4352} 4353 4354 4355static void 4356mxge_free_slices(mxge_softc_t *sc) 4357{ 4358 struct mxge_slice_state *ss; 4359 int i; 4360 4361 4362 if (sc->ss == NULL) 4363 return; 4364 4365 for (i = 0; i < sc->num_slices; i++) { 4366 ss = &sc->ss[i]; 4367 if (ss->fw_stats != NULL) { 4368 mxge_dma_free(&ss->fw_stats_dma); 4369 ss->fw_stats = NULL; 4370#ifdef IFNET_BUF_RING 4371 if (ss->tx.br != NULL) { 4372 drbr_free(ss->tx.br, M_DEVBUF); 4373 ss->tx.br = NULL; 4374 } 4375#endif 4376 mtx_destroy(&ss->tx.mtx); 4377 } 4378 if (ss->rx_done.entry != NULL) { 4379 mxge_dma_free(&ss->rx_done.dma); 4380 ss->rx_done.entry = NULL; 4381 } 4382 } 4383 free(sc->ss, M_DEVBUF); 4384 sc->ss = NULL; 4385} 4386 4387static int 4388mxge_alloc_slices(mxge_softc_t *sc) 4389{ 4390 mxge_cmd_t cmd; 4391 struct mxge_slice_state *ss; 4392 size_t bytes; 4393 int err, i, max_intr_slots; 4394 4395 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4396 if (err != 0) { 4397 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4398 return err; 4399 } 4400 sc->rx_ring_size = cmd.data0; 4401 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4402 4403 bytes = sizeof (*sc->ss) * sc->num_slices; 4404 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4405 if (sc->ss == NULL) 4406 return (ENOMEM); 4407 for (i = 0; i < sc->num_slices; i++) { 4408 ss = &sc->ss[i]; 4409 4410 ss->sc = sc; 4411 4412 /* allocate per-slice rx interrupt queues */ 4413 4414 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4415 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4416 if (err != 0) 4417 goto abort; 4418 ss->rx_done.entry = ss->rx_done.dma.addr; 4419 bzero(ss->rx_done.entry, bytes); 4420 4421 /* 4422 * allocate the per-slice firmware stats; stats 4423 * (including tx) are used used only on the first 4424 * slice for now 4425 */ 4426#ifndef IFNET_BUF_RING 4427 if (i > 0) 4428 continue; 4429#endif 4430 4431 bytes = sizeof (*ss->fw_stats); 4432 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4433 sizeof (*ss->fw_stats), 64); 4434 if (err != 0) 4435 goto abort; 4436 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4437 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4438 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4439 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4440#ifdef IFNET_BUF_RING 4441 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4442 &ss->tx.mtx); 4443#endif 4444 } 4445 4446 return (0); 4447 4448abort: 4449 mxge_free_slices(sc); 4450 return (ENOMEM); 4451} 4452 4453static void 4454mxge_slice_probe(mxge_softc_t *sc) 4455{ 4456 mxge_cmd_t cmd; 4457 char *old_fw; 4458 int msix_cnt, status, max_intr_slots; 4459 4460 sc->num_slices = 1; 4461 /* 4462 * don't enable multiple slices if they are not enabled, 4463 * or if this is not an SMP system 4464 */ 4465 4466 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4467 return; 4468 4469 /* see how many MSI-X interrupts are available */ 4470 msix_cnt = pci_msix_count(sc->dev); 4471 if (msix_cnt < 2) 4472 return; 4473 4474 /* now load the slice aware firmware see what it supports */ 4475 old_fw = sc->fw_name; 4476 if (old_fw == mxge_fw_aligned) 4477 sc->fw_name = mxge_fw_rss_aligned; 4478 else 4479 sc->fw_name = mxge_fw_rss_unaligned; 4480 status = mxge_load_firmware(sc, 0); 4481 if (status != 0) { 4482 device_printf(sc->dev, "Falling back to a single slice\n"); 4483 return; 4484 } 4485 4486 /* try to send a reset command to the card to see if it 4487 is alive */ 4488 memset(&cmd, 0, sizeof (cmd)); 4489 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4490 if (status != 0) { 4491 device_printf(sc->dev, "failed reset\n"); 4492 goto abort_with_fw; 4493 } 4494 4495 /* get rx ring size */ 4496 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4497 if (status != 0) { 4498 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4499 goto abort_with_fw; 4500 } 4501 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4502 4503 /* tell it the size of the interrupt queues */ 4504 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4505 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4506 if (status != 0) { 4507 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4508 goto abort_with_fw; 4509 } 4510 4511 /* ask the maximum number of slices it supports */ 4512 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4513 if (status != 0) { 4514 device_printf(sc->dev, 4515 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4516 goto abort_with_fw; 4517 } 4518 sc->num_slices = cmd.data0; 4519 if (sc->num_slices > msix_cnt) 4520 sc->num_slices = msix_cnt; 4521 4522 if (mxge_max_slices == -1) { 4523 /* cap to number of CPUs in system */ 4524 if (sc->num_slices > mp_ncpus) 4525 sc->num_slices = mp_ncpus; 4526 } else { 4527 if (sc->num_slices > mxge_max_slices) 4528 sc->num_slices = mxge_max_slices; 4529 } 4530 /* make sure it is a power of two */ 4531 while (sc->num_slices & (sc->num_slices - 1)) 4532 sc->num_slices--; 4533 4534 if (mxge_verbose) 4535 device_printf(sc->dev, "using %d slices\n", 4536 sc->num_slices); 4537 4538 return; 4539 4540abort_with_fw: 4541 sc->fw_name = old_fw; 4542 (void) mxge_load_firmware(sc, 0); 4543} 4544 4545static int 4546mxge_add_msix_irqs(mxge_softc_t *sc) 4547{ 4548 size_t bytes; 4549 int count, err, i, rid; 4550 4551 rid = PCIR_BAR(2); 4552 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4553 &rid, RF_ACTIVE); 4554 4555 if (sc->msix_table_res == NULL) { 4556 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4557 return ENXIO; 4558 } 4559 4560 count = sc->num_slices; 4561 err = pci_alloc_msix(sc->dev, &count); 4562 if (err != 0) { 4563 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4564 "err = %d \n", sc->num_slices, err); 4565 goto abort_with_msix_table; 4566 } 4567 if (count < sc->num_slices) { 4568 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4569 count, sc->num_slices); 4570 device_printf(sc->dev, 4571 "Try setting hw.mxge.max_slices to %d\n", 4572 count); 4573 err = ENOSPC; 4574 goto abort_with_msix; 4575 } 4576 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4577 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4578 if (sc->msix_irq_res == NULL) { 4579 err = ENOMEM; 4580 goto abort_with_msix; 4581 } 4582 4583 for (i = 0; i < sc->num_slices; i++) { 4584 rid = i + 1; 4585 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4586 SYS_RES_IRQ, 4587 &rid, RF_ACTIVE); 4588 if (sc->msix_irq_res[i] == NULL) { 4589 device_printf(sc->dev, "couldn't allocate IRQ res" 4590 " for message %d\n", i); 4591 err = ENXIO; 4592 goto abort_with_res; 4593 } 4594 } 4595 4596 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4597 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4598 4599 for (i = 0; i < sc->num_slices; i++) { 4600 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4601 INTR_TYPE_NET | INTR_MPSAFE, 4602#if __FreeBSD_version > 700030 4603 NULL, 4604#endif 4605 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4606 if (err != 0) { 4607 device_printf(sc->dev, "couldn't setup intr for " 4608 "message %d\n", i); 4609 goto abort_with_intr; 4610 } 4611 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4612 sc->msix_ih[i], "s%d", i); 4613 } 4614 4615 if (mxge_verbose) { 4616 device_printf(sc->dev, "using %d msix IRQs:", 4617 sc->num_slices); 4618 for (i = 0; i < sc->num_slices; i++) 4619 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4620 printf("\n"); 4621 } 4622 return (0); 4623 4624abort_with_intr: 4625 for (i = 0; i < sc->num_slices; i++) { 4626 if (sc->msix_ih[i] != NULL) { 4627 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4628 sc->msix_ih[i]); 4629 sc->msix_ih[i] = NULL; 4630 } 4631 } 4632 free(sc->msix_ih, M_DEVBUF); 4633 4634 4635abort_with_res: 4636 for (i = 0; i < sc->num_slices; i++) { 4637 rid = i + 1; 4638 if (sc->msix_irq_res[i] != NULL) 4639 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4640 sc->msix_irq_res[i]); 4641 sc->msix_irq_res[i] = NULL; 4642 } 4643 free(sc->msix_irq_res, M_DEVBUF); 4644 4645 4646abort_with_msix: 4647 pci_release_msi(sc->dev); 4648 4649abort_with_msix_table: 4650 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4651 sc->msix_table_res); 4652 4653 return err; 4654} 4655 4656static int 4657mxge_add_single_irq(mxge_softc_t *sc) 4658{ 4659 int count, err, rid; 4660 4661 count = pci_msi_count(sc->dev); 4662 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4663 rid = 1; 4664 } else { 4665 rid = 0; 4666 sc->legacy_irq = 1; 4667 } 4668 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4669 1, RF_SHAREABLE | RF_ACTIVE); 4670 if (sc->irq_res == NULL) { 4671 device_printf(sc->dev, "could not alloc interrupt\n"); 4672 return ENXIO; 4673 } 4674 if (mxge_verbose) 4675 device_printf(sc->dev, "using %s irq %ld\n", 4676 sc->legacy_irq ? "INTx" : "MSI", 4677 rman_get_start(sc->irq_res)); 4678 err = bus_setup_intr(sc->dev, sc->irq_res, 4679 INTR_TYPE_NET | INTR_MPSAFE, 4680#if __FreeBSD_version > 700030 4681 NULL, 4682#endif 4683 mxge_intr, &sc->ss[0], &sc->ih); 4684 if (err != 0) { 4685 bus_release_resource(sc->dev, SYS_RES_IRQ, 4686 sc->legacy_irq ? 0 : 1, sc->irq_res); 4687 if (!sc->legacy_irq) 4688 pci_release_msi(sc->dev); 4689 } 4690 return err; 4691} 4692 4693static void 4694mxge_rem_msix_irqs(mxge_softc_t *sc) 4695{ 4696 int i, rid; 4697 4698 for (i = 0; i < sc->num_slices; i++) { 4699 if (sc->msix_ih[i] != NULL) { 4700 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4701 sc->msix_ih[i]); 4702 sc->msix_ih[i] = NULL; 4703 } 4704 } 4705 free(sc->msix_ih, M_DEVBUF); 4706 4707 for (i = 0; i < sc->num_slices; i++) { 4708 rid = i + 1; 4709 if (sc->msix_irq_res[i] != NULL) 4710 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4711 sc->msix_irq_res[i]); 4712 sc->msix_irq_res[i] = NULL; 4713 } 4714 free(sc->msix_irq_res, M_DEVBUF); 4715 4716 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4717 sc->msix_table_res); 4718 4719 pci_release_msi(sc->dev); 4720 return; 4721} 4722 4723static void 4724mxge_rem_single_irq(mxge_softc_t *sc) 4725{ 4726 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4727 bus_release_resource(sc->dev, SYS_RES_IRQ, 4728 sc->legacy_irq ? 0 : 1, sc->irq_res); 4729 if (!sc->legacy_irq) 4730 pci_release_msi(sc->dev); 4731} 4732 4733static void 4734mxge_rem_irq(mxge_softc_t *sc) 4735{ 4736 if (sc->num_slices > 1) 4737 mxge_rem_msix_irqs(sc); 4738 else 4739 mxge_rem_single_irq(sc); 4740} 4741 4742static int 4743mxge_add_irq(mxge_softc_t *sc) 4744{ 4745 int err; 4746 4747 if (sc->num_slices > 1) 4748 err = mxge_add_msix_irqs(sc); 4749 else 4750 err = mxge_add_single_irq(sc); 4751 4752 if (0 && err == 0 && sc->num_slices > 1) { 4753 mxge_rem_msix_irqs(sc); 4754 err = mxge_add_msix_irqs(sc); 4755 } 4756 return err; 4757} 4758 4759 4760static int 4761mxge_attach(device_t dev) 4762{ 4763 mxge_cmd_t cmd; 4764 mxge_softc_t *sc = device_get_softc(dev); 4765 struct ifnet *ifp; 4766 int err, rid; 4767 4768 sc->dev = dev; 4769 mxge_fetch_tunables(sc); 4770 4771 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4772 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4773 taskqueue_thread_enqueue, &sc->tq); 4774 if (sc->tq == NULL) { 4775 err = ENOMEM; 4776 goto abort_with_nothing; 4777 } 4778 4779 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4780 1, /* alignment */ 4781 0, /* boundary */ 4782 BUS_SPACE_MAXADDR, /* low */ 4783 BUS_SPACE_MAXADDR, /* high */ 4784 NULL, NULL, /* filter */ 4785 65536 + 256, /* maxsize */ 4786 MXGE_MAX_SEND_DESC, /* num segs */ 4787 65536, /* maxsegsize */ 4788 0, /* flags */ 4789 NULL, NULL, /* lock */ 4790 &sc->parent_dmat); /* tag */ 4791 4792 if (err != 0) { 4793 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4794 err); 4795 goto abort_with_tq; 4796 } 4797 4798 ifp = sc->ifp = if_alloc(IFT_ETHER); 4799 if (ifp == NULL) { 4800 device_printf(dev, "can not if_alloc()\n"); 4801 err = ENOSPC; 4802 goto abort_with_parent_dmat; 4803 } 4804 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4805 4806 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4807 device_get_nameunit(dev)); 4808 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4809 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4810 "%s:drv", device_get_nameunit(dev)); 4811 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4812 MTX_NETWORK_LOCK, MTX_DEF); 4813 4814 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4815 4816 mxge_setup_cfg_space(sc); 4817 4818 /* Map the board into the kernel */ 4819 rid = PCIR_BARS; 4820 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4821 ~0, 1, RF_ACTIVE); 4822 if (sc->mem_res == NULL) { 4823 device_printf(dev, "could not map memory\n"); 4824 err = ENXIO; 4825 goto abort_with_lock; 4826 } 4827 sc->sram = rman_get_virtual(sc->mem_res); 4828 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4829 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4830 device_printf(dev, "impossible memory region size %ld\n", 4831 rman_get_size(sc->mem_res)); 4832 err = ENXIO; 4833 goto abort_with_mem_res; 4834 } 4835 4836 /* make NULL terminated copy of the EEPROM strings section of 4837 lanai SRAM */ 4838 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4839 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4840 rman_get_bushandle(sc->mem_res), 4841 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4842 sc->eeprom_strings, 4843 MXGE_EEPROM_STRINGS_SIZE - 2); 4844 err = mxge_parse_strings(sc); 4845 if (err != 0) 4846 goto abort_with_mem_res; 4847 4848 /* Enable write combining for efficient use of PCIe bus */ 4849 mxge_enable_wc(sc); 4850 4851 /* Allocate the out of band dma memory */ 4852 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4853 sizeof (mxge_cmd_t), 64); 4854 if (err != 0) 4855 goto abort_with_mem_res; 4856 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4857 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4858 if (err != 0) 4859 goto abort_with_cmd_dma; 4860 4861 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4862 if (err != 0) 4863 goto abort_with_zeropad_dma; 4864 4865 /* select & load the firmware */ 4866 err = mxge_select_firmware(sc); 4867 if (err != 0) 4868 goto abort_with_dmabench; 4869 sc->intr_coal_delay = mxge_intr_coal_delay; 4870 4871 mxge_slice_probe(sc); 4872 err = mxge_alloc_slices(sc); 4873 if (err != 0) 4874 goto abort_with_dmabench; 4875 4876 err = mxge_reset(sc, 0); 4877 if (err != 0) 4878 goto abort_with_slices; 4879 4880 err = mxge_alloc_rings(sc); 4881 if (err != 0) { 4882 device_printf(sc->dev, "failed to allocate rings\n"); 4883 goto abort_with_slices; 4884 } 4885 4886 err = mxge_add_irq(sc); 4887 if (err != 0) { 4888 device_printf(sc->dev, "failed to add irq\n"); 4889 goto abort_with_rings; 4890 } 4891 4892 if_initbaudrate(ifp, IF_Gbps(10)); 4893 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4894 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4895 IFCAP_RXCSUM_IPV6; 4896#if defined(INET) || defined(INET6) 4897 ifp->if_capabilities |= IFCAP_LRO; 4898#endif 4899 4900#ifdef MXGE_NEW_VLAN_API 4901 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4902 4903 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4904 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4905 sc->fw_ver_tiny >= 32) 4906 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4907#endif 4908 sc->max_mtu = mxge_max_mtu(sc); 4909 if (sc->max_mtu >= 9000) 4910 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4911 else 4912 device_printf(dev, "MTU limited to %d. Install " 4913 "latest firmware for 9000 byte jumbo support\n", 4914 sc->max_mtu - ETHER_HDR_LEN); 4915 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4916 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4917 /* check to see if f/w supports TSO for IPv6 */ 4918 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4919 if (CSUM_TCP_IPV6) 4920 ifp->if_capabilities |= IFCAP_TSO6; 4921 sc->max_tso6_hlen = min(cmd.data0, 4922 sizeof (sc->ss[0].scratch)); 4923 } 4924 ifp->if_capenable = ifp->if_capabilities; 4925 if (sc->lro_cnt == 0) 4926 ifp->if_capenable &= ~IFCAP_LRO; 4927 ifp->if_init = mxge_init; 4928 ifp->if_softc = sc; 4929 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4930 ifp->if_ioctl = mxge_ioctl; 4931 ifp->if_start = mxge_start; 4932 /* Initialise the ifmedia structure */ 4933 ifmedia_init(&sc->media, 0, mxge_media_change, 4934 mxge_media_status); 4935 mxge_media_init(sc); 4936 mxge_media_probe(sc); 4937 sc->dying = 0; 4938 ether_ifattach(ifp, sc->mac_addr); 4939 /* ether_ifattach sets mtu to ETHERMTU */ 4940 if (mxge_initial_mtu != ETHERMTU) 4941 mxge_change_mtu(sc, mxge_initial_mtu); 4942 4943 mxge_add_sysctls(sc); 4944#ifdef IFNET_BUF_RING 4945 ifp->if_transmit = mxge_transmit; 4946 ifp->if_qflush = mxge_qflush; 4947#endif 4948 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4949 device_get_nameunit(sc->dev)); 4950 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4951 return 0; 4952 4953abort_with_rings: 4954 mxge_free_rings(sc); 4955abort_with_slices: 4956 mxge_free_slices(sc); 4957abort_with_dmabench: 4958 mxge_dma_free(&sc->dmabench_dma); 4959abort_with_zeropad_dma: 4960 mxge_dma_free(&sc->zeropad_dma); 4961abort_with_cmd_dma: 4962 mxge_dma_free(&sc->cmd_dma); 4963abort_with_mem_res: 4964 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4965abort_with_lock: 4966 pci_disable_busmaster(dev); 4967 mtx_destroy(&sc->cmd_mtx); 4968 mtx_destroy(&sc->driver_mtx); 4969 if_free(ifp); 4970abort_with_parent_dmat: 4971 bus_dma_tag_destroy(sc->parent_dmat); 4972abort_with_tq: 4973 if (sc->tq != NULL) { 4974 taskqueue_drain(sc->tq, &sc->watchdog_task); 4975 taskqueue_free(sc->tq); 4976 sc->tq = NULL; 4977 } 4978abort_with_nothing: 4979 return err; 4980} 4981 4982static int 4983mxge_detach(device_t dev) 4984{ 4985 mxge_softc_t *sc = device_get_softc(dev); 4986 4987 if (mxge_vlans_active(sc)) { 4988 device_printf(sc->dev, 4989 "Detach vlans before removing module\n"); 4990 return EBUSY; 4991 } 4992 mtx_lock(&sc->driver_mtx); 4993 sc->dying = 1; 4994 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4995 mxge_close(sc, 0); 4996 mtx_unlock(&sc->driver_mtx); 4997 ether_ifdetach(sc->ifp); 4998 if (sc->tq != NULL) { 4999 taskqueue_drain(sc->tq, &sc->watchdog_task); 5000 taskqueue_free(sc->tq); 5001 sc->tq = NULL; 5002 } 5003 callout_drain(&sc->co_hdl); 5004 ifmedia_removeall(&sc->media); 5005 mxge_dummy_rdma(sc, 0); 5006 mxge_rem_sysctls(sc); 5007 mxge_rem_irq(sc); 5008 mxge_free_rings(sc); 5009 mxge_free_slices(sc); 5010 mxge_dma_free(&sc->dmabench_dma); 5011 mxge_dma_free(&sc->zeropad_dma); 5012 mxge_dma_free(&sc->cmd_dma); 5013 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5014 pci_disable_busmaster(dev); 5015 mtx_destroy(&sc->cmd_mtx); 5016 mtx_destroy(&sc->driver_mtx); 5017 if_free(sc->ifp); 5018 bus_dma_tag_destroy(sc->parent_dmat); 5019 return 0; 5020} 5021 5022static int 5023mxge_shutdown(device_t dev) 5024{ 5025 return 0; 5026} 5027 5028/* 5029 This file uses Myri10GE driver indentation. 5030 5031 Local Variables: 5032 c-file-style:"linux" 5033 tab-width:8 5034 End: 5035*/
|