32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50/* count xmits ourselves, rather than via drbr */ 51#define NO_SLOW_STATS 52#include <net/if.h> 53#include <net/if_arp.h> 54#include <net/ethernet.h> 55#include <net/if_dl.h> 56#include <net/if_media.h> 57 58#include <net/bpf.h> 59 60#include <net/if_types.h> 61#include <net/if_vlan_var.h> 62#include <net/zlib.h> 63 64#include <netinet/in_systm.h> 65#include <netinet/in.h> 66#include <netinet/ip.h> 67#include <netinet/tcp.h> 68 69#include <machine/bus.h> 70#include <machine/in_cksum.h> 71#include <machine/resource.h> 72#include <sys/bus.h> 73#include <sys/rman.h> 74#include <sys/smp.h> 75 76#include <dev/pci/pcireg.h> 77#include <dev/pci/pcivar.h> 78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80#include <vm/vm.h> /* for pmap_mapdev() */ 81#include <vm/pmap.h> 82 83#if defined(__i386) || defined(__amd64) 84#include <machine/specialreg.h> 85#endif 86 87#include <dev/mxge/mxge_mcp.h> 88#include <dev/mxge/mcp_gen_header.h> 89/*#define MXGE_FAKE_IFP*/ 90#include <dev/mxge/if_mxge_var.h> 91#ifdef IFNET_BUF_RING 92#include <sys/buf_ring.h> 93#endif 94 95#include "opt_inet.h" 96 97/* tunable params */ 98static int mxge_nvidia_ecrc_enable = 1; 99static int mxge_force_firmware = 0; 100static int mxge_intr_coal_delay = 30; 101static int mxge_deassert_wait = 1; 102static int mxge_flow_control = 1; 103static int mxge_verbose = 0; 104static int mxge_lro_cnt = 8; 105static int mxge_ticks; 106static int mxge_max_slices = 1; 107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108static int mxge_always_promisc = 0; 109static int mxge_initial_mtu = ETHERMTU_JUMBO; 110static int mxge_throttle = 0; 111static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112static char *mxge_fw_aligned = "mxge_eth_z8e"; 113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116static int mxge_probe(device_t dev); 117static int mxge_attach(device_t dev); 118static int mxge_detach(device_t dev); 119static int mxge_shutdown(device_t dev); 120static void mxge_intr(void *arg); 121 122static device_method_t mxge_methods[] = 123{ 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130}; 131 132static driver_t mxge_driver = 133{ 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137}; 138 139static devclass_t mxge_devclass; 140 141/* Declare ourselves to be a child of the PCI bus.*/ 142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148static int mxge_close(mxge_softc_t *sc, int down); 149static int mxge_open(mxge_softc_t *sc); 150static void mxge_tick(void *arg); 151 152static int 153mxge_probe(device_t dev) 154{ 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178} 179 180static void 181mxge_enable_wc(mxge_softc_t *sc) 182{ 183#if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196#endif 197} 198 199 200/* callback to get our DMA address */ 201static void 202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204{ 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208} 209 210static int 211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213{ 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268} 269 270 271static void 272mxge_dma_free(mxge_dma_t *dma) 273{ 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277} 278 279/* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286static int 287mxge_parse_strings(mxge_softc_t *sc) 288{ 289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327} 328 329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330static void 331mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332{ 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381#if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390#endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452} 453#else 454static void 455mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456{ 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460} 461#endif 462 463 464static int 465mxge_dma_test(mxge_softc_t *sc, int test_type) 466{ 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523} 524 525/* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544static int 545mxge_firmware_probe(mxge_softc_t *sc) 546{ 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595} 596 597static int 598mxge_select_firmware(mxge_softc_t *sc) 599{ 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640} 641 642union qualhack 643{ 644 const char *ro_char; 645 char *rw_char; 646}; 647 648static int 649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650{ 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677} 678 679static void * 680z_alloc(void *nil, u_int items, u_int size) 681{ 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686} 687 688static void 689z_free(void *nil, void *ptr) 690{ 691 free(ptr, M_TEMP); 692} 693 694 695static int 696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697{ 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772abort_with_zs: 773 inflateEnd(&zs); 774abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777} 778 779/* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784static void 785mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786{ 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834} 835 836static int 837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838{ 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 default: 887 device_printf(sc->dev, 888 "mxge: command %d " 889 "failed, result = %d\n", 890 cmd, be32toh(response->result)); 891 err = ENXIO; 892 break; 893 } 894 if (err != EAGAIN) 895 break; 896 } 897 if (err == EAGAIN) 898 device_printf(sc->dev, "mxge: command %d timed out" 899 "result = %d\n", 900 cmd, be32toh(response->result)); 901 mtx_unlock(&sc->cmd_mtx); 902 return err; 903} 904 905static int 906mxge_adopt_running_firmware(mxge_softc_t *sc) 907{ 908 struct mcp_gen_header *hdr; 909 const size_t bytes = sizeof (struct mcp_gen_header); 910 size_t hdr_offset; 911 int status; 912 913 /* find running firmware header */ 914 hdr_offset = htobe32(*(volatile uint32_t *) 915 (sc->sram + MCP_HEADER_PTR_OFFSET)); 916 917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 918 device_printf(sc->dev, 919 "Running firmware has bad header offset (%d)\n", 920 (int)hdr_offset); 921 return EIO; 922 } 923 924 /* copy header of running firmware from SRAM to host memory to 925 * validate firmware */ 926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 927 if (hdr == NULL) { 928 device_printf(sc->dev, "could not malloc firmware hdr\n"); 929 return ENOMEM; 930 } 931 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 932 rman_get_bushandle(sc->mem_res), 933 hdr_offset, (char *)hdr, bytes); 934 status = mxge_validate_firmware(sc, hdr); 935 free(hdr, M_DEVBUF); 936 937 /* 938 * check to see if adopted firmware has bug where adopting 939 * it will cause broadcasts to be filtered unless the NIC 940 * is kept in ALLMULTI mode 941 */ 942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 944 sc->adopted_rx_filter_bug = 1; 945 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 946 "working around rx filter bug\n", 947 sc->fw_ver_major, sc->fw_ver_minor, 948 sc->fw_ver_tiny); 949 } 950 951 return status; 952} 953 954 955static int 956mxge_load_firmware(mxge_softc_t *sc, int adopt) 957{ 958 volatile uint32_t *confirm; 959 volatile char *submit; 960 char buf_bytes[72]; 961 uint32_t *buf, size, dma_low, dma_high; 962 int status, i; 963 964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 965 966 size = sc->sram_size; 967 status = mxge_load_firmware_helper(sc, &size); 968 if (status) { 969 if (!adopt) 970 return status; 971 /* Try to use the currently running firmware, if 972 it is new enough */ 973 status = mxge_adopt_running_firmware(sc); 974 if (status) { 975 device_printf(sc->dev, 976 "failed to adopt running firmware\n"); 977 return status; 978 } 979 device_printf(sc->dev, 980 "Successfully adopted running firmware\n"); 981 if (sc->tx_boundary == 4096) { 982 device_printf(sc->dev, 983 "Using firmware currently running on NIC" 984 ". For optimal\n"); 985 device_printf(sc->dev, 986 "performance consider loading optimized " 987 "firmware\n"); 988 } 989 sc->fw_name = mxge_fw_unaligned; 990 sc->tx_boundary = 2048; 991 return 0; 992 } 993 /* clear confirmation addr */ 994 confirm = (volatile uint32_t *)sc->cmd; 995 *confirm = 0; 996 wmb(); 997 /* send a reload command to the bootstrap MCP, and wait for the 998 response in the confirmation address. The firmware should 999 write a -1 there to indicate it is alive and well 1000 */ 1001 1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1004 1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1007 buf[2] = htobe32(0xffffffff); /* confirm data */ 1008 1009 /* FIX: All newest firmware should un-protect the bottom of 1010 the sram before handoff. However, the very first interfaces 1011 do not. Therefore the handoff copy must skip the first 8 bytes 1012 */ 1013 /* where the code starts*/ 1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1015 buf[4] = htobe32(size - 8); /* length of code */ 1016 buf[5] = htobe32(8); /* where to copy to */ 1017 buf[6] = htobe32(0); /* where to jump to */ 1018 1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1020 mxge_pio_copy(submit, buf, 64); 1021 wmb(); 1022 DELAY(1000); 1023 wmb(); 1024 i = 0; 1025 while (*confirm != 0xffffffff && i < 20) { 1026 DELAY(1000*10); 1027 i++; 1028 bus_dmamap_sync(sc->cmd_dma.dmat, 1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1030 } 1031 if (*confirm != 0xffffffff) { 1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1033 confirm, *confirm); 1034 1035 return ENXIO; 1036 } 1037 return 0; 1038} 1039 1040static int 1041mxge_update_mac_address(mxge_softc_t *sc) 1042{ 1043 mxge_cmd_t cmd; 1044 uint8_t *addr = sc->mac_addr; 1045 int status; 1046 1047 1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1049 | (addr[2] << 8) | addr[3]); 1050 1051 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1052 1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1054 return status; 1055} 1056 1057static int 1058mxge_change_pause(mxge_softc_t *sc, int pause) 1059{ 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (pause) 1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1065 &cmd); 1066 else 1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1068 &cmd); 1069 1070 if (status) { 1071 device_printf(sc->dev, "Failed to set flow control mode\n"); 1072 return ENXIO; 1073 } 1074 sc->pause = pause; 1075 return 0; 1076} 1077 1078static void 1079mxge_change_promisc(mxge_softc_t *sc, int promisc) 1080{ 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (mxge_always_promisc) 1085 promisc = 1; 1086 1087 if (promisc) 1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1089 &cmd); 1090 else 1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1092 &cmd); 1093 1094 if (status) { 1095 device_printf(sc->dev, "Failed to set promisc mode\n"); 1096 } 1097} 1098 1099static void 1100mxge_set_multicast_list(mxge_softc_t *sc) 1101{ 1102 mxge_cmd_t cmd; 1103 struct ifmultiaddr *ifma; 1104 struct ifnet *ifp = sc->ifp; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 1138 if_maddr_rlock(ifp); 1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1140 if (ifma->ifma_addr->sa_family != AF_LINK) 1141 continue; 1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1143 &cmd.data0, 4); 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1145 &cmd.data1, 2); 1146 cmd.data0 = htonl(cmd.data0); 1147 cmd.data1 = htonl(cmd.data1); 1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed " 1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1152 "%d\t", err); 1153 /* abort, leaving multicast filtering off */ 1154 if_maddr_runlock(ifp); 1155 return; 1156 } 1157 } 1158 if_maddr_runlock(ifp); 1159 /* Enable multicast filtering */ 1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1161 if (err != 0) { 1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1163 ", error status: %d\n", err); 1164 } 1165} 1166 1167static int 1168mxge_max_mtu(mxge_softc_t *sc) 1169{ 1170 mxge_cmd_t cmd; 1171 int status; 1172 1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* try to set nbufs to see if it we can 1177 use virtually contiguous jumbos */ 1178 cmd.data0 = 0; 1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1180 &cmd); 1181 if (status == 0) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* otherwise, we're limited to MJUMPAGESIZE */ 1185 return MJUMPAGESIZE - MXGEFW_PAD; 1186} 1187 1188static int 1189mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1190{ 1191 struct mxge_slice_state *ss; 1192 mxge_rx_done_t *rx_done; 1193 volatile uint32_t *irq_claim; 1194 mxge_cmd_t cmd; 1195 int slice, status; 1196 1197 /* try to send a reset command to the card to see if it 1198 is alive */ 1199 memset(&cmd, 0, sizeof (cmd)); 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1201 if (status != 0) { 1202 device_printf(sc->dev, "failed reset\n"); 1203 return ENXIO; 1204 } 1205 1206 mxge_dummy_rdma(sc, 1); 1207 1208 1209 /* set the intrq size */ 1210 cmd.data0 = sc->rx_ring_size; 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1212 1213 /* 1214 * Even though we already know how many slices are supported 1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1216 * has magic side effects, and must be called after a reset. 1217 * It must be called prior to calling any RSS related cmds, 1218 * including assigning an interrupt queue for anything but 1219 * slice 0. It must also be called *after* 1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1221 * the firmware to compute offsets. 1222 */ 1223 1224 if (sc->num_slices > 1) { 1225 /* ask the maximum number of slices it supports */ 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to get number of slices\n"); 1231 return status; 1232 } 1233 /* 1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1235 * to setting up the interrupt queue DMA 1236 */ 1237 cmd.data0 = sc->num_slices; 1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1239#ifdef IFNET_BUF_RING 1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1241#endif 1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1243 &cmd); 1244 if (status != 0) { 1245 device_printf(sc->dev, 1246 "failed to set number of slices\n"); 1247 return status; 1248 } 1249 } 1250 1251 1252 if (interrupts_setup) { 1253 /* Now exchange information about interrupts */ 1254 for (slice = 0; slice < sc->num_slices; slice++) { 1255 rx_done = &sc->ss[slice].rx_done; 1256 memset(rx_done->entry, 0, sc->rx_ring_size); 1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data2 = slice; 1260 status |= mxge_send_cmd(sc, 1261 MXGEFW_CMD_SET_INTRQ_DMA, 1262 &cmd); 1263 } 1264 } 1265 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1268 1269 1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1277 &cmd); 1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 if (status != 0) { 1280 device_printf(sc->dev, "failed set interrupt parameters\n"); 1281 return status; 1282 } 1283 1284 1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1286 1287 1288 /* run a DMA benchmark */ 1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1290 1291 for (slice = 0; slice < sc->num_slices; slice++) { 1292 ss = &sc->ss[slice]; 1293 1294 ss->irq_claim = irq_claim + (2 * slice); 1295 /* reset mcp/driver shared state back to 0 */ 1296 ss->rx_done.idx = 0; 1297 ss->rx_done.cnt = 0; 1298 ss->tx.req = 0; 1299 ss->tx.done = 0; 1300 ss->tx.pkt_done = 0; 1301 ss->tx.queue_active = 0; 1302 ss->tx.activate = 0; 1303 ss->tx.deactivate = 0; 1304 ss->tx.wake = 0; 1305 ss->tx.defrag = 0; 1306 ss->tx.stall = 0; 1307 ss->rx_big.cnt = 0; 1308 ss->rx_small.cnt = 0; 1309 ss->lro_bad_csum = 0; 1310 ss->lro_queued = 0; 1311 ss->lro_flushed = 0; 1312 if (ss->fw_stats != NULL) { 1313 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1314 } 1315 } 1316 sc->rdma_tags_available = 15; 1317 status = mxge_update_mac_address(sc); 1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1319 mxge_change_pause(sc, sc->pause); 1320 mxge_set_multicast_list(sc); 1321 if (sc->throttle) { 1322 cmd.data0 = sc->throttle; 1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1324 &cmd)) { 1325 device_printf(sc->dev, 1326 "can't enable throttle\n"); 1327 } 1328 } 1329 return status; 1330} 1331 1332static int 1333mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1334{ 1335 mxge_cmd_t cmd; 1336 mxge_softc_t *sc; 1337 int err; 1338 unsigned int throttle; 1339 1340 sc = arg1; 1341 throttle = sc->throttle; 1342 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 1347 if (throttle == sc->throttle) 1348 return 0; 1349 1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1351 return EINVAL; 1352 1353 mtx_lock(&sc->driver_mtx); 1354 cmd.data0 = throttle; 1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1356 if (err == 0) 1357 sc->throttle = throttle; 1358 mtx_unlock(&sc->driver_mtx); 1359 return err; 1360} 1361 1362static int 1363mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1364{ 1365 mxge_softc_t *sc; 1366 unsigned int intr_coal_delay; 1367 int err; 1368 1369 sc = arg1; 1370 intr_coal_delay = sc->intr_coal_delay; 1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1372 if (err != 0) { 1373 return err; 1374 } 1375 if (intr_coal_delay == sc->intr_coal_delay) 1376 return 0; 1377 1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1379 return EINVAL; 1380 1381 mtx_lock(&sc->driver_mtx); 1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1383 sc->intr_coal_delay = intr_coal_delay; 1384 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387} 1388 1389static int 1390mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1391{ 1392 mxge_softc_t *sc; 1393 unsigned int enabled; 1394 int err; 1395 1396 sc = arg1; 1397 enabled = sc->pause; 1398 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1399 if (err != 0) { 1400 return err; 1401 } 1402 if (enabled == sc->pause) 1403 return 0; 1404 1405 mtx_lock(&sc->driver_mtx); 1406 err = mxge_change_pause(sc, enabled); 1407 mtx_unlock(&sc->driver_mtx); 1408 return err; 1409} 1410 1411static int 1412mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1413{ 1414 struct ifnet *ifp; 1415 int err = 0; 1416 1417 ifp = sc->ifp; 1418 if (lro_cnt == 0) 1419 ifp->if_capenable &= ~IFCAP_LRO; 1420 else 1421 ifp->if_capenable |= IFCAP_LRO; 1422 sc->lro_cnt = lro_cnt; 1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1424 mxge_close(sc, 0); 1425 err = mxge_open(sc); 1426 } 1427 return err; 1428} 1429 1430static int 1431mxge_change_lro(SYSCTL_HANDLER_ARGS) 1432{ 1433 mxge_softc_t *sc; 1434 unsigned int lro_cnt; 1435 int err; 1436 1437 sc = arg1; 1438 lro_cnt = sc->lro_cnt; 1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1440 if (err != 0) 1441 return err; 1442 1443 if (lro_cnt == sc->lro_cnt) 1444 return 0; 1445 1446 if (lro_cnt > 128) 1447 return EINVAL; 1448 1449 mtx_lock(&sc->driver_mtx); 1450 err = mxge_change_lro_locked(sc, lro_cnt); 1451 mtx_unlock(&sc->driver_mtx); 1452 return err; 1453} 1454 1455static int 1456mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1457{ 1458 int err; 1459 1460 if (arg1 == NULL) 1461 return EFAULT; 1462 arg2 = be32toh(*(int *)arg1); 1463 arg1 = NULL; 1464 err = sysctl_handle_int(oidp, arg1, arg2, req); 1465 1466 return err; 1467} 1468 1469static void 1470mxge_rem_sysctls(mxge_softc_t *sc) 1471{ 1472 struct mxge_slice_state *ss; 1473 int slice; 1474 1475 if (sc->slice_sysctl_tree == NULL) 1476 return; 1477 1478 for (slice = 0; slice < sc->num_slices; slice++) { 1479 ss = &sc->ss[slice]; 1480 if (ss == NULL || ss->sysctl_tree == NULL) 1481 continue; 1482 sysctl_ctx_free(&ss->sysctl_ctx); 1483 ss->sysctl_tree = NULL; 1484 } 1485 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1486 sc->slice_sysctl_tree = NULL; 1487} 1488 1489static void 1490mxge_add_sysctls(mxge_softc_t *sc) 1491{ 1492 struct sysctl_ctx_list *ctx; 1493 struct sysctl_oid_list *children; 1494 mcp_irq_data_t *fw; 1495 struct mxge_slice_state *ss; 1496 int slice; 1497 char slice_num[8]; 1498 1499 ctx = device_get_sysctl_ctx(sc->dev); 1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1501 fw = sc->ss[0].fw_stats; 1502 1503 /* random information */ 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "firmware_version", 1506 CTLFLAG_RD, &sc->fw_version, 1507 0, "firmware version"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "serial_number", 1510 CTLFLAG_RD, &sc->serial_number_string, 1511 0, "serial number"); 1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1513 "product_code", 1514 CTLFLAG_RD, &sc->product_code_string, 1515 0, "product_code"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "pcie_link_width", 1518 CTLFLAG_RD, &sc->link_width, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "tx_boundary", 1522 CTLFLAG_RD, &sc->tx_boundary, 1523 0, "tx_boundary"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "write_combine", 1526 CTLFLAG_RD, &sc->wc, 1527 0, "write combining PIO?"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "read_dma_MBs", 1530 CTLFLAG_RD, &sc->read_dma, 1531 0, "DMA Read speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "write_dma_MBs", 1534 CTLFLAG_RD, &sc->write_dma, 1535 0, "DMA Write speed in MB/s"); 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1537 "read_write_dma_MBs", 1538 CTLFLAG_RD, &sc->read_write_dma, 1539 0, "DMA concurrent Read/Write speed in MB/s"); 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1541 "watchdog_resets", 1542 CTLFLAG_RD, &sc->watchdog_resets, 1543 0, "Number of times NIC was reset"); 1544 1545 1546 /* performance related tunables */ 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "intr_coal_delay", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_intr_coal, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "throttle", 1555 CTLTYPE_INT|CTLFLAG_RW, sc, 1556 0, mxge_change_throttle, 1557 "I", "transmit throttling"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "flow_control_enabled", 1561 CTLTYPE_INT|CTLFLAG_RW, sc, 1562 0, mxge_change_flow_control, 1563 "I", "interrupt coalescing delay in usecs"); 1564 1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1566 "deassert_wait", 1567 CTLFLAG_RW, &mxge_deassert_wait, 1568 0, "Wait for IRQ line to go low in ihandler"); 1569 1570 /* stats block from firmware is in network byte order. 1571 Need to swap it */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "link_up", 1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1575 0, mxge_handle_be32, 1576 "I", "link up"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "rdma_tags_available", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1580 0, mxge_handle_be32, 1581 "I", "rdma_tags_available"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_bad_crc32", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_bad_crc32, 1586 0, mxge_handle_be32, 1587 "I", "dropped_bad_crc32"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_bad_phy", 1590 CTLTYPE_INT|CTLFLAG_RD, 1591 &fw->dropped_bad_phy, 1592 0, mxge_handle_be32, 1593 "I", "dropped_bad_phy"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_link_error_or_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_link_error_or_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_link_error_or_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_link_overflow", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1603 0, mxge_handle_be32, 1604 "I", "dropped_link_overflow"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_multicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_multicast_filtered, 1609 0, mxge_handle_be32, 1610 "I", "dropped_multicast_filtered"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_no_big_buffer", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1614 0, mxge_handle_be32, 1615 "I", "dropped_no_big_buffer"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_no_small_buffer", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_no_small_buffer, 1620 0, mxge_handle_be32, 1621 "I", "dropped_no_small_buffer"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_overrun", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1625 0, mxge_handle_be32, 1626 "I", "dropped_overrun"); 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_pause", 1629 CTLTYPE_INT|CTLFLAG_RD, 1630 &fw->dropped_pause, 1631 0, mxge_handle_be32, 1632 "I", "dropped_pause"); 1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1634 "dropped_runt", 1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1636 0, mxge_handle_be32, 1637 "I", "dropped_runt"); 1638 1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1640 "dropped_unicast_filtered", 1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1642 0, mxge_handle_be32, 1643 "I", "dropped_unicast_filtered"); 1644 1645 /* verbose printing? */ 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "verbose", 1648 CTLFLAG_RW, &mxge_verbose, 1649 0, "verbose printing"); 1650 1651 /* lro */ 1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1653 "lro_cnt", 1654 CTLTYPE_INT|CTLFLAG_RW, sc, 1655 0, mxge_change_lro, 1656 "I", "number of lro merge queues"); 1657 1658 1659 /* add counters exported for debugging from all slices */ 1660 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1661 sc->slice_sysctl_tree = 1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1663 "slice", CTLFLAG_RD, 0, ""); 1664 1665 for (slice = 0; slice < sc->num_slices; slice++) { 1666 ss = &sc->ss[slice]; 1667 sysctl_ctx_init(&ss->sysctl_ctx); 1668 ctx = &ss->sysctl_ctx; 1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1670 sprintf(slice_num, "%d", slice); 1671 ss->sysctl_tree = 1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1673 CTLFLAG_RD, 0, ""); 1674 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "rx_small_cnt", 1677 CTLFLAG_RD, &ss->rx_small.cnt, 1678 0, "rx_small_cnt"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "rx_big_cnt", 1681 CTLFLAG_RD, &ss->rx_big.cnt, 1682 0, "rx_small_cnt"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1685 0, "number of lro merge queues flushed"); 1686 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1689 0, "number of frames appended to lro merge" 1690 "queues"); 1691 1692#ifndef IFNET_BUF_RING 1693 /* only transmit from slice 0 for now */ 1694 if (slice > 0) 1695 continue; 1696#endif 1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1698 "tx_req", 1699 CTLFLAG_RD, &ss->tx.req, 1700 0, "tx_req"); 1701 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_done", 1704 CTLFLAG_RD, &ss->tx.done, 1705 0, "tx_done"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_pkt_done", 1708 CTLFLAG_RD, &ss->tx.pkt_done, 1709 0, "tx_done"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_stall", 1712 CTLFLAG_RD, &ss->tx.stall, 1713 0, "tx_stall"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_wake", 1716 CTLFLAG_RD, &ss->tx.wake, 1717 0, "tx_wake"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_defrag", 1720 CTLFLAG_RD, &ss->tx.defrag, 1721 0, "tx_defrag"); 1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1723 "tx_queue_active", 1724 CTLFLAG_RD, &ss->tx.queue_active, 1725 0, "tx_queue_active"); 1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1727 "tx_activate", 1728 CTLFLAG_RD, &ss->tx.activate, 1729 0, "tx_activate"); 1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1731 "tx_deactivate", 1732 CTLFLAG_RD, &ss->tx.deactivate, 1733 0, "tx_deactivate"); 1734 } 1735} 1736 1737/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1738 backwards one at a time and handle ring wraps */ 1739 1740static inline void 1741mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1742 mcp_kreq_ether_send_t *src, int cnt) 1743{ 1744 int idx, starting_slot; 1745 starting_slot = tx->req; 1746 while (cnt > 1) { 1747 cnt--; 1748 idx = (starting_slot + cnt) & tx->mask; 1749 mxge_pio_copy(&tx->lanai[idx], 1750 &src[cnt], sizeof(*src)); 1751 wmb(); 1752 } 1753} 1754 1755/* 1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1757 * at most 32 bytes at a time, so as to avoid involving the software 1758 * pio handler in the nic. We re-write the first segment's flags 1759 * to mark them valid only after writing the entire chain 1760 */ 1761 1762static inline void 1763mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1764 int cnt) 1765{ 1766 int idx, i; 1767 uint32_t *src_ints; 1768 volatile uint32_t *dst_ints; 1769 mcp_kreq_ether_send_t *srcp; 1770 volatile mcp_kreq_ether_send_t *dstp, *dst; 1771 uint8_t last_flags; 1772 1773 idx = tx->req & tx->mask; 1774 1775 last_flags = src->flags; 1776 src->flags = 0; 1777 wmb(); 1778 dst = dstp = &tx->lanai[idx]; 1779 srcp = src; 1780 1781 if ((idx + cnt) < tx->mask) { 1782 for (i = 0; i < (cnt - 1); i += 2) { 1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1784 wmb(); /* force write every 32 bytes */ 1785 srcp += 2; 1786 dstp += 2; 1787 } 1788 } else { 1789 /* submit all but the first request, and ensure 1790 that it is submitted below */ 1791 mxge_submit_req_backwards(tx, src, cnt); 1792 i = 0; 1793 } 1794 if (i < cnt) { 1795 /* submit the first request */ 1796 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1797 wmb(); /* barrier before setting valid flag */ 1798 } 1799 1800 /* re-write the last 32-bits with the valid flags */ 1801 src->flags = last_flags; 1802 src_ints = (uint32_t *)src; 1803 src_ints+=3; 1804 dst_ints = (volatile uint32_t *)dst; 1805 dst_ints+=3; 1806 *dst_ints = *src_ints; 1807 tx->req += cnt; 1808 wmb(); 1809} 1810 1811#if IFCAP_TSO4 1812 1813static void 1814mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1815 int busdma_seg_cnt, int ip_off) 1816{ 1817 mxge_tx_ring_t *tx; 1818 mcp_kreq_ether_send_t *req; 1819 bus_dma_segment_t *seg; 1820 struct ip *ip; 1821 struct tcphdr *tcp; 1822 uint32_t low, high_swapped; 1823 int len, seglen, cum_len, cum_len_next; 1824 int next_is_first, chop, cnt, rdma_count, small; 1825 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1826 uint8_t flags, flags_next; 1827 static int once; 1828 1829 mss = m->m_pkthdr.tso_segsz; 1830 1831 /* negative cum_len signifies to the 1832 * send loop that we are still in the 1833 * header portion of the TSO packet. 1834 */ 1835 1836 /* ensure we have the ethernet, IP and TCP 1837 header together in the first mbuf, copy 1838 it to a scratch buffer if not */ 1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1840 m_copydata(m, 0, ip_off + sizeof (*ip), 1841 ss->scratch); 1842 ip = (struct ip *)(ss->scratch + ip_off); 1843 } else { 1844 ip = (struct ip *)(mtod(m, char *) + ip_off); 1845 } 1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1847 + sizeof (*tcp))) { 1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp), ss->scratch); 1850 ip = (struct ip *)(mtod(m, char *) + ip_off); 1851 } 1852 1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1859 1860 1861 /* for TSO, pseudo_hdr_offset holds mss. 1862 * The firmware figures out where to put 1863 * the checksum by parsing the header. */ 1864 pseudo_hdr_offset = htobe16(mss); 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (__predict_false(cksum_offset > seglen)) 1938 cksum_offset -= seglen; 1939 else 1940 cksum_offset = 0; 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956#ifdef IFNET_BUF_RING 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964#endif 1965 return; 1966 1967drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979} 1980 1981#endif /* IFCAP_TSO4 */ 1982 1983#ifdef MXGE_NEW_VLAN_API 1984/* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990static struct mbuf * 1991mxge_vlan_tag_insert(struct mbuf *m) 1992{ 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014} 2015#endif /* MXGE_NEW_VLAN_API */ 2016 2017static void 2018mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019{ 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 struct ifnet *ifp; 2025 mxge_tx_ring_t *tx; 2026 struct ip *ip; 2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2028 uint16_t pseudo_hdr_offset; 2029 uint8_t flags, cksum_offset; 2030 2031 2032 sc = ss->sc; 2033 ifp = sc->ifp; 2034 tx = &ss->tx; 2035 2036 ip_off = sizeof (struct ether_header); 2037#ifdef MXGE_NEW_VLAN_API 2038 if (m->m_flags & M_VLANTAG) { 2039 m = mxge_vlan_tag_insert(m); 2040 if (__predict_false(m == NULL)) 2041 goto drop; 2042 ip_off += ETHER_VLAN_ENCAP_LEN; 2043 } 2044#endif 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073#if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, ip_off); 2077 return; 2078 } 2079#endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2088 /* ensure ip header is in first mbuf, copy 2089 it to a scratch buffer if not */ 2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2091 m_copydata(m, 0, ip_off + sizeof (*ip), 2092 ss->scratch); 2093 ip = (struct ip *)(ss->scratch + ip_off); 2094 } else { 2095 ip = (struct ip *)(mtod(m, char *) + ip_off); 2096 } 2097 cksum_offset = ip_off + (ip->ip_hl << 2); 2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2100 req->cksum_offset = cksum_offset; 2101 flags |= MXGEFW_FLAGS_CKSUM; 2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2103 } else { 2104 odd_flag = 0; 2105 } 2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2107 flags |= MXGEFW_FLAGS_SMALL; 2108 2109 /* convert segments into a request list */ 2110 cum_len = 0; 2111 seg = tx->seg_list; 2112 req->flags = MXGEFW_FLAGS_FIRST; 2113 for (i = 0; i < cnt; i++) { 2114 req->addr_low = 2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2116 req->addr_high = 2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2118 req->length = htobe16(seg->ds_len); 2119 req->cksum_offset = cksum_offset; 2120 if (cksum_offset > seg->ds_len) 2121 cksum_offset -= seg->ds_len; 2122 else 2123 cksum_offset = 0; 2124 req->pseudo_hdr_offset = pseudo_hdr_offset; 2125 req->pad = 0; /* complete solid 16-byte block */ 2126 req->rdma_count = 1; 2127 req->flags |= flags | ((cum_len & 1) * odd_flag); 2128 cum_len += seg->ds_len; 2129 seg++; 2130 req++; 2131 req->flags = 0; 2132 } 2133 req--; 2134 /* pad runts to 60 bytes */ 2135 if (cum_len < 60) { 2136 req++; 2137 req->addr_low = 2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2139 req->addr_high = 2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2141 req->length = htobe16(60 - cum_len); 2142 req->cksum_offset = 0; 2143 req->pseudo_hdr_offset = pseudo_hdr_offset; 2144 req->pad = 0; /* complete solid 16-byte block */ 2145 req->rdma_count = 1; 2146 req->flags |= flags | ((cum_len & 1) * odd_flag); 2147 cnt++; 2148 } 2149 2150 tx->req_list[0].rdma_count = cnt; 2151#if 0 2152 /* print what the firmware will see */ 2153 for (i = 0; i < cnt; i++) { 2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2155 "cso:%d, flags:0x%x, rdma:%d\n", 2156 i, (int)ntohl(tx->req_list[i].addr_high), 2157 (int)ntohl(tx->req_list[i].addr_low), 2158 (int)ntohs(tx->req_list[i].length), 2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2161 tx->req_list[i].rdma_count); 2162 } 2163 printf("--------------\n"); 2164#endif 2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2166 mxge_submit_req(tx, tx->req_list, cnt); 2167#ifdef IFNET_BUF_RING 2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2169 /* tell the NIC to start polling this slice */ 2170 *tx->send_go = 1; 2171 tx->queue_active = 1; 2172 tx->activate++; 2173 wmb(); 2174 } 2175#endif 2176 return; 2177 2178drop: 2179 m_freem(m); 2180 ss->oerrors++; 2181 return; 2182} 2183 2184#ifdef IFNET_BUF_RING 2185static void 2186mxge_qflush(struct ifnet *ifp) 2187{ 2188 mxge_softc_t *sc = ifp->if_softc; 2189 mxge_tx_ring_t *tx; 2190 struct mbuf *m; 2191 int slice; 2192 2193 for (slice = 0; slice < sc->num_slices; slice++) { 2194 tx = &sc->ss[slice].tx; 2195 mtx_lock(&tx->mtx); 2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2197 m_freem(m); 2198 mtx_unlock(&tx->mtx); 2199 } 2200 if_qflush(ifp); 2201} 2202 2203static inline void 2204mxge_start_locked(struct mxge_slice_state *ss) 2205{ 2206 mxge_softc_t *sc; 2207 struct mbuf *m; 2208 struct ifnet *ifp; 2209 mxge_tx_ring_t *tx; 2210 2211 sc = ss->sc; 2212 ifp = sc->ifp; 2213 tx = &ss->tx; 2214 2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2216 m = drbr_dequeue(ifp, tx->br); 2217 if (m == NULL) { 2218 return; 2219 } 2220 /* let BPF see it */ 2221 BPF_MTAP(ifp, m); 2222 2223 /* give it to the nic */ 2224 mxge_encap(ss, m); 2225 } 2226 /* ran out of transmit slots */ 2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2228 && (!drbr_empty(ifp, tx->br))) { 2229 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2230 tx->stall++; 2231 } 2232} 2233 2234static int 2235mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2236{ 2237 mxge_softc_t *sc; 2238 struct ifnet *ifp; 2239 mxge_tx_ring_t *tx; 2240 int err; 2241 2242 sc = ss->sc; 2243 ifp = sc->ifp; 2244 tx = &ss->tx; 2245 2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2247 IFF_DRV_RUNNING) { 2248 err = drbr_enqueue(ifp, tx->br, m); 2249 return (err); 2250 } 2251
| 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50/* count xmits ourselves, rather than via drbr */ 51#define NO_SLOW_STATS 52#include <net/if.h> 53#include <net/if_arp.h> 54#include <net/ethernet.h> 55#include <net/if_dl.h> 56#include <net/if_media.h> 57 58#include <net/bpf.h> 59 60#include <net/if_types.h> 61#include <net/if_vlan_var.h> 62#include <net/zlib.h> 63 64#include <netinet/in_systm.h> 65#include <netinet/in.h> 66#include <netinet/ip.h> 67#include <netinet/tcp.h> 68 69#include <machine/bus.h> 70#include <machine/in_cksum.h> 71#include <machine/resource.h> 72#include <sys/bus.h> 73#include <sys/rman.h> 74#include <sys/smp.h> 75 76#include <dev/pci/pcireg.h> 77#include <dev/pci/pcivar.h> 78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80#include <vm/vm.h> /* for pmap_mapdev() */ 81#include <vm/pmap.h> 82 83#if defined(__i386) || defined(__amd64) 84#include <machine/specialreg.h> 85#endif 86 87#include <dev/mxge/mxge_mcp.h> 88#include <dev/mxge/mcp_gen_header.h> 89/*#define MXGE_FAKE_IFP*/ 90#include <dev/mxge/if_mxge_var.h> 91#ifdef IFNET_BUF_RING 92#include <sys/buf_ring.h> 93#endif 94 95#include "opt_inet.h" 96 97/* tunable params */ 98static int mxge_nvidia_ecrc_enable = 1; 99static int mxge_force_firmware = 0; 100static int mxge_intr_coal_delay = 30; 101static int mxge_deassert_wait = 1; 102static int mxge_flow_control = 1; 103static int mxge_verbose = 0; 104static int mxge_lro_cnt = 8; 105static int mxge_ticks; 106static int mxge_max_slices = 1; 107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108static int mxge_always_promisc = 0; 109static int mxge_initial_mtu = ETHERMTU_JUMBO; 110static int mxge_throttle = 0; 111static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112static char *mxge_fw_aligned = "mxge_eth_z8e"; 113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116static int mxge_probe(device_t dev); 117static int mxge_attach(device_t dev); 118static int mxge_detach(device_t dev); 119static int mxge_shutdown(device_t dev); 120static void mxge_intr(void *arg); 121 122static device_method_t mxge_methods[] = 123{ 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130}; 131 132static driver_t mxge_driver = 133{ 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137}; 138 139static devclass_t mxge_devclass; 140 141/* Declare ourselves to be a child of the PCI bus.*/ 142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148static int mxge_close(mxge_softc_t *sc, int down); 149static int mxge_open(mxge_softc_t *sc); 150static void mxge_tick(void *arg); 151 152static int 153mxge_probe(device_t dev) 154{ 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178} 179 180static void 181mxge_enable_wc(mxge_softc_t *sc) 182{ 183#if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196#endif 197} 198 199 200/* callback to get our DMA address */ 201static void 202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204{ 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208} 209 210static int 211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213{ 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268} 269 270 271static void 272mxge_dma_free(mxge_dma_t *dma) 273{ 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277} 278 279/* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286static int 287mxge_parse_strings(mxge_softc_t *sc) 288{ 289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327} 328 329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330static void 331mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332{ 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381#if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390#endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452} 453#else 454static void 455mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456{ 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460} 461#endif 462 463 464static int 465mxge_dma_test(mxge_softc_t *sc, int test_type) 466{ 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523} 524 525/* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544static int 545mxge_firmware_probe(mxge_softc_t *sc) 546{ 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595} 596 597static int 598mxge_select_firmware(mxge_softc_t *sc) 599{ 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640} 641 642union qualhack 643{ 644 const char *ro_char; 645 char *rw_char; 646}; 647 648static int 649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650{ 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677} 678 679static void * 680z_alloc(void *nil, u_int items, u_int size) 681{ 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686} 687 688static void 689z_free(void *nil, void *ptr) 690{ 691 free(ptr, M_TEMP); 692} 693 694 695static int 696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697{ 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772abort_with_zs: 773 inflateEnd(&zs); 774abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777} 778 779/* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784static void 785mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786{ 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834} 835 836static int 837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838{ 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 default: 887 device_printf(sc->dev, 888 "mxge: command %d " 889 "failed, result = %d\n", 890 cmd, be32toh(response->result)); 891 err = ENXIO; 892 break; 893 } 894 if (err != EAGAIN) 895 break; 896 } 897 if (err == EAGAIN) 898 device_printf(sc->dev, "mxge: command %d timed out" 899 "result = %d\n", 900 cmd, be32toh(response->result)); 901 mtx_unlock(&sc->cmd_mtx); 902 return err; 903} 904 905static int 906mxge_adopt_running_firmware(mxge_softc_t *sc) 907{ 908 struct mcp_gen_header *hdr; 909 const size_t bytes = sizeof (struct mcp_gen_header); 910 size_t hdr_offset; 911 int status; 912 913 /* find running firmware header */ 914 hdr_offset = htobe32(*(volatile uint32_t *) 915 (sc->sram + MCP_HEADER_PTR_OFFSET)); 916 917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 918 device_printf(sc->dev, 919 "Running firmware has bad header offset (%d)\n", 920 (int)hdr_offset); 921 return EIO; 922 } 923 924 /* copy header of running firmware from SRAM to host memory to 925 * validate firmware */ 926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 927 if (hdr == NULL) { 928 device_printf(sc->dev, "could not malloc firmware hdr\n"); 929 return ENOMEM; 930 } 931 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 932 rman_get_bushandle(sc->mem_res), 933 hdr_offset, (char *)hdr, bytes); 934 status = mxge_validate_firmware(sc, hdr); 935 free(hdr, M_DEVBUF); 936 937 /* 938 * check to see if adopted firmware has bug where adopting 939 * it will cause broadcasts to be filtered unless the NIC 940 * is kept in ALLMULTI mode 941 */ 942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 944 sc->adopted_rx_filter_bug = 1; 945 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 946 "working around rx filter bug\n", 947 sc->fw_ver_major, sc->fw_ver_minor, 948 sc->fw_ver_tiny); 949 } 950 951 return status; 952} 953 954 955static int 956mxge_load_firmware(mxge_softc_t *sc, int adopt) 957{ 958 volatile uint32_t *confirm; 959 volatile char *submit; 960 char buf_bytes[72]; 961 uint32_t *buf, size, dma_low, dma_high; 962 int status, i; 963 964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 965 966 size = sc->sram_size; 967 status = mxge_load_firmware_helper(sc, &size); 968 if (status) { 969 if (!adopt) 970 return status; 971 /* Try to use the currently running firmware, if 972 it is new enough */ 973 status = mxge_adopt_running_firmware(sc); 974 if (status) { 975 device_printf(sc->dev, 976 "failed to adopt running firmware\n"); 977 return status; 978 } 979 device_printf(sc->dev, 980 "Successfully adopted running firmware\n"); 981 if (sc->tx_boundary == 4096) { 982 device_printf(sc->dev, 983 "Using firmware currently running on NIC" 984 ". For optimal\n"); 985 device_printf(sc->dev, 986 "performance consider loading optimized " 987 "firmware\n"); 988 } 989 sc->fw_name = mxge_fw_unaligned; 990 sc->tx_boundary = 2048; 991 return 0; 992 } 993 /* clear confirmation addr */ 994 confirm = (volatile uint32_t *)sc->cmd; 995 *confirm = 0; 996 wmb(); 997 /* send a reload command to the bootstrap MCP, and wait for the 998 response in the confirmation address. The firmware should 999 write a -1 there to indicate it is alive and well 1000 */ 1001 1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1004 1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1007 buf[2] = htobe32(0xffffffff); /* confirm data */ 1008 1009 /* FIX: All newest firmware should un-protect the bottom of 1010 the sram before handoff. However, the very first interfaces 1011 do not. Therefore the handoff copy must skip the first 8 bytes 1012 */ 1013 /* where the code starts*/ 1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1015 buf[4] = htobe32(size - 8); /* length of code */ 1016 buf[5] = htobe32(8); /* where to copy to */ 1017 buf[6] = htobe32(0); /* where to jump to */ 1018 1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1020 mxge_pio_copy(submit, buf, 64); 1021 wmb(); 1022 DELAY(1000); 1023 wmb(); 1024 i = 0; 1025 while (*confirm != 0xffffffff && i < 20) { 1026 DELAY(1000*10); 1027 i++; 1028 bus_dmamap_sync(sc->cmd_dma.dmat, 1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1030 } 1031 if (*confirm != 0xffffffff) { 1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1033 confirm, *confirm); 1034 1035 return ENXIO; 1036 } 1037 return 0; 1038} 1039 1040static int 1041mxge_update_mac_address(mxge_softc_t *sc) 1042{ 1043 mxge_cmd_t cmd; 1044 uint8_t *addr = sc->mac_addr; 1045 int status; 1046 1047 1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1049 | (addr[2] << 8) | addr[3]); 1050 1051 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1052 1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1054 return status; 1055} 1056 1057static int 1058mxge_change_pause(mxge_softc_t *sc, int pause) 1059{ 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (pause) 1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1065 &cmd); 1066 else 1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1068 &cmd); 1069 1070 if (status) { 1071 device_printf(sc->dev, "Failed to set flow control mode\n"); 1072 return ENXIO; 1073 } 1074 sc->pause = pause; 1075 return 0; 1076} 1077 1078static void 1079mxge_change_promisc(mxge_softc_t *sc, int promisc) 1080{ 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (mxge_always_promisc) 1085 promisc = 1; 1086 1087 if (promisc) 1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1089 &cmd); 1090 else 1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1092 &cmd); 1093 1094 if (status) { 1095 device_printf(sc->dev, "Failed to set promisc mode\n"); 1096 } 1097} 1098 1099static void 1100mxge_set_multicast_list(mxge_softc_t *sc) 1101{ 1102 mxge_cmd_t cmd; 1103 struct ifmultiaddr *ifma; 1104 struct ifnet *ifp = sc->ifp; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 1138 if_maddr_rlock(ifp); 1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1140 if (ifma->ifma_addr->sa_family != AF_LINK) 1141 continue; 1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1143 &cmd.data0, 4); 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1145 &cmd.data1, 2); 1146 cmd.data0 = htonl(cmd.data0); 1147 cmd.data1 = htonl(cmd.data1); 1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed " 1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1152 "%d\t", err); 1153 /* abort, leaving multicast filtering off */ 1154 if_maddr_runlock(ifp); 1155 return; 1156 } 1157 } 1158 if_maddr_runlock(ifp); 1159 /* Enable multicast filtering */ 1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1161 if (err != 0) { 1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1163 ", error status: %d\n", err); 1164 } 1165} 1166 1167static int 1168mxge_max_mtu(mxge_softc_t *sc) 1169{ 1170 mxge_cmd_t cmd; 1171 int status; 1172 1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* try to set nbufs to see if it we can 1177 use virtually contiguous jumbos */ 1178 cmd.data0 = 0; 1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1180 &cmd); 1181 if (status == 0) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* otherwise, we're limited to MJUMPAGESIZE */ 1185 return MJUMPAGESIZE - MXGEFW_PAD; 1186} 1187 1188static int 1189mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1190{ 1191 struct mxge_slice_state *ss; 1192 mxge_rx_done_t *rx_done; 1193 volatile uint32_t *irq_claim; 1194 mxge_cmd_t cmd; 1195 int slice, status; 1196 1197 /* try to send a reset command to the card to see if it 1198 is alive */ 1199 memset(&cmd, 0, sizeof (cmd)); 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1201 if (status != 0) { 1202 device_printf(sc->dev, "failed reset\n"); 1203 return ENXIO; 1204 } 1205 1206 mxge_dummy_rdma(sc, 1); 1207 1208 1209 /* set the intrq size */ 1210 cmd.data0 = sc->rx_ring_size; 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1212 1213 /* 1214 * Even though we already know how many slices are supported 1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1216 * has magic side effects, and must be called after a reset. 1217 * It must be called prior to calling any RSS related cmds, 1218 * including assigning an interrupt queue for anything but 1219 * slice 0. It must also be called *after* 1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1221 * the firmware to compute offsets. 1222 */ 1223 1224 if (sc->num_slices > 1) { 1225 /* ask the maximum number of slices it supports */ 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to get number of slices\n"); 1231 return status; 1232 } 1233 /* 1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1235 * to setting up the interrupt queue DMA 1236 */ 1237 cmd.data0 = sc->num_slices; 1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1239#ifdef IFNET_BUF_RING 1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1241#endif 1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1243 &cmd); 1244 if (status != 0) { 1245 device_printf(sc->dev, 1246 "failed to set number of slices\n"); 1247 return status; 1248 } 1249 } 1250 1251 1252 if (interrupts_setup) { 1253 /* Now exchange information about interrupts */ 1254 for (slice = 0; slice < sc->num_slices; slice++) { 1255 rx_done = &sc->ss[slice].rx_done; 1256 memset(rx_done->entry, 0, sc->rx_ring_size); 1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data2 = slice; 1260 status |= mxge_send_cmd(sc, 1261 MXGEFW_CMD_SET_INTRQ_DMA, 1262 &cmd); 1263 } 1264 } 1265 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1268 1269 1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1277 &cmd); 1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 if (status != 0) { 1280 device_printf(sc->dev, "failed set interrupt parameters\n"); 1281 return status; 1282 } 1283 1284 1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1286 1287 1288 /* run a DMA benchmark */ 1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1290 1291 for (slice = 0; slice < sc->num_slices; slice++) { 1292 ss = &sc->ss[slice]; 1293 1294 ss->irq_claim = irq_claim + (2 * slice); 1295 /* reset mcp/driver shared state back to 0 */ 1296 ss->rx_done.idx = 0; 1297 ss->rx_done.cnt = 0; 1298 ss->tx.req = 0; 1299 ss->tx.done = 0; 1300 ss->tx.pkt_done = 0; 1301 ss->tx.queue_active = 0; 1302 ss->tx.activate = 0; 1303 ss->tx.deactivate = 0; 1304 ss->tx.wake = 0; 1305 ss->tx.defrag = 0; 1306 ss->tx.stall = 0; 1307 ss->rx_big.cnt = 0; 1308 ss->rx_small.cnt = 0; 1309 ss->lro_bad_csum = 0; 1310 ss->lro_queued = 0; 1311 ss->lro_flushed = 0; 1312 if (ss->fw_stats != NULL) { 1313 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1314 } 1315 } 1316 sc->rdma_tags_available = 15; 1317 status = mxge_update_mac_address(sc); 1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1319 mxge_change_pause(sc, sc->pause); 1320 mxge_set_multicast_list(sc); 1321 if (sc->throttle) { 1322 cmd.data0 = sc->throttle; 1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1324 &cmd)) { 1325 device_printf(sc->dev, 1326 "can't enable throttle\n"); 1327 } 1328 } 1329 return status; 1330} 1331 1332static int 1333mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1334{ 1335 mxge_cmd_t cmd; 1336 mxge_softc_t *sc; 1337 int err; 1338 unsigned int throttle; 1339 1340 sc = arg1; 1341 throttle = sc->throttle; 1342 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 1347 if (throttle == sc->throttle) 1348 return 0; 1349 1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1351 return EINVAL; 1352 1353 mtx_lock(&sc->driver_mtx); 1354 cmd.data0 = throttle; 1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1356 if (err == 0) 1357 sc->throttle = throttle; 1358 mtx_unlock(&sc->driver_mtx); 1359 return err; 1360} 1361 1362static int 1363mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1364{ 1365 mxge_softc_t *sc; 1366 unsigned int intr_coal_delay; 1367 int err; 1368 1369 sc = arg1; 1370 intr_coal_delay = sc->intr_coal_delay; 1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1372 if (err != 0) { 1373 return err; 1374 } 1375 if (intr_coal_delay == sc->intr_coal_delay) 1376 return 0; 1377 1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1379 return EINVAL; 1380 1381 mtx_lock(&sc->driver_mtx); 1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1383 sc->intr_coal_delay = intr_coal_delay; 1384 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387} 1388 1389static int 1390mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1391{ 1392 mxge_softc_t *sc; 1393 unsigned int enabled; 1394 int err; 1395 1396 sc = arg1; 1397 enabled = sc->pause; 1398 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1399 if (err != 0) { 1400 return err; 1401 } 1402 if (enabled == sc->pause) 1403 return 0; 1404 1405 mtx_lock(&sc->driver_mtx); 1406 err = mxge_change_pause(sc, enabled); 1407 mtx_unlock(&sc->driver_mtx); 1408 return err; 1409} 1410 1411static int 1412mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1413{ 1414 struct ifnet *ifp; 1415 int err = 0; 1416 1417 ifp = sc->ifp; 1418 if (lro_cnt == 0) 1419 ifp->if_capenable &= ~IFCAP_LRO; 1420 else 1421 ifp->if_capenable |= IFCAP_LRO; 1422 sc->lro_cnt = lro_cnt; 1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1424 mxge_close(sc, 0); 1425 err = mxge_open(sc); 1426 } 1427 return err; 1428} 1429 1430static int 1431mxge_change_lro(SYSCTL_HANDLER_ARGS) 1432{ 1433 mxge_softc_t *sc; 1434 unsigned int lro_cnt; 1435 int err; 1436 1437 sc = arg1; 1438 lro_cnt = sc->lro_cnt; 1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1440 if (err != 0) 1441 return err; 1442 1443 if (lro_cnt == sc->lro_cnt) 1444 return 0; 1445 1446 if (lro_cnt > 128) 1447 return EINVAL; 1448 1449 mtx_lock(&sc->driver_mtx); 1450 err = mxge_change_lro_locked(sc, lro_cnt); 1451 mtx_unlock(&sc->driver_mtx); 1452 return err; 1453} 1454 1455static int 1456mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1457{ 1458 int err; 1459 1460 if (arg1 == NULL) 1461 return EFAULT; 1462 arg2 = be32toh(*(int *)arg1); 1463 arg1 = NULL; 1464 err = sysctl_handle_int(oidp, arg1, arg2, req); 1465 1466 return err; 1467} 1468 1469static void 1470mxge_rem_sysctls(mxge_softc_t *sc) 1471{ 1472 struct mxge_slice_state *ss; 1473 int slice; 1474 1475 if (sc->slice_sysctl_tree == NULL) 1476 return; 1477 1478 for (slice = 0; slice < sc->num_slices; slice++) { 1479 ss = &sc->ss[slice]; 1480 if (ss == NULL || ss->sysctl_tree == NULL) 1481 continue; 1482 sysctl_ctx_free(&ss->sysctl_ctx); 1483 ss->sysctl_tree = NULL; 1484 } 1485 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1486 sc->slice_sysctl_tree = NULL; 1487} 1488 1489static void 1490mxge_add_sysctls(mxge_softc_t *sc) 1491{ 1492 struct sysctl_ctx_list *ctx; 1493 struct sysctl_oid_list *children; 1494 mcp_irq_data_t *fw; 1495 struct mxge_slice_state *ss; 1496 int slice; 1497 char slice_num[8]; 1498 1499 ctx = device_get_sysctl_ctx(sc->dev); 1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1501 fw = sc->ss[0].fw_stats; 1502 1503 /* random information */ 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "firmware_version", 1506 CTLFLAG_RD, &sc->fw_version, 1507 0, "firmware version"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "serial_number", 1510 CTLFLAG_RD, &sc->serial_number_string, 1511 0, "serial number"); 1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1513 "product_code", 1514 CTLFLAG_RD, &sc->product_code_string, 1515 0, "product_code"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "pcie_link_width", 1518 CTLFLAG_RD, &sc->link_width, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "tx_boundary", 1522 CTLFLAG_RD, &sc->tx_boundary, 1523 0, "tx_boundary"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "write_combine", 1526 CTLFLAG_RD, &sc->wc, 1527 0, "write combining PIO?"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "read_dma_MBs", 1530 CTLFLAG_RD, &sc->read_dma, 1531 0, "DMA Read speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "write_dma_MBs", 1534 CTLFLAG_RD, &sc->write_dma, 1535 0, "DMA Write speed in MB/s"); 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1537 "read_write_dma_MBs", 1538 CTLFLAG_RD, &sc->read_write_dma, 1539 0, "DMA concurrent Read/Write speed in MB/s"); 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1541 "watchdog_resets", 1542 CTLFLAG_RD, &sc->watchdog_resets, 1543 0, "Number of times NIC was reset"); 1544 1545 1546 /* performance related tunables */ 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "intr_coal_delay", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_intr_coal, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "throttle", 1555 CTLTYPE_INT|CTLFLAG_RW, sc, 1556 0, mxge_change_throttle, 1557 "I", "transmit throttling"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "flow_control_enabled", 1561 CTLTYPE_INT|CTLFLAG_RW, sc, 1562 0, mxge_change_flow_control, 1563 "I", "interrupt coalescing delay in usecs"); 1564 1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1566 "deassert_wait", 1567 CTLFLAG_RW, &mxge_deassert_wait, 1568 0, "Wait for IRQ line to go low in ihandler"); 1569 1570 /* stats block from firmware is in network byte order. 1571 Need to swap it */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "link_up", 1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1575 0, mxge_handle_be32, 1576 "I", "link up"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "rdma_tags_available", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1580 0, mxge_handle_be32, 1581 "I", "rdma_tags_available"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_bad_crc32", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_bad_crc32, 1586 0, mxge_handle_be32, 1587 "I", "dropped_bad_crc32"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_bad_phy", 1590 CTLTYPE_INT|CTLFLAG_RD, 1591 &fw->dropped_bad_phy, 1592 0, mxge_handle_be32, 1593 "I", "dropped_bad_phy"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_link_error_or_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_link_error_or_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_link_error_or_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_link_overflow", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1603 0, mxge_handle_be32, 1604 "I", "dropped_link_overflow"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_multicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_multicast_filtered, 1609 0, mxge_handle_be32, 1610 "I", "dropped_multicast_filtered"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_no_big_buffer", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1614 0, mxge_handle_be32, 1615 "I", "dropped_no_big_buffer"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_no_small_buffer", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_no_small_buffer, 1620 0, mxge_handle_be32, 1621 "I", "dropped_no_small_buffer"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_overrun", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1625 0, mxge_handle_be32, 1626 "I", "dropped_overrun"); 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_pause", 1629 CTLTYPE_INT|CTLFLAG_RD, 1630 &fw->dropped_pause, 1631 0, mxge_handle_be32, 1632 "I", "dropped_pause"); 1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1634 "dropped_runt", 1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1636 0, mxge_handle_be32, 1637 "I", "dropped_runt"); 1638 1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1640 "dropped_unicast_filtered", 1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1642 0, mxge_handle_be32, 1643 "I", "dropped_unicast_filtered"); 1644 1645 /* verbose printing? */ 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "verbose", 1648 CTLFLAG_RW, &mxge_verbose, 1649 0, "verbose printing"); 1650 1651 /* lro */ 1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1653 "lro_cnt", 1654 CTLTYPE_INT|CTLFLAG_RW, sc, 1655 0, mxge_change_lro, 1656 "I", "number of lro merge queues"); 1657 1658 1659 /* add counters exported for debugging from all slices */ 1660 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1661 sc->slice_sysctl_tree = 1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1663 "slice", CTLFLAG_RD, 0, ""); 1664 1665 for (slice = 0; slice < sc->num_slices; slice++) { 1666 ss = &sc->ss[slice]; 1667 sysctl_ctx_init(&ss->sysctl_ctx); 1668 ctx = &ss->sysctl_ctx; 1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1670 sprintf(slice_num, "%d", slice); 1671 ss->sysctl_tree = 1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1673 CTLFLAG_RD, 0, ""); 1674 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "rx_small_cnt", 1677 CTLFLAG_RD, &ss->rx_small.cnt, 1678 0, "rx_small_cnt"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "rx_big_cnt", 1681 CTLFLAG_RD, &ss->rx_big.cnt, 1682 0, "rx_small_cnt"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1685 0, "number of lro merge queues flushed"); 1686 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1689 0, "number of frames appended to lro merge" 1690 "queues"); 1691 1692#ifndef IFNET_BUF_RING 1693 /* only transmit from slice 0 for now */ 1694 if (slice > 0) 1695 continue; 1696#endif 1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1698 "tx_req", 1699 CTLFLAG_RD, &ss->tx.req, 1700 0, "tx_req"); 1701 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_done", 1704 CTLFLAG_RD, &ss->tx.done, 1705 0, "tx_done"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_pkt_done", 1708 CTLFLAG_RD, &ss->tx.pkt_done, 1709 0, "tx_done"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_stall", 1712 CTLFLAG_RD, &ss->tx.stall, 1713 0, "tx_stall"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_wake", 1716 CTLFLAG_RD, &ss->tx.wake, 1717 0, "tx_wake"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_defrag", 1720 CTLFLAG_RD, &ss->tx.defrag, 1721 0, "tx_defrag"); 1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1723 "tx_queue_active", 1724 CTLFLAG_RD, &ss->tx.queue_active, 1725 0, "tx_queue_active"); 1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1727 "tx_activate", 1728 CTLFLAG_RD, &ss->tx.activate, 1729 0, "tx_activate"); 1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1731 "tx_deactivate", 1732 CTLFLAG_RD, &ss->tx.deactivate, 1733 0, "tx_deactivate"); 1734 } 1735} 1736 1737/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1738 backwards one at a time and handle ring wraps */ 1739 1740static inline void 1741mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1742 mcp_kreq_ether_send_t *src, int cnt) 1743{ 1744 int idx, starting_slot; 1745 starting_slot = tx->req; 1746 while (cnt > 1) { 1747 cnt--; 1748 idx = (starting_slot + cnt) & tx->mask; 1749 mxge_pio_copy(&tx->lanai[idx], 1750 &src[cnt], sizeof(*src)); 1751 wmb(); 1752 } 1753} 1754 1755/* 1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1757 * at most 32 bytes at a time, so as to avoid involving the software 1758 * pio handler in the nic. We re-write the first segment's flags 1759 * to mark them valid only after writing the entire chain 1760 */ 1761 1762static inline void 1763mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1764 int cnt) 1765{ 1766 int idx, i; 1767 uint32_t *src_ints; 1768 volatile uint32_t *dst_ints; 1769 mcp_kreq_ether_send_t *srcp; 1770 volatile mcp_kreq_ether_send_t *dstp, *dst; 1771 uint8_t last_flags; 1772 1773 idx = tx->req & tx->mask; 1774 1775 last_flags = src->flags; 1776 src->flags = 0; 1777 wmb(); 1778 dst = dstp = &tx->lanai[idx]; 1779 srcp = src; 1780 1781 if ((idx + cnt) < tx->mask) { 1782 for (i = 0; i < (cnt - 1); i += 2) { 1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1784 wmb(); /* force write every 32 bytes */ 1785 srcp += 2; 1786 dstp += 2; 1787 } 1788 } else { 1789 /* submit all but the first request, and ensure 1790 that it is submitted below */ 1791 mxge_submit_req_backwards(tx, src, cnt); 1792 i = 0; 1793 } 1794 if (i < cnt) { 1795 /* submit the first request */ 1796 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1797 wmb(); /* barrier before setting valid flag */ 1798 } 1799 1800 /* re-write the last 32-bits with the valid flags */ 1801 src->flags = last_flags; 1802 src_ints = (uint32_t *)src; 1803 src_ints+=3; 1804 dst_ints = (volatile uint32_t *)dst; 1805 dst_ints+=3; 1806 *dst_ints = *src_ints; 1807 tx->req += cnt; 1808 wmb(); 1809} 1810 1811#if IFCAP_TSO4 1812 1813static void 1814mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1815 int busdma_seg_cnt, int ip_off) 1816{ 1817 mxge_tx_ring_t *tx; 1818 mcp_kreq_ether_send_t *req; 1819 bus_dma_segment_t *seg; 1820 struct ip *ip; 1821 struct tcphdr *tcp; 1822 uint32_t low, high_swapped; 1823 int len, seglen, cum_len, cum_len_next; 1824 int next_is_first, chop, cnt, rdma_count, small; 1825 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1826 uint8_t flags, flags_next; 1827 static int once; 1828 1829 mss = m->m_pkthdr.tso_segsz; 1830 1831 /* negative cum_len signifies to the 1832 * send loop that we are still in the 1833 * header portion of the TSO packet. 1834 */ 1835 1836 /* ensure we have the ethernet, IP and TCP 1837 header together in the first mbuf, copy 1838 it to a scratch buffer if not */ 1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1840 m_copydata(m, 0, ip_off + sizeof (*ip), 1841 ss->scratch); 1842 ip = (struct ip *)(ss->scratch + ip_off); 1843 } else { 1844 ip = (struct ip *)(mtod(m, char *) + ip_off); 1845 } 1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1847 + sizeof (*tcp))) { 1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp), ss->scratch); 1850 ip = (struct ip *)(mtod(m, char *) + ip_off); 1851 } 1852 1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1859 1860 1861 /* for TSO, pseudo_hdr_offset holds mss. 1862 * The firmware figures out where to put 1863 * the checksum by parsing the header. */ 1864 pseudo_hdr_offset = htobe16(mss); 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (__predict_false(cksum_offset > seglen)) 1938 cksum_offset -= seglen; 1939 else 1940 cksum_offset = 0; 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956#ifdef IFNET_BUF_RING 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964#endif 1965 return; 1966 1967drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979} 1980 1981#endif /* IFCAP_TSO4 */ 1982 1983#ifdef MXGE_NEW_VLAN_API 1984/* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990static struct mbuf * 1991mxge_vlan_tag_insert(struct mbuf *m) 1992{ 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014} 2015#endif /* MXGE_NEW_VLAN_API */ 2016 2017static void 2018mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019{ 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 struct ifnet *ifp; 2025 mxge_tx_ring_t *tx; 2026 struct ip *ip; 2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2028 uint16_t pseudo_hdr_offset; 2029 uint8_t flags, cksum_offset; 2030 2031 2032 sc = ss->sc; 2033 ifp = sc->ifp; 2034 tx = &ss->tx; 2035 2036 ip_off = sizeof (struct ether_header); 2037#ifdef MXGE_NEW_VLAN_API 2038 if (m->m_flags & M_VLANTAG) { 2039 m = mxge_vlan_tag_insert(m); 2040 if (__predict_false(m == NULL)) 2041 goto drop; 2042 ip_off += ETHER_VLAN_ENCAP_LEN; 2043 } 2044#endif 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073#if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, ip_off); 2077 return; 2078 } 2079#endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2088 /* ensure ip header is in first mbuf, copy 2089 it to a scratch buffer if not */ 2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2091 m_copydata(m, 0, ip_off + sizeof (*ip), 2092 ss->scratch); 2093 ip = (struct ip *)(ss->scratch + ip_off); 2094 } else { 2095 ip = (struct ip *)(mtod(m, char *) + ip_off); 2096 } 2097 cksum_offset = ip_off + (ip->ip_hl << 2); 2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2100 req->cksum_offset = cksum_offset; 2101 flags |= MXGEFW_FLAGS_CKSUM; 2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2103 } else { 2104 odd_flag = 0; 2105 } 2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2107 flags |= MXGEFW_FLAGS_SMALL; 2108 2109 /* convert segments into a request list */ 2110 cum_len = 0; 2111 seg = tx->seg_list; 2112 req->flags = MXGEFW_FLAGS_FIRST; 2113 for (i = 0; i < cnt; i++) { 2114 req->addr_low = 2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2116 req->addr_high = 2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2118 req->length = htobe16(seg->ds_len); 2119 req->cksum_offset = cksum_offset; 2120 if (cksum_offset > seg->ds_len) 2121 cksum_offset -= seg->ds_len; 2122 else 2123 cksum_offset = 0; 2124 req->pseudo_hdr_offset = pseudo_hdr_offset; 2125 req->pad = 0; /* complete solid 16-byte block */ 2126 req->rdma_count = 1; 2127 req->flags |= flags | ((cum_len & 1) * odd_flag); 2128 cum_len += seg->ds_len; 2129 seg++; 2130 req++; 2131 req->flags = 0; 2132 } 2133 req--; 2134 /* pad runts to 60 bytes */ 2135 if (cum_len < 60) { 2136 req++; 2137 req->addr_low = 2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2139 req->addr_high = 2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2141 req->length = htobe16(60 - cum_len); 2142 req->cksum_offset = 0; 2143 req->pseudo_hdr_offset = pseudo_hdr_offset; 2144 req->pad = 0; /* complete solid 16-byte block */ 2145 req->rdma_count = 1; 2146 req->flags |= flags | ((cum_len & 1) * odd_flag); 2147 cnt++; 2148 } 2149 2150 tx->req_list[0].rdma_count = cnt; 2151#if 0 2152 /* print what the firmware will see */ 2153 for (i = 0; i < cnt; i++) { 2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2155 "cso:%d, flags:0x%x, rdma:%d\n", 2156 i, (int)ntohl(tx->req_list[i].addr_high), 2157 (int)ntohl(tx->req_list[i].addr_low), 2158 (int)ntohs(tx->req_list[i].length), 2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2161 tx->req_list[i].rdma_count); 2162 } 2163 printf("--------------\n"); 2164#endif 2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2166 mxge_submit_req(tx, tx->req_list, cnt); 2167#ifdef IFNET_BUF_RING 2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2169 /* tell the NIC to start polling this slice */ 2170 *tx->send_go = 1; 2171 tx->queue_active = 1; 2172 tx->activate++; 2173 wmb(); 2174 } 2175#endif 2176 return; 2177 2178drop: 2179 m_freem(m); 2180 ss->oerrors++; 2181 return; 2182} 2183 2184#ifdef IFNET_BUF_RING 2185static void 2186mxge_qflush(struct ifnet *ifp) 2187{ 2188 mxge_softc_t *sc = ifp->if_softc; 2189 mxge_tx_ring_t *tx; 2190 struct mbuf *m; 2191 int slice; 2192 2193 for (slice = 0; slice < sc->num_slices; slice++) { 2194 tx = &sc->ss[slice].tx; 2195 mtx_lock(&tx->mtx); 2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2197 m_freem(m); 2198 mtx_unlock(&tx->mtx); 2199 } 2200 if_qflush(ifp); 2201} 2202 2203static inline void 2204mxge_start_locked(struct mxge_slice_state *ss) 2205{ 2206 mxge_softc_t *sc; 2207 struct mbuf *m; 2208 struct ifnet *ifp; 2209 mxge_tx_ring_t *tx; 2210 2211 sc = ss->sc; 2212 ifp = sc->ifp; 2213 tx = &ss->tx; 2214 2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2216 m = drbr_dequeue(ifp, tx->br); 2217 if (m == NULL) { 2218 return; 2219 } 2220 /* let BPF see it */ 2221 BPF_MTAP(ifp, m); 2222 2223 /* give it to the nic */ 2224 mxge_encap(ss, m); 2225 } 2226 /* ran out of transmit slots */ 2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2228 && (!drbr_empty(ifp, tx->br))) { 2229 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2230 tx->stall++; 2231 } 2232} 2233 2234static int 2235mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2236{ 2237 mxge_softc_t *sc; 2238 struct ifnet *ifp; 2239 mxge_tx_ring_t *tx; 2240 int err; 2241 2242 sc = ss->sc; 2243 ifp = sc->ifp; 2244 tx = &ss->tx; 2245 2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2247 IFF_DRV_RUNNING) { 2248 err = drbr_enqueue(ifp, tx->br, m); 2249 return (err); 2250 } 2251
|
2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2259 return (err); 2260 } 2261 if (!drbr_empty(ifp, tx->br)) 2262 mxge_start_locked(ss); 2263 return (0); 2264} 2265 2266static int 2267mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2268{ 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 mxge_tx_ring_t *tx; 2272 int err = 0; 2273 int slice; 2274 2275 slice = m->m_pkthdr.flowid; 2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2277 2278 ss = &sc->ss[slice]; 2279 tx = &ss->tx; 2280 2281 if (mtx_trylock(&tx->mtx)) { 2282 err = mxge_transmit_locked(ss, m); 2283 mtx_unlock(&tx->mtx); 2284 } else { 2285 err = drbr_enqueue(ifp, tx->br, m); 2286 } 2287 2288 return (err); 2289} 2290 2291#else 2292 2293static inline void 2294mxge_start_locked(struct mxge_slice_state *ss) 2295{ 2296 mxge_softc_t *sc; 2297 struct mbuf *m; 2298 struct ifnet *ifp; 2299 mxge_tx_ring_t *tx; 2300 2301 sc = ss->sc; 2302 ifp = sc->ifp; 2303 tx = &ss->tx; 2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2306 if (m == NULL) { 2307 return; 2308 } 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } 2315 /* ran out of transmit slots */ 2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2318 tx->stall++; 2319 } 2320} 2321#endif 2322static void 2323mxge_start(struct ifnet *ifp) 2324{ 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 2328 /* only use the first slice for now */ 2329 ss = &sc->ss[0]; 2330 mtx_lock(&ss->tx.mtx); 2331 mxge_start_locked(ss); 2332 mtx_unlock(&ss->tx.mtx); 2333} 2334 2335/* 2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2337 * at most 32 bytes at a time, so as to avoid involving the software 2338 * pio handler in the nic. We re-write the first segment's low 2339 * DMA address to mark it valid only after we write the entire chunk 2340 * in a burst 2341 */ 2342static inline void 2343mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2344 mcp_kreq_ether_recv_t *src) 2345{ 2346 uint32_t low; 2347 2348 low = src->addr_low; 2349 src->addr_low = 0xffffffff; 2350 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2351 wmb(); 2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2353 wmb(); 2354 src->addr_low = low; 2355 dst->addr_low = low; 2356 wmb(); 2357} 2358 2359static int 2360mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2361{ 2362 bus_dma_segment_t seg; 2363 struct mbuf *m; 2364 mxge_rx_ring_t *rx = &ss->rx_small; 2365 int cnt, err; 2366 2367 m = m_gethdr(M_DONTWAIT, MT_DATA); 2368 if (m == NULL) { 2369 rx->alloc_fail++; 2370 err = ENOBUFS; 2371 goto done; 2372 } 2373 m->m_len = MHLEN; 2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2375 &seg, &cnt, BUS_DMA_NOWAIT); 2376 if (err != 0) { 2377 m_free(m); 2378 goto done; 2379 } 2380 rx->info[idx].m = m; 2381 rx->shadow[idx].addr_low = 2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2383 rx->shadow[idx].addr_high = 2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2385 2386done: 2387 if ((idx & 7) == 7) 2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2389 return err; 2390} 2391 2392static int 2393mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2394{ 2395 bus_dma_segment_t seg[3]; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx = &ss->rx_big; 2398 int cnt, err, i; 2399 2400 if (rx->cl_size == MCLBYTES) 2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2402 else 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422#if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429#endif 2430 2431done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440} 2441 2442/* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451static inline uint16_t 2452mxge_rx_csum(struct mbuf *m, int csum) 2453{ 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467#ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471#else 2472 c = 1; 2473#endif 2474 c ^= 0xffff; 2475 return (c); 2476} 2477 2478static void 2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480{ 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507#ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509#else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520#endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532} 2533 2534 2535static inline void 2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537{ 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598} 2599 2600static inline void 2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602{ 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663} 2664 2665static inline void 2666mxge_clean_rx_done(struct mxge_slice_state *ss) 2667{ 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689#ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695#endif 2696} 2697 2698 2699static inline void 2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701{ 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735#ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737#else 2738 flags = &ifp->if_drv_flags; 2739#endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747#ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758#endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761} 2762 2763static struct mxge_media_type mxge_xfp_media_types[] = 2764{ 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774}; 2775static struct mxge_media_type mxge_sfp_media_types[] = 2776{ 2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2778 {0, (1 << 7), "Reserved"}, 2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2782}; 2783 2784static void 2785mxge_set_media(mxge_softc_t *sc, int type) 2786{ 2787 sc->media_flags |= type; 2788 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2789 ifmedia_set(&sc->media, sc->media_flags); 2790} 2791 2792 2793/* 2794 * Determine the media type for a NIC. Some XFPs will identify 2795 * themselves only when their link is up, so this is initiated via a 2796 * link up interrupt. However, this can potentially take up to 2797 * several milliseconds, so it is run via the watchdog routine, rather 2798 * than in the interrupt handler itself. This need only be done 2799 * once, not each time the link is up. 2800 */ 2801static void 2802mxge_media_probe(mxge_softc_t *sc) 2803{ 2804 mxge_cmd_t cmd; 2805 char *cage_type; 2806 char *ptr; 2807 struct mxge_media_type *mxge_media_types = NULL; 2808 int i, err, ms, mxge_media_type_entries; 2809 uint32_t byte; 2810 2811 sc->need_media_probe = 0; 2812 2813 /* if we've already set a media type, we're done */ 2814 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2815 return; 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 } 2827 2828 for (i = 0; i < 3; i++, ptr++) { 2829 ptr = index(ptr, '-'); 2830 if (ptr == NULL) { 2831 device_printf(sc->dev, 2832 "only %d dashes in PC?!?\n", i); 2833 return; 2834 } 2835 } 2836 if (*ptr == 'C') { 2837 /* -C is CX4 */ 2838 mxge_set_media(sc, IFM_10G_CX4); 2839 return; 2840 } 2841 else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2844 /* FreeBSD has no media type for Quad ribbon fiber */ 2845 return; 2846 } 2847 2848 if (*ptr == 'R') { 2849 /* -R is XFP */ 2850 mxge_media_types = mxge_xfp_media_types; 2851 mxge_media_type_entries = 2852 sizeof (mxge_xfp_media_types) / 2853 sizeof (mxge_xfp_media_types[0]); 2854 byte = MXGE_XFP_COMPLIANCE_BYTE; 2855 cage_type = "XFP"; 2856 } 2857 2858 if (*ptr == 'S' || *(ptr +1) == 'S') { 2859 /* -S or -2S is SFP+ */ 2860 mxge_media_types = mxge_sfp_media_types; 2861 mxge_media_type_entries = 2862 sizeof (mxge_sfp_media_types) / 2863 sizeof (mxge_sfp_media_types[0]); 2864 cage_type = "SFP+"; 2865 byte = 3; 2866 } 2867 2868 if (mxge_media_types == NULL) { 2869 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2870 return; 2871 } 2872 2873 /* 2874 * At this point we know the NIC has an XFP cage, so now we 2875 * try to determine what is in the cage by using the 2876 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2877 * register. We read just one byte, which may take over 2878 * a millisecond 2879 */ 2880 2881 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2882 cmd.data1 = byte; 2883 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2884 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2885 device_printf(sc->dev, "failed to read XFP\n"); 2886 } 2887 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2888 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2889 } 2890 if (err != MXGEFW_CMD_OK) { 2891 return; 2892 } 2893 2894 /* now we wait for the data to be cached */ 2895 cmd.data0 = byte; 2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2897 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2898 DELAY(1000); 2899 cmd.data0 = byte; 2900 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2901 } 2902 if (err != MXGEFW_CMD_OK) { 2903 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2904 cage_type, err, ms); 2905 return; 2906 } 2907 2908 if (cmd.data0 == mxge_media_types[0].bitmask) { 2909 if (mxge_verbose) 2910 device_printf(sc->dev, "%s:%s\n", cage_type, 2911 mxge_media_types[0].name); 2912 mxge_set_media(sc, mxge_media_types[0].flag); 2913 return; 2914 } 2915 for (i = 1; i < mxge_media_type_entries; i++) { 2916 if (cmd.data0 & mxge_media_types[i].bitmask) { 2917 if (mxge_verbose) 2918 device_printf(sc->dev, "%s:%s\n", 2919 cage_type, 2920 mxge_media_types[i].name); 2921 2922 mxge_set_media(sc, mxge_media_types[i].flag); 2923 return; 2924 } 2925 } 2926 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2927 cmd.data0); 2928 2929 return; 2930} 2931 2932static void 2933mxge_intr(void *arg) 2934{ 2935 struct mxge_slice_state *ss = arg; 2936 mxge_softc_t *sc = ss->sc; 2937 mcp_irq_data_t *stats = ss->fw_stats; 2938 mxge_tx_ring_t *tx = &ss->tx; 2939 mxge_rx_done_t *rx_done = &ss->rx_done; 2940 uint32_t send_done_count; 2941 uint8_t valid; 2942 2943 2944#ifndef IFNET_BUF_RING 2945 /* an interrupt on a non-zero slice is implicitly valid 2946 since MSI-X irqs are not shared */ 2947 if (ss != sc->ss) { 2948 mxge_clean_rx_done(ss); 2949 *ss->irq_claim = be32toh(3); 2950 return; 2951 } 2952#endif 2953 2954 /* make sure the DMA has finished */ 2955 if (!stats->valid) { 2956 return; 2957 } 2958 valid = stats->valid; 2959 2960 if (sc->legacy_irq) { 2961 /* lower legacy IRQ */ 2962 *sc->irq_deassert = 0; 2963 if (!mxge_deassert_wait) 2964 /* don't wait for conf. that irq is low */ 2965 stats->valid = 0; 2966 } else { 2967 stats->valid = 0; 2968 } 2969 2970 /* loop while waiting for legacy irq deassertion */ 2971 do { 2972 /* check for transmit completes and receives */ 2973 send_done_count = be32toh(stats->send_done_count); 2974 while ((send_done_count != tx->pkt_done) || 2975 (rx_done->entry[rx_done->idx].length != 0)) { 2976 if (send_done_count != tx->pkt_done) 2977 mxge_tx_done(ss, (int)send_done_count); 2978 mxge_clean_rx_done(ss); 2979 send_done_count = be32toh(stats->send_done_count); 2980 } 2981 if (sc->legacy_irq && mxge_deassert_wait) 2982 wmb(); 2983 } while (*((volatile uint8_t *) &stats->valid)); 2984 2985 /* fw link & error stats meaningful only on the first slice */ 2986 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2987 if (sc->link_state != stats->link_up) { 2988 sc->link_state = stats->link_up; 2989 if (sc->link_state) { 2990 if_link_state_change(sc->ifp, LINK_STATE_UP); 2991 if (mxge_verbose) 2992 device_printf(sc->dev, "link up\n"); 2993 } else { 2994 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2995 if (mxge_verbose) 2996 device_printf(sc->dev, "link down\n"); 2997 } 2998 sc->need_media_probe = 1; 2999 } 3000 if (sc->rdma_tags_available != 3001 be32toh(stats->rdma_tags_available)) { 3002 sc->rdma_tags_available = 3003 be32toh(stats->rdma_tags_available); 3004 device_printf(sc->dev, "RDMA timed out! %d tags " 3005 "left\n", sc->rdma_tags_available); 3006 } 3007 3008 if (stats->link_down) { 3009 sc->down_cnt += stats->link_down; 3010 sc->link_state = 0; 3011 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3012 } 3013 } 3014 3015 /* check to see if we have rx token to pass back */ 3016 if (valid & 0x1) 3017 *ss->irq_claim = be32toh(3); 3018 *(ss->irq_claim + 1) = be32toh(3); 3019} 3020 3021static void 3022mxge_init(void *arg) 3023{ 3024} 3025 3026 3027 3028static void 3029mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3030{ 3031 struct lro_entry *lro_entry; 3032 int i; 3033 3034 while (!SLIST_EMPTY(&ss->lro_free)) { 3035 lro_entry = SLIST_FIRST(&ss->lro_free); 3036 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3037 free(lro_entry, M_DEVBUF); 3038 } 3039 3040 for (i = 0; i <= ss->rx_big.mask; i++) { 3041 if (ss->rx_big.info[i].m == NULL) 3042 continue; 3043 bus_dmamap_unload(ss->rx_big.dmat, 3044 ss->rx_big.info[i].map); 3045 m_freem(ss->rx_big.info[i].m); 3046 ss->rx_big.info[i].m = NULL; 3047 } 3048 3049 for (i = 0; i <= ss->rx_small.mask; i++) { 3050 if (ss->rx_small.info[i].m == NULL) 3051 continue; 3052 bus_dmamap_unload(ss->rx_small.dmat, 3053 ss->rx_small.info[i].map); 3054 m_freem(ss->rx_small.info[i].m); 3055 ss->rx_small.info[i].m = NULL; 3056 } 3057 3058 /* transmit ring used only on the first slice */ 3059 if (ss->tx.info == NULL) 3060 return; 3061 3062 for (i = 0; i <= ss->tx.mask; i++) { 3063 ss->tx.info[i].flag = 0; 3064 if (ss->tx.info[i].m == NULL) 3065 continue; 3066 bus_dmamap_unload(ss->tx.dmat, 3067 ss->tx.info[i].map); 3068 m_freem(ss->tx.info[i].m); 3069 ss->tx.info[i].m = NULL; 3070 } 3071} 3072 3073static void 3074mxge_free_mbufs(mxge_softc_t *sc) 3075{ 3076 int slice; 3077 3078 for (slice = 0; slice < sc->num_slices; slice++) 3079 mxge_free_slice_mbufs(&sc->ss[slice]); 3080} 3081 3082static void 3083mxge_free_slice_rings(struct mxge_slice_state *ss) 3084{ 3085 int i; 3086 3087 3088 if (ss->rx_done.entry != NULL) 3089 mxge_dma_free(&ss->rx_done.dma); 3090 ss->rx_done.entry = NULL; 3091 3092 if (ss->tx.req_bytes != NULL) 3093 free(ss->tx.req_bytes, M_DEVBUF); 3094 ss->tx.req_bytes = NULL; 3095 3096 if (ss->tx.seg_list != NULL) 3097 free(ss->tx.seg_list, M_DEVBUF); 3098 ss->tx.seg_list = NULL; 3099 3100 if (ss->rx_small.shadow != NULL) 3101 free(ss->rx_small.shadow, M_DEVBUF); 3102 ss->rx_small.shadow = NULL; 3103 3104 if (ss->rx_big.shadow != NULL) 3105 free(ss->rx_big.shadow, M_DEVBUF); 3106 ss->rx_big.shadow = NULL; 3107 3108 if (ss->tx.info != NULL) { 3109 if (ss->tx.dmat != NULL) { 3110 for (i = 0; i <= ss->tx.mask; i++) { 3111 bus_dmamap_destroy(ss->tx.dmat, 3112 ss->tx.info[i].map); 3113 } 3114 bus_dma_tag_destroy(ss->tx.dmat); 3115 } 3116 free(ss->tx.info, M_DEVBUF); 3117 } 3118 ss->tx.info = NULL; 3119 3120 if (ss->rx_small.info != NULL) { 3121 if (ss->rx_small.dmat != NULL) { 3122 for (i = 0; i <= ss->rx_small.mask; i++) { 3123 bus_dmamap_destroy(ss->rx_small.dmat, 3124 ss->rx_small.info[i].map); 3125 } 3126 bus_dmamap_destroy(ss->rx_small.dmat, 3127 ss->rx_small.extra_map); 3128 bus_dma_tag_destroy(ss->rx_small.dmat); 3129 } 3130 free(ss->rx_small.info, M_DEVBUF); 3131 } 3132 ss->rx_small.info = NULL; 3133 3134 if (ss->rx_big.info != NULL) { 3135 if (ss->rx_big.dmat != NULL) { 3136 for (i = 0; i <= ss->rx_big.mask; i++) { 3137 bus_dmamap_destroy(ss->rx_big.dmat, 3138 ss->rx_big.info[i].map); 3139 } 3140 bus_dmamap_destroy(ss->rx_big.dmat, 3141 ss->rx_big.extra_map); 3142 bus_dma_tag_destroy(ss->rx_big.dmat); 3143 } 3144 free(ss->rx_big.info, M_DEVBUF); 3145 } 3146 ss->rx_big.info = NULL; 3147} 3148 3149static void 3150mxge_free_rings(mxge_softc_t *sc) 3151{ 3152 int slice; 3153 3154 for (slice = 0; slice < sc->num_slices; slice++) 3155 mxge_free_slice_rings(&sc->ss[slice]); 3156} 3157 3158static int 3159mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3160 int tx_ring_entries) 3161{ 3162 mxge_softc_t *sc = ss->sc; 3163 size_t bytes; 3164 int err, i; 3165 3166 err = ENOMEM; 3167 3168 /* allocate per-slice receive resources */ 3169 3170 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3171 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3172 3173 /* allocate the rx shadow rings */ 3174 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3175 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3176 if (ss->rx_small.shadow == NULL) 3177 return err; 3178 3179 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3180 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3181 if (ss->rx_big.shadow == NULL) 3182 return err; 3183 3184 /* allocate the rx host info rings */ 3185 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3186 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3187 if (ss->rx_small.info == NULL) 3188 return err; 3189 3190 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3191 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3192 if (ss->rx_big.info == NULL) 3193 return err; 3194 3195 /* allocate the rx busdma resources */ 3196 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3197 1, /* alignment */ 3198 4096, /* boundary */ 3199 BUS_SPACE_MAXADDR, /* low */ 3200 BUS_SPACE_MAXADDR, /* high */ 3201 NULL, NULL, /* filter */ 3202 MHLEN, /* maxsize */ 3203 1, /* num segs */ 3204 MHLEN, /* maxsegsize */ 3205 BUS_DMA_ALLOCNOW, /* flags */ 3206 NULL, NULL, /* lock */ 3207 &ss->rx_small.dmat); /* tag */ 3208 if (err != 0) { 3209 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3210 err); 3211 return err; 3212 } 3213 3214 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3215 1, /* alignment */ 3216#if MXGE_VIRT_JUMBOS 3217 4096, /* boundary */ 3218#else 3219 0, /* boundary */ 3220#endif 3221 BUS_SPACE_MAXADDR, /* low */ 3222 BUS_SPACE_MAXADDR, /* high */ 3223 NULL, NULL, /* filter */ 3224 3*4096, /* maxsize */ 3225#if MXGE_VIRT_JUMBOS 3226 3, /* num segs */ 3227 4096, /* maxsegsize*/ 3228#else 3229 1, /* num segs */ 3230 MJUM9BYTES, /* maxsegsize*/ 3231#endif 3232 BUS_DMA_ALLOCNOW, /* flags */ 3233 NULL, NULL, /* lock */ 3234 &ss->rx_big.dmat); /* tag */ 3235 if (err != 0) { 3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3237 err); 3238 return err; 3239 } 3240 for (i = 0; i <= ss->rx_small.mask; i++) { 3241 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3242 &ss->rx_small.info[i].map); 3243 if (err != 0) { 3244 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3245 err); 3246 return err; 3247 } 3248 } 3249 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3250 &ss->rx_small.extra_map); 3251 if (err != 0) { 3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3253 err); 3254 return err; 3255 } 3256 3257 for (i = 0; i <= ss->rx_big.mask; i++) { 3258 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3259 &ss->rx_big.info[i].map); 3260 if (err != 0) { 3261 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3262 err); 3263 return err; 3264 } 3265 } 3266 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3267 &ss->rx_big.extra_map); 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3270 err); 3271 return err; 3272 } 3273 3274 /* now allocate TX resouces */ 3275 3276#ifndef IFNET_BUF_RING 3277 /* only use a single TX ring for now */ 3278 if (ss != ss->sc->ss) 3279 return 0; 3280#endif 3281 3282 ss->tx.mask = tx_ring_entries - 1; 3283 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3284 3285 3286 /* allocate the tx request copy block */ 3287 bytes = 8 + 3288 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3289 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3290 if (ss->tx.req_bytes == NULL) 3291 return err; 3292 /* ensure req_list entries are aligned to 8 bytes */ 3293 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3294 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3295 3296 /* allocate the tx busdma segment list */ 3297 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3298 ss->tx.seg_list = (bus_dma_segment_t *) 3299 malloc(bytes, M_DEVBUF, M_WAITOK); 3300 if (ss->tx.seg_list == NULL) 3301 return err; 3302 3303 /* allocate the tx host info ring */ 3304 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3305 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3306 if (ss->tx.info == NULL) 3307 return err; 3308 3309 /* allocate the tx busdma resources */ 3310 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3311 1, /* alignment */ 3312 sc->tx_boundary, /* boundary */ 3313 BUS_SPACE_MAXADDR, /* low */ 3314 BUS_SPACE_MAXADDR, /* high */ 3315 NULL, NULL, /* filter */ 3316 65536 + 256, /* maxsize */ 3317 ss->tx.max_desc - 2, /* num segs */ 3318 sc->tx_boundary, /* maxsegsz */ 3319 BUS_DMA_ALLOCNOW, /* flags */ 3320 NULL, NULL, /* lock */ 3321 &ss->tx.dmat); /* tag */ 3322 3323 if (err != 0) { 3324 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3325 err); 3326 return err; 3327 } 3328 3329 /* now use these tags to setup dmamaps for each slot 3330 in the ring */ 3331 for (i = 0; i <= ss->tx.mask; i++) { 3332 err = bus_dmamap_create(ss->tx.dmat, 0, 3333 &ss->tx.info[i].map); 3334 if (err != 0) { 3335 device_printf(sc->dev, "Err %d tx dmamap\n", 3336 err); 3337 return err; 3338 } 3339 } 3340 return 0; 3341 3342} 3343 3344static int 3345mxge_alloc_rings(mxge_softc_t *sc) 3346{ 3347 mxge_cmd_t cmd; 3348 int tx_ring_size; 3349 int tx_ring_entries, rx_ring_entries; 3350 int err, slice; 3351 3352 /* get ring sizes */ 3353 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3354 tx_ring_size = cmd.data0; 3355 if (err != 0) { 3356 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3357 goto abort; 3358 } 3359 3360 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3361 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3362 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3363 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3364 IFQ_SET_READY(&sc->ifp->if_snd); 3365 3366 for (slice = 0; slice < sc->num_slices; slice++) { 3367 err = mxge_alloc_slice_rings(&sc->ss[slice], 3368 rx_ring_entries, 3369 tx_ring_entries); 3370 if (err != 0) 3371 goto abort; 3372 } 3373 return 0; 3374 3375abort: 3376 mxge_free_rings(sc); 3377 return err; 3378 3379} 3380 3381 3382static void 3383mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3384{ 3385 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3386 3387 if (bufsize < MCLBYTES) { 3388 /* easy, everything fits in a single buffer */ 3389 *big_buf_size = MCLBYTES; 3390 *cl_size = MCLBYTES; 3391 *nbufs = 1; 3392 return; 3393 } 3394 3395 if (bufsize < MJUMPAGESIZE) { 3396 /* still easy, everything still fits in a single buffer */ 3397 *big_buf_size = MJUMPAGESIZE; 3398 *cl_size = MJUMPAGESIZE; 3399 *nbufs = 1; 3400 return; 3401 } 3402#if MXGE_VIRT_JUMBOS 3403 /* now we need to use virtually contiguous buffers */ 3404 *cl_size = MJUM9BYTES; 3405 *big_buf_size = 4096; 3406 *nbufs = mtu / 4096 + 1; 3407 /* needs to be a power of two, so round up */ 3408 if (*nbufs == 3) 3409 *nbufs = 4; 3410#else 3411 *cl_size = MJUM9BYTES; 3412 *big_buf_size = MJUM9BYTES; 3413 *nbufs = 1; 3414#endif 3415} 3416 3417static int 3418mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3419{ 3420 mxge_softc_t *sc; 3421 mxge_cmd_t cmd; 3422 bus_dmamap_t map; 3423 struct lro_entry *lro_entry; 3424 int err, i, slice; 3425 3426 3427 sc = ss->sc; 3428 slice = ss - sc->ss; 3429 3430 SLIST_INIT(&ss->lro_free); 3431 SLIST_INIT(&ss->lro_active); 3432 3433 for (i = 0; i < sc->lro_cnt; i++) { 3434 lro_entry = (struct lro_entry *) 3435 malloc(sizeof (*lro_entry), M_DEVBUF, 3436 M_NOWAIT | M_ZERO); 3437 if (lro_entry == NULL) { 3438 sc->lro_cnt = i; 3439 break; 3440 } 3441 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3442 } 3443 /* get the lanai pointers to the send and receive rings */ 3444 3445 err = 0; 3446#ifndef IFNET_BUF_RING 3447 /* We currently only send from the first slice */ 3448 if (slice == 0) { 3449#endif 3450 cmd.data0 = slice; 3451 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3452 ss->tx.lanai = 3453 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3454 ss->tx.send_go = (volatile uint32_t *) 3455 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3456 ss->tx.send_stop = (volatile uint32_t *) 3457 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3458#ifndef IFNET_BUF_RING 3459 } 3460#endif 3461 cmd.data0 = slice; 3462 err |= mxge_send_cmd(sc, 3463 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3464 ss->rx_small.lanai = 3465 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3466 cmd.data0 = slice; 3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3468 ss->rx_big.lanai = 3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3470 3471 if (err != 0) { 3472 device_printf(sc->dev, 3473 "failed to get ring sizes or locations\n"); 3474 return EIO; 3475 } 3476 3477 /* stock receive rings */ 3478 for (i = 0; i <= ss->rx_small.mask; i++) { 3479 map = ss->rx_small.info[i].map; 3480 err = mxge_get_buf_small(ss, map, i); 3481 if (err) { 3482 device_printf(sc->dev, "alloced %d/%d smalls\n", 3483 i, ss->rx_small.mask + 1); 3484 return ENOMEM; 3485 } 3486 } 3487 for (i = 0; i <= ss->rx_big.mask; i++) { 3488 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3489 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3490 } 3491 ss->rx_big.nbufs = nbufs; 3492 ss->rx_big.cl_size = cl_size; 3493 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3494 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3495 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3496 map = ss->rx_big.info[i].map; 3497 err = mxge_get_buf_big(ss, map, i); 3498 if (err) { 3499 device_printf(sc->dev, "alloced %d/%d bigs\n", 3500 i, ss->rx_big.mask + 1); 3501 return ENOMEM; 3502 } 3503 } 3504 return 0; 3505} 3506 3507static int 3508mxge_open(mxge_softc_t *sc) 3509{ 3510 mxge_cmd_t cmd; 3511 int err, big_bytes, nbufs, slice, cl_size, i; 3512 bus_addr_t bus; 3513 volatile uint8_t *itable; 3514 struct mxge_slice_state *ss; 3515 3516 /* Copy the MAC address in case it was overridden */ 3517 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3518 3519 err = mxge_reset(sc, 1); 3520 if (err != 0) { 3521 device_printf(sc->dev, "failed to reset\n"); 3522 return EIO; 3523 } 3524 3525 if (sc->num_slices > 1) { 3526 /* setup the indirection table */ 3527 cmd.data0 = sc->num_slices; 3528 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3529 &cmd); 3530 3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3532 &cmd); 3533 if (err != 0) { 3534 device_printf(sc->dev, 3535 "failed to setup rss tables\n"); 3536 return err; 3537 } 3538 3539 /* just enable an identity mapping */ 3540 itable = sc->sram + cmd.data0; 3541 for (i = 0; i < sc->num_slices; i++) 3542 itable[i] = (uint8_t)i; 3543 3544 cmd.data0 = 1; 3545 cmd.data1 = mxge_rss_hash_type; 3546 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3547 if (err != 0) { 3548 device_printf(sc->dev, "failed to enable slices\n"); 3549 return err; 3550 } 3551 } 3552 3553 3554 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3555 3556 cmd.data0 = nbufs; 3557 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3558 &cmd); 3559 /* error is only meaningful if we're trying to set 3560 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3561 if (err && nbufs > 1) { 3562 device_printf(sc->dev, 3563 "Failed to set alway-use-n to %d\n", 3564 nbufs); 3565 return EIO; 3566 } 3567 /* Give the firmware the mtu and the big and small buffer 3568 sizes. The firmware wants the big buf size to be a power 3569 of two. Luckily, FreeBSD's clusters are powers of two */ 3570 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3571 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3572 cmd.data0 = MHLEN - MXGEFW_PAD; 3573 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3574 &cmd); 3575 cmd.data0 = big_bytes; 3576 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3577 3578 if (err != 0) { 3579 device_printf(sc->dev, "failed to setup params\n"); 3580 goto abort; 3581 } 3582 3583 /* Now give him the pointer to the stats block */ 3584 for (slice = 0; 3585#ifdef IFNET_BUF_RING 3586 slice < sc->num_slices; 3587#else 3588 slice < 1; 3589#endif 3590 slice++) { 3591 ss = &sc->ss[slice]; 3592 cmd.data0 = 3593 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3594 cmd.data1 = 3595 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3596 cmd.data2 = sizeof(struct mcp_irq_data); 3597 cmd.data2 |= (slice << 16); 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3599 } 3600 3601 if (err != 0) { 3602 bus = sc->ss->fw_stats_dma.bus_addr; 3603 bus += offsetof(struct mcp_irq_data, send_done_count); 3604 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3605 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3606 err = mxge_send_cmd(sc, 3607 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3608 &cmd); 3609 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3610 sc->fw_multicast_support = 0; 3611 } else { 3612 sc->fw_multicast_support = 1; 3613 } 3614 3615 if (err != 0) { 3616 device_printf(sc->dev, "failed to setup params\n"); 3617 goto abort; 3618 } 3619 3620 for (slice = 0; slice < sc->num_slices; slice++) { 3621 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3622 if (err != 0) { 3623 device_printf(sc->dev, "couldn't open slice %d\n", 3624 slice); 3625 goto abort; 3626 } 3627 } 3628 3629 /* Finally, start the firmware running */ 3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3631 if (err) { 3632 device_printf(sc->dev, "Couldn't bring up link\n"); 3633 goto abort; 3634 } 3635#ifdef IFNET_BUF_RING 3636 for (slice = 0; slice < sc->num_slices; slice++) { 3637 ss = &sc->ss[slice]; 3638 ss->if_drv_flags |= IFF_DRV_RUNNING; 3639 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3640 } 3641#endif 3642 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3643 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3644 3645 return 0; 3646 3647 3648abort: 3649 mxge_free_mbufs(sc); 3650 3651 return err; 3652} 3653 3654static int 3655mxge_close(mxge_softc_t *sc, int down) 3656{ 3657 mxge_cmd_t cmd; 3658 int err, old_down_cnt; 3659#ifdef IFNET_BUF_RING 3660 struct mxge_slice_state *ss; 3661 int slice; 3662#endif 3663 3664#ifdef IFNET_BUF_RING 3665 for (slice = 0; slice < sc->num_slices; slice++) { 3666 ss = &sc->ss[slice]; 3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3668 } 3669#endif 3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3671 if (!down) { 3672 old_down_cnt = sc->down_cnt; 3673 wmb(); 3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3675 if (err) { 3676 device_printf(sc->dev, 3677 "Couldn't bring down link\n"); 3678 } 3679 if (old_down_cnt == sc->down_cnt) { 3680 /* wait for down irq */ 3681 DELAY(10 * sc->intr_coal_delay); 3682 } 3683 wmb(); 3684 if (old_down_cnt == sc->down_cnt) { 3685 device_printf(sc->dev, "never got down irq\n"); 3686 } 3687 } 3688 mxge_free_mbufs(sc); 3689 3690 return 0; 3691} 3692 3693static void 3694mxge_setup_cfg_space(mxge_softc_t *sc) 3695{ 3696 device_t dev = sc->dev; 3697 int reg; 3698 uint16_t cmd, lnk, pectl; 3699 3700 /* find the PCIe link width and set max read request to 4KB*/ 3701 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3702 lnk = pci_read_config(dev, reg + 0x12, 2); 3703 sc->link_width = (lnk >> 4) & 0x3f; 3704 3705 if (sc->pectl == 0) { 3706 pectl = pci_read_config(dev, reg + 0x8, 2); 3707 pectl = (pectl & ~0x7000) | (5 << 12); 3708 pci_write_config(dev, reg + 0x8, pectl, 2); 3709 sc->pectl = pectl; 3710 } else { 3711 /* restore saved pectl after watchdog reset */ 3712 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3713 } 3714 } 3715 3716 /* Enable DMA and Memory space access */ 3717 pci_enable_busmaster(dev); 3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3719 cmd |= PCIM_CMD_MEMEN; 3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3721} 3722 3723static uint32_t 3724mxge_read_reboot(mxge_softc_t *sc) 3725{ 3726 device_t dev = sc->dev; 3727 uint32_t vs; 3728 3729 /* find the vendor specific offset */ 3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3731 device_printf(sc->dev, 3732 "could not find vendor specific offset\n"); 3733 return (uint32_t)-1; 3734 } 3735 /* enable read32 mode */ 3736 pci_write_config(dev, vs + 0x10, 0x3, 1); 3737 /* tell NIC which register to read */ 3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3739 return (pci_read_config(dev, vs + 0x14, 4)); 3740} 3741 3742static void 3743mxge_watchdog_reset(mxge_softc_t *sc) 3744{ 3745 struct pci_devinfo *dinfo; 3746 struct mxge_slice_state *ss; 3747 int err, running, s, num_tx_slices = 1; 3748 uint32_t reboot; 3749 uint16_t cmd; 3750 3751 err = ENXIO; 3752 3753 device_printf(sc->dev, "Watchdog reset!\n"); 3754 3755 /* 3756 * check to see if the NIC rebooted. If it did, then all of 3757 * PCI config space has been reset, and things like the 3758 * busmaster bit will be zero. If this is the case, then we 3759 * must restore PCI config space before the NIC can be used 3760 * again 3761 */ 3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3763 if (cmd == 0xffff) { 3764 /* 3765 * maybe the watchdog caught the NIC rebooting; wait 3766 * up to 100ms for it to finish. If it does not come 3767 * back, then give up 3768 */ 3769 DELAY(1000*100); 3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3771 if (cmd == 0xffff) { 3772 device_printf(sc->dev, "NIC disappeared!\n"); 3773 } 3774 } 3775 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3776 /* print the reboot status */ 3777 reboot = mxge_read_reboot(sc); 3778 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3779 reboot); 3780 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3781 if (running) { 3782 3783 /* 3784 * quiesce NIC so that TX routines will not try to 3785 * xmit after restoration of BAR 3786 */ 3787 3788 /* Mark the link as down */ 3789 if (sc->link_state) { 3790 sc->link_state = 0; 3791 if_link_state_change(sc->ifp, 3792 LINK_STATE_DOWN); 3793 } 3794#ifdef IFNET_BUF_RING 3795 num_tx_slices = sc->num_slices; 3796#endif 3797 /* grab all TX locks to ensure no tx */ 3798 for (s = 0; s < num_tx_slices; s++) { 3799 ss = &sc->ss[s]; 3800 mtx_lock(&ss->tx.mtx); 3801 } 3802 mxge_close(sc, 1); 3803 } 3804 /* restore PCI configuration space */ 3805 dinfo = device_get_ivars(sc->dev); 3806 pci_cfg_restore(sc->dev, dinfo); 3807 3808 /* and redo any changes we made to our config space */ 3809 mxge_setup_cfg_space(sc); 3810 3811 /* reload f/w */ 3812 err = mxge_load_firmware(sc, 0); 3813 if (err) { 3814 device_printf(sc->dev, 3815 "Unable to re-load f/w\n"); 3816 } 3817 if (running) { 3818 if (!err) 3819 err = mxge_open(sc); 3820 /* release all TX locks */ 3821 for (s = 0; s < num_tx_slices; s++) { 3822 ss = &sc->ss[s]; 3823#ifdef IFNET_BUF_RING 3824 mxge_start_locked(ss); 3825#endif 3826 mtx_unlock(&ss->tx.mtx); 3827 } 3828 } 3829 sc->watchdog_resets++; 3830 } else { 3831 device_printf(sc->dev, 3832 "NIC did not reboot, not resetting\n"); 3833 err = 0; 3834 } 3835 if (err) { 3836 device_printf(sc->dev, "watchdog reset failed\n"); 3837 } else { 3838 if (sc->dying == 2) 3839 sc->dying = 0; 3840 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3841 } 3842} 3843 3844static void 3845mxge_watchdog_task(void *arg, int pending) 3846{ 3847 mxge_softc_t *sc = arg; 3848 3849 3850 mtx_lock(&sc->driver_mtx); 3851 mxge_watchdog_reset(sc); 3852 mtx_unlock(&sc->driver_mtx); 3853} 3854 3855static void 3856mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3857{ 3858 tx = &sc->ss[slice].tx; 3859 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3860 device_printf(sc->dev, 3861 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3862 tx->req, tx->done, tx->queue_active); 3863 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3864 tx->activate, tx->deactivate); 3865 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3866 tx->pkt_done, 3867 be32toh(sc->ss->fw_stats->send_done_count)); 3868} 3869 3870static int 3871mxge_watchdog(mxge_softc_t *sc) 3872{ 3873 mxge_tx_ring_t *tx; 3874 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3875 int i, err = 0; 3876 3877 /* see if we have outstanding transmits, which 3878 have been pending for more than mxge_ticks */ 3879 for (i = 0; 3880#ifdef IFNET_BUF_RING 3881 (i < sc->num_slices) && (err == 0); 3882#else 3883 (i < 1) && (err == 0); 3884#endif 3885 i++) { 3886 tx = &sc->ss[i].tx; 3887 if (tx->req != tx->done && 3888 tx->watchdog_req != tx->watchdog_done && 3889 tx->done == tx->watchdog_done) { 3890 /* check for pause blocking before resetting */ 3891 if (tx->watchdog_rx_pause == rx_pause) { 3892 mxge_warn_stuck(sc, tx, i); 3893 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3894 return (ENXIO); 3895 } 3896 else 3897 device_printf(sc->dev, "Flow control blocking " 3898 "xmits, check link partner\n"); 3899 } 3900 3901 tx->watchdog_req = tx->req; 3902 tx->watchdog_done = tx->done; 3903 tx->watchdog_rx_pause = rx_pause; 3904 } 3905 3906 if (sc->need_media_probe) 3907 mxge_media_probe(sc); 3908 return (err); 3909} 3910 3911static u_long 3912mxge_update_stats(mxge_softc_t *sc) 3913{ 3914 struct mxge_slice_state *ss; 3915 u_long pkts = 0; 3916 u_long ipackets = 0; 3917 u_long opackets = 0; 3918#ifdef IFNET_BUF_RING 3919 u_long obytes = 0; 3920 u_long omcasts = 0; 3921 u_long odrops = 0; 3922#endif 3923 u_long oerrors = 0; 3924 int slice; 3925 3926 for (slice = 0; slice < sc->num_slices; slice++) { 3927 ss = &sc->ss[slice]; 3928 ipackets += ss->ipackets; 3929 opackets += ss->opackets; 3930#ifdef IFNET_BUF_RING 3931 obytes += ss->obytes; 3932 omcasts += ss->omcasts; 3933 odrops += ss->tx.br->br_drops; 3934#endif 3935 oerrors += ss->oerrors; 3936 } 3937 pkts = (ipackets - sc->ifp->if_ipackets); 3938 pkts += (opackets - sc->ifp->if_opackets); 3939 sc->ifp->if_ipackets = ipackets; 3940 sc->ifp->if_opackets = opackets; 3941#ifdef IFNET_BUF_RING 3942 sc->ifp->if_obytes = obytes; 3943 sc->ifp->if_omcasts = omcasts; 3944 sc->ifp->if_snd.ifq_drops = odrops; 3945#endif 3946 sc->ifp->if_oerrors = oerrors; 3947 return pkts; 3948} 3949 3950static void 3951mxge_tick(void *arg) 3952{ 3953 mxge_softc_t *sc = arg; 3954 u_long pkts = 0; 3955 int err = 0; 3956 int running, ticks; 3957 uint16_t cmd; 3958 3959 ticks = mxge_ticks; 3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3961 if (running) { 3962 /* aggregate stats from different slices */ 3963 pkts = mxge_update_stats(sc); 3964 if (!sc->watchdog_countdown) { 3965 err = mxge_watchdog(sc); 3966 sc->watchdog_countdown = 4; 3967 } 3968 sc->watchdog_countdown--; 3969 } 3970 if (pkts == 0) { 3971 /* ensure NIC did not suffer h/w fault while idle */ 3972 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3973 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3974 sc->dying = 2; 3975 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3976 err = ENXIO; 3977 } 3978 /* look less often if NIC is idle */ 3979 ticks *= 4; 3980 } 3981 3982 if (err == 0) 3983 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3984 3985} 3986 3987static int 3988mxge_media_change(struct ifnet *ifp) 3989{ 3990 return EINVAL; 3991} 3992 3993static int 3994mxge_change_mtu(mxge_softc_t *sc, int mtu) 3995{ 3996 struct ifnet *ifp = sc->ifp; 3997 int real_mtu, old_mtu; 3998 int err = 0; 3999 4000 4001 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4002 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4003 return EINVAL; 4004 mtx_lock(&sc->driver_mtx); 4005 old_mtu = ifp->if_mtu; 4006 ifp->if_mtu = mtu; 4007 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4008 mxge_close(sc, 0); 4009 err = mxge_open(sc); 4010 if (err != 0) { 4011 ifp->if_mtu = old_mtu; 4012 mxge_close(sc, 0); 4013 (void) mxge_open(sc); 4014 } 4015 } 4016 mtx_unlock(&sc->driver_mtx); 4017 return err; 4018} 4019 4020static void 4021mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4022{ 4023 mxge_softc_t *sc = ifp->if_softc; 4024 4025 4026 if (sc == NULL) 4027 return; 4028 ifmr->ifm_status = IFM_AVALID; 4029 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4030 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 4031 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 4032} 4033 4034static int 4035mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4036{ 4037 mxge_softc_t *sc = ifp->if_softc; 4038 struct ifreq *ifr = (struct ifreq *)data; 4039 int err, mask; 4040 4041 err = 0; 4042 switch (command) { 4043 case SIOCSIFADDR: 4044 case SIOCGIFADDR: 4045 err = ether_ioctl(ifp, command, data); 4046 break; 4047 4048 case SIOCSIFMTU: 4049 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4050 break; 4051 4052 case SIOCSIFFLAGS: 4053 mtx_lock(&sc->driver_mtx); 4054 if (sc->dying) { 4055 mtx_unlock(&sc->driver_mtx); 4056 return EINVAL; 4057 } 4058 if (ifp->if_flags & IFF_UP) { 4059 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4060 err = mxge_open(sc); 4061 } else { 4062 /* take care of promis can allmulti 4063 flag chages */ 4064 mxge_change_promisc(sc, 4065 ifp->if_flags & IFF_PROMISC); 4066 mxge_set_multicast_list(sc); 4067 } 4068 } else { 4069 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4070 mxge_close(sc, 0); 4071 } 4072 } 4073 mtx_unlock(&sc->driver_mtx); 4074 break; 4075 4076 case SIOCADDMULTI: 4077 case SIOCDELMULTI: 4078 mtx_lock(&sc->driver_mtx); 4079 mxge_set_multicast_list(sc); 4080 mtx_unlock(&sc->driver_mtx); 4081 break; 4082 4083 case SIOCSIFCAP: 4084 mtx_lock(&sc->driver_mtx); 4085 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4086 if (mask & IFCAP_TXCSUM) { 4087 if (IFCAP_TXCSUM & ifp->if_capenable) { 4088 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4089 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4090 | CSUM_TSO); 4091 } else { 4092 ifp->if_capenable |= IFCAP_TXCSUM; 4093 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4094 } 4095 } else if (mask & IFCAP_RXCSUM) { 4096 if (IFCAP_RXCSUM & ifp->if_capenable) { 4097 ifp->if_capenable &= ~IFCAP_RXCSUM; 4098 sc->csum_flag = 0; 4099 } else { 4100 ifp->if_capenable |= IFCAP_RXCSUM; 4101 sc->csum_flag = 1; 4102 } 4103 } 4104 if (mask & IFCAP_TSO4) { 4105 if (IFCAP_TSO4 & ifp->if_capenable) { 4106 ifp->if_capenable &= ~IFCAP_TSO4; 4107 ifp->if_hwassist &= ~CSUM_TSO; 4108 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4109 ifp->if_capenable |= IFCAP_TSO4; 4110 ifp->if_hwassist |= CSUM_TSO; 4111 } else { 4112 printf("mxge requires tx checksum offload" 4113 " be enabled to use TSO\n"); 4114 err = EINVAL; 4115 } 4116 } 4117 if (mask & IFCAP_LRO) { 4118 if (IFCAP_LRO & ifp->if_capenable) 4119 err = mxge_change_lro_locked(sc, 0); 4120 else 4121 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4122 } 4123 if (mask & IFCAP_VLAN_HWTAGGING) 4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4125 mtx_unlock(&sc->driver_mtx); 4126 VLAN_CAPABILITIES(ifp); 4127 4128 break; 4129 4130 case SIOCGIFMEDIA: 4131 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4132 &sc->media, command); 4133 break; 4134 4135 default: 4136 err = ENOTTY; 4137 } 4138 return err; 4139} 4140 4141static void 4142mxge_fetch_tunables(mxge_softc_t *sc) 4143{ 4144 4145 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4146 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4147 &mxge_flow_control); 4148 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4149 &mxge_intr_coal_delay); 4150 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4151 &mxge_nvidia_ecrc_enable); 4152 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4153 &mxge_force_firmware); 4154 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4155 &mxge_deassert_wait); 4156 TUNABLE_INT_FETCH("hw.mxge.verbose", 4157 &mxge_verbose); 4158 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4159 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4160 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4161 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4162 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4163 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4164 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4165 if (sc->lro_cnt != 0) 4166 mxge_lro_cnt = sc->lro_cnt; 4167 4168 if (bootverbose) 4169 mxge_verbose = 1; 4170 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4171 mxge_intr_coal_delay = 30; 4172 if (mxge_ticks == 0) 4173 mxge_ticks = hz / 2; 4174 sc->pause = mxge_flow_control; 4175 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4176 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4177 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4178 } 4179 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4180 mxge_initial_mtu < ETHER_MIN_LEN) 4181 mxge_initial_mtu = ETHERMTU_JUMBO; 4182 4183 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4184 mxge_throttle = MXGE_MAX_THROTTLE; 4185 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4186 mxge_throttle = MXGE_MIN_THROTTLE; 4187 sc->throttle = mxge_throttle; 4188} 4189 4190 4191static void 4192mxge_free_slices(mxge_softc_t *sc) 4193{ 4194 struct mxge_slice_state *ss; 4195 int i; 4196 4197 4198 if (sc->ss == NULL) 4199 return; 4200 4201 for (i = 0; i < sc->num_slices; i++) { 4202 ss = &sc->ss[i]; 4203 if (ss->fw_stats != NULL) { 4204 mxge_dma_free(&ss->fw_stats_dma); 4205 ss->fw_stats = NULL; 4206#ifdef IFNET_BUF_RING 4207 if (ss->tx.br != NULL) { 4208 drbr_free(ss->tx.br, M_DEVBUF); 4209 ss->tx.br = NULL; 4210 } 4211#endif 4212 mtx_destroy(&ss->tx.mtx); 4213 } 4214 if (ss->rx_done.entry != NULL) { 4215 mxge_dma_free(&ss->rx_done.dma); 4216 ss->rx_done.entry = NULL; 4217 } 4218 } 4219 free(sc->ss, M_DEVBUF); 4220 sc->ss = NULL; 4221} 4222 4223static int 4224mxge_alloc_slices(mxge_softc_t *sc) 4225{ 4226 mxge_cmd_t cmd; 4227 struct mxge_slice_state *ss; 4228 size_t bytes; 4229 int err, i, max_intr_slots; 4230 4231 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4232 if (err != 0) { 4233 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4234 return err; 4235 } 4236 sc->rx_ring_size = cmd.data0; 4237 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4238 4239 bytes = sizeof (*sc->ss) * sc->num_slices; 4240 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4241 if (sc->ss == NULL) 4242 return (ENOMEM); 4243 for (i = 0; i < sc->num_slices; i++) { 4244 ss = &sc->ss[i]; 4245 4246 ss->sc = sc; 4247 4248 /* allocate per-slice rx interrupt queues */ 4249 4250 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4251 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4252 if (err != 0) 4253 goto abort; 4254 ss->rx_done.entry = ss->rx_done.dma.addr; 4255 bzero(ss->rx_done.entry, bytes); 4256 4257 /* 4258 * allocate the per-slice firmware stats; stats 4259 * (including tx) are used used only on the first 4260 * slice for now 4261 */ 4262#ifndef IFNET_BUF_RING 4263 if (i > 0) 4264 continue; 4265#endif 4266 4267 bytes = sizeof (*ss->fw_stats); 4268 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4269 sizeof (*ss->fw_stats), 64); 4270 if (err != 0) 4271 goto abort; 4272 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4273 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4274 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4275 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4276#ifdef IFNET_BUF_RING 4277 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4278 &ss->tx.mtx); 4279#endif 4280 } 4281 4282 return (0); 4283 4284abort: 4285 mxge_free_slices(sc); 4286 return (ENOMEM); 4287} 4288 4289static void 4290mxge_slice_probe(mxge_softc_t *sc) 4291{ 4292 mxge_cmd_t cmd; 4293 char *old_fw; 4294 int msix_cnt, status, max_intr_slots; 4295 4296 sc->num_slices = 1; 4297 /* 4298 * don't enable multiple slices if they are not enabled, 4299 * or if this is not an SMP system 4300 */ 4301 4302 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4303 return; 4304 4305 /* see how many MSI-X interrupts are available */ 4306 msix_cnt = pci_msix_count(sc->dev); 4307 if (msix_cnt < 2) 4308 return; 4309 4310 /* now load the slice aware firmware see what it supports */ 4311 old_fw = sc->fw_name; 4312 if (old_fw == mxge_fw_aligned) 4313 sc->fw_name = mxge_fw_rss_aligned; 4314 else 4315 sc->fw_name = mxge_fw_rss_unaligned; 4316 status = mxge_load_firmware(sc, 0); 4317 if (status != 0) { 4318 device_printf(sc->dev, "Falling back to a single slice\n"); 4319 return; 4320 } 4321 4322 /* try to send a reset command to the card to see if it 4323 is alive */ 4324 memset(&cmd, 0, sizeof (cmd)); 4325 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4326 if (status != 0) { 4327 device_printf(sc->dev, "failed reset\n"); 4328 goto abort_with_fw; 4329 } 4330 4331 /* get rx ring size */ 4332 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4333 if (status != 0) { 4334 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4335 goto abort_with_fw; 4336 } 4337 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4338 4339 /* tell it the size of the interrupt queues */ 4340 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4341 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4342 if (status != 0) { 4343 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4344 goto abort_with_fw; 4345 } 4346 4347 /* ask the maximum number of slices it supports */ 4348 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4349 if (status != 0) { 4350 device_printf(sc->dev, 4351 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4352 goto abort_with_fw; 4353 } 4354 sc->num_slices = cmd.data0; 4355 if (sc->num_slices > msix_cnt) 4356 sc->num_slices = msix_cnt; 4357 4358 if (mxge_max_slices == -1) { 4359 /* cap to number of CPUs in system */ 4360 if (sc->num_slices > mp_ncpus) 4361 sc->num_slices = mp_ncpus; 4362 } else { 4363 if (sc->num_slices > mxge_max_slices) 4364 sc->num_slices = mxge_max_slices; 4365 } 4366 /* make sure it is a power of two */ 4367 while (sc->num_slices & (sc->num_slices - 1)) 4368 sc->num_slices--; 4369 4370 if (mxge_verbose) 4371 device_printf(sc->dev, "using %d slices\n", 4372 sc->num_slices); 4373 4374 return; 4375 4376abort_with_fw: 4377 sc->fw_name = old_fw; 4378 (void) mxge_load_firmware(sc, 0); 4379} 4380 4381static int 4382mxge_add_msix_irqs(mxge_softc_t *sc) 4383{ 4384 size_t bytes; 4385 int count, err, i, rid; 4386 4387 rid = PCIR_BAR(2); 4388 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4389 &rid, RF_ACTIVE); 4390 4391 if (sc->msix_table_res == NULL) { 4392 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4393 return ENXIO; 4394 } 4395 4396 count = sc->num_slices; 4397 err = pci_alloc_msix(sc->dev, &count); 4398 if (err != 0) { 4399 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4400 "err = %d \n", sc->num_slices, err); 4401 goto abort_with_msix_table; 4402 } 4403 if (count < sc->num_slices) { 4404 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4405 count, sc->num_slices); 4406 device_printf(sc->dev, 4407 "Try setting hw.mxge.max_slices to %d\n", 4408 count); 4409 err = ENOSPC; 4410 goto abort_with_msix; 4411 } 4412 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4413 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4414 if (sc->msix_irq_res == NULL) { 4415 err = ENOMEM; 4416 goto abort_with_msix; 4417 } 4418 4419 for (i = 0; i < sc->num_slices; i++) { 4420 rid = i + 1; 4421 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4422 SYS_RES_IRQ, 4423 &rid, RF_ACTIVE); 4424 if (sc->msix_irq_res[i] == NULL) { 4425 device_printf(sc->dev, "couldn't allocate IRQ res" 4426 " for message %d\n", i); 4427 err = ENXIO; 4428 goto abort_with_res; 4429 } 4430 } 4431 4432 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4433 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4434 4435 for (i = 0; i < sc->num_slices; i++) { 4436 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4437 INTR_TYPE_NET | INTR_MPSAFE, 4438#if __FreeBSD_version > 700030 4439 NULL, 4440#endif 4441 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4442 if (err != 0) { 4443 device_printf(sc->dev, "couldn't setup intr for " 4444 "message %d\n", i); 4445 goto abort_with_intr; 4446 } 4447 } 4448 4449 if (mxge_verbose) { 4450 device_printf(sc->dev, "using %d msix IRQs:", 4451 sc->num_slices); 4452 for (i = 0; i < sc->num_slices; i++) 4453 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4454 printf("\n"); 4455 } 4456 return (0); 4457 4458abort_with_intr: 4459 for (i = 0; i < sc->num_slices; i++) { 4460 if (sc->msix_ih[i] != NULL) { 4461 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4462 sc->msix_ih[i]); 4463 sc->msix_ih[i] = NULL; 4464 } 4465 } 4466 free(sc->msix_ih, M_DEVBUF); 4467 4468 4469abort_with_res: 4470 for (i = 0; i < sc->num_slices; i++) { 4471 rid = i + 1; 4472 if (sc->msix_irq_res[i] != NULL) 4473 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4474 sc->msix_irq_res[i]); 4475 sc->msix_irq_res[i] = NULL; 4476 } 4477 free(sc->msix_irq_res, M_DEVBUF); 4478 4479 4480abort_with_msix: 4481 pci_release_msi(sc->dev); 4482 4483abort_with_msix_table: 4484 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4485 sc->msix_table_res); 4486 4487 return err; 4488} 4489 4490static int 4491mxge_add_single_irq(mxge_softc_t *sc) 4492{ 4493 int count, err, rid; 4494 4495 count = pci_msi_count(sc->dev); 4496 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4497 rid = 1; 4498 } else { 4499 rid = 0; 4500 sc->legacy_irq = 1; 4501 } 4502 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4503 1, RF_SHAREABLE | RF_ACTIVE); 4504 if (sc->irq_res == NULL) { 4505 device_printf(sc->dev, "could not alloc interrupt\n"); 4506 return ENXIO; 4507 } 4508 if (mxge_verbose) 4509 device_printf(sc->dev, "using %s irq %ld\n", 4510 sc->legacy_irq ? "INTx" : "MSI", 4511 rman_get_start(sc->irq_res)); 4512 err = bus_setup_intr(sc->dev, sc->irq_res, 4513 INTR_TYPE_NET | INTR_MPSAFE, 4514#if __FreeBSD_version > 700030 4515 NULL, 4516#endif 4517 mxge_intr, &sc->ss[0], &sc->ih); 4518 if (err != 0) { 4519 bus_release_resource(sc->dev, SYS_RES_IRQ, 4520 sc->legacy_irq ? 0 : 1, sc->irq_res); 4521 if (!sc->legacy_irq) 4522 pci_release_msi(sc->dev); 4523 } 4524 return err; 4525} 4526 4527static void 4528mxge_rem_msix_irqs(mxge_softc_t *sc) 4529{ 4530 int i, rid; 4531 4532 for (i = 0; i < sc->num_slices; i++) { 4533 if (sc->msix_ih[i] != NULL) { 4534 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4535 sc->msix_ih[i]); 4536 sc->msix_ih[i] = NULL; 4537 } 4538 } 4539 free(sc->msix_ih, M_DEVBUF); 4540 4541 for (i = 0; i < sc->num_slices; i++) { 4542 rid = i + 1; 4543 if (sc->msix_irq_res[i] != NULL) 4544 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4545 sc->msix_irq_res[i]); 4546 sc->msix_irq_res[i] = NULL; 4547 } 4548 free(sc->msix_irq_res, M_DEVBUF); 4549 4550 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4551 sc->msix_table_res); 4552 4553 pci_release_msi(sc->dev); 4554 return; 4555} 4556 4557static void 4558mxge_rem_single_irq(mxge_softc_t *sc) 4559{ 4560 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4561 bus_release_resource(sc->dev, SYS_RES_IRQ, 4562 sc->legacy_irq ? 0 : 1, sc->irq_res); 4563 if (!sc->legacy_irq) 4564 pci_release_msi(sc->dev); 4565} 4566 4567static void 4568mxge_rem_irq(mxge_softc_t *sc) 4569{ 4570 if (sc->num_slices > 1) 4571 mxge_rem_msix_irqs(sc); 4572 else 4573 mxge_rem_single_irq(sc); 4574} 4575 4576static int 4577mxge_add_irq(mxge_softc_t *sc) 4578{ 4579 int err; 4580 4581 if (sc->num_slices > 1) 4582 err = mxge_add_msix_irqs(sc); 4583 else 4584 err = mxge_add_single_irq(sc); 4585 4586 if (0 && err == 0 && sc->num_slices > 1) { 4587 mxge_rem_msix_irqs(sc); 4588 err = mxge_add_msix_irqs(sc); 4589 } 4590 return err; 4591} 4592 4593 4594static int 4595mxge_attach(device_t dev) 4596{ 4597 mxge_softc_t *sc = device_get_softc(dev); 4598 struct ifnet *ifp; 4599 int err, rid; 4600 4601 sc->dev = dev; 4602 mxge_fetch_tunables(sc); 4603 4604 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4605 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4606 taskqueue_thread_enqueue, 4607 &sc->tq); 4608 if (sc->tq == NULL) { 4609 err = ENOMEM; 4610 goto abort_with_nothing; 4611 } 4612 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4613 device_get_nameunit(sc->dev)); 4614 4615 err = bus_dma_tag_create(NULL, /* parent */ 4616 1, /* alignment */ 4617 0, /* boundary */ 4618 BUS_SPACE_MAXADDR, /* low */ 4619 BUS_SPACE_MAXADDR, /* high */ 4620 NULL, NULL, /* filter */ 4621 65536 + 256, /* maxsize */ 4622 MXGE_MAX_SEND_DESC, /* num segs */ 4623 65536, /* maxsegsize */ 4624 0, /* flags */ 4625 NULL, NULL, /* lock */ 4626 &sc->parent_dmat); /* tag */ 4627 4628 if (err != 0) { 4629 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4630 err); 4631 goto abort_with_tq; 4632 } 4633 4634 ifp = sc->ifp = if_alloc(IFT_ETHER); 4635 if (ifp == NULL) { 4636 device_printf(dev, "can not if_alloc()\n"); 4637 err = ENOSPC; 4638 goto abort_with_parent_dmat; 4639 } 4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4641 4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4643 device_get_nameunit(dev)); 4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4646 "%s:drv", device_get_nameunit(dev)); 4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4648 MTX_NETWORK_LOCK, MTX_DEF); 4649 4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4651 4652 mxge_setup_cfg_space(sc); 4653 4654 /* Map the board into the kernel */ 4655 rid = PCIR_BARS; 4656 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4657 ~0, 1, RF_ACTIVE); 4658 if (sc->mem_res == NULL) { 4659 device_printf(dev, "could not map memory\n"); 4660 err = ENXIO; 4661 goto abort_with_lock; 4662 } 4663 sc->sram = rman_get_virtual(sc->mem_res); 4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4665 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4666 device_printf(dev, "impossible memory region size %ld\n", 4667 rman_get_size(sc->mem_res)); 4668 err = ENXIO; 4669 goto abort_with_mem_res; 4670 } 4671 4672 /* make NULL terminated copy of the EEPROM strings section of 4673 lanai SRAM */ 4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4676 rman_get_bushandle(sc->mem_res), 4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4678 sc->eeprom_strings, 4679 MXGE_EEPROM_STRINGS_SIZE - 2); 4680 err = mxge_parse_strings(sc); 4681 if (err != 0) 4682 goto abort_with_mem_res; 4683 4684 /* Enable write combining for efficient use of PCIe bus */ 4685 mxge_enable_wc(sc); 4686 4687 /* Allocate the out of band dma memory */ 4688 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4689 sizeof (mxge_cmd_t), 64); 4690 if (err != 0) 4691 goto abort_with_mem_res; 4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4694 if (err != 0) 4695 goto abort_with_cmd_dma; 4696 4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4698 if (err != 0) 4699 goto abort_with_zeropad_dma; 4700 4701 /* select & load the firmware */ 4702 err = mxge_select_firmware(sc); 4703 if (err != 0) 4704 goto abort_with_dmabench; 4705 sc->intr_coal_delay = mxge_intr_coal_delay; 4706 4707 mxge_slice_probe(sc); 4708 err = mxge_alloc_slices(sc); 4709 if (err != 0) 4710 goto abort_with_dmabench; 4711 4712 err = mxge_reset(sc, 0); 4713 if (err != 0) 4714 goto abort_with_slices; 4715 4716 err = mxge_alloc_rings(sc); 4717 if (err != 0) { 4718 device_printf(sc->dev, "failed to allocate rings\n"); 4719 goto abort_with_dmabench; 4720 } 4721 4722 err = mxge_add_irq(sc); 4723 if (err != 0) { 4724 device_printf(sc->dev, "failed to add irq\n"); 4725 goto abort_with_rings; 4726 } 4727 4728 ifp->if_baudrate = IF_Gbps(10UL); 4729 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4730 IFCAP_VLAN_MTU; 4731#ifdef INET 4732 ifp->if_capabilities |= IFCAP_LRO; 4733#endif 4734 4735#ifdef MXGE_NEW_VLAN_API 4736 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4737#endif 4738 4739 sc->max_mtu = mxge_max_mtu(sc); 4740 if (sc->max_mtu >= 9000) 4741 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4742 else 4743 device_printf(dev, "MTU limited to %d. Install " 4744 "latest firmware for 9000 byte jumbo support\n", 4745 sc->max_mtu - ETHER_HDR_LEN); 4746 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4747 ifp->if_capenable = ifp->if_capabilities; 4748 if (sc->lro_cnt == 0) 4749 ifp->if_capenable &= ~IFCAP_LRO; 4750 sc->csum_flag = 1; 4751 ifp->if_init = mxge_init; 4752 ifp->if_softc = sc; 4753 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4754 ifp->if_ioctl = mxge_ioctl; 4755 ifp->if_start = mxge_start; 4756 /* Initialise the ifmedia structure */ 4757 ifmedia_init(&sc->media, 0, mxge_media_change, 4758 mxge_media_status); 4759 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4760 mxge_media_probe(sc); 4761 sc->dying = 0; 4762 ether_ifattach(ifp, sc->mac_addr); 4763 /* ether_ifattach sets mtu to ETHERMTU */ 4764 if (mxge_initial_mtu != ETHERMTU) 4765 mxge_change_mtu(sc, mxge_initial_mtu); 4766 4767 mxge_add_sysctls(sc); 4768#ifdef IFNET_BUF_RING 4769 ifp->if_transmit = mxge_transmit; 4770 ifp->if_qflush = mxge_qflush; 4771#endif 4772 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4773 return 0; 4774 4775abort_with_rings: 4776 mxge_free_rings(sc); 4777abort_with_slices: 4778 mxge_free_slices(sc); 4779abort_with_dmabench: 4780 mxge_dma_free(&sc->dmabench_dma); 4781abort_with_zeropad_dma: 4782 mxge_dma_free(&sc->zeropad_dma); 4783abort_with_cmd_dma: 4784 mxge_dma_free(&sc->cmd_dma); 4785abort_with_mem_res: 4786 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4787abort_with_lock: 4788 pci_disable_busmaster(dev); 4789 mtx_destroy(&sc->cmd_mtx); 4790 mtx_destroy(&sc->driver_mtx); 4791 if_free(ifp); 4792abort_with_parent_dmat: 4793 bus_dma_tag_destroy(sc->parent_dmat); 4794abort_with_tq: 4795 if (sc->tq != NULL) { 4796 taskqueue_drain(sc->tq, &sc->watchdog_task); 4797 taskqueue_free(sc->tq); 4798 sc->tq = NULL; 4799 } 4800abort_with_nothing: 4801 return err; 4802} 4803 4804static int 4805mxge_detach(device_t dev) 4806{ 4807 mxge_softc_t *sc = device_get_softc(dev); 4808 4809 if (mxge_vlans_active(sc)) { 4810 device_printf(sc->dev, 4811 "Detach vlans before removing module\n"); 4812 return EBUSY; 4813 } 4814 mtx_lock(&sc->driver_mtx); 4815 sc->dying = 1; 4816 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4817 mxge_close(sc, 0); 4818 mtx_unlock(&sc->driver_mtx); 4819 ether_ifdetach(sc->ifp); 4820 if (sc->tq != NULL) { 4821 taskqueue_drain(sc->tq, &sc->watchdog_task); 4822 taskqueue_free(sc->tq); 4823 sc->tq = NULL; 4824 } 4825 callout_drain(&sc->co_hdl); 4826 ifmedia_removeall(&sc->media); 4827 mxge_dummy_rdma(sc, 0); 4828 mxge_rem_sysctls(sc); 4829 mxge_rem_irq(sc); 4830 mxge_free_rings(sc); 4831 mxge_free_slices(sc); 4832 mxge_dma_free(&sc->dmabench_dma); 4833 mxge_dma_free(&sc->zeropad_dma); 4834 mxge_dma_free(&sc->cmd_dma); 4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4836 pci_disable_busmaster(dev); 4837 mtx_destroy(&sc->cmd_mtx); 4838 mtx_destroy(&sc->driver_mtx); 4839 if_free(sc->ifp); 4840 bus_dma_tag_destroy(sc->parent_dmat); 4841 return 0; 4842} 4843 4844static int 4845mxge_shutdown(device_t dev) 4846{ 4847 return 0; 4848} 4849 4850/* 4851 This file uses Myri10GE driver indentation. 4852 4853 Local Variables: 4854 c-file-style:"linux" 4855 tab-width:8 4856 End: 4857*/
| 2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2259 return (err); 2260 } 2261 if (!drbr_empty(ifp, tx->br)) 2262 mxge_start_locked(ss); 2263 return (0); 2264} 2265 2266static int 2267mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2268{ 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 mxge_tx_ring_t *tx; 2272 int err = 0; 2273 int slice; 2274 2275 slice = m->m_pkthdr.flowid; 2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2277 2278 ss = &sc->ss[slice]; 2279 tx = &ss->tx; 2280 2281 if (mtx_trylock(&tx->mtx)) { 2282 err = mxge_transmit_locked(ss, m); 2283 mtx_unlock(&tx->mtx); 2284 } else { 2285 err = drbr_enqueue(ifp, tx->br, m); 2286 } 2287 2288 return (err); 2289} 2290 2291#else 2292 2293static inline void 2294mxge_start_locked(struct mxge_slice_state *ss) 2295{ 2296 mxge_softc_t *sc; 2297 struct mbuf *m; 2298 struct ifnet *ifp; 2299 mxge_tx_ring_t *tx; 2300 2301 sc = ss->sc; 2302 ifp = sc->ifp; 2303 tx = &ss->tx; 2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2306 if (m == NULL) { 2307 return; 2308 } 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } 2315 /* ran out of transmit slots */ 2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2318 tx->stall++; 2319 } 2320} 2321#endif 2322static void 2323mxge_start(struct ifnet *ifp) 2324{ 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 2328 /* only use the first slice for now */ 2329 ss = &sc->ss[0]; 2330 mtx_lock(&ss->tx.mtx); 2331 mxge_start_locked(ss); 2332 mtx_unlock(&ss->tx.mtx); 2333} 2334 2335/* 2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2337 * at most 32 bytes at a time, so as to avoid involving the software 2338 * pio handler in the nic. We re-write the first segment's low 2339 * DMA address to mark it valid only after we write the entire chunk 2340 * in a burst 2341 */ 2342static inline void 2343mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2344 mcp_kreq_ether_recv_t *src) 2345{ 2346 uint32_t low; 2347 2348 low = src->addr_low; 2349 src->addr_low = 0xffffffff; 2350 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2351 wmb(); 2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2353 wmb(); 2354 src->addr_low = low; 2355 dst->addr_low = low; 2356 wmb(); 2357} 2358 2359static int 2360mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2361{ 2362 bus_dma_segment_t seg; 2363 struct mbuf *m; 2364 mxge_rx_ring_t *rx = &ss->rx_small; 2365 int cnt, err; 2366 2367 m = m_gethdr(M_DONTWAIT, MT_DATA); 2368 if (m == NULL) { 2369 rx->alloc_fail++; 2370 err = ENOBUFS; 2371 goto done; 2372 } 2373 m->m_len = MHLEN; 2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2375 &seg, &cnt, BUS_DMA_NOWAIT); 2376 if (err != 0) { 2377 m_free(m); 2378 goto done; 2379 } 2380 rx->info[idx].m = m; 2381 rx->shadow[idx].addr_low = 2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2383 rx->shadow[idx].addr_high = 2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2385 2386done: 2387 if ((idx & 7) == 7) 2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2389 return err; 2390} 2391 2392static int 2393mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2394{ 2395 bus_dma_segment_t seg[3]; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx = &ss->rx_big; 2398 int cnt, err, i; 2399 2400 if (rx->cl_size == MCLBYTES) 2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2402 else 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422#if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429#endif 2430 2431done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440} 2441 2442/* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451static inline uint16_t 2452mxge_rx_csum(struct mbuf *m, int csum) 2453{ 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467#ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471#else 2472 c = 1; 2473#endif 2474 c ^= 0xffff; 2475 return (c); 2476} 2477 2478static void 2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480{ 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507#ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509#else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520#endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532} 2533 2534 2535static inline void 2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537{ 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598} 2599 2600static inline void 2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602{ 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663} 2664 2665static inline void 2666mxge_clean_rx_done(struct mxge_slice_state *ss) 2667{ 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689#ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695#endif 2696} 2697 2698 2699static inline void 2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701{ 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735#ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737#else 2738 flags = &ifp->if_drv_flags; 2739#endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747#ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758#endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761} 2762 2763static struct mxge_media_type mxge_xfp_media_types[] = 2764{ 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774}; 2775static struct mxge_media_type mxge_sfp_media_types[] = 2776{ 2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2778 {0, (1 << 7), "Reserved"}, 2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2782}; 2783 2784static void 2785mxge_set_media(mxge_softc_t *sc, int type) 2786{ 2787 sc->media_flags |= type; 2788 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2789 ifmedia_set(&sc->media, sc->media_flags); 2790} 2791 2792 2793/* 2794 * Determine the media type for a NIC. Some XFPs will identify 2795 * themselves only when their link is up, so this is initiated via a 2796 * link up interrupt. However, this can potentially take up to 2797 * several milliseconds, so it is run via the watchdog routine, rather 2798 * than in the interrupt handler itself. This need only be done 2799 * once, not each time the link is up. 2800 */ 2801static void 2802mxge_media_probe(mxge_softc_t *sc) 2803{ 2804 mxge_cmd_t cmd; 2805 char *cage_type; 2806 char *ptr; 2807 struct mxge_media_type *mxge_media_types = NULL; 2808 int i, err, ms, mxge_media_type_entries; 2809 uint32_t byte; 2810 2811 sc->need_media_probe = 0; 2812 2813 /* if we've already set a media type, we're done */ 2814 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2815 return; 2816 2817 /* 2818 * parse the product code to deterimine the interface type 2819 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2820 * after the 3rd dash in the driver's cached copy of the 2821 * EEPROM's product code string. 2822 */ 2823 ptr = sc->product_code_string; 2824 if (ptr == NULL) { 2825 device_printf(sc->dev, "Missing product code\n"); 2826 } 2827 2828 for (i = 0; i < 3; i++, ptr++) { 2829 ptr = index(ptr, '-'); 2830 if (ptr == NULL) { 2831 device_printf(sc->dev, 2832 "only %d dashes in PC?!?\n", i); 2833 return; 2834 } 2835 } 2836 if (*ptr == 'C') { 2837 /* -C is CX4 */ 2838 mxge_set_media(sc, IFM_10G_CX4); 2839 return; 2840 } 2841 else if (*ptr == 'Q') { 2842 /* -Q is Quad Ribbon Fiber */ 2843 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2844 /* FreeBSD has no media type for Quad ribbon fiber */ 2845 return; 2846 } 2847 2848 if (*ptr == 'R') { 2849 /* -R is XFP */ 2850 mxge_media_types = mxge_xfp_media_types; 2851 mxge_media_type_entries = 2852 sizeof (mxge_xfp_media_types) / 2853 sizeof (mxge_xfp_media_types[0]); 2854 byte = MXGE_XFP_COMPLIANCE_BYTE; 2855 cage_type = "XFP"; 2856 } 2857 2858 if (*ptr == 'S' || *(ptr +1) == 'S') { 2859 /* -S or -2S is SFP+ */ 2860 mxge_media_types = mxge_sfp_media_types; 2861 mxge_media_type_entries = 2862 sizeof (mxge_sfp_media_types) / 2863 sizeof (mxge_sfp_media_types[0]); 2864 cage_type = "SFP+"; 2865 byte = 3; 2866 } 2867 2868 if (mxge_media_types == NULL) { 2869 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2870 return; 2871 } 2872 2873 /* 2874 * At this point we know the NIC has an XFP cage, so now we 2875 * try to determine what is in the cage by using the 2876 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2877 * register. We read just one byte, which may take over 2878 * a millisecond 2879 */ 2880 2881 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2882 cmd.data1 = byte; 2883 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2884 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2885 device_printf(sc->dev, "failed to read XFP\n"); 2886 } 2887 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2888 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2889 } 2890 if (err != MXGEFW_CMD_OK) { 2891 return; 2892 } 2893 2894 /* now we wait for the data to be cached */ 2895 cmd.data0 = byte; 2896 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2897 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2898 DELAY(1000); 2899 cmd.data0 = byte; 2900 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2901 } 2902 if (err != MXGEFW_CMD_OK) { 2903 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2904 cage_type, err, ms); 2905 return; 2906 } 2907 2908 if (cmd.data0 == mxge_media_types[0].bitmask) { 2909 if (mxge_verbose) 2910 device_printf(sc->dev, "%s:%s\n", cage_type, 2911 mxge_media_types[0].name); 2912 mxge_set_media(sc, mxge_media_types[0].flag); 2913 return; 2914 } 2915 for (i = 1; i < mxge_media_type_entries; i++) { 2916 if (cmd.data0 & mxge_media_types[i].bitmask) { 2917 if (mxge_verbose) 2918 device_printf(sc->dev, "%s:%s\n", 2919 cage_type, 2920 mxge_media_types[i].name); 2921 2922 mxge_set_media(sc, mxge_media_types[i].flag); 2923 return; 2924 } 2925 } 2926 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2927 cmd.data0); 2928 2929 return; 2930} 2931 2932static void 2933mxge_intr(void *arg) 2934{ 2935 struct mxge_slice_state *ss = arg; 2936 mxge_softc_t *sc = ss->sc; 2937 mcp_irq_data_t *stats = ss->fw_stats; 2938 mxge_tx_ring_t *tx = &ss->tx; 2939 mxge_rx_done_t *rx_done = &ss->rx_done; 2940 uint32_t send_done_count; 2941 uint8_t valid; 2942 2943 2944#ifndef IFNET_BUF_RING 2945 /* an interrupt on a non-zero slice is implicitly valid 2946 since MSI-X irqs are not shared */ 2947 if (ss != sc->ss) { 2948 mxge_clean_rx_done(ss); 2949 *ss->irq_claim = be32toh(3); 2950 return; 2951 } 2952#endif 2953 2954 /* make sure the DMA has finished */ 2955 if (!stats->valid) { 2956 return; 2957 } 2958 valid = stats->valid; 2959 2960 if (sc->legacy_irq) { 2961 /* lower legacy IRQ */ 2962 *sc->irq_deassert = 0; 2963 if (!mxge_deassert_wait) 2964 /* don't wait for conf. that irq is low */ 2965 stats->valid = 0; 2966 } else { 2967 stats->valid = 0; 2968 } 2969 2970 /* loop while waiting for legacy irq deassertion */ 2971 do { 2972 /* check for transmit completes and receives */ 2973 send_done_count = be32toh(stats->send_done_count); 2974 while ((send_done_count != tx->pkt_done) || 2975 (rx_done->entry[rx_done->idx].length != 0)) { 2976 if (send_done_count != tx->pkt_done) 2977 mxge_tx_done(ss, (int)send_done_count); 2978 mxge_clean_rx_done(ss); 2979 send_done_count = be32toh(stats->send_done_count); 2980 } 2981 if (sc->legacy_irq && mxge_deassert_wait) 2982 wmb(); 2983 } while (*((volatile uint8_t *) &stats->valid)); 2984 2985 /* fw link & error stats meaningful only on the first slice */ 2986 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2987 if (sc->link_state != stats->link_up) { 2988 sc->link_state = stats->link_up; 2989 if (sc->link_state) { 2990 if_link_state_change(sc->ifp, LINK_STATE_UP); 2991 if (mxge_verbose) 2992 device_printf(sc->dev, "link up\n"); 2993 } else { 2994 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2995 if (mxge_verbose) 2996 device_printf(sc->dev, "link down\n"); 2997 } 2998 sc->need_media_probe = 1; 2999 } 3000 if (sc->rdma_tags_available != 3001 be32toh(stats->rdma_tags_available)) { 3002 sc->rdma_tags_available = 3003 be32toh(stats->rdma_tags_available); 3004 device_printf(sc->dev, "RDMA timed out! %d tags " 3005 "left\n", sc->rdma_tags_available); 3006 } 3007 3008 if (stats->link_down) { 3009 sc->down_cnt += stats->link_down; 3010 sc->link_state = 0; 3011 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3012 } 3013 } 3014 3015 /* check to see if we have rx token to pass back */ 3016 if (valid & 0x1) 3017 *ss->irq_claim = be32toh(3); 3018 *(ss->irq_claim + 1) = be32toh(3); 3019} 3020 3021static void 3022mxge_init(void *arg) 3023{ 3024} 3025 3026 3027 3028static void 3029mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3030{ 3031 struct lro_entry *lro_entry; 3032 int i; 3033 3034 while (!SLIST_EMPTY(&ss->lro_free)) { 3035 lro_entry = SLIST_FIRST(&ss->lro_free); 3036 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3037 free(lro_entry, M_DEVBUF); 3038 } 3039 3040 for (i = 0; i <= ss->rx_big.mask; i++) { 3041 if (ss->rx_big.info[i].m == NULL) 3042 continue; 3043 bus_dmamap_unload(ss->rx_big.dmat, 3044 ss->rx_big.info[i].map); 3045 m_freem(ss->rx_big.info[i].m); 3046 ss->rx_big.info[i].m = NULL; 3047 } 3048 3049 for (i = 0; i <= ss->rx_small.mask; i++) { 3050 if (ss->rx_small.info[i].m == NULL) 3051 continue; 3052 bus_dmamap_unload(ss->rx_small.dmat, 3053 ss->rx_small.info[i].map); 3054 m_freem(ss->rx_small.info[i].m); 3055 ss->rx_small.info[i].m = NULL; 3056 } 3057 3058 /* transmit ring used only on the first slice */ 3059 if (ss->tx.info == NULL) 3060 return; 3061 3062 for (i = 0; i <= ss->tx.mask; i++) { 3063 ss->tx.info[i].flag = 0; 3064 if (ss->tx.info[i].m == NULL) 3065 continue; 3066 bus_dmamap_unload(ss->tx.dmat, 3067 ss->tx.info[i].map); 3068 m_freem(ss->tx.info[i].m); 3069 ss->tx.info[i].m = NULL; 3070 } 3071} 3072 3073static void 3074mxge_free_mbufs(mxge_softc_t *sc) 3075{ 3076 int slice; 3077 3078 for (slice = 0; slice < sc->num_slices; slice++) 3079 mxge_free_slice_mbufs(&sc->ss[slice]); 3080} 3081 3082static void 3083mxge_free_slice_rings(struct mxge_slice_state *ss) 3084{ 3085 int i; 3086 3087 3088 if (ss->rx_done.entry != NULL) 3089 mxge_dma_free(&ss->rx_done.dma); 3090 ss->rx_done.entry = NULL; 3091 3092 if (ss->tx.req_bytes != NULL) 3093 free(ss->tx.req_bytes, M_DEVBUF); 3094 ss->tx.req_bytes = NULL; 3095 3096 if (ss->tx.seg_list != NULL) 3097 free(ss->tx.seg_list, M_DEVBUF); 3098 ss->tx.seg_list = NULL; 3099 3100 if (ss->rx_small.shadow != NULL) 3101 free(ss->rx_small.shadow, M_DEVBUF); 3102 ss->rx_small.shadow = NULL; 3103 3104 if (ss->rx_big.shadow != NULL) 3105 free(ss->rx_big.shadow, M_DEVBUF); 3106 ss->rx_big.shadow = NULL; 3107 3108 if (ss->tx.info != NULL) { 3109 if (ss->tx.dmat != NULL) { 3110 for (i = 0; i <= ss->tx.mask; i++) { 3111 bus_dmamap_destroy(ss->tx.dmat, 3112 ss->tx.info[i].map); 3113 } 3114 bus_dma_tag_destroy(ss->tx.dmat); 3115 } 3116 free(ss->tx.info, M_DEVBUF); 3117 } 3118 ss->tx.info = NULL; 3119 3120 if (ss->rx_small.info != NULL) { 3121 if (ss->rx_small.dmat != NULL) { 3122 for (i = 0; i <= ss->rx_small.mask; i++) { 3123 bus_dmamap_destroy(ss->rx_small.dmat, 3124 ss->rx_small.info[i].map); 3125 } 3126 bus_dmamap_destroy(ss->rx_small.dmat, 3127 ss->rx_small.extra_map); 3128 bus_dma_tag_destroy(ss->rx_small.dmat); 3129 } 3130 free(ss->rx_small.info, M_DEVBUF); 3131 } 3132 ss->rx_small.info = NULL; 3133 3134 if (ss->rx_big.info != NULL) { 3135 if (ss->rx_big.dmat != NULL) { 3136 for (i = 0; i <= ss->rx_big.mask; i++) { 3137 bus_dmamap_destroy(ss->rx_big.dmat, 3138 ss->rx_big.info[i].map); 3139 } 3140 bus_dmamap_destroy(ss->rx_big.dmat, 3141 ss->rx_big.extra_map); 3142 bus_dma_tag_destroy(ss->rx_big.dmat); 3143 } 3144 free(ss->rx_big.info, M_DEVBUF); 3145 } 3146 ss->rx_big.info = NULL; 3147} 3148 3149static void 3150mxge_free_rings(mxge_softc_t *sc) 3151{ 3152 int slice; 3153 3154 for (slice = 0; slice < sc->num_slices; slice++) 3155 mxge_free_slice_rings(&sc->ss[slice]); 3156} 3157 3158static int 3159mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3160 int tx_ring_entries) 3161{ 3162 mxge_softc_t *sc = ss->sc; 3163 size_t bytes; 3164 int err, i; 3165 3166 err = ENOMEM; 3167 3168 /* allocate per-slice receive resources */ 3169 3170 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3171 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3172 3173 /* allocate the rx shadow rings */ 3174 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3175 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3176 if (ss->rx_small.shadow == NULL) 3177 return err; 3178 3179 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3180 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3181 if (ss->rx_big.shadow == NULL) 3182 return err; 3183 3184 /* allocate the rx host info rings */ 3185 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3186 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3187 if (ss->rx_small.info == NULL) 3188 return err; 3189 3190 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3191 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3192 if (ss->rx_big.info == NULL) 3193 return err; 3194 3195 /* allocate the rx busdma resources */ 3196 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3197 1, /* alignment */ 3198 4096, /* boundary */ 3199 BUS_SPACE_MAXADDR, /* low */ 3200 BUS_SPACE_MAXADDR, /* high */ 3201 NULL, NULL, /* filter */ 3202 MHLEN, /* maxsize */ 3203 1, /* num segs */ 3204 MHLEN, /* maxsegsize */ 3205 BUS_DMA_ALLOCNOW, /* flags */ 3206 NULL, NULL, /* lock */ 3207 &ss->rx_small.dmat); /* tag */ 3208 if (err != 0) { 3209 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3210 err); 3211 return err; 3212 } 3213 3214 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3215 1, /* alignment */ 3216#if MXGE_VIRT_JUMBOS 3217 4096, /* boundary */ 3218#else 3219 0, /* boundary */ 3220#endif 3221 BUS_SPACE_MAXADDR, /* low */ 3222 BUS_SPACE_MAXADDR, /* high */ 3223 NULL, NULL, /* filter */ 3224 3*4096, /* maxsize */ 3225#if MXGE_VIRT_JUMBOS 3226 3, /* num segs */ 3227 4096, /* maxsegsize*/ 3228#else 3229 1, /* num segs */ 3230 MJUM9BYTES, /* maxsegsize*/ 3231#endif 3232 BUS_DMA_ALLOCNOW, /* flags */ 3233 NULL, NULL, /* lock */ 3234 &ss->rx_big.dmat); /* tag */ 3235 if (err != 0) { 3236 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3237 err); 3238 return err; 3239 } 3240 for (i = 0; i <= ss->rx_small.mask; i++) { 3241 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3242 &ss->rx_small.info[i].map); 3243 if (err != 0) { 3244 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3245 err); 3246 return err; 3247 } 3248 } 3249 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3250 &ss->rx_small.extra_map); 3251 if (err != 0) { 3252 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3253 err); 3254 return err; 3255 } 3256 3257 for (i = 0; i <= ss->rx_big.mask; i++) { 3258 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3259 &ss->rx_big.info[i].map); 3260 if (err != 0) { 3261 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3262 err); 3263 return err; 3264 } 3265 } 3266 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3267 &ss->rx_big.extra_map); 3268 if (err != 0) { 3269 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3270 err); 3271 return err; 3272 } 3273 3274 /* now allocate TX resouces */ 3275 3276#ifndef IFNET_BUF_RING 3277 /* only use a single TX ring for now */ 3278 if (ss != ss->sc->ss) 3279 return 0; 3280#endif 3281 3282 ss->tx.mask = tx_ring_entries - 1; 3283 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3284 3285 3286 /* allocate the tx request copy block */ 3287 bytes = 8 + 3288 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3289 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3290 if (ss->tx.req_bytes == NULL) 3291 return err; 3292 /* ensure req_list entries are aligned to 8 bytes */ 3293 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3294 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3295 3296 /* allocate the tx busdma segment list */ 3297 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3298 ss->tx.seg_list = (bus_dma_segment_t *) 3299 malloc(bytes, M_DEVBUF, M_WAITOK); 3300 if (ss->tx.seg_list == NULL) 3301 return err; 3302 3303 /* allocate the tx host info ring */ 3304 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3305 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3306 if (ss->tx.info == NULL) 3307 return err; 3308 3309 /* allocate the tx busdma resources */ 3310 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3311 1, /* alignment */ 3312 sc->tx_boundary, /* boundary */ 3313 BUS_SPACE_MAXADDR, /* low */ 3314 BUS_SPACE_MAXADDR, /* high */ 3315 NULL, NULL, /* filter */ 3316 65536 + 256, /* maxsize */ 3317 ss->tx.max_desc - 2, /* num segs */ 3318 sc->tx_boundary, /* maxsegsz */ 3319 BUS_DMA_ALLOCNOW, /* flags */ 3320 NULL, NULL, /* lock */ 3321 &ss->tx.dmat); /* tag */ 3322 3323 if (err != 0) { 3324 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3325 err); 3326 return err; 3327 } 3328 3329 /* now use these tags to setup dmamaps for each slot 3330 in the ring */ 3331 for (i = 0; i <= ss->tx.mask; i++) { 3332 err = bus_dmamap_create(ss->tx.dmat, 0, 3333 &ss->tx.info[i].map); 3334 if (err != 0) { 3335 device_printf(sc->dev, "Err %d tx dmamap\n", 3336 err); 3337 return err; 3338 } 3339 } 3340 return 0; 3341 3342} 3343 3344static int 3345mxge_alloc_rings(mxge_softc_t *sc) 3346{ 3347 mxge_cmd_t cmd; 3348 int tx_ring_size; 3349 int tx_ring_entries, rx_ring_entries; 3350 int err, slice; 3351 3352 /* get ring sizes */ 3353 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3354 tx_ring_size = cmd.data0; 3355 if (err != 0) { 3356 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3357 goto abort; 3358 } 3359 3360 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3361 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3362 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3363 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3364 IFQ_SET_READY(&sc->ifp->if_snd); 3365 3366 for (slice = 0; slice < sc->num_slices; slice++) { 3367 err = mxge_alloc_slice_rings(&sc->ss[slice], 3368 rx_ring_entries, 3369 tx_ring_entries); 3370 if (err != 0) 3371 goto abort; 3372 } 3373 return 0; 3374 3375abort: 3376 mxge_free_rings(sc); 3377 return err; 3378 3379} 3380 3381 3382static void 3383mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3384{ 3385 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3386 3387 if (bufsize < MCLBYTES) { 3388 /* easy, everything fits in a single buffer */ 3389 *big_buf_size = MCLBYTES; 3390 *cl_size = MCLBYTES; 3391 *nbufs = 1; 3392 return; 3393 } 3394 3395 if (bufsize < MJUMPAGESIZE) { 3396 /* still easy, everything still fits in a single buffer */ 3397 *big_buf_size = MJUMPAGESIZE; 3398 *cl_size = MJUMPAGESIZE; 3399 *nbufs = 1; 3400 return; 3401 } 3402#if MXGE_VIRT_JUMBOS 3403 /* now we need to use virtually contiguous buffers */ 3404 *cl_size = MJUM9BYTES; 3405 *big_buf_size = 4096; 3406 *nbufs = mtu / 4096 + 1; 3407 /* needs to be a power of two, so round up */ 3408 if (*nbufs == 3) 3409 *nbufs = 4; 3410#else 3411 *cl_size = MJUM9BYTES; 3412 *big_buf_size = MJUM9BYTES; 3413 *nbufs = 1; 3414#endif 3415} 3416 3417static int 3418mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3419{ 3420 mxge_softc_t *sc; 3421 mxge_cmd_t cmd; 3422 bus_dmamap_t map; 3423 struct lro_entry *lro_entry; 3424 int err, i, slice; 3425 3426 3427 sc = ss->sc; 3428 slice = ss - sc->ss; 3429 3430 SLIST_INIT(&ss->lro_free); 3431 SLIST_INIT(&ss->lro_active); 3432 3433 for (i = 0; i < sc->lro_cnt; i++) { 3434 lro_entry = (struct lro_entry *) 3435 malloc(sizeof (*lro_entry), M_DEVBUF, 3436 M_NOWAIT | M_ZERO); 3437 if (lro_entry == NULL) { 3438 sc->lro_cnt = i; 3439 break; 3440 } 3441 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3442 } 3443 /* get the lanai pointers to the send and receive rings */ 3444 3445 err = 0; 3446#ifndef IFNET_BUF_RING 3447 /* We currently only send from the first slice */ 3448 if (slice == 0) { 3449#endif 3450 cmd.data0 = slice; 3451 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3452 ss->tx.lanai = 3453 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3454 ss->tx.send_go = (volatile uint32_t *) 3455 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3456 ss->tx.send_stop = (volatile uint32_t *) 3457 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3458#ifndef IFNET_BUF_RING 3459 } 3460#endif 3461 cmd.data0 = slice; 3462 err |= mxge_send_cmd(sc, 3463 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3464 ss->rx_small.lanai = 3465 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3466 cmd.data0 = slice; 3467 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3468 ss->rx_big.lanai = 3469 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3470 3471 if (err != 0) { 3472 device_printf(sc->dev, 3473 "failed to get ring sizes or locations\n"); 3474 return EIO; 3475 } 3476 3477 /* stock receive rings */ 3478 for (i = 0; i <= ss->rx_small.mask; i++) { 3479 map = ss->rx_small.info[i].map; 3480 err = mxge_get_buf_small(ss, map, i); 3481 if (err) { 3482 device_printf(sc->dev, "alloced %d/%d smalls\n", 3483 i, ss->rx_small.mask + 1); 3484 return ENOMEM; 3485 } 3486 } 3487 for (i = 0; i <= ss->rx_big.mask; i++) { 3488 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3489 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3490 } 3491 ss->rx_big.nbufs = nbufs; 3492 ss->rx_big.cl_size = cl_size; 3493 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3494 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3495 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3496 map = ss->rx_big.info[i].map; 3497 err = mxge_get_buf_big(ss, map, i); 3498 if (err) { 3499 device_printf(sc->dev, "alloced %d/%d bigs\n", 3500 i, ss->rx_big.mask + 1); 3501 return ENOMEM; 3502 } 3503 } 3504 return 0; 3505} 3506 3507static int 3508mxge_open(mxge_softc_t *sc) 3509{ 3510 mxge_cmd_t cmd; 3511 int err, big_bytes, nbufs, slice, cl_size, i; 3512 bus_addr_t bus; 3513 volatile uint8_t *itable; 3514 struct mxge_slice_state *ss; 3515 3516 /* Copy the MAC address in case it was overridden */ 3517 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3518 3519 err = mxge_reset(sc, 1); 3520 if (err != 0) { 3521 device_printf(sc->dev, "failed to reset\n"); 3522 return EIO; 3523 } 3524 3525 if (sc->num_slices > 1) { 3526 /* setup the indirection table */ 3527 cmd.data0 = sc->num_slices; 3528 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3529 &cmd); 3530 3531 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3532 &cmd); 3533 if (err != 0) { 3534 device_printf(sc->dev, 3535 "failed to setup rss tables\n"); 3536 return err; 3537 } 3538 3539 /* just enable an identity mapping */ 3540 itable = sc->sram + cmd.data0; 3541 for (i = 0; i < sc->num_slices; i++) 3542 itable[i] = (uint8_t)i; 3543 3544 cmd.data0 = 1; 3545 cmd.data1 = mxge_rss_hash_type; 3546 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3547 if (err != 0) { 3548 device_printf(sc->dev, "failed to enable slices\n"); 3549 return err; 3550 } 3551 } 3552 3553 3554 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3555 3556 cmd.data0 = nbufs; 3557 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3558 &cmd); 3559 /* error is only meaningful if we're trying to set 3560 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3561 if (err && nbufs > 1) { 3562 device_printf(sc->dev, 3563 "Failed to set alway-use-n to %d\n", 3564 nbufs); 3565 return EIO; 3566 } 3567 /* Give the firmware the mtu and the big and small buffer 3568 sizes. The firmware wants the big buf size to be a power 3569 of two. Luckily, FreeBSD's clusters are powers of two */ 3570 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3571 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3572 cmd.data0 = MHLEN - MXGEFW_PAD; 3573 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3574 &cmd); 3575 cmd.data0 = big_bytes; 3576 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3577 3578 if (err != 0) { 3579 device_printf(sc->dev, "failed to setup params\n"); 3580 goto abort; 3581 } 3582 3583 /* Now give him the pointer to the stats block */ 3584 for (slice = 0; 3585#ifdef IFNET_BUF_RING 3586 slice < sc->num_slices; 3587#else 3588 slice < 1; 3589#endif 3590 slice++) { 3591 ss = &sc->ss[slice]; 3592 cmd.data0 = 3593 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3594 cmd.data1 = 3595 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3596 cmd.data2 = sizeof(struct mcp_irq_data); 3597 cmd.data2 |= (slice << 16); 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3599 } 3600 3601 if (err != 0) { 3602 bus = sc->ss->fw_stats_dma.bus_addr; 3603 bus += offsetof(struct mcp_irq_data, send_done_count); 3604 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3605 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3606 err = mxge_send_cmd(sc, 3607 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3608 &cmd); 3609 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3610 sc->fw_multicast_support = 0; 3611 } else { 3612 sc->fw_multicast_support = 1; 3613 } 3614 3615 if (err != 0) { 3616 device_printf(sc->dev, "failed to setup params\n"); 3617 goto abort; 3618 } 3619 3620 for (slice = 0; slice < sc->num_slices; slice++) { 3621 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3622 if (err != 0) { 3623 device_printf(sc->dev, "couldn't open slice %d\n", 3624 slice); 3625 goto abort; 3626 } 3627 } 3628 3629 /* Finally, start the firmware running */ 3630 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3631 if (err) { 3632 device_printf(sc->dev, "Couldn't bring up link\n"); 3633 goto abort; 3634 } 3635#ifdef IFNET_BUF_RING 3636 for (slice = 0; slice < sc->num_slices; slice++) { 3637 ss = &sc->ss[slice]; 3638 ss->if_drv_flags |= IFF_DRV_RUNNING; 3639 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3640 } 3641#endif 3642 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3643 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3644 3645 return 0; 3646 3647 3648abort: 3649 mxge_free_mbufs(sc); 3650 3651 return err; 3652} 3653 3654static int 3655mxge_close(mxge_softc_t *sc, int down) 3656{ 3657 mxge_cmd_t cmd; 3658 int err, old_down_cnt; 3659#ifdef IFNET_BUF_RING 3660 struct mxge_slice_state *ss; 3661 int slice; 3662#endif 3663 3664#ifdef IFNET_BUF_RING 3665 for (slice = 0; slice < sc->num_slices; slice++) { 3666 ss = &sc->ss[slice]; 3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3668 } 3669#endif 3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3671 if (!down) { 3672 old_down_cnt = sc->down_cnt; 3673 wmb(); 3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3675 if (err) { 3676 device_printf(sc->dev, 3677 "Couldn't bring down link\n"); 3678 } 3679 if (old_down_cnt == sc->down_cnt) { 3680 /* wait for down irq */ 3681 DELAY(10 * sc->intr_coal_delay); 3682 } 3683 wmb(); 3684 if (old_down_cnt == sc->down_cnt) { 3685 device_printf(sc->dev, "never got down irq\n"); 3686 } 3687 } 3688 mxge_free_mbufs(sc); 3689 3690 return 0; 3691} 3692 3693static void 3694mxge_setup_cfg_space(mxge_softc_t *sc) 3695{ 3696 device_t dev = sc->dev; 3697 int reg; 3698 uint16_t cmd, lnk, pectl; 3699 3700 /* find the PCIe link width and set max read request to 4KB*/ 3701 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3702 lnk = pci_read_config(dev, reg + 0x12, 2); 3703 sc->link_width = (lnk >> 4) & 0x3f; 3704 3705 if (sc->pectl == 0) { 3706 pectl = pci_read_config(dev, reg + 0x8, 2); 3707 pectl = (pectl & ~0x7000) | (5 << 12); 3708 pci_write_config(dev, reg + 0x8, pectl, 2); 3709 sc->pectl = pectl; 3710 } else { 3711 /* restore saved pectl after watchdog reset */ 3712 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3713 } 3714 } 3715 3716 /* Enable DMA and Memory space access */ 3717 pci_enable_busmaster(dev); 3718 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3719 cmd |= PCIM_CMD_MEMEN; 3720 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3721} 3722 3723static uint32_t 3724mxge_read_reboot(mxge_softc_t *sc) 3725{ 3726 device_t dev = sc->dev; 3727 uint32_t vs; 3728 3729 /* find the vendor specific offset */ 3730 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3731 device_printf(sc->dev, 3732 "could not find vendor specific offset\n"); 3733 return (uint32_t)-1; 3734 } 3735 /* enable read32 mode */ 3736 pci_write_config(dev, vs + 0x10, 0x3, 1); 3737 /* tell NIC which register to read */ 3738 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3739 return (pci_read_config(dev, vs + 0x14, 4)); 3740} 3741 3742static void 3743mxge_watchdog_reset(mxge_softc_t *sc) 3744{ 3745 struct pci_devinfo *dinfo; 3746 struct mxge_slice_state *ss; 3747 int err, running, s, num_tx_slices = 1; 3748 uint32_t reboot; 3749 uint16_t cmd; 3750 3751 err = ENXIO; 3752 3753 device_printf(sc->dev, "Watchdog reset!\n"); 3754 3755 /* 3756 * check to see if the NIC rebooted. If it did, then all of 3757 * PCI config space has been reset, and things like the 3758 * busmaster bit will be zero. If this is the case, then we 3759 * must restore PCI config space before the NIC can be used 3760 * again 3761 */ 3762 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3763 if (cmd == 0xffff) { 3764 /* 3765 * maybe the watchdog caught the NIC rebooting; wait 3766 * up to 100ms for it to finish. If it does not come 3767 * back, then give up 3768 */ 3769 DELAY(1000*100); 3770 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3771 if (cmd == 0xffff) { 3772 device_printf(sc->dev, "NIC disappeared!\n"); 3773 } 3774 } 3775 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3776 /* print the reboot status */ 3777 reboot = mxge_read_reboot(sc); 3778 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3779 reboot); 3780 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3781 if (running) { 3782 3783 /* 3784 * quiesce NIC so that TX routines will not try to 3785 * xmit after restoration of BAR 3786 */ 3787 3788 /* Mark the link as down */ 3789 if (sc->link_state) { 3790 sc->link_state = 0; 3791 if_link_state_change(sc->ifp, 3792 LINK_STATE_DOWN); 3793 } 3794#ifdef IFNET_BUF_RING 3795 num_tx_slices = sc->num_slices; 3796#endif 3797 /* grab all TX locks to ensure no tx */ 3798 for (s = 0; s < num_tx_slices; s++) { 3799 ss = &sc->ss[s]; 3800 mtx_lock(&ss->tx.mtx); 3801 } 3802 mxge_close(sc, 1); 3803 } 3804 /* restore PCI configuration space */ 3805 dinfo = device_get_ivars(sc->dev); 3806 pci_cfg_restore(sc->dev, dinfo); 3807 3808 /* and redo any changes we made to our config space */ 3809 mxge_setup_cfg_space(sc); 3810 3811 /* reload f/w */ 3812 err = mxge_load_firmware(sc, 0); 3813 if (err) { 3814 device_printf(sc->dev, 3815 "Unable to re-load f/w\n"); 3816 } 3817 if (running) { 3818 if (!err) 3819 err = mxge_open(sc); 3820 /* release all TX locks */ 3821 for (s = 0; s < num_tx_slices; s++) { 3822 ss = &sc->ss[s]; 3823#ifdef IFNET_BUF_RING 3824 mxge_start_locked(ss); 3825#endif 3826 mtx_unlock(&ss->tx.mtx); 3827 } 3828 } 3829 sc->watchdog_resets++; 3830 } else { 3831 device_printf(sc->dev, 3832 "NIC did not reboot, not resetting\n"); 3833 err = 0; 3834 } 3835 if (err) { 3836 device_printf(sc->dev, "watchdog reset failed\n"); 3837 } else { 3838 if (sc->dying == 2) 3839 sc->dying = 0; 3840 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3841 } 3842} 3843 3844static void 3845mxge_watchdog_task(void *arg, int pending) 3846{ 3847 mxge_softc_t *sc = arg; 3848 3849 3850 mtx_lock(&sc->driver_mtx); 3851 mxge_watchdog_reset(sc); 3852 mtx_unlock(&sc->driver_mtx); 3853} 3854 3855static void 3856mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3857{ 3858 tx = &sc->ss[slice].tx; 3859 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3860 device_printf(sc->dev, 3861 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3862 tx->req, tx->done, tx->queue_active); 3863 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3864 tx->activate, tx->deactivate); 3865 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3866 tx->pkt_done, 3867 be32toh(sc->ss->fw_stats->send_done_count)); 3868} 3869 3870static int 3871mxge_watchdog(mxge_softc_t *sc) 3872{ 3873 mxge_tx_ring_t *tx; 3874 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3875 int i, err = 0; 3876 3877 /* see if we have outstanding transmits, which 3878 have been pending for more than mxge_ticks */ 3879 for (i = 0; 3880#ifdef IFNET_BUF_RING 3881 (i < sc->num_slices) && (err == 0); 3882#else 3883 (i < 1) && (err == 0); 3884#endif 3885 i++) { 3886 tx = &sc->ss[i].tx; 3887 if (tx->req != tx->done && 3888 tx->watchdog_req != tx->watchdog_done && 3889 tx->done == tx->watchdog_done) { 3890 /* check for pause blocking before resetting */ 3891 if (tx->watchdog_rx_pause == rx_pause) { 3892 mxge_warn_stuck(sc, tx, i); 3893 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3894 return (ENXIO); 3895 } 3896 else 3897 device_printf(sc->dev, "Flow control blocking " 3898 "xmits, check link partner\n"); 3899 } 3900 3901 tx->watchdog_req = tx->req; 3902 tx->watchdog_done = tx->done; 3903 tx->watchdog_rx_pause = rx_pause; 3904 } 3905 3906 if (sc->need_media_probe) 3907 mxge_media_probe(sc); 3908 return (err); 3909} 3910 3911static u_long 3912mxge_update_stats(mxge_softc_t *sc) 3913{ 3914 struct mxge_slice_state *ss; 3915 u_long pkts = 0; 3916 u_long ipackets = 0; 3917 u_long opackets = 0; 3918#ifdef IFNET_BUF_RING 3919 u_long obytes = 0; 3920 u_long omcasts = 0; 3921 u_long odrops = 0; 3922#endif 3923 u_long oerrors = 0; 3924 int slice; 3925 3926 for (slice = 0; slice < sc->num_slices; slice++) { 3927 ss = &sc->ss[slice]; 3928 ipackets += ss->ipackets; 3929 opackets += ss->opackets; 3930#ifdef IFNET_BUF_RING 3931 obytes += ss->obytes; 3932 omcasts += ss->omcasts; 3933 odrops += ss->tx.br->br_drops; 3934#endif 3935 oerrors += ss->oerrors; 3936 } 3937 pkts = (ipackets - sc->ifp->if_ipackets); 3938 pkts += (opackets - sc->ifp->if_opackets); 3939 sc->ifp->if_ipackets = ipackets; 3940 sc->ifp->if_opackets = opackets; 3941#ifdef IFNET_BUF_RING 3942 sc->ifp->if_obytes = obytes; 3943 sc->ifp->if_omcasts = omcasts; 3944 sc->ifp->if_snd.ifq_drops = odrops; 3945#endif 3946 sc->ifp->if_oerrors = oerrors; 3947 return pkts; 3948} 3949 3950static void 3951mxge_tick(void *arg) 3952{ 3953 mxge_softc_t *sc = arg; 3954 u_long pkts = 0; 3955 int err = 0; 3956 int running, ticks; 3957 uint16_t cmd; 3958 3959 ticks = mxge_ticks; 3960 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3961 if (running) { 3962 /* aggregate stats from different slices */ 3963 pkts = mxge_update_stats(sc); 3964 if (!sc->watchdog_countdown) { 3965 err = mxge_watchdog(sc); 3966 sc->watchdog_countdown = 4; 3967 } 3968 sc->watchdog_countdown--; 3969 } 3970 if (pkts == 0) { 3971 /* ensure NIC did not suffer h/w fault while idle */ 3972 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3973 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3974 sc->dying = 2; 3975 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3976 err = ENXIO; 3977 } 3978 /* look less often if NIC is idle */ 3979 ticks *= 4; 3980 } 3981 3982 if (err == 0) 3983 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 3984 3985} 3986 3987static int 3988mxge_media_change(struct ifnet *ifp) 3989{ 3990 return EINVAL; 3991} 3992 3993static int 3994mxge_change_mtu(mxge_softc_t *sc, int mtu) 3995{ 3996 struct ifnet *ifp = sc->ifp; 3997 int real_mtu, old_mtu; 3998 int err = 0; 3999 4000 4001 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4002 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4003 return EINVAL; 4004 mtx_lock(&sc->driver_mtx); 4005 old_mtu = ifp->if_mtu; 4006 ifp->if_mtu = mtu; 4007 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4008 mxge_close(sc, 0); 4009 err = mxge_open(sc); 4010 if (err != 0) { 4011 ifp->if_mtu = old_mtu; 4012 mxge_close(sc, 0); 4013 (void) mxge_open(sc); 4014 } 4015 } 4016 mtx_unlock(&sc->driver_mtx); 4017 return err; 4018} 4019 4020static void 4021mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4022{ 4023 mxge_softc_t *sc = ifp->if_softc; 4024 4025 4026 if (sc == NULL) 4027 return; 4028 ifmr->ifm_status = IFM_AVALID; 4029 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4030 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 4031 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 4032} 4033 4034static int 4035mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4036{ 4037 mxge_softc_t *sc = ifp->if_softc; 4038 struct ifreq *ifr = (struct ifreq *)data; 4039 int err, mask; 4040 4041 err = 0; 4042 switch (command) { 4043 case SIOCSIFADDR: 4044 case SIOCGIFADDR: 4045 err = ether_ioctl(ifp, command, data); 4046 break; 4047 4048 case SIOCSIFMTU: 4049 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4050 break; 4051 4052 case SIOCSIFFLAGS: 4053 mtx_lock(&sc->driver_mtx); 4054 if (sc->dying) { 4055 mtx_unlock(&sc->driver_mtx); 4056 return EINVAL; 4057 } 4058 if (ifp->if_flags & IFF_UP) { 4059 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4060 err = mxge_open(sc); 4061 } else { 4062 /* take care of promis can allmulti 4063 flag chages */ 4064 mxge_change_promisc(sc, 4065 ifp->if_flags & IFF_PROMISC); 4066 mxge_set_multicast_list(sc); 4067 } 4068 } else { 4069 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4070 mxge_close(sc, 0); 4071 } 4072 } 4073 mtx_unlock(&sc->driver_mtx); 4074 break; 4075 4076 case SIOCADDMULTI: 4077 case SIOCDELMULTI: 4078 mtx_lock(&sc->driver_mtx); 4079 mxge_set_multicast_list(sc); 4080 mtx_unlock(&sc->driver_mtx); 4081 break; 4082 4083 case SIOCSIFCAP: 4084 mtx_lock(&sc->driver_mtx); 4085 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4086 if (mask & IFCAP_TXCSUM) { 4087 if (IFCAP_TXCSUM & ifp->if_capenable) { 4088 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4089 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4090 | CSUM_TSO); 4091 } else { 4092 ifp->if_capenable |= IFCAP_TXCSUM; 4093 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4094 } 4095 } else if (mask & IFCAP_RXCSUM) { 4096 if (IFCAP_RXCSUM & ifp->if_capenable) { 4097 ifp->if_capenable &= ~IFCAP_RXCSUM; 4098 sc->csum_flag = 0; 4099 } else { 4100 ifp->if_capenable |= IFCAP_RXCSUM; 4101 sc->csum_flag = 1; 4102 } 4103 } 4104 if (mask & IFCAP_TSO4) { 4105 if (IFCAP_TSO4 & ifp->if_capenable) { 4106 ifp->if_capenable &= ~IFCAP_TSO4; 4107 ifp->if_hwassist &= ~CSUM_TSO; 4108 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4109 ifp->if_capenable |= IFCAP_TSO4; 4110 ifp->if_hwassist |= CSUM_TSO; 4111 } else { 4112 printf("mxge requires tx checksum offload" 4113 " be enabled to use TSO\n"); 4114 err = EINVAL; 4115 } 4116 } 4117 if (mask & IFCAP_LRO) { 4118 if (IFCAP_LRO & ifp->if_capenable) 4119 err = mxge_change_lro_locked(sc, 0); 4120 else 4121 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4122 } 4123 if (mask & IFCAP_VLAN_HWTAGGING) 4124 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4125 mtx_unlock(&sc->driver_mtx); 4126 VLAN_CAPABILITIES(ifp); 4127 4128 break; 4129 4130 case SIOCGIFMEDIA: 4131 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4132 &sc->media, command); 4133 break; 4134 4135 default: 4136 err = ENOTTY; 4137 } 4138 return err; 4139} 4140 4141static void 4142mxge_fetch_tunables(mxge_softc_t *sc) 4143{ 4144 4145 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4146 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4147 &mxge_flow_control); 4148 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4149 &mxge_intr_coal_delay); 4150 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4151 &mxge_nvidia_ecrc_enable); 4152 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4153 &mxge_force_firmware); 4154 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4155 &mxge_deassert_wait); 4156 TUNABLE_INT_FETCH("hw.mxge.verbose", 4157 &mxge_verbose); 4158 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4159 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4160 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4161 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4162 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4163 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4164 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4165 if (sc->lro_cnt != 0) 4166 mxge_lro_cnt = sc->lro_cnt; 4167 4168 if (bootverbose) 4169 mxge_verbose = 1; 4170 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4171 mxge_intr_coal_delay = 30; 4172 if (mxge_ticks == 0) 4173 mxge_ticks = hz / 2; 4174 sc->pause = mxge_flow_control; 4175 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4176 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4177 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4178 } 4179 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4180 mxge_initial_mtu < ETHER_MIN_LEN) 4181 mxge_initial_mtu = ETHERMTU_JUMBO; 4182 4183 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4184 mxge_throttle = MXGE_MAX_THROTTLE; 4185 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4186 mxge_throttle = MXGE_MIN_THROTTLE; 4187 sc->throttle = mxge_throttle; 4188} 4189 4190 4191static void 4192mxge_free_slices(mxge_softc_t *sc) 4193{ 4194 struct mxge_slice_state *ss; 4195 int i; 4196 4197 4198 if (sc->ss == NULL) 4199 return; 4200 4201 for (i = 0; i < sc->num_slices; i++) { 4202 ss = &sc->ss[i]; 4203 if (ss->fw_stats != NULL) { 4204 mxge_dma_free(&ss->fw_stats_dma); 4205 ss->fw_stats = NULL; 4206#ifdef IFNET_BUF_RING 4207 if (ss->tx.br != NULL) { 4208 drbr_free(ss->tx.br, M_DEVBUF); 4209 ss->tx.br = NULL; 4210 } 4211#endif 4212 mtx_destroy(&ss->tx.mtx); 4213 } 4214 if (ss->rx_done.entry != NULL) { 4215 mxge_dma_free(&ss->rx_done.dma); 4216 ss->rx_done.entry = NULL; 4217 } 4218 } 4219 free(sc->ss, M_DEVBUF); 4220 sc->ss = NULL; 4221} 4222 4223static int 4224mxge_alloc_slices(mxge_softc_t *sc) 4225{ 4226 mxge_cmd_t cmd; 4227 struct mxge_slice_state *ss; 4228 size_t bytes; 4229 int err, i, max_intr_slots; 4230 4231 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4232 if (err != 0) { 4233 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4234 return err; 4235 } 4236 sc->rx_ring_size = cmd.data0; 4237 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4238 4239 bytes = sizeof (*sc->ss) * sc->num_slices; 4240 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4241 if (sc->ss == NULL) 4242 return (ENOMEM); 4243 for (i = 0; i < sc->num_slices; i++) { 4244 ss = &sc->ss[i]; 4245 4246 ss->sc = sc; 4247 4248 /* allocate per-slice rx interrupt queues */ 4249 4250 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4251 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4252 if (err != 0) 4253 goto abort; 4254 ss->rx_done.entry = ss->rx_done.dma.addr; 4255 bzero(ss->rx_done.entry, bytes); 4256 4257 /* 4258 * allocate the per-slice firmware stats; stats 4259 * (including tx) are used used only on the first 4260 * slice for now 4261 */ 4262#ifndef IFNET_BUF_RING 4263 if (i > 0) 4264 continue; 4265#endif 4266 4267 bytes = sizeof (*ss->fw_stats); 4268 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4269 sizeof (*ss->fw_stats), 64); 4270 if (err != 0) 4271 goto abort; 4272 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4273 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4274 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4275 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4276#ifdef IFNET_BUF_RING 4277 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4278 &ss->tx.mtx); 4279#endif 4280 } 4281 4282 return (0); 4283 4284abort: 4285 mxge_free_slices(sc); 4286 return (ENOMEM); 4287} 4288 4289static void 4290mxge_slice_probe(mxge_softc_t *sc) 4291{ 4292 mxge_cmd_t cmd; 4293 char *old_fw; 4294 int msix_cnt, status, max_intr_slots; 4295 4296 sc->num_slices = 1; 4297 /* 4298 * don't enable multiple slices if they are not enabled, 4299 * or if this is not an SMP system 4300 */ 4301 4302 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4303 return; 4304 4305 /* see how many MSI-X interrupts are available */ 4306 msix_cnt = pci_msix_count(sc->dev); 4307 if (msix_cnt < 2) 4308 return; 4309 4310 /* now load the slice aware firmware see what it supports */ 4311 old_fw = sc->fw_name; 4312 if (old_fw == mxge_fw_aligned) 4313 sc->fw_name = mxge_fw_rss_aligned; 4314 else 4315 sc->fw_name = mxge_fw_rss_unaligned; 4316 status = mxge_load_firmware(sc, 0); 4317 if (status != 0) { 4318 device_printf(sc->dev, "Falling back to a single slice\n"); 4319 return; 4320 } 4321 4322 /* try to send a reset command to the card to see if it 4323 is alive */ 4324 memset(&cmd, 0, sizeof (cmd)); 4325 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4326 if (status != 0) { 4327 device_printf(sc->dev, "failed reset\n"); 4328 goto abort_with_fw; 4329 } 4330 4331 /* get rx ring size */ 4332 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4333 if (status != 0) { 4334 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4335 goto abort_with_fw; 4336 } 4337 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4338 4339 /* tell it the size of the interrupt queues */ 4340 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4341 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4342 if (status != 0) { 4343 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4344 goto abort_with_fw; 4345 } 4346 4347 /* ask the maximum number of slices it supports */ 4348 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4349 if (status != 0) { 4350 device_printf(sc->dev, 4351 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4352 goto abort_with_fw; 4353 } 4354 sc->num_slices = cmd.data0; 4355 if (sc->num_slices > msix_cnt) 4356 sc->num_slices = msix_cnt; 4357 4358 if (mxge_max_slices == -1) { 4359 /* cap to number of CPUs in system */ 4360 if (sc->num_slices > mp_ncpus) 4361 sc->num_slices = mp_ncpus; 4362 } else { 4363 if (sc->num_slices > mxge_max_slices) 4364 sc->num_slices = mxge_max_slices; 4365 } 4366 /* make sure it is a power of two */ 4367 while (sc->num_slices & (sc->num_slices - 1)) 4368 sc->num_slices--; 4369 4370 if (mxge_verbose) 4371 device_printf(sc->dev, "using %d slices\n", 4372 sc->num_slices); 4373 4374 return; 4375 4376abort_with_fw: 4377 sc->fw_name = old_fw; 4378 (void) mxge_load_firmware(sc, 0); 4379} 4380 4381static int 4382mxge_add_msix_irqs(mxge_softc_t *sc) 4383{ 4384 size_t bytes; 4385 int count, err, i, rid; 4386 4387 rid = PCIR_BAR(2); 4388 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4389 &rid, RF_ACTIVE); 4390 4391 if (sc->msix_table_res == NULL) { 4392 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4393 return ENXIO; 4394 } 4395 4396 count = sc->num_slices; 4397 err = pci_alloc_msix(sc->dev, &count); 4398 if (err != 0) { 4399 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4400 "err = %d \n", sc->num_slices, err); 4401 goto abort_with_msix_table; 4402 } 4403 if (count < sc->num_slices) { 4404 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4405 count, sc->num_slices); 4406 device_printf(sc->dev, 4407 "Try setting hw.mxge.max_slices to %d\n", 4408 count); 4409 err = ENOSPC; 4410 goto abort_with_msix; 4411 } 4412 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4413 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4414 if (sc->msix_irq_res == NULL) { 4415 err = ENOMEM; 4416 goto abort_with_msix; 4417 } 4418 4419 for (i = 0; i < sc->num_slices; i++) { 4420 rid = i + 1; 4421 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4422 SYS_RES_IRQ, 4423 &rid, RF_ACTIVE); 4424 if (sc->msix_irq_res[i] == NULL) { 4425 device_printf(sc->dev, "couldn't allocate IRQ res" 4426 " for message %d\n", i); 4427 err = ENXIO; 4428 goto abort_with_res; 4429 } 4430 } 4431 4432 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4433 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4434 4435 for (i = 0; i < sc->num_slices; i++) { 4436 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4437 INTR_TYPE_NET | INTR_MPSAFE, 4438#if __FreeBSD_version > 700030 4439 NULL, 4440#endif 4441 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4442 if (err != 0) { 4443 device_printf(sc->dev, "couldn't setup intr for " 4444 "message %d\n", i); 4445 goto abort_with_intr; 4446 } 4447 } 4448 4449 if (mxge_verbose) { 4450 device_printf(sc->dev, "using %d msix IRQs:", 4451 sc->num_slices); 4452 for (i = 0; i < sc->num_slices; i++) 4453 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4454 printf("\n"); 4455 } 4456 return (0); 4457 4458abort_with_intr: 4459 for (i = 0; i < sc->num_slices; i++) { 4460 if (sc->msix_ih[i] != NULL) { 4461 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4462 sc->msix_ih[i]); 4463 sc->msix_ih[i] = NULL; 4464 } 4465 } 4466 free(sc->msix_ih, M_DEVBUF); 4467 4468 4469abort_with_res: 4470 for (i = 0; i < sc->num_slices; i++) { 4471 rid = i + 1; 4472 if (sc->msix_irq_res[i] != NULL) 4473 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4474 sc->msix_irq_res[i]); 4475 sc->msix_irq_res[i] = NULL; 4476 } 4477 free(sc->msix_irq_res, M_DEVBUF); 4478 4479 4480abort_with_msix: 4481 pci_release_msi(sc->dev); 4482 4483abort_with_msix_table: 4484 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4485 sc->msix_table_res); 4486 4487 return err; 4488} 4489 4490static int 4491mxge_add_single_irq(mxge_softc_t *sc) 4492{ 4493 int count, err, rid; 4494 4495 count = pci_msi_count(sc->dev); 4496 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4497 rid = 1; 4498 } else { 4499 rid = 0; 4500 sc->legacy_irq = 1; 4501 } 4502 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4503 1, RF_SHAREABLE | RF_ACTIVE); 4504 if (sc->irq_res == NULL) { 4505 device_printf(sc->dev, "could not alloc interrupt\n"); 4506 return ENXIO; 4507 } 4508 if (mxge_verbose) 4509 device_printf(sc->dev, "using %s irq %ld\n", 4510 sc->legacy_irq ? "INTx" : "MSI", 4511 rman_get_start(sc->irq_res)); 4512 err = bus_setup_intr(sc->dev, sc->irq_res, 4513 INTR_TYPE_NET | INTR_MPSAFE, 4514#if __FreeBSD_version > 700030 4515 NULL, 4516#endif 4517 mxge_intr, &sc->ss[0], &sc->ih); 4518 if (err != 0) { 4519 bus_release_resource(sc->dev, SYS_RES_IRQ, 4520 sc->legacy_irq ? 0 : 1, sc->irq_res); 4521 if (!sc->legacy_irq) 4522 pci_release_msi(sc->dev); 4523 } 4524 return err; 4525} 4526 4527static void 4528mxge_rem_msix_irqs(mxge_softc_t *sc) 4529{ 4530 int i, rid; 4531 4532 for (i = 0; i < sc->num_slices; i++) { 4533 if (sc->msix_ih[i] != NULL) { 4534 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4535 sc->msix_ih[i]); 4536 sc->msix_ih[i] = NULL; 4537 } 4538 } 4539 free(sc->msix_ih, M_DEVBUF); 4540 4541 for (i = 0; i < sc->num_slices; i++) { 4542 rid = i + 1; 4543 if (sc->msix_irq_res[i] != NULL) 4544 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4545 sc->msix_irq_res[i]); 4546 sc->msix_irq_res[i] = NULL; 4547 } 4548 free(sc->msix_irq_res, M_DEVBUF); 4549 4550 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4551 sc->msix_table_res); 4552 4553 pci_release_msi(sc->dev); 4554 return; 4555} 4556 4557static void 4558mxge_rem_single_irq(mxge_softc_t *sc) 4559{ 4560 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4561 bus_release_resource(sc->dev, SYS_RES_IRQ, 4562 sc->legacy_irq ? 0 : 1, sc->irq_res); 4563 if (!sc->legacy_irq) 4564 pci_release_msi(sc->dev); 4565} 4566 4567static void 4568mxge_rem_irq(mxge_softc_t *sc) 4569{ 4570 if (sc->num_slices > 1) 4571 mxge_rem_msix_irqs(sc); 4572 else 4573 mxge_rem_single_irq(sc); 4574} 4575 4576static int 4577mxge_add_irq(mxge_softc_t *sc) 4578{ 4579 int err; 4580 4581 if (sc->num_slices > 1) 4582 err = mxge_add_msix_irqs(sc); 4583 else 4584 err = mxge_add_single_irq(sc); 4585 4586 if (0 && err == 0 && sc->num_slices > 1) { 4587 mxge_rem_msix_irqs(sc); 4588 err = mxge_add_msix_irqs(sc); 4589 } 4590 return err; 4591} 4592 4593 4594static int 4595mxge_attach(device_t dev) 4596{ 4597 mxge_softc_t *sc = device_get_softc(dev); 4598 struct ifnet *ifp; 4599 int err, rid; 4600 4601 sc->dev = dev; 4602 mxge_fetch_tunables(sc); 4603 4604 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4605 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4606 taskqueue_thread_enqueue, 4607 &sc->tq); 4608 if (sc->tq == NULL) { 4609 err = ENOMEM; 4610 goto abort_with_nothing; 4611 } 4612 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4613 device_get_nameunit(sc->dev)); 4614 4615 err = bus_dma_tag_create(NULL, /* parent */ 4616 1, /* alignment */ 4617 0, /* boundary */ 4618 BUS_SPACE_MAXADDR, /* low */ 4619 BUS_SPACE_MAXADDR, /* high */ 4620 NULL, NULL, /* filter */ 4621 65536 + 256, /* maxsize */ 4622 MXGE_MAX_SEND_DESC, /* num segs */ 4623 65536, /* maxsegsize */ 4624 0, /* flags */ 4625 NULL, NULL, /* lock */ 4626 &sc->parent_dmat); /* tag */ 4627 4628 if (err != 0) { 4629 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4630 err); 4631 goto abort_with_tq; 4632 } 4633 4634 ifp = sc->ifp = if_alloc(IFT_ETHER); 4635 if (ifp == NULL) { 4636 device_printf(dev, "can not if_alloc()\n"); 4637 err = ENOSPC; 4638 goto abort_with_parent_dmat; 4639 } 4640 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4641 4642 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4643 device_get_nameunit(dev)); 4644 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4645 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4646 "%s:drv", device_get_nameunit(dev)); 4647 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4648 MTX_NETWORK_LOCK, MTX_DEF); 4649 4650 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4651 4652 mxge_setup_cfg_space(sc); 4653 4654 /* Map the board into the kernel */ 4655 rid = PCIR_BARS; 4656 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4657 ~0, 1, RF_ACTIVE); 4658 if (sc->mem_res == NULL) { 4659 device_printf(dev, "could not map memory\n"); 4660 err = ENXIO; 4661 goto abort_with_lock; 4662 } 4663 sc->sram = rman_get_virtual(sc->mem_res); 4664 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4665 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4666 device_printf(dev, "impossible memory region size %ld\n", 4667 rman_get_size(sc->mem_res)); 4668 err = ENXIO; 4669 goto abort_with_mem_res; 4670 } 4671 4672 /* make NULL terminated copy of the EEPROM strings section of 4673 lanai SRAM */ 4674 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4675 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4676 rman_get_bushandle(sc->mem_res), 4677 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4678 sc->eeprom_strings, 4679 MXGE_EEPROM_STRINGS_SIZE - 2); 4680 err = mxge_parse_strings(sc); 4681 if (err != 0) 4682 goto abort_with_mem_res; 4683 4684 /* Enable write combining for efficient use of PCIe bus */ 4685 mxge_enable_wc(sc); 4686 4687 /* Allocate the out of band dma memory */ 4688 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4689 sizeof (mxge_cmd_t), 64); 4690 if (err != 0) 4691 goto abort_with_mem_res; 4692 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4693 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4694 if (err != 0) 4695 goto abort_with_cmd_dma; 4696 4697 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4698 if (err != 0) 4699 goto abort_with_zeropad_dma; 4700 4701 /* select & load the firmware */ 4702 err = mxge_select_firmware(sc); 4703 if (err != 0) 4704 goto abort_with_dmabench; 4705 sc->intr_coal_delay = mxge_intr_coal_delay; 4706 4707 mxge_slice_probe(sc); 4708 err = mxge_alloc_slices(sc); 4709 if (err != 0) 4710 goto abort_with_dmabench; 4711 4712 err = mxge_reset(sc, 0); 4713 if (err != 0) 4714 goto abort_with_slices; 4715 4716 err = mxge_alloc_rings(sc); 4717 if (err != 0) { 4718 device_printf(sc->dev, "failed to allocate rings\n"); 4719 goto abort_with_dmabench; 4720 } 4721 4722 err = mxge_add_irq(sc); 4723 if (err != 0) { 4724 device_printf(sc->dev, "failed to add irq\n"); 4725 goto abort_with_rings; 4726 } 4727 4728 ifp->if_baudrate = IF_Gbps(10UL); 4729 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4730 IFCAP_VLAN_MTU; 4731#ifdef INET 4732 ifp->if_capabilities |= IFCAP_LRO; 4733#endif 4734 4735#ifdef MXGE_NEW_VLAN_API 4736 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4737#endif 4738 4739 sc->max_mtu = mxge_max_mtu(sc); 4740 if (sc->max_mtu >= 9000) 4741 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4742 else 4743 device_printf(dev, "MTU limited to %d. Install " 4744 "latest firmware for 9000 byte jumbo support\n", 4745 sc->max_mtu - ETHER_HDR_LEN); 4746 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4747 ifp->if_capenable = ifp->if_capabilities; 4748 if (sc->lro_cnt == 0) 4749 ifp->if_capenable &= ~IFCAP_LRO; 4750 sc->csum_flag = 1; 4751 ifp->if_init = mxge_init; 4752 ifp->if_softc = sc; 4753 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4754 ifp->if_ioctl = mxge_ioctl; 4755 ifp->if_start = mxge_start; 4756 /* Initialise the ifmedia structure */ 4757 ifmedia_init(&sc->media, 0, mxge_media_change, 4758 mxge_media_status); 4759 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4760 mxge_media_probe(sc); 4761 sc->dying = 0; 4762 ether_ifattach(ifp, sc->mac_addr); 4763 /* ether_ifattach sets mtu to ETHERMTU */ 4764 if (mxge_initial_mtu != ETHERMTU) 4765 mxge_change_mtu(sc, mxge_initial_mtu); 4766 4767 mxge_add_sysctls(sc); 4768#ifdef IFNET_BUF_RING 4769 ifp->if_transmit = mxge_transmit; 4770 ifp->if_qflush = mxge_qflush; 4771#endif 4772 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4773 return 0; 4774 4775abort_with_rings: 4776 mxge_free_rings(sc); 4777abort_with_slices: 4778 mxge_free_slices(sc); 4779abort_with_dmabench: 4780 mxge_dma_free(&sc->dmabench_dma); 4781abort_with_zeropad_dma: 4782 mxge_dma_free(&sc->zeropad_dma); 4783abort_with_cmd_dma: 4784 mxge_dma_free(&sc->cmd_dma); 4785abort_with_mem_res: 4786 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4787abort_with_lock: 4788 pci_disable_busmaster(dev); 4789 mtx_destroy(&sc->cmd_mtx); 4790 mtx_destroy(&sc->driver_mtx); 4791 if_free(ifp); 4792abort_with_parent_dmat: 4793 bus_dma_tag_destroy(sc->parent_dmat); 4794abort_with_tq: 4795 if (sc->tq != NULL) { 4796 taskqueue_drain(sc->tq, &sc->watchdog_task); 4797 taskqueue_free(sc->tq); 4798 sc->tq = NULL; 4799 } 4800abort_with_nothing: 4801 return err; 4802} 4803 4804static int 4805mxge_detach(device_t dev) 4806{ 4807 mxge_softc_t *sc = device_get_softc(dev); 4808 4809 if (mxge_vlans_active(sc)) { 4810 device_printf(sc->dev, 4811 "Detach vlans before removing module\n"); 4812 return EBUSY; 4813 } 4814 mtx_lock(&sc->driver_mtx); 4815 sc->dying = 1; 4816 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4817 mxge_close(sc, 0); 4818 mtx_unlock(&sc->driver_mtx); 4819 ether_ifdetach(sc->ifp); 4820 if (sc->tq != NULL) { 4821 taskqueue_drain(sc->tq, &sc->watchdog_task); 4822 taskqueue_free(sc->tq); 4823 sc->tq = NULL; 4824 } 4825 callout_drain(&sc->co_hdl); 4826 ifmedia_removeall(&sc->media); 4827 mxge_dummy_rdma(sc, 0); 4828 mxge_rem_sysctls(sc); 4829 mxge_rem_irq(sc); 4830 mxge_free_rings(sc); 4831 mxge_free_slices(sc); 4832 mxge_dma_free(&sc->dmabench_dma); 4833 mxge_dma_free(&sc->zeropad_dma); 4834 mxge_dma_free(&sc->cmd_dma); 4835 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4836 pci_disable_busmaster(dev); 4837 mtx_destroy(&sc->cmd_mtx); 4838 mtx_destroy(&sc->driver_mtx); 4839 if_free(sc->ifp); 4840 bus_dma_tag_destroy(sc->parent_dmat); 4841 return 0; 4842} 4843 4844static int 4845mxge_shutdown(device_t dev) 4846{ 4847 return 0; 4848} 4849 4850/* 4851 This file uses Myri10GE driver indentation. 4852 4853 Local Variables: 4854 c-file-style:"linux" 4855 tab-width:8 4856 End: 4857*/
|