if_mxge.c revision 197395
1/****************************************************************************** 2 3Copyright (c) 2006-2009, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 197395 2009-09-21 20:16:10Z gallatin $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48 49/* count xmits ourselves, rather than via drbr */ 50#define NO_SLOW_STATS 51#include <net/if.h> 52#include <net/if_arp.h> 53#include <net/ethernet.h> 54#include <net/if_dl.h> 55#include <net/if_media.h> 56 57#include <net/bpf.h> 58 59#include <net/if_types.h> 60#include <net/if_vlan_var.h> 61#include <net/zlib.h> 62 63#include <netinet/in_systm.h> 64#include <netinet/in.h> 65#include <netinet/ip.h> 66#include <netinet/tcp.h> 67 68#include <machine/bus.h> 69#include <machine/in_cksum.h> 70#include <machine/resource.h> 71#include <sys/bus.h> 72#include <sys/rman.h> 73#include <sys/smp.h> 74 75#include <dev/pci/pcireg.h> 76#include <dev/pci/pcivar.h> 77#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 78 79#include <vm/vm.h> /* for pmap_mapdev() */ 80#include <vm/pmap.h> 81 82#if defined(__i386) || defined(__amd64) 83#include <machine/specialreg.h> 84#endif 85 86#include <dev/mxge/mxge_mcp.h> 87#include <dev/mxge/mcp_gen_header.h> 88/*#define MXGE_FAKE_IFP*/ 89#include <dev/mxge/if_mxge_var.h> 90#ifdef IFNET_BUF_RING 91#include <sys/buf_ring.h> 92#endif 93 94#include "opt_inet.h" 95 96/* tunable params */ 97static int mxge_nvidia_ecrc_enable = 1; 98static int mxge_force_firmware = 0; 99static int mxge_intr_coal_delay = 30; 100static int mxge_deassert_wait = 1; 101static int mxge_flow_control = 1; 102static int mxge_verbose = 0; 103static int mxge_lro_cnt = 8; 104static int mxge_ticks; 105static int mxge_max_slices = 1; 106static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 107static int mxge_always_promisc = 0; 108static int mxge_initial_mtu = ETHERMTU_JUMBO; 109static int mxge_throttle = 0; 110static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 111static char *mxge_fw_aligned = "mxge_eth_z8e"; 112static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 113static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 114 115static int mxge_probe(device_t dev); 116static int mxge_attach(device_t dev); 117static int mxge_detach(device_t dev); 118static int mxge_shutdown(device_t dev); 119static void mxge_intr(void *arg); 120 121static device_method_t mxge_methods[] = 122{ 123 /* Device interface */ 124 DEVMETHOD(device_probe, mxge_probe), 125 DEVMETHOD(device_attach, mxge_attach), 126 DEVMETHOD(device_detach, mxge_detach), 127 DEVMETHOD(device_shutdown, mxge_shutdown), 128 {0, 0} 129}; 130 131static driver_t mxge_driver = 132{ 133 "mxge", 134 mxge_methods, 135 sizeof(mxge_softc_t), 136}; 137 138static devclass_t mxge_devclass; 139 140/* Declare ourselves to be a child of the PCI bus.*/ 141DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 142MODULE_DEPEND(mxge, firmware, 1, 1, 1); 143MODULE_DEPEND(mxge, zlib, 1, 1, 1); 144 145static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 146static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 147static int mxge_close(mxge_softc_t *sc, int down); 148static int mxge_open(mxge_softc_t *sc); 149static void mxge_tick(void *arg); 150 151static int 152mxge_probe(device_t dev) 153{ 154 int rev; 155 156 157 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 158 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 159 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 160 rev = pci_get_revid(dev); 161 switch (rev) { 162 case MXGE_PCI_REV_Z8E: 163 device_set_desc(dev, "Myri10G-PCIE-8A"); 164 break; 165 case MXGE_PCI_REV_Z8ES: 166 device_set_desc(dev, "Myri10G-PCIE-8B"); 167 break; 168 default: 169 device_set_desc(dev, "Myri10G-PCIE-8??"); 170 device_printf(dev, "Unrecognized rev %d NIC\n", 171 rev); 172 break; 173 } 174 return 0; 175 } 176 return ENXIO; 177} 178 179static void 180mxge_enable_wc(mxge_softc_t *sc) 181{ 182#if defined(__i386) || defined(__amd64) 183 vm_offset_t len; 184 int err; 185 186 sc->wc = 1; 187 len = rman_get_size(sc->mem_res); 188 err = pmap_change_attr((vm_offset_t) sc->sram, 189 len, PAT_WRITE_COMBINING); 190 if (err != 0) { 191 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 192 err); 193 sc->wc = 0; 194 } 195#endif 196} 197 198 199/* callback to get our DMA address */ 200static void 201mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 202 int error) 203{ 204 if (error == 0) { 205 *(bus_addr_t *) arg = segs->ds_addr; 206 } 207} 208 209static int 210mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 211 bus_size_t alignment) 212{ 213 int err; 214 device_t dev = sc->dev; 215 bus_size_t boundary, maxsegsize; 216 217 if (bytes > 4096 && alignment == 4096) { 218 boundary = 0; 219 maxsegsize = bytes; 220 } else { 221 boundary = 4096; 222 maxsegsize = 4096; 223 } 224 225 /* allocate DMAable memory tags */ 226 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 227 alignment, /* alignment */ 228 boundary, /* boundary */ 229 BUS_SPACE_MAXADDR, /* low */ 230 BUS_SPACE_MAXADDR, /* high */ 231 NULL, NULL, /* filter */ 232 bytes, /* maxsize */ 233 1, /* num segs */ 234 maxsegsize, /* maxsegsize */ 235 BUS_DMA_COHERENT, /* flags */ 236 NULL, NULL, /* lock */ 237 &dma->dmat); /* tag */ 238 if (err != 0) { 239 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 240 return err; 241 } 242 243 /* allocate DMAable memory & map */ 244 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 245 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 246 | BUS_DMA_ZERO), &dma->map); 247 if (err != 0) { 248 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 249 goto abort_with_dmat; 250 } 251 252 /* load the memory */ 253 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 254 mxge_dmamap_callback, 255 (void *)&dma->bus_addr, 0); 256 if (err != 0) { 257 device_printf(dev, "couldn't load map (err = %d)\n", err); 258 goto abort_with_mem; 259 } 260 return 0; 261 262abort_with_mem: 263 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 264abort_with_dmat: 265 (void)bus_dma_tag_destroy(dma->dmat); 266 return err; 267} 268 269 270static void 271mxge_dma_free(mxge_dma_t *dma) 272{ 273 bus_dmamap_unload(dma->dmat, dma->map); 274 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 275 (void)bus_dma_tag_destroy(dma->dmat); 276} 277 278/* 279 * The eeprom strings on the lanaiX have the format 280 * SN=x\0 281 * MAC=x:x:x:x:x:x\0 282 * PC=text\0 283 */ 284 285static int 286mxge_parse_strings(mxge_softc_t *sc) 287{ 288#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 289 290 char *ptr, *limit; 291 int i, found_mac; 292 293 ptr = sc->eeprom_strings; 294 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 295 found_mac = 0; 296 while (ptr < limit && *ptr != '\0') { 297 if (memcmp(ptr, "MAC=", 4) == 0) { 298 ptr += 1; 299 sc->mac_addr_string = ptr; 300 for (i = 0; i < 6; i++) { 301 ptr += 3; 302 if ((ptr + 2) > limit) 303 goto abort; 304 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 305 found_mac = 1; 306 } 307 } else if (memcmp(ptr, "PC=", 3) == 0) { 308 ptr += 3; 309 strncpy(sc->product_code_string, ptr, 310 sizeof (sc->product_code_string) - 1); 311 } else if (memcmp(ptr, "SN=", 3) == 0) { 312 ptr += 3; 313 strncpy(sc->serial_number_string, ptr, 314 sizeof (sc->serial_number_string) - 1); 315 } 316 MXGE_NEXT_STRING(ptr); 317 } 318 319 if (found_mac) 320 return 0; 321 322 abort: 323 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 324 325 return ENXIO; 326} 327 328#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 329static void 330mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 331{ 332 uint32_t val; 333 unsigned long base, off; 334 char *va, *cfgptr; 335 device_t pdev, mcp55; 336 uint16_t vendor_id, device_id, word; 337 uintptr_t bus, slot, func, ivend, idev; 338 uint32_t *ptr32; 339 340 341 if (!mxge_nvidia_ecrc_enable) 342 return; 343 344 pdev = device_get_parent(device_get_parent(sc->dev)); 345 if (pdev == NULL) { 346 device_printf(sc->dev, "could not find parent?\n"); 347 return; 348 } 349 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 350 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 351 352 if (vendor_id != 0x10de) 353 return; 354 355 base = 0; 356 357 if (device_id == 0x005d) { 358 /* ck804, base address is magic */ 359 base = 0xe0000000UL; 360 } else if (device_id >= 0x0374 && device_id <= 0x378) { 361 /* mcp55, base address stored in chipset */ 362 mcp55 = pci_find_bsf(0, 0, 0); 363 if (mcp55 && 364 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 365 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 366 word = pci_read_config(mcp55, 0x90, 2); 367 base = ((unsigned long)word & 0x7ffeU) << 25; 368 } 369 } 370 if (!base) 371 return; 372 373 /* XXXX 374 Test below is commented because it is believed that doing 375 config read/write beyond 0xff will access the config space 376 for the next larger function. Uncomment this and remove 377 the hacky pmap_mapdev() way of accessing config space when 378 FreeBSD grows support for extended pcie config space access 379 */ 380#if 0 381 /* See if we can, by some miracle, access the extended 382 config space */ 383 val = pci_read_config(pdev, 0x178, 4); 384 if (val != 0xffffffff) { 385 val |= 0x40; 386 pci_write_config(pdev, 0x178, val, 4); 387 return; 388 } 389#endif 390 /* Rather than using normal pci config space writes, we must 391 * map the Nvidia config space ourselves. This is because on 392 * opteron/nvidia class machine the 0xe000000 mapping is 393 * handled by the nvidia chipset, that means the internal PCI 394 * device (the on-chip northbridge), or the amd-8131 bridge 395 * and things behind them are not visible by this method. 396 */ 397 398 BUS_READ_IVAR(device_get_parent(pdev), pdev, 399 PCI_IVAR_BUS, &bus); 400 BUS_READ_IVAR(device_get_parent(pdev), pdev, 401 PCI_IVAR_SLOT, &slot); 402 BUS_READ_IVAR(device_get_parent(pdev), pdev, 403 PCI_IVAR_FUNCTION, &func); 404 BUS_READ_IVAR(device_get_parent(pdev), pdev, 405 PCI_IVAR_VENDOR, &ivend); 406 BUS_READ_IVAR(device_get_parent(pdev), pdev, 407 PCI_IVAR_DEVICE, &idev); 408 409 off = base 410 + 0x00100000UL * (unsigned long)bus 411 + 0x00001000UL * (unsigned long)(func 412 + 8 * slot); 413 414 /* map it into the kernel */ 415 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 416 417 418 if (va == NULL) { 419 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 420 return; 421 } 422 /* get a pointer to the config space mapped into the kernel */ 423 cfgptr = va + (off & PAGE_MASK); 424 425 /* make sure that we can really access it */ 426 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 427 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 428 if (! (vendor_id == ivend && device_id == idev)) { 429 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 430 vendor_id, device_id); 431 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 432 return; 433 } 434 435 ptr32 = (uint32_t*)(cfgptr + 0x178); 436 val = *ptr32; 437 438 if (val == 0xffffffff) { 439 device_printf(sc->dev, "extended mapping failed\n"); 440 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 441 return; 442 } 443 *ptr32 = val | 0x40; 444 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 445 if (mxge_verbose) 446 device_printf(sc->dev, 447 "Enabled ECRC on upstream Nvidia bridge " 448 "at %d:%d:%d\n", 449 (int)bus, (int)slot, (int)func); 450 return; 451} 452#else 453static void 454mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 455{ 456 device_printf(sc->dev, 457 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 458 return; 459} 460#endif 461 462 463static int 464mxge_dma_test(mxge_softc_t *sc, int test_type) 465{ 466 mxge_cmd_t cmd; 467 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 468 int status; 469 uint32_t len; 470 char *test = " "; 471 472 473 /* Run a small DMA test. 474 * The magic multipliers to the length tell the firmware 475 * to do DMA read, write, or read+write tests. The 476 * results are returned in cmd.data0. The upper 16 477 * bits of the return is the number of transfers completed. 478 * The lower 16 bits is the time in 0.5us ticks that the 479 * transfers took to complete. 480 */ 481 482 len = sc->tx_boundary; 483 484 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 485 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 486 cmd.data2 = len * 0x10000; 487 status = mxge_send_cmd(sc, test_type, &cmd); 488 if (status != 0) { 489 test = "read"; 490 goto abort; 491 } 492 sc->read_dma = ((cmd.data0>>16) * len * 2) / 493 (cmd.data0 & 0xffff); 494 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 495 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 496 cmd.data2 = len * 0x1; 497 status = mxge_send_cmd(sc, test_type, &cmd); 498 if (status != 0) { 499 test = "write"; 500 goto abort; 501 } 502 sc->write_dma = ((cmd.data0>>16) * len * 2) / 503 (cmd.data0 & 0xffff); 504 505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 507 cmd.data2 = len * 0x10001; 508 status = mxge_send_cmd(sc, test_type, &cmd); 509 if (status != 0) { 510 test = "read/write"; 511 goto abort; 512 } 513 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 514 (cmd.data0 & 0xffff); 515 516abort: 517 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 518 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 519 test, status); 520 521 return status; 522} 523 524/* 525 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 526 * when the PCI-E Completion packets are aligned on an 8-byte 527 * boundary. Some PCI-E chip sets always align Completion packets; on 528 * the ones that do not, the alignment can be enforced by enabling 529 * ECRC generation (if supported). 530 * 531 * When PCI-E Completion packets are not aligned, it is actually more 532 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 533 * 534 * If the driver can neither enable ECRC nor verify that it has 535 * already been enabled, then it must use a firmware image which works 536 * around unaligned completion packets (ethp_z8e.dat), and it should 537 * also ensure that it never gives the device a Read-DMA which is 538 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 539 * enabled, then the driver should use the aligned (eth_z8e.dat) 540 * firmware image, and set tx_boundary to 4KB. 541 */ 542 543static int 544mxge_firmware_probe(mxge_softc_t *sc) 545{ 546 device_t dev = sc->dev; 547 int reg, status; 548 uint16_t pectl; 549 550 sc->tx_boundary = 4096; 551 /* 552 * Verify the max read request size was set to 4KB 553 * before trying the test with 4KB. 554 */ 555 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 556 pectl = pci_read_config(dev, reg + 0x8, 2); 557 if ((pectl & (5 << 12)) != (5 << 12)) { 558 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 559 pectl); 560 sc->tx_boundary = 2048; 561 } 562 } 563 564 /* 565 * load the optimized firmware (which assumes aligned PCIe 566 * completions) in order to see if it works on this host. 567 */ 568 sc->fw_name = mxge_fw_aligned; 569 status = mxge_load_firmware(sc, 1); 570 if (status != 0) { 571 return status; 572 } 573 574 /* 575 * Enable ECRC if possible 576 */ 577 mxge_enable_nvidia_ecrc(sc); 578 579 /* 580 * Run a DMA test which watches for unaligned completions and 581 * aborts on the first one seen. 582 */ 583 584 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 585 if (status == 0) 586 return 0; /* keep the aligned firmware */ 587 588 if (status != E2BIG) 589 device_printf(dev, "DMA test failed: %d\n", status); 590 if (status == ENOSYS) 591 device_printf(dev, "Falling back to ethp! " 592 "Please install up to date fw\n"); 593 return status; 594} 595 596static int 597mxge_select_firmware(mxge_softc_t *sc) 598{ 599 int aligned = 0; 600 int force_firmware = mxge_force_firmware; 601 602 if (sc->throttle) 603 force_firmware = sc->throttle; 604 605 if (force_firmware != 0) { 606 if (force_firmware == 1) 607 aligned = 1; 608 else 609 aligned = 0; 610 if (mxge_verbose) 611 device_printf(sc->dev, 612 "Assuming %s completions (forced)\n", 613 aligned ? "aligned" : "unaligned"); 614 goto abort; 615 } 616 617 /* if the PCIe link width is 4 or less, we can use the aligned 618 firmware and skip any checks */ 619 if (sc->link_width != 0 && sc->link_width <= 4) { 620 device_printf(sc->dev, 621 "PCIe x%d Link, expect reduced performance\n", 622 sc->link_width); 623 aligned = 1; 624 goto abort; 625 } 626 627 if (0 == mxge_firmware_probe(sc)) 628 return 0; 629 630abort: 631 if (aligned) { 632 sc->fw_name = mxge_fw_aligned; 633 sc->tx_boundary = 4096; 634 } else { 635 sc->fw_name = mxge_fw_unaligned; 636 sc->tx_boundary = 2048; 637 } 638 return (mxge_load_firmware(sc, 0)); 639} 640 641union qualhack 642{ 643 const char *ro_char; 644 char *rw_char; 645}; 646 647static int 648mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 649{ 650 651 652 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 653 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 654 be32toh(hdr->mcp_type)); 655 return EIO; 656 } 657 658 /* save firmware version for sysctl */ 659 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 660 if (mxge_verbose) 661 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 662 663 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 664 &sc->fw_ver_minor, &sc->fw_ver_tiny); 665 666 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 667 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 668 device_printf(sc->dev, "Found firmware version %s\n", 669 sc->fw_version); 670 device_printf(sc->dev, "Driver needs %d.%d\n", 671 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 672 return EINVAL; 673 } 674 return 0; 675 676} 677 678static void * 679z_alloc(void *nil, u_int items, u_int size) 680{ 681 void *ptr; 682 683 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 684 return ptr; 685} 686 687static void 688z_free(void *nil, void *ptr) 689{ 690 free(ptr, M_TEMP); 691} 692 693 694static int 695mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 696{ 697 z_stream zs; 698 char *inflate_buffer; 699 const struct firmware *fw; 700 const mcp_gen_header_t *hdr; 701 unsigned hdr_offset; 702 int status; 703 unsigned int i; 704 char dummy; 705 size_t fw_len; 706 707 fw = firmware_get(sc->fw_name); 708 if (fw == NULL) { 709 device_printf(sc->dev, "Could not find firmware image %s\n", 710 sc->fw_name); 711 return ENOENT; 712 } 713 714 715 716 /* setup zlib and decompress f/w */ 717 bzero(&zs, sizeof (zs)); 718 zs.zalloc = z_alloc; 719 zs.zfree = z_free; 720 status = inflateInit(&zs); 721 if (status != Z_OK) { 722 status = EIO; 723 goto abort_with_fw; 724 } 725 726 /* the uncompressed size is stored as the firmware version, 727 which would otherwise go unused */ 728 fw_len = (size_t) fw->version; 729 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 730 if (inflate_buffer == NULL) 731 goto abort_with_zs; 732 zs.avail_in = fw->datasize; 733 zs.next_in = __DECONST(char *, fw->data); 734 zs.avail_out = fw_len; 735 zs.next_out = inflate_buffer; 736 status = inflate(&zs, Z_FINISH); 737 if (status != Z_STREAM_END) { 738 device_printf(sc->dev, "zlib %d\n", status); 739 status = EIO; 740 goto abort_with_buffer; 741 } 742 743 /* check id */ 744 hdr_offset = htobe32(*(const uint32_t *) 745 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 746 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 747 device_printf(sc->dev, "Bad firmware file"); 748 status = EIO; 749 goto abort_with_buffer; 750 } 751 hdr = (const void*)(inflate_buffer + hdr_offset); 752 753 status = mxge_validate_firmware(sc, hdr); 754 if (status != 0) 755 goto abort_with_buffer; 756 757 /* Copy the inflated firmware to NIC SRAM. */ 758 for (i = 0; i < fw_len; i += 256) { 759 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 760 inflate_buffer + i, 761 min(256U, (unsigned)(fw_len - i))); 762 wmb(); 763 dummy = *sc->sram; 764 wmb(); 765 } 766 767 *limit = fw_len; 768 status = 0; 769abort_with_buffer: 770 free(inflate_buffer, M_TEMP); 771abort_with_zs: 772 inflateEnd(&zs); 773abort_with_fw: 774 firmware_put(fw, FIRMWARE_UNLOAD); 775 return status; 776} 777 778/* 779 * Enable or disable periodic RDMAs from the host to make certain 780 * chipsets resend dropped PCIe messages 781 */ 782 783static void 784mxge_dummy_rdma(mxge_softc_t *sc, int enable) 785{ 786 char buf_bytes[72]; 787 volatile uint32_t *confirm; 788 volatile char *submit; 789 uint32_t *buf, dma_low, dma_high; 790 int i; 791 792 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 793 794 /* clear confirmation addr */ 795 confirm = (volatile uint32_t *)sc->cmd; 796 *confirm = 0; 797 wmb(); 798 799 /* send an rdma command to the PCIe engine, and wait for the 800 response in the confirmation address. The firmware should 801 write a -1 there to indicate it is alive and well 802 */ 803 804 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 805 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 806 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 807 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 808 buf[2] = htobe32(0xffffffff); /* confirm data */ 809 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 810 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 811 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 812 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 813 buf[5] = htobe32(enable); /* enable? */ 814 815 816 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 817 818 mxge_pio_copy(submit, buf, 64); 819 wmb(); 820 DELAY(1000); 821 wmb(); 822 i = 0; 823 while (*confirm != 0xffffffff && i < 20) { 824 DELAY(1000); 825 i++; 826 } 827 if (*confirm != 0xffffffff) { 828 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 829 (enable ? "enable" : "disable"), confirm, 830 *confirm); 831 } 832 return; 833} 834 835static int 836mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 837{ 838 mcp_cmd_t *buf; 839 char buf_bytes[sizeof(*buf) + 8]; 840 volatile mcp_cmd_response_t *response = sc->cmd; 841 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 842 uint32_t dma_low, dma_high; 843 int err, sleep_total = 0; 844 845 /* ensure buf is aligned to 8 bytes */ 846 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 847 848 buf->data0 = htobe32(data->data0); 849 buf->data1 = htobe32(data->data1); 850 buf->data2 = htobe32(data->data2); 851 buf->cmd = htobe32(cmd); 852 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 853 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 854 855 buf->response_addr.low = htobe32(dma_low); 856 buf->response_addr.high = htobe32(dma_high); 857 mtx_lock(&sc->cmd_mtx); 858 response->result = 0xffffffff; 859 wmb(); 860 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 861 862 /* wait up to 20ms */ 863 err = EAGAIN; 864 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 865 bus_dmamap_sync(sc->cmd_dma.dmat, 866 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 867 wmb(); 868 switch (be32toh(response->result)) { 869 case 0: 870 data->data0 = be32toh(response->data); 871 err = 0; 872 break; 873 case 0xffffffff: 874 DELAY(1000); 875 break; 876 case MXGEFW_CMD_UNKNOWN: 877 err = ENOSYS; 878 break; 879 case MXGEFW_CMD_ERROR_UNALIGNED: 880 err = E2BIG; 881 break; 882 case MXGEFW_CMD_ERROR_BUSY: 883 err = EBUSY; 884 break; 885 default: 886 device_printf(sc->dev, 887 "mxge: command %d " 888 "failed, result = %d\n", 889 cmd, be32toh(response->result)); 890 err = ENXIO; 891 break; 892 } 893 if (err != EAGAIN) 894 break; 895 } 896 if (err == EAGAIN) 897 device_printf(sc->dev, "mxge: command %d timed out" 898 "result = %d\n", 899 cmd, be32toh(response->result)); 900 mtx_unlock(&sc->cmd_mtx); 901 return err; 902} 903 904static int 905mxge_adopt_running_firmware(mxge_softc_t *sc) 906{ 907 struct mcp_gen_header *hdr; 908 const size_t bytes = sizeof (struct mcp_gen_header); 909 size_t hdr_offset; 910 int status; 911 912 /* find running firmware header */ 913 hdr_offset = htobe32(*(volatile uint32_t *) 914 (sc->sram + MCP_HEADER_PTR_OFFSET)); 915 916 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 917 device_printf(sc->dev, 918 "Running firmware has bad header offset (%d)\n", 919 (int)hdr_offset); 920 return EIO; 921 } 922 923 /* copy header of running firmware from SRAM to host memory to 924 * validate firmware */ 925 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 926 if (hdr == NULL) { 927 device_printf(sc->dev, "could not malloc firmware hdr\n"); 928 return ENOMEM; 929 } 930 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 931 rman_get_bushandle(sc->mem_res), 932 hdr_offset, (char *)hdr, bytes); 933 status = mxge_validate_firmware(sc, hdr); 934 free(hdr, M_DEVBUF); 935 936 /* 937 * check to see if adopted firmware has bug where adopting 938 * it will cause broadcasts to be filtered unless the NIC 939 * is kept in ALLMULTI mode 940 */ 941 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 942 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 943 sc->adopted_rx_filter_bug = 1; 944 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 945 "working around rx filter bug\n", 946 sc->fw_ver_major, sc->fw_ver_minor, 947 sc->fw_ver_tiny); 948 } 949 950 return status; 951} 952 953 954static int 955mxge_load_firmware(mxge_softc_t *sc, int adopt) 956{ 957 volatile uint32_t *confirm; 958 volatile char *submit; 959 char buf_bytes[72]; 960 uint32_t *buf, size, dma_low, dma_high; 961 int status, i; 962 963 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 964 965 size = sc->sram_size; 966 status = mxge_load_firmware_helper(sc, &size); 967 if (status) { 968 if (!adopt) 969 return status; 970 /* Try to use the currently running firmware, if 971 it is new enough */ 972 status = mxge_adopt_running_firmware(sc); 973 if (status) { 974 device_printf(sc->dev, 975 "failed to adopt running firmware\n"); 976 return status; 977 } 978 device_printf(sc->dev, 979 "Successfully adopted running firmware\n"); 980 if (sc->tx_boundary == 4096) { 981 device_printf(sc->dev, 982 "Using firmware currently running on NIC" 983 ". For optimal\n"); 984 device_printf(sc->dev, 985 "performance consider loading optimized " 986 "firmware\n"); 987 } 988 sc->fw_name = mxge_fw_unaligned; 989 sc->tx_boundary = 2048; 990 return 0; 991 } 992 /* clear confirmation addr */ 993 confirm = (volatile uint32_t *)sc->cmd; 994 *confirm = 0; 995 wmb(); 996 /* send a reload command to the bootstrap MCP, and wait for the 997 response in the confirmation address. The firmware should 998 write a -1 there to indicate it is alive and well 999 */ 1000 1001 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1002 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1003 1004 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1005 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1006 buf[2] = htobe32(0xffffffff); /* confirm data */ 1007 1008 /* FIX: All newest firmware should un-protect the bottom of 1009 the sram before handoff. However, the very first interfaces 1010 do not. Therefore the handoff copy must skip the first 8 bytes 1011 */ 1012 /* where the code starts*/ 1013 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1014 buf[4] = htobe32(size - 8); /* length of code */ 1015 buf[5] = htobe32(8); /* where to copy to */ 1016 buf[6] = htobe32(0); /* where to jump to */ 1017 1018 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1019 mxge_pio_copy(submit, buf, 64); 1020 wmb(); 1021 DELAY(1000); 1022 wmb(); 1023 i = 0; 1024 while (*confirm != 0xffffffff && i < 20) { 1025 DELAY(1000*10); 1026 i++; 1027 bus_dmamap_sync(sc->cmd_dma.dmat, 1028 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1029 } 1030 if (*confirm != 0xffffffff) { 1031 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1032 confirm, *confirm); 1033 1034 return ENXIO; 1035 } 1036 return 0; 1037} 1038 1039static int 1040mxge_update_mac_address(mxge_softc_t *sc) 1041{ 1042 mxge_cmd_t cmd; 1043 uint8_t *addr = sc->mac_addr; 1044 int status; 1045 1046 1047 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1048 | (addr[2] << 8) | addr[3]); 1049 1050 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1051 1052 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1053 return status; 1054} 1055 1056static int 1057mxge_change_pause(mxge_softc_t *sc, int pause) 1058{ 1059 mxge_cmd_t cmd; 1060 int status; 1061 1062 if (pause) 1063 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1064 &cmd); 1065 else 1066 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1067 &cmd); 1068 1069 if (status) { 1070 device_printf(sc->dev, "Failed to set flow control mode\n"); 1071 return ENXIO; 1072 } 1073 sc->pause = pause; 1074 return 0; 1075} 1076 1077static void 1078mxge_change_promisc(mxge_softc_t *sc, int promisc) 1079{ 1080 mxge_cmd_t cmd; 1081 int status; 1082 1083 if (mxge_always_promisc) 1084 promisc = 1; 1085 1086 if (promisc) 1087 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1088 &cmd); 1089 else 1090 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1091 &cmd); 1092 1093 if (status) { 1094 device_printf(sc->dev, "Failed to set promisc mode\n"); 1095 } 1096} 1097 1098static void 1099mxge_set_multicast_list(mxge_softc_t *sc) 1100{ 1101 mxge_cmd_t cmd; 1102 struct ifmultiaddr *ifma; 1103 struct ifnet *ifp = sc->ifp; 1104 int err; 1105 1106 /* This firmware is known to not support multicast */ 1107 if (!sc->fw_multicast_support) 1108 return; 1109 1110 /* Disable multicast filtering while we play with the lists*/ 1111 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1112 if (err != 0) { 1113 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1114 " error status: %d\n", err); 1115 return; 1116 } 1117 1118 if (sc->adopted_rx_filter_bug) 1119 return; 1120 1121 if (ifp->if_flags & IFF_ALLMULTI) 1122 /* request to disable multicast filtering, so quit here */ 1123 return; 1124 1125 /* Flush all the filters */ 1126 1127 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1128 if (err != 0) { 1129 device_printf(sc->dev, 1130 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1131 ", error status: %d\n", err); 1132 return; 1133 } 1134 1135 /* Walk the multicast list, and add each address */ 1136 1137 if_maddr_rlock(ifp); 1138 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1139 if (ifma->ifma_addr->sa_family != AF_LINK) 1140 continue; 1141 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1142 &cmd.data0, 4); 1143 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1144 &cmd.data1, 2); 1145 cmd.data0 = htonl(cmd.data0); 1146 cmd.data1 = htonl(cmd.data1); 1147 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1148 if (err != 0) { 1149 device_printf(sc->dev, "Failed " 1150 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1151 "%d\t", err); 1152 /* abort, leaving multicast filtering off */ 1153 if_maddr_runlock(ifp); 1154 return; 1155 } 1156 } 1157 if_maddr_runlock(ifp); 1158 /* Enable multicast filtering */ 1159 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1160 if (err != 0) { 1161 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1162 ", error status: %d\n", err); 1163 } 1164} 1165 1166static int 1167mxge_max_mtu(mxge_softc_t *sc) 1168{ 1169 mxge_cmd_t cmd; 1170 int status; 1171 1172 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1173 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1174 1175 /* try to set nbufs to see if it we can 1176 use virtually contiguous jumbos */ 1177 cmd.data0 = 0; 1178 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1179 &cmd); 1180 if (status == 0) 1181 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1182 1183 /* otherwise, we're limited to MJUMPAGESIZE */ 1184 return MJUMPAGESIZE - MXGEFW_PAD; 1185} 1186 1187static int 1188mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1189{ 1190 struct mxge_slice_state *ss; 1191 mxge_rx_done_t *rx_done; 1192 volatile uint32_t *irq_claim; 1193 mxge_cmd_t cmd; 1194 int slice, status; 1195 1196 /* try to send a reset command to the card to see if it 1197 is alive */ 1198 memset(&cmd, 0, sizeof (cmd)); 1199 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1200 if (status != 0) { 1201 device_printf(sc->dev, "failed reset\n"); 1202 return ENXIO; 1203 } 1204 1205 mxge_dummy_rdma(sc, 1); 1206 1207 1208 /* set the intrq size */ 1209 cmd.data0 = sc->rx_ring_size; 1210 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1211 1212 /* 1213 * Even though we already know how many slices are supported 1214 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1215 * has magic side effects, and must be called after a reset. 1216 * It must be called prior to calling any RSS related cmds, 1217 * including assigning an interrupt queue for anything but 1218 * slice 0. It must also be called *after* 1219 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1220 * the firmware to compute offsets. 1221 */ 1222 1223 if (sc->num_slices > 1) { 1224 /* ask the maximum number of slices it supports */ 1225 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1226 &cmd); 1227 if (status != 0) { 1228 device_printf(sc->dev, 1229 "failed to get number of slices\n"); 1230 return status; 1231 } 1232 /* 1233 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1234 * to setting up the interrupt queue DMA 1235 */ 1236 cmd.data0 = sc->num_slices; 1237 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1238#ifdef IFNET_BUF_RING 1239 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1240#endif 1241 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1242 &cmd); 1243 if (status != 0) { 1244 device_printf(sc->dev, 1245 "failed to set number of slices\n"); 1246 return status; 1247 } 1248 } 1249 1250 1251 if (interrupts_setup) { 1252 /* Now exchange information about interrupts */ 1253 for (slice = 0; slice < sc->num_slices; slice++) { 1254 rx_done = &sc->ss[slice].rx_done; 1255 memset(rx_done->entry, 0, sc->rx_ring_size); 1256 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1257 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data2 = slice; 1259 status |= mxge_send_cmd(sc, 1260 MXGEFW_CMD_SET_INTRQ_DMA, 1261 &cmd); 1262 } 1263 } 1264 1265 status |= mxge_send_cmd(sc, 1266 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1267 1268 1269 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1270 1271 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1272 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1273 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1276 &cmd); 1277 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1278 if (status != 0) { 1279 device_printf(sc->dev, "failed set interrupt parameters\n"); 1280 return status; 1281 } 1282 1283 1284 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1285 1286 1287 /* run a DMA benchmark */ 1288 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1289 1290 for (slice = 0; slice < sc->num_slices; slice++) { 1291 ss = &sc->ss[slice]; 1292 1293 ss->irq_claim = irq_claim + (2 * slice); 1294 /* reset mcp/driver shared state back to 0 */ 1295 ss->rx_done.idx = 0; 1296 ss->rx_done.cnt = 0; 1297 ss->tx.req = 0; 1298 ss->tx.done = 0; 1299 ss->tx.pkt_done = 0; 1300 ss->tx.queue_active = 0; 1301 ss->tx.activate = 0; 1302 ss->tx.deactivate = 0; 1303 ss->tx.wake = 0; 1304 ss->tx.defrag = 0; 1305 ss->tx.stall = 0; 1306 ss->rx_big.cnt = 0; 1307 ss->rx_small.cnt = 0; 1308 ss->lro_bad_csum = 0; 1309 ss->lro_queued = 0; 1310 ss->lro_flushed = 0; 1311 if (ss->fw_stats != NULL) { 1312 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1313 } 1314 } 1315 sc->rdma_tags_available = 15; 1316 status = mxge_update_mac_address(sc); 1317 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1318 mxge_change_pause(sc, sc->pause); 1319 mxge_set_multicast_list(sc); 1320 if (sc->throttle) { 1321 cmd.data0 = sc->throttle; 1322 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1323 &cmd)) { 1324 device_printf(sc->dev, 1325 "can't enable throttle\n"); 1326 } 1327 } 1328 return status; 1329} 1330 1331static int 1332mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1333{ 1334 mxge_cmd_t cmd; 1335 mxge_softc_t *sc; 1336 int err; 1337 unsigned int throttle; 1338 1339 sc = arg1; 1340 throttle = sc->throttle; 1341 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1342 if (err != 0) { 1343 return err; 1344 } 1345 1346 if (throttle == sc->throttle) 1347 return 0; 1348 1349 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1350 return EINVAL; 1351 1352 mtx_lock(&sc->driver_mtx); 1353 cmd.data0 = throttle; 1354 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1355 if (err == 0) 1356 sc->throttle = throttle; 1357 mtx_unlock(&sc->driver_mtx); 1358 return err; 1359} 1360 1361static int 1362mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1363{ 1364 mxge_softc_t *sc; 1365 unsigned int intr_coal_delay; 1366 int err; 1367 1368 sc = arg1; 1369 intr_coal_delay = sc->intr_coal_delay; 1370 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1371 if (err != 0) { 1372 return err; 1373 } 1374 if (intr_coal_delay == sc->intr_coal_delay) 1375 return 0; 1376 1377 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1378 return EINVAL; 1379 1380 mtx_lock(&sc->driver_mtx); 1381 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1382 sc->intr_coal_delay = intr_coal_delay; 1383 1384 mtx_unlock(&sc->driver_mtx); 1385 return err; 1386} 1387 1388static int 1389mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1390{ 1391 mxge_softc_t *sc; 1392 unsigned int enabled; 1393 int err; 1394 1395 sc = arg1; 1396 enabled = sc->pause; 1397 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1398 if (err != 0) { 1399 return err; 1400 } 1401 if (enabled == sc->pause) 1402 return 0; 1403 1404 mtx_lock(&sc->driver_mtx); 1405 err = mxge_change_pause(sc, enabled); 1406 mtx_unlock(&sc->driver_mtx); 1407 return err; 1408} 1409 1410static int 1411mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1412{ 1413 struct ifnet *ifp; 1414 int err = 0; 1415 1416 ifp = sc->ifp; 1417 if (lro_cnt == 0) 1418 ifp->if_capenable &= ~IFCAP_LRO; 1419 else 1420 ifp->if_capenable |= IFCAP_LRO; 1421 sc->lro_cnt = lro_cnt; 1422 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1423 mxge_close(sc, 0); 1424 err = mxge_open(sc); 1425 } 1426 return err; 1427} 1428 1429static int 1430mxge_change_lro(SYSCTL_HANDLER_ARGS) 1431{ 1432 mxge_softc_t *sc; 1433 unsigned int lro_cnt; 1434 int err; 1435 1436 sc = arg1; 1437 lro_cnt = sc->lro_cnt; 1438 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1439 if (err != 0) 1440 return err; 1441 1442 if (lro_cnt == sc->lro_cnt) 1443 return 0; 1444 1445 if (lro_cnt > 128) 1446 return EINVAL; 1447 1448 mtx_lock(&sc->driver_mtx); 1449 err = mxge_change_lro_locked(sc, lro_cnt); 1450 mtx_unlock(&sc->driver_mtx); 1451 return err; 1452} 1453 1454static int 1455mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1456{ 1457 int err; 1458 1459 if (arg1 == NULL) 1460 return EFAULT; 1461 arg2 = be32toh(*(int *)arg1); 1462 arg1 = NULL; 1463 err = sysctl_handle_int(oidp, arg1, arg2, req); 1464 1465 return err; 1466} 1467 1468static void 1469mxge_rem_sysctls(mxge_softc_t *sc) 1470{ 1471 struct mxge_slice_state *ss; 1472 int slice; 1473 1474 if (sc->slice_sysctl_tree == NULL) 1475 return; 1476 1477 for (slice = 0; slice < sc->num_slices; slice++) { 1478 ss = &sc->ss[slice]; 1479 if (ss == NULL || ss->sysctl_tree == NULL) 1480 continue; 1481 sysctl_ctx_free(&ss->sysctl_ctx); 1482 ss->sysctl_tree = NULL; 1483 } 1484 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1485 sc->slice_sysctl_tree = NULL; 1486} 1487 1488static void 1489mxge_add_sysctls(mxge_softc_t *sc) 1490{ 1491 struct sysctl_ctx_list *ctx; 1492 struct sysctl_oid_list *children; 1493 mcp_irq_data_t *fw; 1494 struct mxge_slice_state *ss; 1495 int slice; 1496 char slice_num[8]; 1497 1498 ctx = device_get_sysctl_ctx(sc->dev); 1499 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1500 fw = sc->ss[0].fw_stats; 1501 1502 /* random information */ 1503 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1504 "firmware_version", 1505 CTLFLAG_RD, &sc->fw_version, 1506 0, "firmware version"); 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "serial_number", 1509 CTLFLAG_RD, &sc->serial_number_string, 1510 0, "serial number"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "product_code", 1513 CTLFLAG_RD, &sc->product_code_string, 1514 0, "product_code"); 1515 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1516 "pcie_link_width", 1517 CTLFLAG_RD, &sc->link_width, 1518 0, "tx_boundary"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "tx_boundary", 1521 CTLFLAG_RD, &sc->tx_boundary, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "write_combine", 1525 CTLFLAG_RD, &sc->wc, 1526 0, "write combining PIO?"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "read_dma_MBs", 1529 CTLFLAG_RD, &sc->read_dma, 1530 0, "DMA Read speed in MB/s"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "write_dma_MBs", 1533 CTLFLAG_RD, &sc->write_dma, 1534 0, "DMA Write speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "read_write_dma_MBs", 1537 CTLFLAG_RD, &sc->read_write_dma, 1538 0, "DMA concurrent Read/Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "watchdog_resets", 1541 CTLFLAG_RD, &sc->watchdog_resets, 1542 0, "Number of times NIC was reset"); 1543 1544 1545 /* performance related tunables */ 1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1547 "intr_coal_delay", 1548 CTLTYPE_INT|CTLFLAG_RW, sc, 1549 0, mxge_change_intr_coal, 1550 "I", "interrupt coalescing delay in usecs"); 1551 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "throttle", 1554 CTLTYPE_INT|CTLFLAG_RW, sc, 1555 0, mxge_change_throttle, 1556 "I", "transmit throttling"); 1557 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "flow_control_enabled", 1560 CTLTYPE_INT|CTLFLAG_RW, sc, 1561 0, mxge_change_flow_control, 1562 "I", "interrupt coalescing delay in usecs"); 1563 1564 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1565 "deassert_wait", 1566 CTLFLAG_RW, &mxge_deassert_wait, 1567 0, "Wait for IRQ line to go low in ihandler"); 1568 1569 /* stats block from firmware is in network byte order. 1570 Need to swap it */ 1571 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1572 "link_up", 1573 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1574 0, mxge_handle_be32, 1575 "I", "link up"); 1576 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1577 "rdma_tags_available", 1578 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1579 0, mxge_handle_be32, 1580 "I", "rdma_tags_available"); 1581 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1582 "dropped_bad_crc32", 1583 CTLTYPE_INT|CTLFLAG_RD, 1584 &fw->dropped_bad_crc32, 1585 0, mxge_handle_be32, 1586 "I", "dropped_bad_crc32"); 1587 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1588 "dropped_bad_phy", 1589 CTLTYPE_INT|CTLFLAG_RD, 1590 &fw->dropped_bad_phy, 1591 0, mxge_handle_be32, 1592 "I", "dropped_bad_phy"); 1593 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1594 "dropped_link_error_or_filtered", 1595 CTLTYPE_INT|CTLFLAG_RD, 1596 &fw->dropped_link_error_or_filtered, 1597 0, mxge_handle_be32, 1598 "I", "dropped_link_error_or_filtered"); 1599 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1600 "dropped_link_overflow", 1601 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1602 0, mxge_handle_be32, 1603 "I", "dropped_link_overflow"); 1604 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1605 "dropped_multicast_filtered", 1606 CTLTYPE_INT|CTLFLAG_RD, 1607 &fw->dropped_multicast_filtered, 1608 0, mxge_handle_be32, 1609 "I", "dropped_multicast_filtered"); 1610 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1611 "dropped_no_big_buffer", 1612 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1613 0, mxge_handle_be32, 1614 "I", "dropped_no_big_buffer"); 1615 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1616 "dropped_no_small_buffer", 1617 CTLTYPE_INT|CTLFLAG_RD, 1618 &fw->dropped_no_small_buffer, 1619 0, mxge_handle_be32, 1620 "I", "dropped_no_small_buffer"); 1621 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1622 "dropped_overrun", 1623 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1624 0, mxge_handle_be32, 1625 "I", "dropped_overrun"); 1626 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1627 "dropped_pause", 1628 CTLTYPE_INT|CTLFLAG_RD, 1629 &fw->dropped_pause, 1630 0, mxge_handle_be32, 1631 "I", "dropped_pause"); 1632 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1633 "dropped_runt", 1634 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1635 0, mxge_handle_be32, 1636 "I", "dropped_runt"); 1637 1638 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1639 "dropped_unicast_filtered", 1640 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1641 0, mxge_handle_be32, 1642 "I", "dropped_unicast_filtered"); 1643 1644 /* verbose printing? */ 1645 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1646 "verbose", 1647 CTLFLAG_RW, &mxge_verbose, 1648 0, "verbose printing"); 1649 1650 /* lro */ 1651 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1652 "lro_cnt", 1653 CTLTYPE_INT|CTLFLAG_RW, sc, 1654 0, mxge_change_lro, 1655 "I", "number of lro merge queues"); 1656 1657 1658 /* add counters exported for debugging from all slices */ 1659 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1660 sc->slice_sysctl_tree = 1661 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1662 "slice", CTLFLAG_RD, 0, ""); 1663 1664 for (slice = 0; slice < sc->num_slices; slice++) { 1665 ss = &sc->ss[slice]; 1666 sysctl_ctx_init(&ss->sysctl_ctx); 1667 ctx = &ss->sysctl_ctx; 1668 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1669 sprintf(slice_num, "%d", slice); 1670 ss->sysctl_tree = 1671 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1672 CTLFLAG_RD, 0, ""); 1673 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1675 "rx_small_cnt", 1676 CTLFLAG_RD, &ss->rx_small.cnt, 1677 0, "rx_small_cnt"); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_big_cnt", 1680 CTLFLAG_RD, &ss->rx_big.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1684 0, "number of lro merge queues flushed"); 1685 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1688 0, "number of frames appended to lro merge" 1689 "queues"); 1690 1691#ifndef IFNET_BUF_RING 1692 /* only transmit from slice 0 for now */ 1693 if (slice > 0) 1694 continue; 1695#endif 1696 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1697 "tx_req", 1698 CTLFLAG_RD, &ss->tx.req, 1699 0, "tx_req"); 1700 1701 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1702 "tx_done", 1703 CTLFLAG_RD, &ss->tx.done, 1704 0, "tx_done"); 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_pkt_done", 1707 CTLFLAG_RD, &ss->tx.pkt_done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_stall", 1711 CTLFLAG_RD, &ss->tx.stall, 1712 0, "tx_stall"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_wake", 1715 CTLFLAG_RD, &ss->tx.wake, 1716 0, "tx_wake"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_defrag", 1719 CTLFLAG_RD, &ss->tx.defrag, 1720 0, "tx_defrag"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_queue_active", 1723 CTLFLAG_RD, &ss->tx.queue_active, 1724 0, "tx_queue_active"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_activate", 1727 CTLFLAG_RD, &ss->tx.activate, 1728 0, "tx_activate"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_deactivate", 1731 CTLFLAG_RD, &ss->tx.deactivate, 1732 0, "tx_deactivate"); 1733 } 1734} 1735 1736/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1737 backwards one at a time and handle ring wraps */ 1738 1739static inline void 1740mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1741 mcp_kreq_ether_send_t *src, int cnt) 1742{ 1743 int idx, starting_slot; 1744 starting_slot = tx->req; 1745 while (cnt > 1) { 1746 cnt--; 1747 idx = (starting_slot + cnt) & tx->mask; 1748 mxge_pio_copy(&tx->lanai[idx], 1749 &src[cnt], sizeof(*src)); 1750 wmb(); 1751 } 1752} 1753 1754/* 1755 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1756 * at most 32 bytes at a time, so as to avoid involving the software 1757 * pio handler in the nic. We re-write the first segment's flags 1758 * to mark them valid only after writing the entire chain 1759 */ 1760 1761static inline void 1762mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1763 int cnt) 1764{ 1765 int idx, i; 1766 uint32_t *src_ints; 1767 volatile uint32_t *dst_ints; 1768 mcp_kreq_ether_send_t *srcp; 1769 volatile mcp_kreq_ether_send_t *dstp, *dst; 1770 uint8_t last_flags; 1771 1772 idx = tx->req & tx->mask; 1773 1774 last_flags = src->flags; 1775 src->flags = 0; 1776 wmb(); 1777 dst = dstp = &tx->lanai[idx]; 1778 srcp = src; 1779 1780 if ((idx + cnt) < tx->mask) { 1781 for (i = 0; i < (cnt - 1); i += 2) { 1782 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1783 wmb(); /* force write every 32 bytes */ 1784 srcp += 2; 1785 dstp += 2; 1786 } 1787 } else { 1788 /* submit all but the first request, and ensure 1789 that it is submitted below */ 1790 mxge_submit_req_backwards(tx, src, cnt); 1791 i = 0; 1792 } 1793 if (i < cnt) { 1794 /* submit the first request */ 1795 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1796 wmb(); /* barrier before setting valid flag */ 1797 } 1798 1799 /* re-write the last 32-bits with the valid flags */ 1800 src->flags = last_flags; 1801 src_ints = (uint32_t *)src; 1802 src_ints+=3; 1803 dst_ints = (volatile uint32_t *)dst; 1804 dst_ints+=3; 1805 *dst_ints = *src_ints; 1806 tx->req += cnt; 1807 wmb(); 1808} 1809 1810#if IFCAP_TSO4 1811 1812static void 1813mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1814 int busdma_seg_cnt, int ip_off) 1815{ 1816 mxge_tx_ring_t *tx; 1817 mcp_kreq_ether_send_t *req; 1818 bus_dma_segment_t *seg; 1819 struct ip *ip; 1820 struct tcphdr *tcp; 1821 uint32_t low, high_swapped; 1822 int len, seglen, cum_len, cum_len_next; 1823 int next_is_first, chop, cnt, rdma_count, small; 1824 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1825 uint8_t flags, flags_next; 1826 static int once; 1827 1828 mss = m->m_pkthdr.tso_segsz; 1829 1830 /* negative cum_len signifies to the 1831 * send loop that we are still in the 1832 * header portion of the TSO packet. 1833 */ 1834 1835 /* ensure we have the ethernet, IP and TCP 1836 header together in the first mbuf, copy 1837 it to a scratch buffer if not */ 1838 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1839 m_copydata(m, 0, ip_off + sizeof (*ip), 1840 ss->scratch); 1841 ip = (struct ip *)(ss->scratch + ip_off); 1842 } else { 1843 ip = (struct ip *)(mtod(m, char *) + ip_off); 1844 } 1845 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1846 + sizeof (*tcp))) { 1847 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1848 + sizeof (*tcp), ss->scratch); 1849 ip = (struct ip *)(mtod(m, char *) + ip_off); 1850 } 1851 1852 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1853 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1854 1855 /* TSO implies checksum offload on this hardware */ 1856 cksum_offset = ip_off + (ip->ip_hl << 2); 1857 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1858 1859 1860 /* for TSO, pseudo_hdr_offset holds mss. 1861 * The firmware figures out where to put 1862 * the checksum by parsing the header. */ 1863 pseudo_hdr_offset = htobe16(mss); 1864 1865 tx = &ss->tx; 1866 req = tx->req_list; 1867 seg = tx->seg_list; 1868 cnt = 0; 1869 rdma_count = 0; 1870 /* "rdma_count" is the number of RDMAs belonging to the 1871 * current packet BEFORE the current send request. For 1872 * non-TSO packets, this is equal to "count". 1873 * For TSO packets, rdma_count needs to be reset 1874 * to 0 after a segment cut. 1875 * 1876 * The rdma_count field of the send request is 1877 * the number of RDMAs of the packet starting at 1878 * that request. For TSO send requests with one ore more cuts 1879 * in the middle, this is the number of RDMAs starting 1880 * after the last cut in the request. All previous 1881 * segments before the last cut implicitly have 1 RDMA. 1882 * 1883 * Since the number of RDMAs is not known beforehand, 1884 * it must be filled-in retroactively - after each 1885 * segmentation cut or at the end of the entire packet. 1886 */ 1887 1888 while (busdma_seg_cnt) { 1889 /* Break the busdma segment up into pieces*/ 1890 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1891 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1892 len = seg->ds_len; 1893 1894 while (len) { 1895 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1896 seglen = len; 1897 cum_len_next = cum_len + seglen; 1898 (req-rdma_count)->rdma_count = rdma_count + 1; 1899 if (__predict_true(cum_len >= 0)) { 1900 /* payload */ 1901 chop = (cum_len_next > mss); 1902 cum_len_next = cum_len_next % mss; 1903 next_is_first = (cum_len_next == 0); 1904 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1905 flags_next |= next_is_first * 1906 MXGEFW_FLAGS_FIRST; 1907 rdma_count |= -(chop | next_is_first); 1908 rdma_count += chop & !next_is_first; 1909 } else if (cum_len_next >= 0) { 1910 /* header ends */ 1911 rdma_count = -1; 1912 cum_len_next = 0; 1913 seglen = -cum_len; 1914 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1915 flags_next = MXGEFW_FLAGS_TSO_PLD | 1916 MXGEFW_FLAGS_FIRST | 1917 (small * MXGEFW_FLAGS_SMALL); 1918 } 1919 1920 req->addr_high = high_swapped; 1921 req->addr_low = htobe32(low); 1922 req->pseudo_hdr_offset = pseudo_hdr_offset; 1923 req->pad = 0; 1924 req->rdma_count = 1; 1925 req->length = htobe16(seglen); 1926 req->cksum_offset = cksum_offset; 1927 req->flags = flags | ((cum_len & 1) * 1928 MXGEFW_FLAGS_ALIGN_ODD); 1929 low += seglen; 1930 len -= seglen; 1931 cum_len = cum_len_next; 1932 flags = flags_next; 1933 req++; 1934 cnt++; 1935 rdma_count++; 1936 if (__predict_false(cksum_offset > seglen)) 1937 cksum_offset -= seglen; 1938 else 1939 cksum_offset = 0; 1940 if (__predict_false(cnt > tx->max_desc)) 1941 goto drop; 1942 } 1943 busdma_seg_cnt--; 1944 seg++; 1945 } 1946 (req-rdma_count)->rdma_count = rdma_count; 1947 1948 do { 1949 req--; 1950 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1951 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1952 1953 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1954 mxge_submit_req(tx, tx->req_list, cnt); 1955#ifdef IFNET_BUF_RING 1956 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1957 /* tell the NIC to start polling this slice */ 1958 *tx->send_go = 1; 1959 tx->queue_active = 1; 1960 tx->activate++; 1961 wmb(); 1962 } 1963#endif 1964 return; 1965 1966drop: 1967 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1968 m_freem(m); 1969 ss->oerrors++; 1970 if (!once) { 1971 printf("tx->max_desc exceeded via TSO!\n"); 1972 printf("mss = %d, %ld, %d!\n", mss, 1973 (long)seg - (long)tx->seg_list, tx->max_desc); 1974 once = 1; 1975 } 1976 return; 1977 1978} 1979 1980#endif /* IFCAP_TSO4 */ 1981 1982#ifdef MXGE_NEW_VLAN_API 1983/* 1984 * We reproduce the software vlan tag insertion from 1985 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1986 * vlan tag insertion. We need to advertise this in order to have the 1987 * vlan interface respect our csum offload flags. 1988 */ 1989static struct mbuf * 1990mxge_vlan_tag_insert(struct mbuf *m) 1991{ 1992 struct ether_vlan_header *evl; 1993 1994 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1995 if (__predict_false(m == NULL)) 1996 return NULL; 1997 if (m->m_len < sizeof(*evl)) { 1998 m = m_pullup(m, sizeof(*evl)); 1999 if (__predict_false(m == NULL)) 2000 return NULL; 2001 } 2002 /* 2003 * Transform the Ethernet header into an Ethernet header 2004 * with 802.1Q encapsulation. 2005 */ 2006 evl = mtod(m, struct ether_vlan_header *); 2007 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2008 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2009 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2010 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2011 m->m_flags &= ~M_VLANTAG; 2012 return m; 2013} 2014#endif /* MXGE_NEW_VLAN_API */ 2015 2016static void 2017mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2018{ 2019 mxge_softc_t *sc; 2020 mcp_kreq_ether_send_t *req; 2021 bus_dma_segment_t *seg; 2022 struct mbuf *m_tmp; 2023 struct ifnet *ifp; 2024 mxge_tx_ring_t *tx; 2025 struct ip *ip; 2026 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2027 uint16_t pseudo_hdr_offset; 2028 uint8_t flags, cksum_offset; 2029 2030 2031 sc = ss->sc; 2032 ifp = sc->ifp; 2033 tx = &ss->tx; 2034 2035 ip_off = sizeof (struct ether_header); 2036#ifdef MXGE_NEW_VLAN_API 2037 if (m->m_flags & M_VLANTAG) { 2038 m = mxge_vlan_tag_insert(m); 2039 if (__predict_false(m == NULL)) 2040 goto drop; 2041 ip_off += ETHER_VLAN_ENCAP_LEN; 2042 } 2043#endif 2044 /* (try to) map the frame for DMA */ 2045 idx = tx->req & tx->mask; 2046 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2047 m, tx->seg_list, &cnt, 2048 BUS_DMA_NOWAIT); 2049 if (__predict_false(err == EFBIG)) { 2050 /* Too many segments in the chain. Try 2051 to defrag */ 2052 m_tmp = m_defrag(m, M_NOWAIT); 2053 if (m_tmp == NULL) { 2054 goto drop; 2055 } 2056 ss->tx.defrag++; 2057 m = m_tmp; 2058 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2059 tx->info[idx].map, 2060 m, tx->seg_list, &cnt, 2061 BUS_DMA_NOWAIT); 2062 } 2063 if (__predict_false(err != 0)) { 2064 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2065 " packet len = %d\n", err, m->m_pkthdr.len); 2066 goto drop; 2067 } 2068 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2069 BUS_DMASYNC_PREWRITE); 2070 tx->info[idx].m = m; 2071 2072#if IFCAP_TSO4 2073 /* TSO is different enough, we handle it in another routine */ 2074 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2075 mxge_encap_tso(ss, m, cnt, ip_off); 2076 return; 2077 } 2078#endif 2079 2080 req = tx->req_list; 2081 cksum_offset = 0; 2082 pseudo_hdr_offset = 0; 2083 flags = MXGEFW_FLAGS_NO_TSO; 2084 2085 /* checksum offloading? */ 2086 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2087 /* ensure ip header is in first mbuf, copy 2088 it to a scratch buffer if not */ 2089 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2090 m_copydata(m, 0, ip_off + sizeof (*ip), 2091 ss->scratch); 2092 ip = (struct ip *)(ss->scratch + ip_off); 2093 } else { 2094 ip = (struct ip *)(mtod(m, char *) + ip_off); 2095 } 2096 cksum_offset = ip_off + (ip->ip_hl << 2); 2097 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2098 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2099 req->cksum_offset = cksum_offset; 2100 flags |= MXGEFW_FLAGS_CKSUM; 2101 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2102 } else { 2103 odd_flag = 0; 2104 } 2105 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2106 flags |= MXGEFW_FLAGS_SMALL; 2107 2108 /* convert segments into a request list */ 2109 cum_len = 0; 2110 seg = tx->seg_list; 2111 req->flags = MXGEFW_FLAGS_FIRST; 2112 for (i = 0; i < cnt; i++) { 2113 req->addr_low = 2114 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2115 req->addr_high = 2116 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2117 req->length = htobe16(seg->ds_len); 2118 req->cksum_offset = cksum_offset; 2119 if (cksum_offset > seg->ds_len) 2120 cksum_offset -= seg->ds_len; 2121 else 2122 cksum_offset = 0; 2123 req->pseudo_hdr_offset = pseudo_hdr_offset; 2124 req->pad = 0; /* complete solid 16-byte block */ 2125 req->rdma_count = 1; 2126 req->flags |= flags | ((cum_len & 1) * odd_flag); 2127 cum_len += seg->ds_len; 2128 seg++; 2129 req++; 2130 req->flags = 0; 2131 } 2132 req--; 2133 /* pad runts to 60 bytes */ 2134 if (cum_len < 60) { 2135 req++; 2136 req->addr_low = 2137 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2138 req->addr_high = 2139 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2140 req->length = htobe16(60 - cum_len); 2141 req->cksum_offset = 0; 2142 req->pseudo_hdr_offset = pseudo_hdr_offset; 2143 req->pad = 0; /* complete solid 16-byte block */ 2144 req->rdma_count = 1; 2145 req->flags |= flags | ((cum_len & 1) * odd_flag); 2146 cnt++; 2147 } 2148 2149 tx->req_list[0].rdma_count = cnt; 2150#if 0 2151 /* print what the firmware will see */ 2152 for (i = 0; i < cnt; i++) { 2153 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2154 "cso:%d, flags:0x%x, rdma:%d\n", 2155 i, (int)ntohl(tx->req_list[i].addr_high), 2156 (int)ntohl(tx->req_list[i].addr_low), 2157 (int)ntohs(tx->req_list[i].length), 2158 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2159 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2160 tx->req_list[i].rdma_count); 2161 } 2162 printf("--------------\n"); 2163#endif 2164 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2165 mxge_submit_req(tx, tx->req_list, cnt); 2166#ifdef IFNET_BUF_RING 2167 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2168 /* tell the NIC to start polling this slice */ 2169 *tx->send_go = 1; 2170 tx->queue_active = 1; 2171 tx->activate++; 2172 wmb(); 2173 } 2174#endif 2175 return; 2176 2177drop: 2178 m_freem(m); 2179 ss->oerrors++; 2180 return; 2181} 2182 2183#ifdef IFNET_BUF_RING 2184static void 2185mxge_qflush(struct ifnet *ifp) 2186{ 2187 mxge_softc_t *sc = ifp->if_softc; 2188 mxge_tx_ring_t *tx; 2189 struct mbuf *m; 2190 int slice; 2191 2192 for (slice = 0; slice < sc->num_slices; slice++) { 2193 tx = &sc->ss[slice].tx; 2194 mtx_lock(&tx->mtx); 2195 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2196 m_freem(m); 2197 mtx_unlock(&tx->mtx); 2198 } 2199 if_qflush(ifp); 2200} 2201 2202static inline void 2203mxge_start_locked(struct mxge_slice_state *ss) 2204{ 2205 mxge_softc_t *sc; 2206 struct mbuf *m; 2207 struct ifnet *ifp; 2208 mxge_tx_ring_t *tx; 2209 2210 sc = ss->sc; 2211 ifp = sc->ifp; 2212 tx = &ss->tx; 2213 2214 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2215 m = drbr_dequeue(ifp, tx->br); 2216 if (m == NULL) { 2217 return; 2218 } 2219 /* let BPF see it */ 2220 BPF_MTAP(ifp, m); 2221 2222 /* give it to the nic */ 2223 mxge_encap(ss, m); 2224 } 2225 /* ran out of transmit slots */ 2226 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2227 && (!drbr_empty(ifp, tx->br))) { 2228 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2229 tx->stall++; 2230 } 2231} 2232 2233static int 2234mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2235{ 2236 mxge_softc_t *sc; 2237 struct ifnet *ifp; 2238 mxge_tx_ring_t *tx; 2239 int err; 2240 2241 sc = ss->sc; 2242 ifp = sc->ifp; 2243 tx = &ss->tx; 2244 2245 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2246 IFF_DRV_RUNNING) { 2247 err = drbr_enqueue(ifp, tx->br, m); 2248 return (err); 2249 } 2250 2251 if (drbr_empty(ifp, tx->br) && 2252 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2253 /* let BPF see it */ 2254 BPF_MTAP(ifp, m); 2255 /* give it to the nic */ 2256 mxge_encap(ss, m); 2257 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2258 return (err); 2259 } 2260 if (!drbr_empty(ifp, tx->br)) 2261 mxge_start_locked(ss); 2262 return (0); 2263} 2264 2265static int 2266mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2267{ 2268 mxge_softc_t *sc = ifp->if_softc; 2269 struct mxge_slice_state *ss; 2270 mxge_tx_ring_t *tx; 2271 int err = 0; 2272 int slice; 2273 2274 slice = m->m_pkthdr.flowid; 2275 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2276 2277 ss = &sc->ss[slice]; 2278 tx = &ss->tx; 2279 2280 if (mtx_trylock(&tx->mtx)) { 2281 err = mxge_transmit_locked(ss, m); 2282 mtx_unlock(&tx->mtx); 2283 } else { 2284 err = drbr_enqueue(ifp, tx->br, m); 2285 } 2286 2287 return (err); 2288} 2289 2290#else 2291 2292static inline void 2293mxge_start_locked(struct mxge_slice_state *ss) 2294{ 2295 mxge_softc_t *sc; 2296 struct mbuf *m; 2297 struct ifnet *ifp; 2298 mxge_tx_ring_t *tx; 2299 2300 sc = ss->sc; 2301 ifp = sc->ifp; 2302 tx = &ss->tx; 2303 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2304 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2305 if (m == NULL) { 2306 return; 2307 } 2308 /* let BPF see it */ 2309 BPF_MTAP(ifp, m); 2310 2311 /* give it to the nic */ 2312 mxge_encap(ss, m); 2313 } 2314 /* ran out of transmit slots */ 2315 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2316 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2317 tx->stall++; 2318 } 2319} 2320#endif 2321static void 2322mxge_start(struct ifnet *ifp) 2323{ 2324 mxge_softc_t *sc = ifp->if_softc; 2325 struct mxge_slice_state *ss; 2326 2327 /* only use the first slice for now */ 2328 ss = &sc->ss[0]; 2329 mtx_lock(&ss->tx.mtx); 2330 mxge_start_locked(ss); 2331 mtx_unlock(&ss->tx.mtx); 2332} 2333 2334/* 2335 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2336 * at most 32 bytes at a time, so as to avoid involving the software 2337 * pio handler in the nic. We re-write the first segment's low 2338 * DMA address to mark it valid only after we write the entire chunk 2339 * in a burst 2340 */ 2341static inline void 2342mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2343 mcp_kreq_ether_recv_t *src) 2344{ 2345 uint32_t low; 2346 2347 low = src->addr_low; 2348 src->addr_low = 0xffffffff; 2349 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2350 wmb(); 2351 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2352 wmb(); 2353 src->addr_low = low; 2354 dst->addr_low = low; 2355 wmb(); 2356} 2357 2358static int 2359mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2360{ 2361 bus_dma_segment_t seg; 2362 struct mbuf *m; 2363 mxge_rx_ring_t *rx = &ss->rx_small; 2364 int cnt, err; 2365 2366 m = m_gethdr(M_DONTWAIT, MT_DATA); 2367 if (m == NULL) { 2368 rx->alloc_fail++; 2369 err = ENOBUFS; 2370 goto done; 2371 } 2372 m->m_len = MHLEN; 2373 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2374 &seg, &cnt, BUS_DMA_NOWAIT); 2375 if (err != 0) { 2376 m_free(m); 2377 goto done; 2378 } 2379 rx->info[idx].m = m; 2380 rx->shadow[idx].addr_low = 2381 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2382 rx->shadow[idx].addr_high = 2383 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2384 2385done: 2386 if ((idx & 7) == 7) 2387 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2388 return err; 2389} 2390 2391static int 2392mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2393{ 2394 bus_dma_segment_t seg[3]; 2395 struct mbuf *m; 2396 mxge_rx_ring_t *rx = &ss->rx_big; 2397 int cnt, err, i; 2398 2399 if (rx->cl_size == MCLBYTES) 2400 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2401 else 2402 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2403 if (m == NULL) { 2404 rx->alloc_fail++; 2405 err = ENOBUFS; 2406 goto done; 2407 } 2408 m->m_len = rx->mlen; 2409 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2410 seg, &cnt, BUS_DMA_NOWAIT); 2411 if (err != 0) { 2412 m_free(m); 2413 goto done; 2414 } 2415 rx->info[idx].m = m; 2416 rx->shadow[idx].addr_low = 2417 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2418 rx->shadow[idx].addr_high = 2419 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2420 2421#if MXGE_VIRT_JUMBOS 2422 for (i = 1; i < cnt; i++) { 2423 rx->shadow[idx + i].addr_low = 2424 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2425 rx->shadow[idx + i].addr_high = 2426 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2427 } 2428#endif 2429 2430done: 2431 for (i = 0; i < rx->nbufs; i++) { 2432 if ((idx & 7) == 7) { 2433 mxge_submit_8rx(&rx->lanai[idx - 7], 2434 &rx->shadow[idx - 7]); 2435 } 2436 idx++; 2437 } 2438 return err; 2439} 2440 2441/* 2442 * Myri10GE hardware checksums are not valid if the sender 2443 * padded the frame with non-zero padding. This is because 2444 * the firmware just does a simple 16-bit 1s complement 2445 * checksum across the entire frame, excluding the first 14 2446 * bytes. It is best to simply to check the checksum and 2447 * tell the stack about it only if the checksum is good 2448 */ 2449 2450static inline uint16_t 2451mxge_rx_csum(struct mbuf *m, int csum) 2452{ 2453 struct ether_header *eh; 2454 struct ip *ip; 2455 uint16_t c; 2456 2457 eh = mtod(m, struct ether_header *); 2458 2459 /* only deal with IPv4 TCP & UDP for now */ 2460 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2461 return 1; 2462 ip = (struct ip *)(eh + 1); 2463 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2464 ip->ip_p != IPPROTO_UDP)) 2465 return 1; 2466#ifdef INET 2467 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2468 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2469 - (ip->ip_hl << 2) + ip->ip_p)); 2470#else 2471 c = 1; 2472#endif 2473 c ^= 0xffff; 2474 return (c); 2475} 2476 2477static void 2478mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2479{ 2480 struct ether_vlan_header *evl; 2481 struct ether_header *eh; 2482 uint32_t partial; 2483 2484 evl = mtod(m, struct ether_vlan_header *); 2485 eh = mtod(m, struct ether_header *); 2486 2487 /* 2488 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2489 * after what the firmware thought was the end of the ethernet 2490 * header. 2491 */ 2492 2493 /* put checksum into host byte order */ 2494 *csum = ntohs(*csum); 2495 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2496 (*csum) += ~partial; 2497 (*csum) += ((*csum) < ~partial); 2498 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 2501 /* restore checksum to network byte order; 2502 later consumers expect this */ 2503 *csum = htons(*csum); 2504 2505 /* save the tag */ 2506#ifdef MXGE_NEW_VLAN_API 2507 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2508#else 2509 { 2510 struct m_tag *mtag; 2511 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2512 M_NOWAIT); 2513 if (mtag == NULL) 2514 return; 2515 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2516 m_tag_prepend(m, mtag); 2517 } 2518 2519#endif 2520 m->m_flags |= M_VLANTAG; 2521 2522 /* 2523 * Remove the 802.1q header by copying the Ethernet 2524 * addresses over it and adjusting the beginning of 2525 * the data in the mbuf. The encapsulated Ethernet 2526 * type field is already in place. 2527 */ 2528 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2529 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2530 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2531} 2532 2533 2534static inline void 2535mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2536{ 2537 mxge_softc_t *sc; 2538 struct ifnet *ifp; 2539 struct mbuf *m; 2540 struct ether_header *eh; 2541 mxge_rx_ring_t *rx; 2542 bus_dmamap_t old_map; 2543 int idx; 2544 uint16_t tcpudp_csum; 2545 2546 sc = ss->sc; 2547 ifp = sc->ifp; 2548 rx = &ss->rx_big; 2549 idx = rx->cnt & rx->mask; 2550 rx->cnt += rx->nbufs; 2551 /* save a pointer to the received mbuf */ 2552 m = rx->info[idx].m; 2553 /* try to replace the received mbuf */ 2554 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2555 /* drop the frame -- the old mbuf is re-cycled */ 2556 ifp->if_ierrors++; 2557 return; 2558 } 2559 2560 /* unmap the received buffer */ 2561 old_map = rx->info[idx].map; 2562 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2563 bus_dmamap_unload(rx->dmat, old_map); 2564 2565 /* swap the bus_dmamap_t's */ 2566 rx->info[idx].map = rx->extra_map; 2567 rx->extra_map = old_map; 2568 2569 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2570 * aligned */ 2571 m->m_data += MXGEFW_PAD; 2572 2573 m->m_pkthdr.rcvif = ifp; 2574 m->m_len = m->m_pkthdr.len = len; 2575 ss->ipackets++; 2576 eh = mtod(m, struct ether_header *); 2577 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2578 mxge_vlan_tag_remove(m, &csum); 2579 } 2580 /* if the checksum is valid, mark it in the mbuf header */ 2581 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2582 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2583 return; 2584 /* otherwise, it was a UDP frame, or a TCP frame which 2585 we could not do LRO on. Tell the stack that the 2586 checksum is good */ 2587 m->m_pkthdr.csum_data = 0xffff; 2588 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2589 } 2590 /* flowid only valid if RSS hashing is enabled */ 2591 if (sc->num_slices > 1) { 2592 m->m_pkthdr.flowid = (ss - sc->ss); 2593 m->m_flags |= M_FLOWID; 2594 } 2595 /* pass the frame up the stack */ 2596 (*ifp->if_input)(ifp, m); 2597} 2598 2599static inline void 2600mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2601{ 2602 mxge_softc_t *sc; 2603 struct ifnet *ifp; 2604 struct ether_header *eh; 2605 struct mbuf *m; 2606 mxge_rx_ring_t *rx; 2607 bus_dmamap_t old_map; 2608 int idx; 2609 uint16_t tcpudp_csum; 2610 2611 sc = ss->sc; 2612 ifp = sc->ifp; 2613 rx = &ss->rx_small; 2614 idx = rx->cnt & rx->mask; 2615 rx->cnt++; 2616 /* save a pointer to the received mbuf */ 2617 m = rx->info[idx].m; 2618 /* try to replace the received mbuf */ 2619 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2620 /* drop the frame -- the old mbuf is re-cycled */ 2621 ifp->if_ierrors++; 2622 return; 2623 } 2624 2625 /* unmap the received buffer */ 2626 old_map = rx->info[idx].map; 2627 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2628 bus_dmamap_unload(rx->dmat, old_map); 2629 2630 /* swap the bus_dmamap_t's */ 2631 rx->info[idx].map = rx->extra_map; 2632 rx->extra_map = old_map; 2633 2634 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2635 * aligned */ 2636 m->m_data += MXGEFW_PAD; 2637 2638 m->m_pkthdr.rcvif = ifp; 2639 m->m_len = m->m_pkthdr.len = len; 2640 ss->ipackets++; 2641 eh = mtod(m, struct ether_header *); 2642 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2643 mxge_vlan_tag_remove(m, &csum); 2644 } 2645 /* if the checksum is valid, mark it in the mbuf header */ 2646 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2647 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2648 return; 2649 /* otherwise, it was a UDP frame, or a TCP frame which 2650 we could not do LRO on. Tell the stack that the 2651 checksum is good */ 2652 m->m_pkthdr.csum_data = 0xffff; 2653 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2654 } 2655 /* flowid only valid if RSS hashing is enabled */ 2656 if (sc->num_slices > 1) { 2657 m->m_pkthdr.flowid = (ss - sc->ss); 2658 m->m_flags |= M_FLOWID; 2659 } 2660 /* pass the frame up the stack */ 2661 (*ifp->if_input)(ifp, m); 2662} 2663 2664static inline void 2665mxge_clean_rx_done(struct mxge_slice_state *ss) 2666{ 2667 mxge_rx_done_t *rx_done = &ss->rx_done; 2668 int limit = 0; 2669 uint16_t length; 2670 uint16_t checksum; 2671 2672 2673 while (rx_done->entry[rx_done->idx].length != 0) { 2674 length = ntohs(rx_done->entry[rx_done->idx].length); 2675 rx_done->entry[rx_done->idx].length = 0; 2676 checksum = rx_done->entry[rx_done->idx].checksum; 2677 if (length <= (MHLEN - MXGEFW_PAD)) 2678 mxge_rx_done_small(ss, length, checksum); 2679 else 2680 mxge_rx_done_big(ss, length, checksum); 2681 rx_done->cnt++; 2682 rx_done->idx = rx_done->cnt & rx_done->mask; 2683 2684 /* limit potential for livelock */ 2685 if (__predict_false(++limit > rx_done->mask / 2)) 2686 break; 2687 } 2688#ifdef INET 2689 while (!SLIST_EMPTY(&ss->lro_active)) { 2690 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2691 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2692 mxge_lro_flush(ss, lro); 2693 } 2694#endif 2695} 2696 2697 2698static inline void 2699mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2700{ 2701 struct ifnet *ifp; 2702 mxge_tx_ring_t *tx; 2703 struct mbuf *m; 2704 bus_dmamap_t map; 2705 int idx; 2706 int *flags; 2707 2708 tx = &ss->tx; 2709 ifp = ss->sc->ifp; 2710 while (tx->pkt_done != mcp_idx) { 2711 idx = tx->done & tx->mask; 2712 tx->done++; 2713 m = tx->info[idx].m; 2714 /* mbuf and DMA map only attached to the first 2715 segment per-mbuf */ 2716 if (m != NULL) { 2717 ss->obytes += m->m_pkthdr.len; 2718 if (m->m_flags & M_MCAST) 2719 ss->omcasts++; 2720 ss->opackets++; 2721 tx->info[idx].m = NULL; 2722 map = tx->info[idx].map; 2723 bus_dmamap_unload(tx->dmat, map); 2724 m_freem(m); 2725 } 2726 if (tx->info[idx].flag) { 2727 tx->info[idx].flag = 0; 2728 tx->pkt_done++; 2729 } 2730 } 2731 2732 /* If we have space, clear IFF_OACTIVE to tell the stack that 2733 its OK to send packets */ 2734#ifdef IFNET_BUF_RING 2735 flags = &ss->if_drv_flags; 2736#else 2737 flags = &ifp->if_drv_flags; 2738#endif 2739 mtx_lock(&ss->tx.mtx); 2740 if ((*flags) & IFF_DRV_OACTIVE && 2741 tx->req - tx->done < (tx->mask + 1)/4) { 2742 *(flags) &= ~IFF_DRV_OACTIVE; 2743 ss->tx.wake++; 2744 mxge_start_locked(ss); 2745 } 2746#ifdef IFNET_BUF_RING 2747 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2748 /* let the NIC stop polling this queue, since there 2749 * are no more transmits pending */ 2750 if (tx->req == tx->done) { 2751 *tx->send_stop = 1; 2752 tx->queue_active = 0; 2753 tx->deactivate++; 2754 wmb(); 2755 } 2756 } 2757#endif 2758 mtx_unlock(&ss->tx.mtx); 2759 2760} 2761 2762static struct mxge_media_type mxge_xfp_media_types[] = 2763{ 2764 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2765 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2766 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2767 {0, (1 << 5), "10GBASE-ER"}, 2768 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2769 {0, (1 << 3), "10GBASE-SW"}, 2770 {0, (1 << 2), "10GBASE-LW"}, 2771 {0, (1 << 1), "10GBASE-EW"}, 2772 {0, (1 << 0), "Reserved"} 2773}; 2774static struct mxge_media_type mxge_sfp_media_types[] = 2775{ 2776 {0, (1 << 7), "Reserved"}, 2777 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2778 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2779 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2780}; 2781 2782static void 2783mxge_set_media(mxge_softc_t *sc, int type) 2784{ 2785 sc->media_flags |= type; 2786 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2787 ifmedia_set(&sc->media, sc->media_flags); 2788} 2789 2790 2791/* 2792 * Determine the media type for a NIC. Some XFPs will identify 2793 * themselves only when their link is up, so this is initiated via a 2794 * link up interrupt. However, this can potentially take up to 2795 * several milliseconds, so it is run via the watchdog routine, rather 2796 * than in the interrupt handler itself. This need only be done 2797 * once, not each time the link is up. 2798 */ 2799static void 2800mxge_media_probe(mxge_softc_t *sc) 2801{ 2802 mxge_cmd_t cmd; 2803 char *cage_type; 2804 char *ptr; 2805 struct mxge_media_type *mxge_media_types = NULL; 2806 int i, err, ms, mxge_media_type_entries; 2807 uint32_t byte; 2808 2809 sc->need_media_probe = 0; 2810 2811 /* if we've already set a media type, we're done */ 2812 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2813 return; 2814 2815 /* 2816 * parse the product code to deterimine the interface type 2817 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2818 * after the 3rd dash in the driver's cached copy of the 2819 * EEPROM's product code string. 2820 */ 2821 ptr = sc->product_code_string; 2822 if (ptr == NULL) { 2823 device_printf(sc->dev, "Missing product code\n"); 2824 } 2825 2826 for (i = 0; i < 3; i++, ptr++) { 2827 ptr = index(ptr, '-'); 2828 if (ptr == NULL) { 2829 device_printf(sc->dev, 2830 "only %d dashes in PC?!?\n", i); 2831 return; 2832 } 2833 } 2834 if (*ptr == 'C') { 2835 /* -C is CX4 */ 2836 mxge_set_media(sc, IFM_10G_CX4); 2837 return; 2838 } 2839 else if (*ptr == 'Q') { 2840 /* -Q is Quad Ribbon Fiber */ 2841 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2842 /* FreeBSD has no media type for Quad ribbon fiber */ 2843 return; 2844 } 2845 2846 if (*ptr == 'R') { 2847 /* -R is XFP */ 2848 mxge_media_types = mxge_xfp_media_types; 2849 mxge_media_type_entries = 2850 sizeof (mxge_xfp_media_types) / 2851 sizeof (mxge_xfp_media_types[0]); 2852 byte = MXGE_XFP_COMPLIANCE_BYTE; 2853 cage_type = "XFP"; 2854 } 2855 2856 if (*ptr == 'S' || *(ptr +1) == 'S') { 2857 /* -S or -2S is SFP+ */ 2858 mxge_media_types = mxge_sfp_media_types; 2859 mxge_media_type_entries = 2860 sizeof (mxge_sfp_media_types) / 2861 sizeof (mxge_sfp_media_types[0]); 2862 cage_type = "SFP+"; 2863 byte = 3; 2864 } 2865 2866 if (mxge_media_types == NULL) { 2867 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2868 return; 2869 } 2870 2871 /* 2872 * At this point we know the NIC has an XFP cage, so now we 2873 * try to determine what is in the cage by using the 2874 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2875 * register. We read just one byte, which may take over 2876 * a millisecond 2877 */ 2878 2879 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2880 cmd.data1 = byte; 2881 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2882 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2883 device_printf(sc->dev, "failed to read XFP\n"); 2884 } 2885 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2886 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2887 } 2888 if (err != MXGEFW_CMD_OK) { 2889 return; 2890 } 2891 2892 /* now we wait for the data to be cached */ 2893 cmd.data0 = byte; 2894 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2895 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2896 DELAY(1000); 2897 cmd.data0 = byte; 2898 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2899 } 2900 if (err != MXGEFW_CMD_OK) { 2901 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2902 cage_type, err, ms); 2903 return; 2904 } 2905 2906 if (cmd.data0 == mxge_media_types[0].bitmask) { 2907 if (mxge_verbose) 2908 device_printf(sc->dev, "%s:%s\n", cage_type, 2909 mxge_media_types[0].name); 2910 mxge_set_media(sc, IFM_10G_CX4); 2911 return; 2912 } 2913 for (i = 1; i < mxge_media_type_entries; i++) { 2914 if (cmd.data0 & mxge_media_types[i].bitmask) { 2915 if (mxge_verbose) 2916 device_printf(sc->dev, "%s:%s\n", 2917 cage_type, 2918 mxge_media_types[i].name); 2919 2920 mxge_set_media(sc, mxge_media_types[i].flag); 2921 return; 2922 } 2923 } 2924 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2925 cmd.data0); 2926 2927 return; 2928} 2929 2930static void 2931mxge_intr(void *arg) 2932{ 2933 struct mxge_slice_state *ss = arg; 2934 mxge_softc_t *sc = ss->sc; 2935 mcp_irq_data_t *stats = ss->fw_stats; 2936 mxge_tx_ring_t *tx = &ss->tx; 2937 mxge_rx_done_t *rx_done = &ss->rx_done; 2938 uint32_t send_done_count; 2939 uint8_t valid; 2940 2941 2942#ifndef IFNET_BUF_RING 2943 /* an interrupt on a non-zero slice is implicitly valid 2944 since MSI-X irqs are not shared */ 2945 if (ss != sc->ss) { 2946 mxge_clean_rx_done(ss); 2947 *ss->irq_claim = be32toh(3); 2948 return; 2949 } 2950#endif 2951 2952 /* make sure the DMA has finished */ 2953 if (!stats->valid) { 2954 return; 2955 } 2956 valid = stats->valid; 2957 2958 if (sc->legacy_irq) { 2959 /* lower legacy IRQ */ 2960 *sc->irq_deassert = 0; 2961 if (!mxge_deassert_wait) 2962 /* don't wait for conf. that irq is low */ 2963 stats->valid = 0; 2964 } else { 2965 stats->valid = 0; 2966 } 2967 2968 /* loop while waiting for legacy irq deassertion */ 2969 do { 2970 /* check for transmit completes and receives */ 2971 send_done_count = be32toh(stats->send_done_count); 2972 while ((send_done_count != tx->pkt_done) || 2973 (rx_done->entry[rx_done->idx].length != 0)) { 2974 if (send_done_count != tx->pkt_done) 2975 mxge_tx_done(ss, (int)send_done_count); 2976 mxge_clean_rx_done(ss); 2977 send_done_count = be32toh(stats->send_done_count); 2978 } 2979 if (sc->legacy_irq && mxge_deassert_wait) 2980 wmb(); 2981 } while (*((volatile uint8_t *) &stats->valid)); 2982 2983 /* fw link & error stats meaningful only on the first slice */ 2984 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2985 if (sc->link_state != stats->link_up) { 2986 sc->link_state = stats->link_up; 2987 if (sc->link_state) { 2988 if_link_state_change(sc->ifp, LINK_STATE_UP); 2989 if (mxge_verbose) 2990 device_printf(sc->dev, "link up\n"); 2991 } else { 2992 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2993 if (mxge_verbose) 2994 device_printf(sc->dev, "link down\n"); 2995 } 2996 sc->need_media_probe = 1; 2997 } 2998 if (sc->rdma_tags_available != 2999 be32toh(stats->rdma_tags_available)) { 3000 sc->rdma_tags_available = 3001 be32toh(stats->rdma_tags_available); 3002 device_printf(sc->dev, "RDMA timed out! %d tags " 3003 "left\n", sc->rdma_tags_available); 3004 } 3005 3006 if (stats->link_down) { 3007 sc->down_cnt += stats->link_down; 3008 sc->link_state = 0; 3009 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3010 } 3011 } 3012 3013 /* check to see if we have rx token to pass back */ 3014 if (valid & 0x1) 3015 *ss->irq_claim = be32toh(3); 3016 *(ss->irq_claim + 1) = be32toh(3); 3017} 3018 3019static void 3020mxge_init(void *arg) 3021{ 3022} 3023 3024 3025 3026static void 3027mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3028{ 3029 struct lro_entry *lro_entry; 3030 int i; 3031 3032 while (!SLIST_EMPTY(&ss->lro_free)) { 3033 lro_entry = SLIST_FIRST(&ss->lro_free); 3034 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3035 free(lro_entry, M_DEVBUF); 3036 } 3037 3038 for (i = 0; i <= ss->rx_big.mask; i++) { 3039 if (ss->rx_big.info[i].m == NULL) 3040 continue; 3041 bus_dmamap_unload(ss->rx_big.dmat, 3042 ss->rx_big.info[i].map); 3043 m_freem(ss->rx_big.info[i].m); 3044 ss->rx_big.info[i].m = NULL; 3045 } 3046 3047 for (i = 0; i <= ss->rx_small.mask; i++) { 3048 if (ss->rx_small.info[i].m == NULL) 3049 continue; 3050 bus_dmamap_unload(ss->rx_small.dmat, 3051 ss->rx_small.info[i].map); 3052 m_freem(ss->rx_small.info[i].m); 3053 ss->rx_small.info[i].m = NULL; 3054 } 3055 3056 /* transmit ring used only on the first slice */ 3057 if (ss->tx.info == NULL) 3058 return; 3059 3060 for (i = 0; i <= ss->tx.mask; i++) { 3061 ss->tx.info[i].flag = 0; 3062 if (ss->tx.info[i].m == NULL) 3063 continue; 3064 bus_dmamap_unload(ss->tx.dmat, 3065 ss->tx.info[i].map); 3066 m_freem(ss->tx.info[i].m); 3067 ss->tx.info[i].m = NULL; 3068 } 3069} 3070 3071static void 3072mxge_free_mbufs(mxge_softc_t *sc) 3073{ 3074 int slice; 3075 3076 for (slice = 0; slice < sc->num_slices; slice++) 3077 mxge_free_slice_mbufs(&sc->ss[slice]); 3078} 3079 3080static void 3081mxge_free_slice_rings(struct mxge_slice_state *ss) 3082{ 3083 int i; 3084 3085 3086 if (ss->rx_done.entry != NULL) 3087 mxge_dma_free(&ss->rx_done.dma); 3088 ss->rx_done.entry = NULL; 3089 3090 if (ss->tx.req_bytes != NULL) 3091 free(ss->tx.req_bytes, M_DEVBUF); 3092 ss->tx.req_bytes = NULL; 3093 3094 if (ss->tx.seg_list != NULL) 3095 free(ss->tx.seg_list, M_DEVBUF); 3096 ss->tx.seg_list = NULL; 3097 3098 if (ss->rx_small.shadow != NULL) 3099 free(ss->rx_small.shadow, M_DEVBUF); 3100 ss->rx_small.shadow = NULL; 3101 3102 if (ss->rx_big.shadow != NULL) 3103 free(ss->rx_big.shadow, M_DEVBUF); 3104 ss->rx_big.shadow = NULL; 3105 3106 if (ss->tx.info != NULL) { 3107 if (ss->tx.dmat != NULL) { 3108 for (i = 0; i <= ss->tx.mask; i++) { 3109 bus_dmamap_destroy(ss->tx.dmat, 3110 ss->tx.info[i].map); 3111 } 3112 bus_dma_tag_destroy(ss->tx.dmat); 3113 } 3114 free(ss->tx.info, M_DEVBUF); 3115 } 3116 ss->tx.info = NULL; 3117 3118 if (ss->rx_small.info != NULL) { 3119 if (ss->rx_small.dmat != NULL) { 3120 for (i = 0; i <= ss->rx_small.mask; i++) { 3121 bus_dmamap_destroy(ss->rx_small.dmat, 3122 ss->rx_small.info[i].map); 3123 } 3124 bus_dmamap_destroy(ss->rx_small.dmat, 3125 ss->rx_small.extra_map); 3126 bus_dma_tag_destroy(ss->rx_small.dmat); 3127 } 3128 free(ss->rx_small.info, M_DEVBUF); 3129 } 3130 ss->rx_small.info = NULL; 3131 3132 if (ss->rx_big.info != NULL) { 3133 if (ss->rx_big.dmat != NULL) { 3134 for (i = 0; i <= ss->rx_big.mask; i++) { 3135 bus_dmamap_destroy(ss->rx_big.dmat, 3136 ss->rx_big.info[i].map); 3137 } 3138 bus_dmamap_destroy(ss->rx_big.dmat, 3139 ss->rx_big.extra_map); 3140 bus_dma_tag_destroy(ss->rx_big.dmat); 3141 } 3142 free(ss->rx_big.info, M_DEVBUF); 3143 } 3144 ss->rx_big.info = NULL; 3145} 3146 3147static void 3148mxge_free_rings(mxge_softc_t *sc) 3149{ 3150 int slice; 3151 3152 for (slice = 0; slice < sc->num_slices; slice++) 3153 mxge_free_slice_rings(&sc->ss[slice]); 3154} 3155 3156static int 3157mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3158 int tx_ring_entries) 3159{ 3160 mxge_softc_t *sc = ss->sc; 3161 size_t bytes; 3162 int err, i; 3163 3164 err = ENOMEM; 3165 3166 /* allocate per-slice receive resources */ 3167 3168 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3169 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3170 3171 /* allocate the rx shadow rings */ 3172 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3173 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3174 if (ss->rx_small.shadow == NULL) 3175 return err;; 3176 3177 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3178 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3179 if (ss->rx_big.shadow == NULL) 3180 return err;; 3181 3182 /* allocate the rx host info rings */ 3183 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3184 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3185 if (ss->rx_small.info == NULL) 3186 return err;; 3187 3188 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3189 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3190 if (ss->rx_big.info == NULL) 3191 return err;; 3192 3193 /* allocate the rx busdma resources */ 3194 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3195 1, /* alignment */ 3196 4096, /* boundary */ 3197 BUS_SPACE_MAXADDR, /* low */ 3198 BUS_SPACE_MAXADDR, /* high */ 3199 NULL, NULL, /* filter */ 3200 MHLEN, /* maxsize */ 3201 1, /* num segs */ 3202 MHLEN, /* maxsegsize */ 3203 BUS_DMA_ALLOCNOW, /* flags */ 3204 NULL, NULL, /* lock */ 3205 &ss->rx_small.dmat); /* tag */ 3206 if (err != 0) { 3207 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3208 err); 3209 return err;; 3210 } 3211 3212 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3213 1, /* alignment */ 3214#if MXGE_VIRT_JUMBOS 3215 4096, /* boundary */ 3216#else 3217 0, /* boundary */ 3218#endif 3219 BUS_SPACE_MAXADDR, /* low */ 3220 BUS_SPACE_MAXADDR, /* high */ 3221 NULL, NULL, /* filter */ 3222 3*4096, /* maxsize */ 3223#if MXGE_VIRT_JUMBOS 3224 3, /* num segs */ 3225 4096, /* maxsegsize*/ 3226#else 3227 1, /* num segs */ 3228 MJUM9BYTES, /* maxsegsize*/ 3229#endif 3230 BUS_DMA_ALLOCNOW, /* flags */ 3231 NULL, NULL, /* lock */ 3232 &ss->rx_big.dmat); /* tag */ 3233 if (err != 0) { 3234 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3235 err); 3236 return err;; 3237 } 3238 for (i = 0; i <= ss->rx_small.mask; i++) { 3239 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3240 &ss->rx_small.info[i].map); 3241 if (err != 0) { 3242 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3243 err); 3244 return err;; 3245 } 3246 } 3247 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3248 &ss->rx_small.extra_map); 3249 if (err != 0) { 3250 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3251 err); 3252 return err;; 3253 } 3254 3255 for (i = 0; i <= ss->rx_big.mask; i++) { 3256 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3257 &ss->rx_big.info[i].map); 3258 if (err != 0) { 3259 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3260 err); 3261 return err;; 3262 } 3263 } 3264 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3265 &ss->rx_big.extra_map); 3266 if (err != 0) { 3267 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3268 err); 3269 return err;; 3270 } 3271 3272 /* now allocate TX resouces */ 3273 3274#ifndef IFNET_BUF_RING 3275 /* only use a single TX ring for now */ 3276 if (ss != ss->sc->ss) 3277 return 0; 3278#endif 3279 3280 ss->tx.mask = tx_ring_entries - 1; 3281 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3282 3283 3284 /* allocate the tx request copy block */ 3285 bytes = 8 + 3286 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3287 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3288 if (ss->tx.req_bytes == NULL) 3289 return err;; 3290 /* ensure req_list entries are aligned to 8 bytes */ 3291 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3292 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3293 3294 /* allocate the tx busdma segment list */ 3295 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3296 ss->tx.seg_list = (bus_dma_segment_t *) 3297 malloc(bytes, M_DEVBUF, M_WAITOK); 3298 if (ss->tx.seg_list == NULL) 3299 return err;; 3300 3301 /* allocate the tx host info ring */ 3302 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3303 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3304 if (ss->tx.info == NULL) 3305 return err;; 3306 3307 /* allocate the tx busdma resources */ 3308 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3309 1, /* alignment */ 3310 sc->tx_boundary, /* boundary */ 3311 BUS_SPACE_MAXADDR, /* low */ 3312 BUS_SPACE_MAXADDR, /* high */ 3313 NULL, NULL, /* filter */ 3314 65536 + 256, /* maxsize */ 3315 ss->tx.max_desc - 2, /* num segs */ 3316 sc->tx_boundary, /* maxsegsz */ 3317 BUS_DMA_ALLOCNOW, /* flags */ 3318 NULL, NULL, /* lock */ 3319 &ss->tx.dmat); /* tag */ 3320 3321 if (err != 0) { 3322 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3323 err); 3324 return err;; 3325 } 3326 3327 /* now use these tags to setup dmamaps for each slot 3328 in the ring */ 3329 for (i = 0; i <= ss->tx.mask; i++) { 3330 err = bus_dmamap_create(ss->tx.dmat, 0, 3331 &ss->tx.info[i].map); 3332 if (err != 0) { 3333 device_printf(sc->dev, "Err %d tx dmamap\n", 3334 err); 3335 return err;; 3336 } 3337 } 3338 return 0; 3339 3340} 3341 3342static int 3343mxge_alloc_rings(mxge_softc_t *sc) 3344{ 3345 mxge_cmd_t cmd; 3346 int tx_ring_size; 3347 int tx_ring_entries, rx_ring_entries; 3348 int err, slice; 3349 3350 /* get ring sizes */ 3351 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3352 tx_ring_size = cmd.data0; 3353 if (err != 0) { 3354 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3355 goto abort; 3356 } 3357 3358 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3359 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3360 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3361 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3362 IFQ_SET_READY(&sc->ifp->if_snd); 3363 3364 for (slice = 0; slice < sc->num_slices; slice++) { 3365 err = mxge_alloc_slice_rings(&sc->ss[slice], 3366 rx_ring_entries, 3367 tx_ring_entries); 3368 if (err != 0) 3369 goto abort; 3370 } 3371 return 0; 3372 3373abort: 3374 mxge_free_rings(sc); 3375 return err; 3376 3377} 3378 3379 3380static void 3381mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3382{ 3383 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3384 3385 if (bufsize < MCLBYTES) { 3386 /* easy, everything fits in a single buffer */ 3387 *big_buf_size = MCLBYTES; 3388 *cl_size = MCLBYTES; 3389 *nbufs = 1; 3390 return; 3391 } 3392 3393 if (bufsize < MJUMPAGESIZE) { 3394 /* still easy, everything still fits in a single buffer */ 3395 *big_buf_size = MJUMPAGESIZE; 3396 *cl_size = MJUMPAGESIZE; 3397 *nbufs = 1; 3398 return; 3399 } 3400#if MXGE_VIRT_JUMBOS 3401 /* now we need to use virtually contiguous buffers */ 3402 *cl_size = MJUM9BYTES; 3403 *big_buf_size = 4096; 3404 *nbufs = mtu / 4096 + 1; 3405 /* needs to be a power of two, so round up */ 3406 if (*nbufs == 3) 3407 *nbufs = 4; 3408#else 3409 *cl_size = MJUM9BYTES; 3410 *big_buf_size = MJUM9BYTES; 3411 *nbufs = 1; 3412#endif 3413} 3414 3415static int 3416mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3417{ 3418 mxge_softc_t *sc; 3419 mxge_cmd_t cmd; 3420 bus_dmamap_t map; 3421 struct lro_entry *lro_entry; 3422 int err, i, slice; 3423 3424 3425 sc = ss->sc; 3426 slice = ss - sc->ss; 3427 3428 SLIST_INIT(&ss->lro_free); 3429 SLIST_INIT(&ss->lro_active); 3430 3431 for (i = 0; i < sc->lro_cnt; i++) { 3432 lro_entry = (struct lro_entry *) 3433 malloc(sizeof (*lro_entry), M_DEVBUF, 3434 M_NOWAIT | M_ZERO); 3435 if (lro_entry == NULL) { 3436 sc->lro_cnt = i; 3437 break; 3438 } 3439 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3440 } 3441 /* get the lanai pointers to the send and receive rings */ 3442 3443 err = 0; 3444#ifndef IFNET_BUF_RING 3445 /* We currently only send from the first slice */ 3446 if (slice == 0) { 3447#endif 3448 cmd.data0 = slice; 3449 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3450 ss->tx.lanai = 3451 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3452 ss->tx.send_go = (volatile uint32_t *) 3453 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3454 ss->tx.send_stop = (volatile uint32_t *) 3455 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3456#ifndef IFNET_BUF_RING 3457 } 3458#endif 3459 cmd.data0 = slice; 3460 err |= mxge_send_cmd(sc, 3461 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3462 ss->rx_small.lanai = 3463 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3464 cmd.data0 = slice; 3465 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3466 ss->rx_big.lanai = 3467 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3468 3469 if (err != 0) { 3470 device_printf(sc->dev, 3471 "failed to get ring sizes or locations\n"); 3472 return EIO; 3473 } 3474 3475 /* stock receive rings */ 3476 for (i = 0; i <= ss->rx_small.mask; i++) { 3477 map = ss->rx_small.info[i].map; 3478 err = mxge_get_buf_small(ss, map, i); 3479 if (err) { 3480 device_printf(sc->dev, "alloced %d/%d smalls\n", 3481 i, ss->rx_small.mask + 1); 3482 return ENOMEM; 3483 } 3484 } 3485 for (i = 0; i <= ss->rx_big.mask; i++) { 3486 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3487 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3488 } 3489 ss->rx_big.nbufs = nbufs; 3490 ss->rx_big.cl_size = cl_size; 3491 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3492 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3493 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3494 map = ss->rx_big.info[i].map; 3495 err = mxge_get_buf_big(ss, map, i); 3496 if (err) { 3497 device_printf(sc->dev, "alloced %d/%d bigs\n", 3498 i, ss->rx_big.mask + 1); 3499 return ENOMEM; 3500 } 3501 } 3502 return 0; 3503} 3504 3505static int 3506mxge_open(mxge_softc_t *sc) 3507{ 3508 mxge_cmd_t cmd; 3509 int err, big_bytes, nbufs, slice, cl_size, i; 3510 bus_addr_t bus; 3511 volatile uint8_t *itable; 3512 struct mxge_slice_state *ss; 3513 3514 /* Copy the MAC address in case it was overridden */ 3515 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3516 3517 err = mxge_reset(sc, 1); 3518 if (err != 0) { 3519 device_printf(sc->dev, "failed to reset\n"); 3520 return EIO; 3521 } 3522 3523 if (sc->num_slices > 1) { 3524 /* setup the indirection table */ 3525 cmd.data0 = sc->num_slices; 3526 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3527 &cmd); 3528 3529 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3530 &cmd); 3531 if (err != 0) { 3532 device_printf(sc->dev, 3533 "failed to setup rss tables\n"); 3534 return err; 3535 } 3536 3537 /* just enable an identity mapping */ 3538 itable = sc->sram + cmd.data0; 3539 for (i = 0; i < sc->num_slices; i++) 3540 itable[i] = (uint8_t)i; 3541 3542 cmd.data0 = 1; 3543 cmd.data1 = mxge_rss_hash_type; 3544 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3545 if (err != 0) { 3546 device_printf(sc->dev, "failed to enable slices\n"); 3547 return err; 3548 } 3549 } 3550 3551 3552 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3553 3554 cmd.data0 = nbufs; 3555 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3556 &cmd); 3557 /* error is only meaningful if we're trying to set 3558 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3559 if (err && nbufs > 1) { 3560 device_printf(sc->dev, 3561 "Failed to set alway-use-n to %d\n", 3562 nbufs); 3563 return EIO; 3564 } 3565 /* Give the firmware the mtu and the big and small buffer 3566 sizes. The firmware wants the big buf size to be a power 3567 of two. Luckily, FreeBSD's clusters are powers of two */ 3568 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3569 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3570 cmd.data0 = MHLEN - MXGEFW_PAD; 3571 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3572 &cmd); 3573 cmd.data0 = big_bytes; 3574 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3575 3576 if (err != 0) { 3577 device_printf(sc->dev, "failed to setup params\n"); 3578 goto abort; 3579 } 3580 3581 /* Now give him the pointer to the stats block */ 3582 for (slice = 0; 3583#ifdef IFNET_BUF_RING 3584 slice < sc->num_slices; 3585#else 3586 slice < 1; 3587#endif 3588 slice++) { 3589 ss = &sc->ss[slice]; 3590 cmd.data0 = 3591 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3592 cmd.data1 = 3593 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3594 cmd.data2 = sizeof(struct mcp_irq_data); 3595 cmd.data2 |= (slice << 16); 3596 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3597 } 3598 3599 if (err != 0) { 3600 bus = sc->ss->fw_stats_dma.bus_addr; 3601 bus += offsetof(struct mcp_irq_data, send_done_count); 3602 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3603 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3604 err = mxge_send_cmd(sc, 3605 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3606 &cmd); 3607 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3608 sc->fw_multicast_support = 0; 3609 } else { 3610 sc->fw_multicast_support = 1; 3611 } 3612 3613 if (err != 0) { 3614 device_printf(sc->dev, "failed to setup params\n"); 3615 goto abort; 3616 } 3617 3618 for (slice = 0; slice < sc->num_slices; slice++) { 3619 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3620 if (err != 0) { 3621 device_printf(sc->dev, "couldn't open slice %d\n", 3622 slice); 3623 goto abort; 3624 } 3625 } 3626 3627 /* Finally, start the firmware running */ 3628 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3629 if (err) { 3630 device_printf(sc->dev, "Couldn't bring up link\n"); 3631 goto abort; 3632 } 3633#ifdef IFNET_BUF_RING 3634 for (slice = 0; slice < sc->num_slices; slice++) { 3635 ss = &sc->ss[slice]; 3636 ss->if_drv_flags |= IFF_DRV_RUNNING; 3637 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3638 } 3639#endif 3640 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3641 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3642 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3643 3644 return 0; 3645 3646 3647abort: 3648 mxge_free_mbufs(sc); 3649 3650 return err; 3651} 3652 3653static int 3654mxge_close(mxge_softc_t *sc, int down) 3655{ 3656 mxge_cmd_t cmd; 3657 int err, old_down_cnt; 3658#ifdef IFNET_BUF_RING 3659 struct mxge_slice_state *ss; 3660 int slice; 3661#endif 3662 3663 callout_stop(&sc->co_hdl); 3664#ifdef IFNET_BUF_RING 3665 for (slice = 0; slice < sc->num_slices; slice++) { 3666 ss = &sc->ss[slice]; 3667 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3668 } 3669#endif 3670 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3671 if (!down) { 3672 old_down_cnt = sc->down_cnt; 3673 wmb(); 3674 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3675 if (err) { 3676 device_printf(sc->dev, 3677 "Couldn't bring down link\n"); 3678 } 3679 if (old_down_cnt == sc->down_cnt) { 3680 /* wait for down irq */ 3681 DELAY(10 * sc->intr_coal_delay); 3682 } 3683 wmb(); 3684 if (old_down_cnt == sc->down_cnt) { 3685 device_printf(sc->dev, "never got down irq\n"); 3686 } 3687 } 3688 mxge_free_mbufs(sc); 3689 3690 return 0; 3691} 3692 3693static void 3694mxge_setup_cfg_space(mxge_softc_t *sc) 3695{ 3696 device_t dev = sc->dev; 3697 int reg; 3698 uint16_t cmd, lnk, pectl; 3699 3700 /* find the PCIe link width and set max read request to 4KB*/ 3701 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3702 lnk = pci_read_config(dev, reg + 0x12, 2); 3703 sc->link_width = (lnk >> 4) & 0x3f; 3704 3705 pectl = pci_read_config(dev, reg + 0x8, 2); 3706 pectl = (pectl & ~0x7000) | (5 << 12); 3707 pci_write_config(dev, reg + 0x8, pectl, 2); 3708 } 3709 3710 /* Enable DMA and Memory space access */ 3711 pci_enable_busmaster(dev); 3712 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3713 cmd |= PCIM_CMD_MEMEN; 3714 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3715} 3716 3717static uint32_t 3718mxge_read_reboot(mxge_softc_t *sc) 3719{ 3720 device_t dev = sc->dev; 3721 uint32_t vs; 3722 3723 /* find the vendor specific offset */ 3724 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3725 device_printf(sc->dev, 3726 "could not find vendor specific offset\n"); 3727 return (uint32_t)-1; 3728 } 3729 /* enable read32 mode */ 3730 pci_write_config(dev, vs + 0x10, 0x3, 1); 3731 /* tell NIC which register to read */ 3732 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3733 return (pci_read_config(dev, vs + 0x14, 4)); 3734} 3735 3736static int 3737mxge_watchdog_reset(mxge_softc_t *sc, int slice) 3738{ 3739 struct pci_devinfo *dinfo; 3740 struct mxge_slice_state *ss; 3741 mxge_tx_ring_t *tx; 3742 int err, running, s, num_tx_slices = 1; 3743 uint32_t reboot; 3744 uint16_t cmd; 3745 3746 err = ENXIO; 3747 3748 device_printf(sc->dev, "Watchdog reset!\n"); 3749 3750 /* 3751 * check to see if the NIC rebooted. If it did, then all of 3752 * PCI config space has been reset, and things like the 3753 * busmaster bit will be zero. If this is the case, then we 3754 * must restore PCI config space before the NIC can be used 3755 * again 3756 */ 3757 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3758 if (cmd == 0xffff) { 3759 /* 3760 * maybe the watchdog caught the NIC rebooting; wait 3761 * up to 100ms for it to finish. If it does not come 3762 * back, then give up 3763 */ 3764 DELAY(1000*100); 3765 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3766 if (cmd == 0xffff) { 3767 device_printf(sc->dev, "NIC disappeared!\n"); 3768 return (err); 3769 } 3770 } 3771 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3772 /* print the reboot status */ 3773 reboot = mxge_read_reboot(sc); 3774 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3775 reboot); 3776 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3777 if (running) { 3778 3779 /* 3780 * quiesce NIC so that TX routines will not try to 3781 * xmit after restoration of BAR 3782 */ 3783 3784 /* Mark the link as down */ 3785 if (sc->link_state) { 3786 sc->link_state = 0; 3787 if_link_state_change(sc->ifp, 3788 LINK_STATE_DOWN); 3789 } 3790#ifdef IFNET_BUF_RING 3791 num_tx_slices = sc->num_slices; 3792#endif 3793 /* grab all TX locks to ensure no tx */ 3794 for (s = 0; s < num_tx_slices; s++) { 3795 ss = &sc->ss[s]; 3796 mtx_lock(&ss->tx.mtx); 3797 } 3798 mxge_close(sc, 1); 3799 } 3800 /* restore PCI configuration space */ 3801 dinfo = device_get_ivars(sc->dev); 3802 pci_cfg_restore(sc->dev, dinfo); 3803 3804 /* and redo any changes we made to our config space */ 3805 mxge_setup_cfg_space(sc); 3806 3807 /* reload f/w */ 3808 err = mxge_load_firmware(sc, 0); 3809 if (err) { 3810 device_printf(sc->dev, 3811 "Unable to re-load f/w\n"); 3812 } 3813 if (running) { 3814 if (!err) 3815 err = mxge_open(sc); 3816 /* release all TX locks */ 3817 for (s = 0; s < num_tx_slices; s++) { 3818 ss = &sc->ss[s]; 3819 mtx_unlock(&ss->tx.mtx); 3820 } 3821 } 3822 sc->watchdog_resets++; 3823 } else { 3824 tx = &sc->ss[slice].tx; 3825 device_printf(sc->dev, 3826 "NIC did not reboot, slice %d ring state:\n", 3827 slice); 3828 device_printf(sc->dev, 3829 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3830 tx->req, tx->done, tx->queue_active); 3831 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3832 tx->activate, tx->deactivate); 3833 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3834 tx->pkt_done, 3835 be32toh(sc->ss->fw_stats->send_done_count)); 3836 device_printf(sc->dev, "not resetting\n"); 3837 } 3838 if (err) 3839 device_printf(sc->dev, "watchdog reset failed\n"); 3840 3841 return (err); 3842} 3843 3844static int 3845mxge_watchdog(mxge_softc_t *sc) 3846{ 3847 mxge_tx_ring_t *tx; 3848 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3849 int i, err = 0; 3850 3851 /* see if we have outstanding transmits, which 3852 have been pending for more than mxge_ticks */ 3853 for (i = 0; 3854#ifdef IFNET_BUF_RING 3855 (i < sc->num_slices) && (err == 0); 3856#else 3857 (i < 1) && (err == 0); 3858#endif 3859 i++) { 3860 tx = &sc->ss[i].tx; 3861 if (tx->req != tx->done && 3862 tx->watchdog_req != tx->watchdog_done && 3863 tx->done == tx->watchdog_done) { 3864 /* check for pause blocking before resetting */ 3865 if (tx->watchdog_rx_pause == rx_pause) 3866 err = mxge_watchdog_reset(sc, i); 3867 else 3868 device_printf(sc->dev, "Flow control blocking " 3869 "xmits, check link partner\n"); 3870 } 3871 3872 tx->watchdog_req = tx->req; 3873 tx->watchdog_done = tx->done; 3874 tx->watchdog_rx_pause = rx_pause; 3875 } 3876 3877 if (sc->need_media_probe) 3878 mxge_media_probe(sc); 3879 return (err); 3880} 3881 3882static void 3883mxge_update_stats(mxge_softc_t *sc) 3884{ 3885 struct mxge_slice_state *ss; 3886 u_long ipackets = 0; 3887 u_long opackets = 0; 3888#ifdef IFNET_BUF_RING 3889 u_long obytes = 0; 3890 u_long omcasts = 0; 3891 u_long odrops = 0; 3892#endif 3893 u_long oerrors = 0; 3894 int slice; 3895 3896 for (slice = 0; slice < sc->num_slices; slice++) { 3897 ss = &sc->ss[slice]; 3898 ipackets += ss->ipackets; 3899 opackets += ss->opackets; 3900#ifdef IFNET_BUF_RING 3901 obytes += ss->obytes; 3902 omcasts += ss->omcasts; 3903 odrops += ss->tx.br->br_drops; 3904#endif 3905 oerrors += ss->oerrors; 3906 } 3907 sc->ifp->if_ipackets = ipackets; 3908 sc->ifp->if_opackets = opackets; 3909#ifdef IFNET_BUF_RING 3910 sc->ifp->if_obytes = obytes; 3911 sc->ifp->if_omcasts = omcasts; 3912 sc->ifp->if_snd.ifq_drops = odrops; 3913#endif 3914 sc->ifp->if_oerrors = oerrors; 3915} 3916 3917static void 3918mxge_tick(void *arg) 3919{ 3920 mxge_softc_t *sc = arg; 3921 int err = 0; 3922 3923 /* aggregate stats from different slices */ 3924 mxge_update_stats(sc); 3925 if (!sc->watchdog_countdown) { 3926 err = mxge_watchdog(sc); 3927 sc->watchdog_countdown = 4; 3928 } 3929 sc->watchdog_countdown--; 3930 if (err == 0) 3931 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3932 3933} 3934 3935static int 3936mxge_media_change(struct ifnet *ifp) 3937{ 3938 return EINVAL; 3939} 3940 3941static int 3942mxge_change_mtu(mxge_softc_t *sc, int mtu) 3943{ 3944 struct ifnet *ifp = sc->ifp; 3945 int real_mtu, old_mtu; 3946 int err = 0; 3947 3948 3949 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3950 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3951 return EINVAL; 3952 mtx_lock(&sc->driver_mtx); 3953 old_mtu = ifp->if_mtu; 3954 ifp->if_mtu = mtu; 3955 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3956 mxge_close(sc, 0); 3957 err = mxge_open(sc); 3958 if (err != 0) { 3959 ifp->if_mtu = old_mtu; 3960 mxge_close(sc, 0); 3961 (void) mxge_open(sc); 3962 } 3963 } 3964 mtx_unlock(&sc->driver_mtx); 3965 return err; 3966} 3967 3968static void 3969mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 3970{ 3971 mxge_softc_t *sc = ifp->if_softc; 3972 3973 3974 if (sc == NULL) 3975 return; 3976 ifmr->ifm_status = IFM_AVALID; 3977 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 3978 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 3979 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 3980} 3981 3982static int 3983mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 3984{ 3985 mxge_softc_t *sc = ifp->if_softc; 3986 struct ifreq *ifr = (struct ifreq *)data; 3987 int err, mask; 3988 3989 err = 0; 3990 switch (command) { 3991 case SIOCSIFADDR: 3992 case SIOCGIFADDR: 3993 err = ether_ioctl(ifp, command, data); 3994 break; 3995 3996 case SIOCSIFMTU: 3997 err = mxge_change_mtu(sc, ifr->ifr_mtu); 3998 break; 3999 4000 case SIOCSIFFLAGS: 4001 mtx_lock(&sc->driver_mtx); 4002 if (sc->dying) { 4003 mtx_unlock(&sc->driver_mtx); 4004 return EINVAL; 4005 } 4006 if (ifp->if_flags & IFF_UP) { 4007 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4008 err = mxge_open(sc); 4009 } else { 4010 /* take care of promis can allmulti 4011 flag chages */ 4012 mxge_change_promisc(sc, 4013 ifp->if_flags & IFF_PROMISC); 4014 mxge_set_multicast_list(sc); 4015 } 4016 } else { 4017 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4018 mxge_close(sc, 0); 4019 } 4020 } 4021 mtx_unlock(&sc->driver_mtx); 4022 break; 4023 4024 case SIOCADDMULTI: 4025 case SIOCDELMULTI: 4026 mtx_lock(&sc->driver_mtx); 4027 mxge_set_multicast_list(sc); 4028 mtx_unlock(&sc->driver_mtx); 4029 break; 4030 4031 case SIOCSIFCAP: 4032 mtx_lock(&sc->driver_mtx); 4033 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4034 if (mask & IFCAP_TXCSUM) { 4035 if (IFCAP_TXCSUM & ifp->if_capenable) { 4036 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4037 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4038 | CSUM_TSO); 4039 } else { 4040 ifp->if_capenable |= IFCAP_TXCSUM; 4041 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4042 } 4043 } else if (mask & IFCAP_RXCSUM) { 4044 if (IFCAP_RXCSUM & ifp->if_capenable) { 4045 ifp->if_capenable &= ~IFCAP_RXCSUM; 4046 sc->csum_flag = 0; 4047 } else { 4048 ifp->if_capenable |= IFCAP_RXCSUM; 4049 sc->csum_flag = 1; 4050 } 4051 } 4052 if (mask & IFCAP_TSO4) { 4053 if (IFCAP_TSO4 & ifp->if_capenable) { 4054 ifp->if_capenable &= ~IFCAP_TSO4; 4055 ifp->if_hwassist &= ~CSUM_TSO; 4056 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4057 ifp->if_capenable |= IFCAP_TSO4; 4058 ifp->if_hwassist |= CSUM_TSO; 4059 } else { 4060 printf("mxge requires tx checksum offload" 4061 " be enabled to use TSO\n"); 4062 err = EINVAL; 4063 } 4064 } 4065 if (mask & IFCAP_LRO) { 4066 if (IFCAP_LRO & ifp->if_capenable) 4067 err = mxge_change_lro_locked(sc, 0); 4068 else 4069 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4070 } 4071 if (mask & IFCAP_VLAN_HWTAGGING) 4072 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4073 mtx_unlock(&sc->driver_mtx); 4074 VLAN_CAPABILITIES(ifp); 4075 4076 break; 4077 4078 case SIOCGIFMEDIA: 4079 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4080 &sc->media, command); 4081 break; 4082 4083 default: 4084 err = ENOTTY; 4085 } 4086 return err; 4087} 4088 4089static void 4090mxge_fetch_tunables(mxge_softc_t *sc) 4091{ 4092 4093 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4094 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4095 &mxge_flow_control); 4096 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4097 &mxge_intr_coal_delay); 4098 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4099 &mxge_nvidia_ecrc_enable); 4100 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4101 &mxge_force_firmware); 4102 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4103 &mxge_deassert_wait); 4104 TUNABLE_INT_FETCH("hw.mxge.verbose", 4105 &mxge_verbose); 4106 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4107 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4108 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4109 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4110 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4111 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4112 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4113 if (sc->lro_cnt != 0) 4114 mxge_lro_cnt = sc->lro_cnt; 4115 4116 if (bootverbose) 4117 mxge_verbose = 1; 4118 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4119 mxge_intr_coal_delay = 30; 4120 if (mxge_ticks == 0) 4121 mxge_ticks = hz / 2; 4122 sc->pause = mxge_flow_control; 4123 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4124 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4125 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4126 } 4127 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4128 mxge_initial_mtu < ETHER_MIN_LEN) 4129 mxge_initial_mtu = ETHERMTU_JUMBO; 4130 4131 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4132 mxge_throttle = MXGE_MAX_THROTTLE; 4133 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4134 mxge_throttle = MXGE_MIN_THROTTLE; 4135 sc->throttle = mxge_throttle; 4136} 4137 4138 4139static void 4140mxge_free_slices(mxge_softc_t *sc) 4141{ 4142 struct mxge_slice_state *ss; 4143 int i; 4144 4145 4146 if (sc->ss == NULL) 4147 return; 4148 4149 for (i = 0; i < sc->num_slices; i++) { 4150 ss = &sc->ss[i]; 4151 if (ss->fw_stats != NULL) { 4152 mxge_dma_free(&ss->fw_stats_dma); 4153 ss->fw_stats = NULL; 4154#ifdef IFNET_BUF_RING 4155 if (ss->tx.br != NULL) { 4156 drbr_free(ss->tx.br, M_DEVBUF); 4157 ss->tx.br = NULL; 4158 } 4159#endif 4160 mtx_destroy(&ss->tx.mtx); 4161 } 4162 if (ss->rx_done.entry != NULL) { 4163 mxge_dma_free(&ss->rx_done.dma); 4164 ss->rx_done.entry = NULL; 4165 } 4166 } 4167 free(sc->ss, M_DEVBUF); 4168 sc->ss = NULL; 4169} 4170 4171static int 4172mxge_alloc_slices(mxge_softc_t *sc) 4173{ 4174 mxge_cmd_t cmd; 4175 struct mxge_slice_state *ss; 4176 size_t bytes; 4177 int err, i, max_intr_slots; 4178 4179 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4180 if (err != 0) { 4181 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4182 return err; 4183 } 4184 sc->rx_ring_size = cmd.data0; 4185 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4186 4187 bytes = sizeof (*sc->ss) * sc->num_slices; 4188 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4189 if (sc->ss == NULL) 4190 return (ENOMEM); 4191 for (i = 0; i < sc->num_slices; i++) { 4192 ss = &sc->ss[i]; 4193 4194 ss->sc = sc; 4195 4196 /* allocate per-slice rx interrupt queues */ 4197 4198 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4199 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4200 if (err != 0) 4201 goto abort; 4202 ss->rx_done.entry = ss->rx_done.dma.addr; 4203 bzero(ss->rx_done.entry, bytes); 4204 4205 /* 4206 * allocate the per-slice firmware stats; stats 4207 * (including tx) are used used only on the first 4208 * slice for now 4209 */ 4210#ifndef IFNET_BUF_RING 4211 if (i > 0) 4212 continue; 4213#endif 4214 4215 bytes = sizeof (*ss->fw_stats); 4216 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4217 sizeof (*ss->fw_stats), 64); 4218 if (err != 0) 4219 goto abort; 4220 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4221 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4222 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4223 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4224#ifdef IFNET_BUF_RING 4225 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4226 &ss->tx.mtx); 4227#endif 4228 } 4229 4230 return (0); 4231 4232abort: 4233 mxge_free_slices(sc); 4234 return (ENOMEM); 4235} 4236 4237static void 4238mxge_slice_probe(mxge_softc_t *sc) 4239{ 4240 mxge_cmd_t cmd; 4241 char *old_fw; 4242 int msix_cnt, status, max_intr_slots; 4243 4244 sc->num_slices = 1; 4245 /* 4246 * don't enable multiple slices if they are not enabled, 4247 * or if this is not an SMP system 4248 */ 4249 4250 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4251 return; 4252 4253 /* see how many MSI-X interrupts are available */ 4254 msix_cnt = pci_msix_count(sc->dev); 4255 if (msix_cnt < 2) 4256 return; 4257 4258 /* now load the slice aware firmware see what it supports */ 4259 old_fw = sc->fw_name; 4260 if (old_fw == mxge_fw_aligned) 4261 sc->fw_name = mxge_fw_rss_aligned; 4262 else 4263 sc->fw_name = mxge_fw_rss_unaligned; 4264 status = mxge_load_firmware(sc, 0); 4265 if (status != 0) { 4266 device_printf(sc->dev, "Falling back to a single slice\n"); 4267 return; 4268 } 4269 4270 /* try to send a reset command to the card to see if it 4271 is alive */ 4272 memset(&cmd, 0, sizeof (cmd)); 4273 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4274 if (status != 0) { 4275 device_printf(sc->dev, "failed reset\n"); 4276 goto abort_with_fw; 4277 } 4278 4279 /* get rx ring size */ 4280 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4281 if (status != 0) { 4282 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4283 goto abort_with_fw; 4284 } 4285 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4286 4287 /* tell it the size of the interrupt queues */ 4288 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4289 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4290 if (status != 0) { 4291 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4292 goto abort_with_fw; 4293 } 4294 4295 /* ask the maximum number of slices it supports */ 4296 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4297 if (status != 0) { 4298 device_printf(sc->dev, 4299 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4300 goto abort_with_fw; 4301 } 4302 sc->num_slices = cmd.data0; 4303 if (sc->num_slices > msix_cnt) 4304 sc->num_slices = msix_cnt; 4305 4306 if (mxge_max_slices == -1) { 4307 /* cap to number of CPUs in system */ 4308 if (sc->num_slices > mp_ncpus) 4309 sc->num_slices = mp_ncpus; 4310 } else { 4311 if (sc->num_slices > mxge_max_slices) 4312 sc->num_slices = mxge_max_slices; 4313 } 4314 /* make sure it is a power of two */ 4315 while (sc->num_slices & (sc->num_slices - 1)) 4316 sc->num_slices--; 4317 4318 if (mxge_verbose) 4319 device_printf(sc->dev, "using %d slices\n", 4320 sc->num_slices); 4321 4322 return; 4323 4324abort_with_fw: 4325 sc->fw_name = old_fw; 4326 (void) mxge_load_firmware(sc, 0); 4327} 4328 4329static int 4330mxge_add_msix_irqs(mxge_softc_t *sc) 4331{ 4332 size_t bytes; 4333 int count, err, i, rid; 4334 4335 rid = PCIR_BAR(2); 4336 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4337 &rid, RF_ACTIVE); 4338 4339 if (sc->msix_table_res == NULL) { 4340 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4341 return ENXIO; 4342 } 4343 4344 count = sc->num_slices; 4345 err = pci_alloc_msix(sc->dev, &count); 4346 if (err != 0) { 4347 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4348 "err = %d \n", sc->num_slices, err); 4349 goto abort_with_msix_table; 4350 } 4351 if (count < sc->num_slices) { 4352 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4353 count, sc->num_slices); 4354 device_printf(sc->dev, 4355 "Try setting hw.mxge.max_slices to %d\n", 4356 count); 4357 err = ENOSPC; 4358 goto abort_with_msix; 4359 } 4360 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4361 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4362 if (sc->msix_irq_res == NULL) { 4363 err = ENOMEM; 4364 goto abort_with_msix; 4365 } 4366 4367 for (i = 0; i < sc->num_slices; i++) { 4368 rid = i + 1; 4369 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4370 SYS_RES_IRQ, 4371 &rid, RF_ACTIVE); 4372 if (sc->msix_irq_res[i] == NULL) { 4373 device_printf(sc->dev, "couldn't allocate IRQ res" 4374 " for message %d\n", i); 4375 err = ENXIO; 4376 goto abort_with_res; 4377 } 4378 } 4379 4380 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4381 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4382 4383 for (i = 0; i < sc->num_slices; i++) { 4384 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4385 INTR_TYPE_NET | INTR_MPSAFE, 4386#if __FreeBSD_version > 700030 4387 NULL, 4388#endif 4389 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4390 if (err != 0) { 4391 device_printf(sc->dev, "couldn't setup intr for " 4392 "message %d\n", i); 4393 goto abort_with_intr; 4394 } 4395 } 4396 4397 if (mxge_verbose) { 4398 device_printf(sc->dev, "using %d msix IRQs:", 4399 sc->num_slices); 4400 for (i = 0; i < sc->num_slices; i++) 4401 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4402 printf("\n"); 4403 } 4404 return (0); 4405 4406abort_with_intr: 4407 for (i = 0; i < sc->num_slices; i++) { 4408 if (sc->msix_ih[i] != NULL) { 4409 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4410 sc->msix_ih[i]); 4411 sc->msix_ih[i] = NULL; 4412 } 4413 } 4414 free(sc->msix_ih, M_DEVBUF); 4415 4416 4417abort_with_res: 4418 for (i = 0; i < sc->num_slices; i++) { 4419 rid = i + 1; 4420 if (sc->msix_irq_res[i] != NULL) 4421 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4422 sc->msix_irq_res[i]); 4423 sc->msix_irq_res[i] = NULL; 4424 } 4425 free(sc->msix_irq_res, M_DEVBUF); 4426 4427 4428abort_with_msix: 4429 pci_release_msi(sc->dev); 4430 4431abort_with_msix_table: 4432 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4433 sc->msix_table_res); 4434 4435 return err; 4436} 4437 4438static int 4439mxge_add_single_irq(mxge_softc_t *sc) 4440{ 4441 int count, err, rid; 4442 4443 count = pci_msi_count(sc->dev); 4444 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4445 rid = 1; 4446 } else { 4447 rid = 0; 4448 sc->legacy_irq = 1; 4449 } 4450 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4451 1, RF_SHAREABLE | RF_ACTIVE); 4452 if (sc->irq_res == NULL) { 4453 device_printf(sc->dev, "could not alloc interrupt\n"); 4454 return ENXIO; 4455 } 4456 if (mxge_verbose) 4457 device_printf(sc->dev, "using %s irq %ld\n", 4458 sc->legacy_irq ? "INTx" : "MSI", 4459 rman_get_start(sc->irq_res)); 4460 err = bus_setup_intr(sc->dev, sc->irq_res, 4461 INTR_TYPE_NET | INTR_MPSAFE, 4462#if __FreeBSD_version > 700030 4463 NULL, 4464#endif 4465 mxge_intr, &sc->ss[0], &sc->ih); 4466 if (err != 0) { 4467 bus_release_resource(sc->dev, SYS_RES_IRQ, 4468 sc->legacy_irq ? 0 : 1, sc->irq_res); 4469 if (!sc->legacy_irq) 4470 pci_release_msi(sc->dev); 4471 } 4472 return err; 4473} 4474 4475static void 4476mxge_rem_msix_irqs(mxge_softc_t *sc) 4477{ 4478 int i, rid; 4479 4480 for (i = 0; i < sc->num_slices; i++) { 4481 if (sc->msix_ih[i] != NULL) { 4482 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4483 sc->msix_ih[i]); 4484 sc->msix_ih[i] = NULL; 4485 } 4486 } 4487 free(sc->msix_ih, M_DEVBUF); 4488 4489 for (i = 0; i < sc->num_slices; i++) { 4490 rid = i + 1; 4491 if (sc->msix_irq_res[i] != NULL) 4492 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4493 sc->msix_irq_res[i]); 4494 sc->msix_irq_res[i] = NULL; 4495 } 4496 free(sc->msix_irq_res, M_DEVBUF); 4497 4498 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4499 sc->msix_table_res); 4500 4501 pci_release_msi(sc->dev); 4502 return; 4503} 4504 4505static void 4506mxge_rem_single_irq(mxge_softc_t *sc) 4507{ 4508 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4509 bus_release_resource(sc->dev, SYS_RES_IRQ, 4510 sc->legacy_irq ? 0 : 1, sc->irq_res); 4511 if (!sc->legacy_irq) 4512 pci_release_msi(sc->dev); 4513} 4514 4515static void 4516mxge_rem_irq(mxge_softc_t *sc) 4517{ 4518 if (sc->num_slices > 1) 4519 mxge_rem_msix_irqs(sc); 4520 else 4521 mxge_rem_single_irq(sc); 4522} 4523 4524static int 4525mxge_add_irq(mxge_softc_t *sc) 4526{ 4527 int err; 4528 4529 if (sc->num_slices > 1) 4530 err = mxge_add_msix_irqs(sc); 4531 else 4532 err = mxge_add_single_irq(sc); 4533 4534 if (0 && err == 0 && sc->num_slices > 1) { 4535 mxge_rem_msix_irqs(sc); 4536 err = mxge_add_msix_irqs(sc); 4537 } 4538 return err; 4539} 4540 4541 4542static int 4543mxge_attach(device_t dev) 4544{ 4545 mxge_softc_t *sc = device_get_softc(dev); 4546 struct ifnet *ifp; 4547 int err, rid; 4548 4549 sc->dev = dev; 4550 mxge_fetch_tunables(sc); 4551 4552 err = bus_dma_tag_create(NULL, /* parent */ 4553 1, /* alignment */ 4554 0, /* boundary */ 4555 BUS_SPACE_MAXADDR, /* low */ 4556 BUS_SPACE_MAXADDR, /* high */ 4557 NULL, NULL, /* filter */ 4558 65536 + 256, /* maxsize */ 4559 MXGE_MAX_SEND_DESC, /* num segs */ 4560 65536, /* maxsegsize */ 4561 0, /* flags */ 4562 NULL, NULL, /* lock */ 4563 &sc->parent_dmat); /* tag */ 4564 4565 if (err != 0) { 4566 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4567 err); 4568 goto abort_with_nothing; 4569 } 4570 4571 ifp = sc->ifp = if_alloc(IFT_ETHER); 4572 if (ifp == NULL) { 4573 device_printf(dev, "can not if_alloc()\n"); 4574 err = ENOSPC; 4575 goto abort_with_parent_dmat; 4576 } 4577 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4578 4579 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4580 device_get_nameunit(dev)); 4581 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4582 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4583 "%s:drv", device_get_nameunit(dev)); 4584 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4585 MTX_NETWORK_LOCK, MTX_DEF); 4586 4587 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4588 4589 mxge_setup_cfg_space(sc); 4590 4591 /* Map the board into the kernel */ 4592 rid = PCIR_BARS; 4593 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4594 ~0, 1, RF_ACTIVE); 4595 if (sc->mem_res == NULL) { 4596 device_printf(dev, "could not map memory\n"); 4597 err = ENXIO; 4598 goto abort_with_lock; 4599 } 4600 sc->sram = rman_get_virtual(sc->mem_res); 4601 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4602 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4603 device_printf(dev, "impossible memory region size %ld\n", 4604 rman_get_size(sc->mem_res)); 4605 err = ENXIO; 4606 goto abort_with_mem_res; 4607 } 4608 4609 /* make NULL terminated copy of the EEPROM strings section of 4610 lanai SRAM */ 4611 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4612 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4613 rman_get_bushandle(sc->mem_res), 4614 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4615 sc->eeprom_strings, 4616 MXGE_EEPROM_STRINGS_SIZE - 2); 4617 err = mxge_parse_strings(sc); 4618 if (err != 0) 4619 goto abort_with_mem_res; 4620 4621 /* Enable write combining for efficient use of PCIe bus */ 4622 mxge_enable_wc(sc); 4623 4624 /* Allocate the out of band dma memory */ 4625 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4626 sizeof (mxge_cmd_t), 64); 4627 if (err != 0) 4628 goto abort_with_mem_res; 4629 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4630 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4631 if (err != 0) 4632 goto abort_with_cmd_dma; 4633 4634 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4635 if (err != 0) 4636 goto abort_with_zeropad_dma; 4637 4638 /* select & load the firmware */ 4639 err = mxge_select_firmware(sc); 4640 if (err != 0) 4641 goto abort_with_dmabench; 4642 sc->intr_coal_delay = mxge_intr_coal_delay; 4643 4644 mxge_slice_probe(sc); 4645 err = mxge_alloc_slices(sc); 4646 if (err != 0) 4647 goto abort_with_dmabench; 4648 4649 err = mxge_reset(sc, 0); 4650 if (err != 0) 4651 goto abort_with_slices; 4652 4653 err = mxge_alloc_rings(sc); 4654 if (err != 0) { 4655 device_printf(sc->dev, "failed to allocate rings\n"); 4656 goto abort_with_dmabench; 4657 } 4658 4659 err = mxge_add_irq(sc); 4660 if (err != 0) { 4661 device_printf(sc->dev, "failed to add irq\n"); 4662 goto abort_with_rings; 4663 } 4664 4665 ifp->if_baudrate = IF_Gbps(10UL); 4666 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4667 IFCAP_VLAN_MTU; 4668#ifdef INET 4669 ifp->if_capabilities |= IFCAP_LRO; 4670#endif 4671 4672#ifdef MXGE_NEW_VLAN_API 4673 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4674#endif 4675 4676 sc->max_mtu = mxge_max_mtu(sc); 4677 if (sc->max_mtu >= 9000) 4678 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4679 else 4680 device_printf(dev, "MTU limited to %d. Install " 4681 "latest firmware for 9000 byte jumbo support\n", 4682 sc->max_mtu - ETHER_HDR_LEN); 4683 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4684 ifp->if_capenable = ifp->if_capabilities; 4685 if (sc->lro_cnt == 0) 4686 ifp->if_capenable &= ~IFCAP_LRO; 4687 sc->csum_flag = 1; 4688 ifp->if_init = mxge_init; 4689 ifp->if_softc = sc; 4690 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4691 ifp->if_ioctl = mxge_ioctl; 4692 ifp->if_start = mxge_start; 4693 /* Initialise the ifmedia structure */ 4694 ifmedia_init(&sc->media, 0, mxge_media_change, 4695 mxge_media_status); 4696 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4697 mxge_media_probe(sc); 4698 sc->dying = 0; 4699 ether_ifattach(ifp, sc->mac_addr); 4700 /* ether_ifattach sets mtu to ETHERMTU */ 4701 if (mxge_initial_mtu != ETHERMTU) 4702 mxge_change_mtu(sc, mxge_initial_mtu); 4703 4704 mxge_add_sysctls(sc); 4705#ifdef IFNET_BUF_RING 4706 ifp->if_transmit = mxge_transmit; 4707 ifp->if_qflush = mxge_qflush; 4708#endif 4709 return 0; 4710 4711abort_with_rings: 4712 mxge_free_rings(sc); 4713abort_with_slices: 4714 mxge_free_slices(sc); 4715abort_with_dmabench: 4716 mxge_dma_free(&sc->dmabench_dma); 4717abort_with_zeropad_dma: 4718 mxge_dma_free(&sc->zeropad_dma); 4719abort_with_cmd_dma: 4720 mxge_dma_free(&sc->cmd_dma); 4721abort_with_mem_res: 4722 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4723abort_with_lock: 4724 pci_disable_busmaster(dev); 4725 mtx_destroy(&sc->cmd_mtx); 4726 mtx_destroy(&sc->driver_mtx); 4727 if_free(ifp); 4728abort_with_parent_dmat: 4729 bus_dma_tag_destroy(sc->parent_dmat); 4730 4731abort_with_nothing: 4732 return err; 4733} 4734 4735static int 4736mxge_detach(device_t dev) 4737{ 4738 mxge_softc_t *sc = device_get_softc(dev); 4739 4740 if (mxge_vlans_active(sc)) { 4741 device_printf(sc->dev, 4742 "Detach vlans before removing module\n"); 4743 return EBUSY; 4744 } 4745 mtx_lock(&sc->driver_mtx); 4746 sc->dying = 1; 4747 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4748 mxge_close(sc, 0); 4749 mtx_unlock(&sc->driver_mtx); 4750 ether_ifdetach(sc->ifp); 4751 callout_drain(&sc->co_hdl); 4752 ifmedia_removeall(&sc->media); 4753 mxge_dummy_rdma(sc, 0); 4754 mxge_rem_sysctls(sc); 4755 mxge_rem_irq(sc); 4756 mxge_free_rings(sc); 4757 mxge_free_slices(sc); 4758 mxge_dma_free(&sc->dmabench_dma); 4759 mxge_dma_free(&sc->zeropad_dma); 4760 mxge_dma_free(&sc->cmd_dma); 4761 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4762 pci_disable_busmaster(dev); 4763 mtx_destroy(&sc->cmd_mtx); 4764 mtx_destroy(&sc->driver_mtx); 4765 if_free(sc->ifp); 4766 bus_dma_tag_destroy(sc->parent_dmat); 4767 return 0; 4768} 4769 4770static int 4771mxge_shutdown(device_t dev) 4772{ 4773 return 0; 4774} 4775 4776/* 4777 This file uses Myri10GE driver indentation. 4778 4779 Local Variables: 4780 c-file-style:"linux" 4781 tab-width:8 4782 End: 4783*/ 4784