if_mxge.c revision 198250
1/****************************************************************************** 2 3Copyright (c) 2006-2009, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 198250 2009-10-19 20:51:27Z gallatin $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50/* count xmits ourselves, rather than via drbr */ 51#define NO_SLOW_STATS 52#include <net/if.h> 53#include <net/if_arp.h> 54#include <net/ethernet.h> 55#include <net/if_dl.h> 56#include <net/if_media.h> 57 58#include <net/bpf.h> 59 60#include <net/if_types.h> 61#include <net/if_vlan_var.h> 62#include <net/zlib.h> 63 64#include <netinet/in_systm.h> 65#include <netinet/in.h> 66#include <netinet/ip.h> 67#include <netinet/tcp.h> 68 69#include <machine/bus.h> 70#include <machine/in_cksum.h> 71#include <machine/resource.h> 72#include <sys/bus.h> 73#include <sys/rman.h> 74#include <sys/smp.h> 75 76#include <dev/pci/pcireg.h> 77#include <dev/pci/pcivar.h> 78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80#include <vm/vm.h> /* for pmap_mapdev() */ 81#include <vm/pmap.h> 82 83#if defined(__i386) || defined(__amd64) 84#include <machine/specialreg.h> 85#endif 86 87#include <dev/mxge/mxge_mcp.h> 88#include <dev/mxge/mcp_gen_header.h> 89/*#define MXGE_FAKE_IFP*/ 90#include <dev/mxge/if_mxge_var.h> 91#ifdef IFNET_BUF_RING 92#include <sys/buf_ring.h> 93#endif 94 95#include "opt_inet.h" 96 97/* tunable params */ 98static int mxge_nvidia_ecrc_enable = 1; 99static int mxge_force_firmware = 0; 100static int mxge_intr_coal_delay = 30; 101static int mxge_deassert_wait = 1; 102static int mxge_flow_control = 1; 103static int mxge_verbose = 0; 104static int mxge_lro_cnt = 8; 105static int mxge_ticks; 106static int mxge_max_slices = 1; 107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 108static int mxge_always_promisc = 0; 109static int mxge_initial_mtu = ETHERMTU_JUMBO; 110static int mxge_throttle = 0; 111static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112static char *mxge_fw_aligned = "mxge_eth_z8e"; 113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116static int mxge_probe(device_t dev); 117static int mxge_attach(device_t dev); 118static int mxge_detach(device_t dev); 119static int mxge_shutdown(device_t dev); 120static void mxge_intr(void *arg); 121 122static device_method_t mxge_methods[] = 123{ 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130}; 131 132static driver_t mxge_driver = 133{ 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137}; 138 139static devclass_t mxge_devclass; 140 141/* Declare ourselves to be a child of the PCI bus.*/ 142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148static int mxge_close(mxge_softc_t *sc, int down); 149static int mxge_open(mxge_softc_t *sc); 150static void mxge_tick(void *arg); 151 152static int 153mxge_probe(device_t dev) 154{ 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178} 179 180static void 181mxge_enable_wc(mxge_softc_t *sc) 182{ 183#if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196#endif 197} 198 199 200/* callback to get our DMA address */ 201static void 202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204{ 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208} 209 210static int 211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213{ 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268} 269 270 271static void 272mxge_dma_free(mxge_dma_t *dma) 273{ 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277} 278 279/* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286static int 287mxge_parse_strings(mxge_softc_t *sc) 288{ 289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327} 328 329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330static void 331mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332{ 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381#if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390#endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452} 453#else 454static void 455mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456{ 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460} 461#endif 462 463 464static int 465mxge_dma_test(mxge_softc_t *sc, int test_type) 466{ 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523} 524 525/* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544static int 545mxge_firmware_probe(mxge_softc_t *sc) 546{ 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595} 596 597static int 598mxge_select_firmware(mxge_softc_t *sc) 599{ 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640} 641 642union qualhack 643{ 644 const char *ro_char; 645 char *rw_char; 646}; 647 648static int 649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650{ 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677} 678 679static void * 680z_alloc(void *nil, u_int items, u_int size) 681{ 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686} 687 688static void 689z_free(void *nil, void *ptr) 690{ 691 free(ptr, M_TEMP); 692} 693 694 695static int 696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697{ 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772abort_with_zs: 773 inflateEnd(&zs); 774abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777} 778 779/* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784static void 785mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786{ 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834} 835 836static int 837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838{ 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 default: 887 device_printf(sc->dev, 888 "mxge: command %d " 889 "failed, result = %d\n", 890 cmd, be32toh(response->result)); 891 err = ENXIO; 892 break; 893 } 894 if (err != EAGAIN) 895 break; 896 } 897 if (err == EAGAIN) 898 device_printf(sc->dev, "mxge: command %d timed out" 899 "result = %d\n", 900 cmd, be32toh(response->result)); 901 mtx_unlock(&sc->cmd_mtx); 902 return err; 903} 904 905static int 906mxge_adopt_running_firmware(mxge_softc_t *sc) 907{ 908 struct mcp_gen_header *hdr; 909 const size_t bytes = sizeof (struct mcp_gen_header); 910 size_t hdr_offset; 911 int status; 912 913 /* find running firmware header */ 914 hdr_offset = htobe32(*(volatile uint32_t *) 915 (sc->sram + MCP_HEADER_PTR_OFFSET)); 916 917 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 918 device_printf(sc->dev, 919 "Running firmware has bad header offset (%d)\n", 920 (int)hdr_offset); 921 return EIO; 922 } 923 924 /* copy header of running firmware from SRAM to host memory to 925 * validate firmware */ 926 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 927 if (hdr == NULL) { 928 device_printf(sc->dev, "could not malloc firmware hdr\n"); 929 return ENOMEM; 930 } 931 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 932 rman_get_bushandle(sc->mem_res), 933 hdr_offset, (char *)hdr, bytes); 934 status = mxge_validate_firmware(sc, hdr); 935 free(hdr, M_DEVBUF); 936 937 /* 938 * check to see if adopted firmware has bug where adopting 939 * it will cause broadcasts to be filtered unless the NIC 940 * is kept in ALLMULTI mode 941 */ 942 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 943 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 944 sc->adopted_rx_filter_bug = 1; 945 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 946 "working around rx filter bug\n", 947 sc->fw_ver_major, sc->fw_ver_minor, 948 sc->fw_ver_tiny); 949 } 950 951 return status; 952} 953 954 955static int 956mxge_load_firmware(mxge_softc_t *sc, int adopt) 957{ 958 volatile uint32_t *confirm; 959 volatile char *submit; 960 char buf_bytes[72]; 961 uint32_t *buf, size, dma_low, dma_high; 962 int status, i; 963 964 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 965 966 size = sc->sram_size; 967 status = mxge_load_firmware_helper(sc, &size); 968 if (status) { 969 if (!adopt) 970 return status; 971 /* Try to use the currently running firmware, if 972 it is new enough */ 973 status = mxge_adopt_running_firmware(sc); 974 if (status) { 975 device_printf(sc->dev, 976 "failed to adopt running firmware\n"); 977 return status; 978 } 979 device_printf(sc->dev, 980 "Successfully adopted running firmware\n"); 981 if (sc->tx_boundary == 4096) { 982 device_printf(sc->dev, 983 "Using firmware currently running on NIC" 984 ". For optimal\n"); 985 device_printf(sc->dev, 986 "performance consider loading optimized " 987 "firmware\n"); 988 } 989 sc->fw_name = mxge_fw_unaligned; 990 sc->tx_boundary = 2048; 991 return 0; 992 } 993 /* clear confirmation addr */ 994 confirm = (volatile uint32_t *)sc->cmd; 995 *confirm = 0; 996 wmb(); 997 /* send a reload command to the bootstrap MCP, and wait for the 998 response in the confirmation address. The firmware should 999 write a -1 there to indicate it is alive and well 1000 */ 1001 1002 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1003 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1004 1005 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1006 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1007 buf[2] = htobe32(0xffffffff); /* confirm data */ 1008 1009 /* FIX: All newest firmware should un-protect the bottom of 1010 the sram before handoff. However, the very first interfaces 1011 do not. Therefore the handoff copy must skip the first 8 bytes 1012 */ 1013 /* where the code starts*/ 1014 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1015 buf[4] = htobe32(size - 8); /* length of code */ 1016 buf[5] = htobe32(8); /* where to copy to */ 1017 buf[6] = htobe32(0); /* where to jump to */ 1018 1019 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1020 mxge_pio_copy(submit, buf, 64); 1021 wmb(); 1022 DELAY(1000); 1023 wmb(); 1024 i = 0; 1025 while (*confirm != 0xffffffff && i < 20) { 1026 DELAY(1000*10); 1027 i++; 1028 bus_dmamap_sync(sc->cmd_dma.dmat, 1029 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1030 } 1031 if (*confirm != 0xffffffff) { 1032 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1033 confirm, *confirm); 1034 1035 return ENXIO; 1036 } 1037 return 0; 1038} 1039 1040static int 1041mxge_update_mac_address(mxge_softc_t *sc) 1042{ 1043 mxge_cmd_t cmd; 1044 uint8_t *addr = sc->mac_addr; 1045 int status; 1046 1047 1048 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1049 | (addr[2] << 8) | addr[3]); 1050 1051 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1052 1053 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1054 return status; 1055} 1056 1057static int 1058mxge_change_pause(mxge_softc_t *sc, int pause) 1059{ 1060 mxge_cmd_t cmd; 1061 int status; 1062 1063 if (pause) 1064 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1065 &cmd); 1066 else 1067 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1068 &cmd); 1069 1070 if (status) { 1071 device_printf(sc->dev, "Failed to set flow control mode\n"); 1072 return ENXIO; 1073 } 1074 sc->pause = pause; 1075 return 0; 1076} 1077 1078static void 1079mxge_change_promisc(mxge_softc_t *sc, int promisc) 1080{ 1081 mxge_cmd_t cmd; 1082 int status; 1083 1084 if (mxge_always_promisc) 1085 promisc = 1; 1086 1087 if (promisc) 1088 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1089 &cmd); 1090 else 1091 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1092 &cmd); 1093 1094 if (status) { 1095 device_printf(sc->dev, "Failed to set promisc mode\n"); 1096 } 1097} 1098 1099static void 1100mxge_set_multicast_list(mxge_softc_t *sc) 1101{ 1102 mxge_cmd_t cmd; 1103 struct ifmultiaddr *ifma; 1104 struct ifnet *ifp = sc->ifp; 1105 int err; 1106 1107 /* This firmware is known to not support multicast */ 1108 if (!sc->fw_multicast_support) 1109 return; 1110 1111 /* Disable multicast filtering while we play with the lists*/ 1112 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1113 if (err != 0) { 1114 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1115 " error status: %d\n", err); 1116 return; 1117 } 1118 1119 if (sc->adopted_rx_filter_bug) 1120 return; 1121 1122 if (ifp->if_flags & IFF_ALLMULTI) 1123 /* request to disable multicast filtering, so quit here */ 1124 return; 1125 1126 /* Flush all the filters */ 1127 1128 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1129 if (err != 0) { 1130 device_printf(sc->dev, 1131 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1132 ", error status: %d\n", err); 1133 return; 1134 } 1135 1136 /* Walk the multicast list, and add each address */ 1137 1138 if_maddr_rlock(ifp); 1139 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1140 if (ifma->ifma_addr->sa_family != AF_LINK) 1141 continue; 1142 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1143 &cmd.data0, 4); 1144 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1145 &cmd.data1, 2); 1146 cmd.data0 = htonl(cmd.data0); 1147 cmd.data1 = htonl(cmd.data1); 1148 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1149 if (err != 0) { 1150 device_printf(sc->dev, "Failed " 1151 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1152 "%d\t", err); 1153 /* abort, leaving multicast filtering off */ 1154 if_maddr_runlock(ifp); 1155 return; 1156 } 1157 } 1158 if_maddr_runlock(ifp); 1159 /* Enable multicast filtering */ 1160 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1161 if (err != 0) { 1162 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1163 ", error status: %d\n", err); 1164 } 1165} 1166 1167static int 1168mxge_max_mtu(mxge_softc_t *sc) 1169{ 1170 mxge_cmd_t cmd; 1171 int status; 1172 1173 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1174 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1175 1176 /* try to set nbufs to see if it we can 1177 use virtually contiguous jumbos */ 1178 cmd.data0 = 0; 1179 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1180 &cmd); 1181 if (status == 0) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* otherwise, we're limited to MJUMPAGESIZE */ 1185 return MJUMPAGESIZE - MXGEFW_PAD; 1186} 1187 1188static int 1189mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1190{ 1191 struct mxge_slice_state *ss; 1192 mxge_rx_done_t *rx_done; 1193 volatile uint32_t *irq_claim; 1194 mxge_cmd_t cmd; 1195 int slice, status; 1196 1197 /* try to send a reset command to the card to see if it 1198 is alive */ 1199 memset(&cmd, 0, sizeof (cmd)); 1200 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1201 if (status != 0) { 1202 device_printf(sc->dev, "failed reset\n"); 1203 return ENXIO; 1204 } 1205 1206 mxge_dummy_rdma(sc, 1); 1207 1208 1209 /* set the intrq size */ 1210 cmd.data0 = sc->rx_ring_size; 1211 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1212 1213 /* 1214 * Even though we already know how many slices are supported 1215 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1216 * has magic side effects, and must be called after a reset. 1217 * It must be called prior to calling any RSS related cmds, 1218 * including assigning an interrupt queue for anything but 1219 * slice 0. It must also be called *after* 1220 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1221 * the firmware to compute offsets. 1222 */ 1223 1224 if (sc->num_slices > 1) { 1225 /* ask the maximum number of slices it supports */ 1226 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1227 &cmd); 1228 if (status != 0) { 1229 device_printf(sc->dev, 1230 "failed to get number of slices\n"); 1231 return status; 1232 } 1233 /* 1234 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1235 * to setting up the interrupt queue DMA 1236 */ 1237 cmd.data0 = sc->num_slices; 1238 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1239#ifdef IFNET_BUF_RING 1240 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1241#endif 1242 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1243 &cmd); 1244 if (status != 0) { 1245 device_printf(sc->dev, 1246 "failed to set number of slices\n"); 1247 return status; 1248 } 1249 } 1250 1251 1252 if (interrupts_setup) { 1253 /* Now exchange information about interrupts */ 1254 for (slice = 0; slice < sc->num_slices; slice++) { 1255 rx_done = &sc->ss[slice].rx_done; 1256 memset(rx_done->entry, 0, sc->rx_ring_size); 1257 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1258 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1259 cmd.data2 = slice; 1260 status |= mxge_send_cmd(sc, 1261 MXGEFW_CMD_SET_INTRQ_DMA, 1262 &cmd); 1263 } 1264 } 1265 1266 status |= mxge_send_cmd(sc, 1267 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1268 1269 1270 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1271 1272 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1273 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 1276 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1277 &cmd); 1278 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 if (status != 0) { 1280 device_printf(sc->dev, "failed set interrupt parameters\n"); 1281 return status; 1282 } 1283 1284 1285 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1286 1287 1288 /* run a DMA benchmark */ 1289 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1290 1291 for (slice = 0; slice < sc->num_slices; slice++) { 1292 ss = &sc->ss[slice]; 1293 1294 ss->irq_claim = irq_claim + (2 * slice); 1295 /* reset mcp/driver shared state back to 0 */ 1296 ss->rx_done.idx = 0; 1297 ss->rx_done.cnt = 0; 1298 ss->tx.req = 0; 1299 ss->tx.done = 0; 1300 ss->tx.pkt_done = 0; 1301 ss->tx.queue_active = 0; 1302 ss->tx.activate = 0; 1303 ss->tx.deactivate = 0; 1304 ss->tx.wake = 0; 1305 ss->tx.defrag = 0; 1306 ss->tx.stall = 0; 1307 ss->rx_big.cnt = 0; 1308 ss->rx_small.cnt = 0; 1309 ss->lro_bad_csum = 0; 1310 ss->lro_queued = 0; 1311 ss->lro_flushed = 0; 1312 if (ss->fw_stats != NULL) { 1313 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1314 } 1315 } 1316 sc->rdma_tags_available = 15; 1317 status = mxge_update_mac_address(sc); 1318 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1319 mxge_change_pause(sc, sc->pause); 1320 mxge_set_multicast_list(sc); 1321 if (sc->throttle) { 1322 cmd.data0 = sc->throttle; 1323 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1324 &cmd)) { 1325 device_printf(sc->dev, 1326 "can't enable throttle\n"); 1327 } 1328 } 1329 return status; 1330} 1331 1332static int 1333mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1334{ 1335 mxge_cmd_t cmd; 1336 mxge_softc_t *sc; 1337 int err; 1338 unsigned int throttle; 1339 1340 sc = arg1; 1341 throttle = sc->throttle; 1342 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1343 if (err != 0) { 1344 return err; 1345 } 1346 1347 if (throttle == sc->throttle) 1348 return 0; 1349 1350 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1351 return EINVAL; 1352 1353 mtx_lock(&sc->driver_mtx); 1354 cmd.data0 = throttle; 1355 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1356 if (err == 0) 1357 sc->throttle = throttle; 1358 mtx_unlock(&sc->driver_mtx); 1359 return err; 1360} 1361 1362static int 1363mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1364{ 1365 mxge_softc_t *sc; 1366 unsigned int intr_coal_delay; 1367 int err; 1368 1369 sc = arg1; 1370 intr_coal_delay = sc->intr_coal_delay; 1371 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1372 if (err != 0) { 1373 return err; 1374 } 1375 if (intr_coal_delay == sc->intr_coal_delay) 1376 return 0; 1377 1378 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1379 return EINVAL; 1380 1381 mtx_lock(&sc->driver_mtx); 1382 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1383 sc->intr_coal_delay = intr_coal_delay; 1384 1385 mtx_unlock(&sc->driver_mtx); 1386 return err; 1387} 1388 1389static int 1390mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1391{ 1392 mxge_softc_t *sc; 1393 unsigned int enabled; 1394 int err; 1395 1396 sc = arg1; 1397 enabled = sc->pause; 1398 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1399 if (err != 0) { 1400 return err; 1401 } 1402 if (enabled == sc->pause) 1403 return 0; 1404 1405 mtx_lock(&sc->driver_mtx); 1406 err = mxge_change_pause(sc, enabled); 1407 mtx_unlock(&sc->driver_mtx); 1408 return err; 1409} 1410 1411static int 1412mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1413{ 1414 struct ifnet *ifp; 1415 int err = 0; 1416 1417 ifp = sc->ifp; 1418 if (lro_cnt == 0) 1419 ifp->if_capenable &= ~IFCAP_LRO; 1420 else 1421 ifp->if_capenable |= IFCAP_LRO; 1422 sc->lro_cnt = lro_cnt; 1423 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1424 mxge_close(sc, 0); 1425 err = mxge_open(sc); 1426 } 1427 return err; 1428} 1429 1430static int 1431mxge_change_lro(SYSCTL_HANDLER_ARGS) 1432{ 1433 mxge_softc_t *sc; 1434 unsigned int lro_cnt; 1435 int err; 1436 1437 sc = arg1; 1438 lro_cnt = sc->lro_cnt; 1439 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1440 if (err != 0) 1441 return err; 1442 1443 if (lro_cnt == sc->lro_cnt) 1444 return 0; 1445 1446 if (lro_cnt > 128) 1447 return EINVAL; 1448 1449 mtx_lock(&sc->driver_mtx); 1450 err = mxge_change_lro_locked(sc, lro_cnt); 1451 mtx_unlock(&sc->driver_mtx); 1452 return err; 1453} 1454 1455static int 1456mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1457{ 1458 int err; 1459 1460 if (arg1 == NULL) 1461 return EFAULT; 1462 arg2 = be32toh(*(int *)arg1); 1463 arg1 = NULL; 1464 err = sysctl_handle_int(oidp, arg1, arg2, req); 1465 1466 return err; 1467} 1468 1469static void 1470mxge_rem_sysctls(mxge_softc_t *sc) 1471{ 1472 struct mxge_slice_state *ss; 1473 int slice; 1474 1475 if (sc->slice_sysctl_tree == NULL) 1476 return; 1477 1478 for (slice = 0; slice < sc->num_slices; slice++) { 1479 ss = &sc->ss[slice]; 1480 if (ss == NULL || ss->sysctl_tree == NULL) 1481 continue; 1482 sysctl_ctx_free(&ss->sysctl_ctx); 1483 ss->sysctl_tree = NULL; 1484 } 1485 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1486 sc->slice_sysctl_tree = NULL; 1487} 1488 1489static void 1490mxge_add_sysctls(mxge_softc_t *sc) 1491{ 1492 struct sysctl_ctx_list *ctx; 1493 struct sysctl_oid_list *children; 1494 mcp_irq_data_t *fw; 1495 struct mxge_slice_state *ss; 1496 int slice; 1497 char slice_num[8]; 1498 1499 ctx = device_get_sysctl_ctx(sc->dev); 1500 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1501 fw = sc->ss[0].fw_stats; 1502 1503 /* random information */ 1504 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1505 "firmware_version", 1506 CTLFLAG_RD, &sc->fw_version, 1507 0, "firmware version"); 1508 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1509 "serial_number", 1510 CTLFLAG_RD, &sc->serial_number_string, 1511 0, "serial number"); 1512 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1513 "product_code", 1514 CTLFLAG_RD, &sc->product_code_string, 1515 0, "product_code"); 1516 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1517 "pcie_link_width", 1518 CTLFLAG_RD, &sc->link_width, 1519 0, "tx_boundary"); 1520 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1521 "tx_boundary", 1522 CTLFLAG_RD, &sc->tx_boundary, 1523 0, "tx_boundary"); 1524 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1525 "write_combine", 1526 CTLFLAG_RD, &sc->wc, 1527 0, "write combining PIO?"); 1528 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1529 "read_dma_MBs", 1530 CTLFLAG_RD, &sc->read_dma, 1531 0, "DMA Read speed in MB/s"); 1532 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1533 "write_dma_MBs", 1534 CTLFLAG_RD, &sc->write_dma, 1535 0, "DMA Write speed in MB/s"); 1536 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1537 "read_write_dma_MBs", 1538 CTLFLAG_RD, &sc->read_write_dma, 1539 0, "DMA concurrent Read/Write speed in MB/s"); 1540 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1541 "watchdog_resets", 1542 CTLFLAG_RD, &sc->watchdog_resets, 1543 0, "Number of times NIC was reset"); 1544 1545 1546 /* performance related tunables */ 1547 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1548 "intr_coal_delay", 1549 CTLTYPE_INT|CTLFLAG_RW, sc, 1550 0, mxge_change_intr_coal, 1551 "I", "interrupt coalescing delay in usecs"); 1552 1553 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1554 "throttle", 1555 CTLTYPE_INT|CTLFLAG_RW, sc, 1556 0, mxge_change_throttle, 1557 "I", "transmit throttling"); 1558 1559 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1560 "flow_control_enabled", 1561 CTLTYPE_INT|CTLFLAG_RW, sc, 1562 0, mxge_change_flow_control, 1563 "I", "interrupt coalescing delay in usecs"); 1564 1565 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1566 "deassert_wait", 1567 CTLFLAG_RW, &mxge_deassert_wait, 1568 0, "Wait for IRQ line to go low in ihandler"); 1569 1570 /* stats block from firmware is in network byte order. 1571 Need to swap it */ 1572 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1573 "link_up", 1574 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1575 0, mxge_handle_be32, 1576 "I", "link up"); 1577 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1578 "rdma_tags_available", 1579 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1580 0, mxge_handle_be32, 1581 "I", "rdma_tags_available"); 1582 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1583 "dropped_bad_crc32", 1584 CTLTYPE_INT|CTLFLAG_RD, 1585 &fw->dropped_bad_crc32, 1586 0, mxge_handle_be32, 1587 "I", "dropped_bad_crc32"); 1588 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1589 "dropped_bad_phy", 1590 CTLTYPE_INT|CTLFLAG_RD, 1591 &fw->dropped_bad_phy, 1592 0, mxge_handle_be32, 1593 "I", "dropped_bad_phy"); 1594 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1595 "dropped_link_error_or_filtered", 1596 CTLTYPE_INT|CTLFLAG_RD, 1597 &fw->dropped_link_error_or_filtered, 1598 0, mxge_handle_be32, 1599 "I", "dropped_link_error_or_filtered"); 1600 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1601 "dropped_link_overflow", 1602 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1603 0, mxge_handle_be32, 1604 "I", "dropped_link_overflow"); 1605 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1606 "dropped_multicast_filtered", 1607 CTLTYPE_INT|CTLFLAG_RD, 1608 &fw->dropped_multicast_filtered, 1609 0, mxge_handle_be32, 1610 "I", "dropped_multicast_filtered"); 1611 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1612 "dropped_no_big_buffer", 1613 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1614 0, mxge_handle_be32, 1615 "I", "dropped_no_big_buffer"); 1616 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1617 "dropped_no_small_buffer", 1618 CTLTYPE_INT|CTLFLAG_RD, 1619 &fw->dropped_no_small_buffer, 1620 0, mxge_handle_be32, 1621 "I", "dropped_no_small_buffer"); 1622 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1623 "dropped_overrun", 1624 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1625 0, mxge_handle_be32, 1626 "I", "dropped_overrun"); 1627 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1628 "dropped_pause", 1629 CTLTYPE_INT|CTLFLAG_RD, 1630 &fw->dropped_pause, 1631 0, mxge_handle_be32, 1632 "I", "dropped_pause"); 1633 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1634 "dropped_runt", 1635 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1636 0, mxge_handle_be32, 1637 "I", "dropped_runt"); 1638 1639 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1640 "dropped_unicast_filtered", 1641 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1642 0, mxge_handle_be32, 1643 "I", "dropped_unicast_filtered"); 1644 1645 /* verbose printing? */ 1646 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1647 "verbose", 1648 CTLFLAG_RW, &mxge_verbose, 1649 0, "verbose printing"); 1650 1651 /* lro */ 1652 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1653 "lro_cnt", 1654 CTLTYPE_INT|CTLFLAG_RW, sc, 1655 0, mxge_change_lro, 1656 "I", "number of lro merge queues"); 1657 1658 1659 /* add counters exported for debugging from all slices */ 1660 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1661 sc->slice_sysctl_tree = 1662 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1663 "slice", CTLFLAG_RD, 0, ""); 1664 1665 for (slice = 0; slice < sc->num_slices; slice++) { 1666 ss = &sc->ss[slice]; 1667 sysctl_ctx_init(&ss->sysctl_ctx); 1668 ctx = &ss->sysctl_ctx; 1669 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1670 sprintf(slice_num, "%d", slice); 1671 ss->sysctl_tree = 1672 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1673 CTLFLAG_RD, 0, ""); 1674 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1675 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1676 "rx_small_cnt", 1677 CTLFLAG_RD, &ss->rx_small.cnt, 1678 0, "rx_small_cnt"); 1679 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1680 "rx_big_cnt", 1681 CTLFLAG_RD, &ss->rx_big.cnt, 1682 0, "rx_small_cnt"); 1683 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1684 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1685 0, "number of lro merge queues flushed"); 1686 1687 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1688 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1689 0, "number of frames appended to lro merge" 1690 "queues"); 1691 1692#ifndef IFNET_BUF_RING 1693 /* only transmit from slice 0 for now */ 1694 if (slice > 0) 1695 continue; 1696#endif 1697 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1698 "tx_req", 1699 CTLFLAG_RD, &ss->tx.req, 1700 0, "tx_req"); 1701 1702 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1703 "tx_done", 1704 CTLFLAG_RD, &ss->tx.done, 1705 0, "tx_done"); 1706 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1707 "tx_pkt_done", 1708 CTLFLAG_RD, &ss->tx.pkt_done, 1709 0, "tx_done"); 1710 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1711 "tx_stall", 1712 CTLFLAG_RD, &ss->tx.stall, 1713 0, "tx_stall"); 1714 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1715 "tx_wake", 1716 CTLFLAG_RD, &ss->tx.wake, 1717 0, "tx_wake"); 1718 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1719 "tx_defrag", 1720 CTLFLAG_RD, &ss->tx.defrag, 1721 0, "tx_defrag"); 1722 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1723 "tx_queue_active", 1724 CTLFLAG_RD, &ss->tx.queue_active, 1725 0, "tx_queue_active"); 1726 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1727 "tx_activate", 1728 CTLFLAG_RD, &ss->tx.activate, 1729 0, "tx_activate"); 1730 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1731 "tx_deactivate", 1732 CTLFLAG_RD, &ss->tx.deactivate, 1733 0, "tx_deactivate"); 1734 } 1735} 1736 1737/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1738 backwards one at a time and handle ring wraps */ 1739 1740static inline void 1741mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1742 mcp_kreq_ether_send_t *src, int cnt) 1743{ 1744 int idx, starting_slot; 1745 starting_slot = tx->req; 1746 while (cnt > 1) { 1747 cnt--; 1748 idx = (starting_slot + cnt) & tx->mask; 1749 mxge_pio_copy(&tx->lanai[idx], 1750 &src[cnt], sizeof(*src)); 1751 wmb(); 1752 } 1753} 1754 1755/* 1756 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1757 * at most 32 bytes at a time, so as to avoid involving the software 1758 * pio handler in the nic. We re-write the first segment's flags 1759 * to mark them valid only after writing the entire chain 1760 */ 1761 1762static inline void 1763mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1764 int cnt) 1765{ 1766 int idx, i; 1767 uint32_t *src_ints; 1768 volatile uint32_t *dst_ints; 1769 mcp_kreq_ether_send_t *srcp; 1770 volatile mcp_kreq_ether_send_t *dstp, *dst; 1771 uint8_t last_flags; 1772 1773 idx = tx->req & tx->mask; 1774 1775 last_flags = src->flags; 1776 src->flags = 0; 1777 wmb(); 1778 dst = dstp = &tx->lanai[idx]; 1779 srcp = src; 1780 1781 if ((idx + cnt) < tx->mask) { 1782 for (i = 0; i < (cnt - 1); i += 2) { 1783 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1784 wmb(); /* force write every 32 bytes */ 1785 srcp += 2; 1786 dstp += 2; 1787 } 1788 } else { 1789 /* submit all but the first request, and ensure 1790 that it is submitted below */ 1791 mxge_submit_req_backwards(tx, src, cnt); 1792 i = 0; 1793 } 1794 if (i < cnt) { 1795 /* submit the first request */ 1796 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1797 wmb(); /* barrier before setting valid flag */ 1798 } 1799 1800 /* re-write the last 32-bits with the valid flags */ 1801 src->flags = last_flags; 1802 src_ints = (uint32_t *)src; 1803 src_ints+=3; 1804 dst_ints = (volatile uint32_t *)dst; 1805 dst_ints+=3; 1806 *dst_ints = *src_ints; 1807 tx->req += cnt; 1808 wmb(); 1809} 1810 1811#if IFCAP_TSO4 1812 1813static void 1814mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1815 int busdma_seg_cnt, int ip_off) 1816{ 1817 mxge_tx_ring_t *tx; 1818 mcp_kreq_ether_send_t *req; 1819 bus_dma_segment_t *seg; 1820 struct ip *ip; 1821 struct tcphdr *tcp; 1822 uint32_t low, high_swapped; 1823 int len, seglen, cum_len, cum_len_next; 1824 int next_is_first, chop, cnt, rdma_count, small; 1825 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1826 uint8_t flags, flags_next; 1827 static int once; 1828 1829 mss = m->m_pkthdr.tso_segsz; 1830 1831 /* negative cum_len signifies to the 1832 * send loop that we are still in the 1833 * header portion of the TSO packet. 1834 */ 1835 1836 /* ensure we have the ethernet, IP and TCP 1837 header together in the first mbuf, copy 1838 it to a scratch buffer if not */ 1839 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1840 m_copydata(m, 0, ip_off + sizeof (*ip), 1841 ss->scratch); 1842 ip = (struct ip *)(ss->scratch + ip_off); 1843 } else { 1844 ip = (struct ip *)(mtod(m, char *) + ip_off); 1845 } 1846 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1847 + sizeof (*tcp))) { 1848 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1849 + sizeof (*tcp), ss->scratch); 1850 ip = (struct ip *)(mtod(m, char *) + ip_off); 1851 } 1852 1853 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1854 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1855 1856 /* TSO implies checksum offload on this hardware */ 1857 cksum_offset = ip_off + (ip->ip_hl << 2); 1858 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1859 1860 1861 /* for TSO, pseudo_hdr_offset holds mss. 1862 * The firmware figures out where to put 1863 * the checksum by parsing the header. */ 1864 pseudo_hdr_offset = htobe16(mss); 1865 1866 tx = &ss->tx; 1867 req = tx->req_list; 1868 seg = tx->seg_list; 1869 cnt = 0; 1870 rdma_count = 0; 1871 /* "rdma_count" is the number of RDMAs belonging to the 1872 * current packet BEFORE the current send request. For 1873 * non-TSO packets, this is equal to "count". 1874 * For TSO packets, rdma_count needs to be reset 1875 * to 0 after a segment cut. 1876 * 1877 * The rdma_count field of the send request is 1878 * the number of RDMAs of the packet starting at 1879 * that request. For TSO send requests with one ore more cuts 1880 * in the middle, this is the number of RDMAs starting 1881 * after the last cut in the request. All previous 1882 * segments before the last cut implicitly have 1 RDMA. 1883 * 1884 * Since the number of RDMAs is not known beforehand, 1885 * it must be filled-in retroactively - after each 1886 * segmentation cut or at the end of the entire packet. 1887 */ 1888 1889 while (busdma_seg_cnt) { 1890 /* Break the busdma segment up into pieces*/ 1891 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1892 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1893 len = seg->ds_len; 1894 1895 while (len) { 1896 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1897 seglen = len; 1898 cum_len_next = cum_len + seglen; 1899 (req-rdma_count)->rdma_count = rdma_count + 1; 1900 if (__predict_true(cum_len >= 0)) { 1901 /* payload */ 1902 chop = (cum_len_next > mss); 1903 cum_len_next = cum_len_next % mss; 1904 next_is_first = (cum_len_next == 0); 1905 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1906 flags_next |= next_is_first * 1907 MXGEFW_FLAGS_FIRST; 1908 rdma_count |= -(chop | next_is_first); 1909 rdma_count += chop & !next_is_first; 1910 } else if (cum_len_next >= 0) { 1911 /* header ends */ 1912 rdma_count = -1; 1913 cum_len_next = 0; 1914 seglen = -cum_len; 1915 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1916 flags_next = MXGEFW_FLAGS_TSO_PLD | 1917 MXGEFW_FLAGS_FIRST | 1918 (small * MXGEFW_FLAGS_SMALL); 1919 } 1920 1921 req->addr_high = high_swapped; 1922 req->addr_low = htobe32(low); 1923 req->pseudo_hdr_offset = pseudo_hdr_offset; 1924 req->pad = 0; 1925 req->rdma_count = 1; 1926 req->length = htobe16(seglen); 1927 req->cksum_offset = cksum_offset; 1928 req->flags = flags | ((cum_len & 1) * 1929 MXGEFW_FLAGS_ALIGN_ODD); 1930 low += seglen; 1931 len -= seglen; 1932 cum_len = cum_len_next; 1933 flags = flags_next; 1934 req++; 1935 cnt++; 1936 rdma_count++; 1937 if (__predict_false(cksum_offset > seglen)) 1938 cksum_offset -= seglen; 1939 else 1940 cksum_offset = 0; 1941 if (__predict_false(cnt > tx->max_desc)) 1942 goto drop; 1943 } 1944 busdma_seg_cnt--; 1945 seg++; 1946 } 1947 (req-rdma_count)->rdma_count = rdma_count; 1948 1949 do { 1950 req--; 1951 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1952 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1953 1954 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1955 mxge_submit_req(tx, tx->req_list, cnt); 1956#ifdef IFNET_BUF_RING 1957 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1958 /* tell the NIC to start polling this slice */ 1959 *tx->send_go = 1; 1960 tx->queue_active = 1; 1961 tx->activate++; 1962 wmb(); 1963 } 1964#endif 1965 return; 1966 1967drop: 1968 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1969 m_freem(m); 1970 ss->oerrors++; 1971 if (!once) { 1972 printf("tx->max_desc exceeded via TSO!\n"); 1973 printf("mss = %d, %ld, %d!\n", mss, 1974 (long)seg - (long)tx->seg_list, tx->max_desc); 1975 once = 1; 1976 } 1977 return; 1978 1979} 1980 1981#endif /* IFCAP_TSO4 */ 1982 1983#ifdef MXGE_NEW_VLAN_API 1984/* 1985 * We reproduce the software vlan tag insertion from 1986 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1987 * vlan tag insertion. We need to advertise this in order to have the 1988 * vlan interface respect our csum offload flags. 1989 */ 1990static struct mbuf * 1991mxge_vlan_tag_insert(struct mbuf *m) 1992{ 1993 struct ether_vlan_header *evl; 1994 1995 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1996 if (__predict_false(m == NULL)) 1997 return NULL; 1998 if (m->m_len < sizeof(*evl)) { 1999 m = m_pullup(m, sizeof(*evl)); 2000 if (__predict_false(m == NULL)) 2001 return NULL; 2002 } 2003 /* 2004 * Transform the Ethernet header into an Ethernet header 2005 * with 802.1Q encapsulation. 2006 */ 2007 evl = mtod(m, struct ether_vlan_header *); 2008 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2009 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2010 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2011 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2012 m->m_flags &= ~M_VLANTAG; 2013 return m; 2014} 2015#endif /* MXGE_NEW_VLAN_API */ 2016 2017static void 2018mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2019{ 2020 mxge_softc_t *sc; 2021 mcp_kreq_ether_send_t *req; 2022 bus_dma_segment_t *seg; 2023 struct mbuf *m_tmp; 2024 struct ifnet *ifp; 2025 mxge_tx_ring_t *tx; 2026 struct ip *ip; 2027 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2028 uint16_t pseudo_hdr_offset; 2029 uint8_t flags, cksum_offset; 2030 2031 2032 sc = ss->sc; 2033 ifp = sc->ifp; 2034 tx = &ss->tx; 2035 2036 ip_off = sizeof (struct ether_header); 2037#ifdef MXGE_NEW_VLAN_API 2038 if (m->m_flags & M_VLANTAG) { 2039 m = mxge_vlan_tag_insert(m); 2040 if (__predict_false(m == NULL)) 2041 goto drop; 2042 ip_off += ETHER_VLAN_ENCAP_LEN; 2043 } 2044#endif 2045 /* (try to) map the frame for DMA */ 2046 idx = tx->req & tx->mask; 2047 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2048 m, tx->seg_list, &cnt, 2049 BUS_DMA_NOWAIT); 2050 if (__predict_false(err == EFBIG)) { 2051 /* Too many segments in the chain. Try 2052 to defrag */ 2053 m_tmp = m_defrag(m, M_NOWAIT); 2054 if (m_tmp == NULL) { 2055 goto drop; 2056 } 2057 ss->tx.defrag++; 2058 m = m_tmp; 2059 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2060 tx->info[idx].map, 2061 m, tx->seg_list, &cnt, 2062 BUS_DMA_NOWAIT); 2063 } 2064 if (__predict_false(err != 0)) { 2065 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2066 " packet len = %d\n", err, m->m_pkthdr.len); 2067 goto drop; 2068 } 2069 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2070 BUS_DMASYNC_PREWRITE); 2071 tx->info[idx].m = m; 2072 2073#if IFCAP_TSO4 2074 /* TSO is different enough, we handle it in another routine */ 2075 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2076 mxge_encap_tso(ss, m, cnt, ip_off); 2077 return; 2078 } 2079#endif 2080 2081 req = tx->req_list; 2082 cksum_offset = 0; 2083 pseudo_hdr_offset = 0; 2084 flags = MXGEFW_FLAGS_NO_TSO; 2085 2086 /* checksum offloading? */ 2087 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2088 /* ensure ip header is in first mbuf, copy 2089 it to a scratch buffer if not */ 2090 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2091 m_copydata(m, 0, ip_off + sizeof (*ip), 2092 ss->scratch); 2093 ip = (struct ip *)(ss->scratch + ip_off); 2094 } else { 2095 ip = (struct ip *)(mtod(m, char *) + ip_off); 2096 } 2097 cksum_offset = ip_off + (ip->ip_hl << 2); 2098 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2099 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2100 req->cksum_offset = cksum_offset; 2101 flags |= MXGEFW_FLAGS_CKSUM; 2102 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2103 } else { 2104 odd_flag = 0; 2105 } 2106 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2107 flags |= MXGEFW_FLAGS_SMALL; 2108 2109 /* convert segments into a request list */ 2110 cum_len = 0; 2111 seg = tx->seg_list; 2112 req->flags = MXGEFW_FLAGS_FIRST; 2113 for (i = 0; i < cnt; i++) { 2114 req->addr_low = 2115 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2116 req->addr_high = 2117 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2118 req->length = htobe16(seg->ds_len); 2119 req->cksum_offset = cksum_offset; 2120 if (cksum_offset > seg->ds_len) 2121 cksum_offset -= seg->ds_len; 2122 else 2123 cksum_offset = 0; 2124 req->pseudo_hdr_offset = pseudo_hdr_offset; 2125 req->pad = 0; /* complete solid 16-byte block */ 2126 req->rdma_count = 1; 2127 req->flags |= flags | ((cum_len & 1) * odd_flag); 2128 cum_len += seg->ds_len; 2129 seg++; 2130 req++; 2131 req->flags = 0; 2132 } 2133 req--; 2134 /* pad runts to 60 bytes */ 2135 if (cum_len < 60) { 2136 req++; 2137 req->addr_low = 2138 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2139 req->addr_high = 2140 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2141 req->length = htobe16(60 - cum_len); 2142 req->cksum_offset = 0; 2143 req->pseudo_hdr_offset = pseudo_hdr_offset; 2144 req->pad = 0; /* complete solid 16-byte block */ 2145 req->rdma_count = 1; 2146 req->flags |= flags | ((cum_len & 1) * odd_flag); 2147 cnt++; 2148 } 2149 2150 tx->req_list[0].rdma_count = cnt; 2151#if 0 2152 /* print what the firmware will see */ 2153 for (i = 0; i < cnt; i++) { 2154 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2155 "cso:%d, flags:0x%x, rdma:%d\n", 2156 i, (int)ntohl(tx->req_list[i].addr_high), 2157 (int)ntohl(tx->req_list[i].addr_low), 2158 (int)ntohs(tx->req_list[i].length), 2159 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2160 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2161 tx->req_list[i].rdma_count); 2162 } 2163 printf("--------------\n"); 2164#endif 2165 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2166 mxge_submit_req(tx, tx->req_list, cnt); 2167#ifdef IFNET_BUF_RING 2168 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2169 /* tell the NIC to start polling this slice */ 2170 *tx->send_go = 1; 2171 tx->queue_active = 1; 2172 tx->activate++; 2173 wmb(); 2174 } 2175#endif 2176 return; 2177 2178drop: 2179 m_freem(m); 2180 ss->oerrors++; 2181 return; 2182} 2183 2184#ifdef IFNET_BUF_RING 2185static void 2186mxge_qflush(struct ifnet *ifp) 2187{ 2188 mxge_softc_t *sc = ifp->if_softc; 2189 mxge_tx_ring_t *tx; 2190 struct mbuf *m; 2191 int slice; 2192 2193 for (slice = 0; slice < sc->num_slices; slice++) { 2194 tx = &sc->ss[slice].tx; 2195 mtx_lock(&tx->mtx); 2196 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2197 m_freem(m); 2198 mtx_unlock(&tx->mtx); 2199 } 2200 if_qflush(ifp); 2201} 2202 2203static inline void 2204mxge_start_locked(struct mxge_slice_state *ss) 2205{ 2206 mxge_softc_t *sc; 2207 struct mbuf *m; 2208 struct ifnet *ifp; 2209 mxge_tx_ring_t *tx; 2210 2211 sc = ss->sc; 2212 ifp = sc->ifp; 2213 tx = &ss->tx; 2214 2215 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2216 m = drbr_dequeue(ifp, tx->br); 2217 if (m == NULL) { 2218 return; 2219 } 2220 /* let BPF see it */ 2221 BPF_MTAP(ifp, m); 2222 2223 /* give it to the nic */ 2224 mxge_encap(ss, m); 2225 } 2226 /* ran out of transmit slots */ 2227 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2228 && (!drbr_empty(ifp, tx->br))) { 2229 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2230 tx->stall++; 2231 } 2232} 2233 2234static int 2235mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2236{ 2237 mxge_softc_t *sc; 2238 struct ifnet *ifp; 2239 mxge_tx_ring_t *tx; 2240 int err; 2241 2242 sc = ss->sc; 2243 ifp = sc->ifp; 2244 tx = &ss->tx; 2245 2246 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2247 IFF_DRV_RUNNING) { 2248 err = drbr_enqueue(ifp, tx->br, m); 2249 return (err); 2250 } 2251 2252 if (drbr_empty(ifp, tx->br) && 2253 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2254 /* let BPF see it */ 2255 BPF_MTAP(ifp, m); 2256 /* give it to the nic */ 2257 mxge_encap(ss, m); 2258 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2259 return (err); 2260 } 2261 if (!drbr_empty(ifp, tx->br)) 2262 mxge_start_locked(ss); 2263 return (0); 2264} 2265 2266static int 2267mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2268{ 2269 mxge_softc_t *sc = ifp->if_softc; 2270 struct mxge_slice_state *ss; 2271 mxge_tx_ring_t *tx; 2272 int err = 0; 2273 int slice; 2274 2275 slice = m->m_pkthdr.flowid; 2276 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2277 2278 ss = &sc->ss[slice]; 2279 tx = &ss->tx; 2280 2281 if (mtx_trylock(&tx->mtx)) { 2282 err = mxge_transmit_locked(ss, m); 2283 mtx_unlock(&tx->mtx); 2284 } else { 2285 err = drbr_enqueue(ifp, tx->br, m); 2286 } 2287 2288 return (err); 2289} 2290 2291#else 2292 2293static inline void 2294mxge_start_locked(struct mxge_slice_state *ss) 2295{ 2296 mxge_softc_t *sc; 2297 struct mbuf *m; 2298 struct ifnet *ifp; 2299 mxge_tx_ring_t *tx; 2300 2301 sc = ss->sc; 2302 ifp = sc->ifp; 2303 tx = &ss->tx; 2304 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2305 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2306 if (m == NULL) { 2307 return; 2308 } 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 2312 /* give it to the nic */ 2313 mxge_encap(ss, m); 2314 } 2315 /* ran out of transmit slots */ 2316 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2317 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2318 tx->stall++; 2319 } 2320} 2321#endif 2322static void 2323mxge_start(struct ifnet *ifp) 2324{ 2325 mxge_softc_t *sc = ifp->if_softc; 2326 struct mxge_slice_state *ss; 2327 2328 /* only use the first slice for now */ 2329 ss = &sc->ss[0]; 2330 mtx_lock(&ss->tx.mtx); 2331 mxge_start_locked(ss); 2332 mtx_unlock(&ss->tx.mtx); 2333} 2334 2335/* 2336 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2337 * at most 32 bytes at a time, so as to avoid involving the software 2338 * pio handler in the nic. We re-write the first segment's low 2339 * DMA address to mark it valid only after we write the entire chunk 2340 * in a burst 2341 */ 2342static inline void 2343mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2344 mcp_kreq_ether_recv_t *src) 2345{ 2346 uint32_t low; 2347 2348 low = src->addr_low; 2349 src->addr_low = 0xffffffff; 2350 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2351 wmb(); 2352 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2353 wmb(); 2354 src->addr_low = low; 2355 dst->addr_low = low; 2356 wmb(); 2357} 2358 2359static int 2360mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2361{ 2362 bus_dma_segment_t seg; 2363 struct mbuf *m; 2364 mxge_rx_ring_t *rx = &ss->rx_small; 2365 int cnt, err; 2366 2367 m = m_gethdr(M_DONTWAIT, MT_DATA); 2368 if (m == NULL) { 2369 rx->alloc_fail++; 2370 err = ENOBUFS; 2371 goto done; 2372 } 2373 m->m_len = MHLEN; 2374 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2375 &seg, &cnt, BUS_DMA_NOWAIT); 2376 if (err != 0) { 2377 m_free(m); 2378 goto done; 2379 } 2380 rx->info[idx].m = m; 2381 rx->shadow[idx].addr_low = 2382 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2383 rx->shadow[idx].addr_high = 2384 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2385 2386done: 2387 if ((idx & 7) == 7) 2388 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2389 return err; 2390} 2391 2392static int 2393mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2394{ 2395 bus_dma_segment_t seg[3]; 2396 struct mbuf *m; 2397 mxge_rx_ring_t *rx = &ss->rx_big; 2398 int cnt, err, i; 2399 2400 if (rx->cl_size == MCLBYTES) 2401 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2402 else 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422#if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429#endif 2430 2431done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440} 2441 2442/* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451static inline uint16_t 2452mxge_rx_csum(struct mbuf *m, int csum) 2453{ 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467#ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471#else 2472 c = 1; 2473#endif 2474 c ^= 0xffff; 2475 return (c); 2476} 2477 2478static void 2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480{ 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507#ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509#else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520#endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532} 2533 2534 2535static inline void 2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537{ 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598} 2599 2600static inline void 2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602{ 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663} 2664 2665static inline void 2666mxge_clean_rx_done(struct mxge_slice_state *ss) 2667{ 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689#ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695#endif 2696} 2697 2698 2699static inline void 2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701{ 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735#ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737#else 2738 flags = &ifp->if_drv_flags; 2739#endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747#ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758#endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761} 2762 2763static struct mxge_media_type mxge_xfp_media_types[] = 2764{ 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774}; 2775static struct mxge_media_type mxge_sfp_media_types[] = 2776{ 2777 {0, (1 << 7), "Reserved"}, 2778 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2779 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2780 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2781}; 2782 2783static void 2784mxge_set_media(mxge_softc_t *sc, int type) 2785{ 2786 sc->media_flags |= type; 2787 ifmedia_add(&sc->media, sc->media_flags, 0, NULL); 2788 ifmedia_set(&sc->media, sc->media_flags); 2789} 2790 2791 2792/* 2793 * Determine the media type for a NIC. Some XFPs will identify 2794 * themselves only when their link is up, so this is initiated via a 2795 * link up interrupt. However, this can potentially take up to 2796 * several milliseconds, so it is run via the watchdog routine, rather 2797 * than in the interrupt handler itself. This need only be done 2798 * once, not each time the link is up. 2799 */ 2800static void 2801mxge_media_probe(mxge_softc_t *sc) 2802{ 2803 mxge_cmd_t cmd; 2804 char *cage_type; 2805 char *ptr; 2806 struct mxge_media_type *mxge_media_types = NULL; 2807 int i, err, ms, mxge_media_type_entries; 2808 uint32_t byte; 2809 2810 sc->need_media_probe = 0; 2811 2812 /* if we've already set a media type, we're done */ 2813 if (sc->media_flags != (IFM_ETHER | IFM_AUTO)) 2814 return; 2815 2816 /* 2817 * parse the product code to deterimine the interface type 2818 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2819 * after the 3rd dash in the driver's cached copy of the 2820 * EEPROM's product code string. 2821 */ 2822 ptr = sc->product_code_string; 2823 if (ptr == NULL) { 2824 device_printf(sc->dev, "Missing product code\n"); 2825 } 2826 2827 for (i = 0; i < 3; i++, ptr++) { 2828 ptr = index(ptr, '-'); 2829 if (ptr == NULL) { 2830 device_printf(sc->dev, 2831 "only %d dashes in PC?!?\n", i); 2832 return; 2833 } 2834 } 2835 if (*ptr == 'C') { 2836 /* -C is CX4 */ 2837 mxge_set_media(sc, IFM_10G_CX4); 2838 return; 2839 } 2840 else if (*ptr == 'Q') { 2841 /* -Q is Quad Ribbon Fiber */ 2842 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2843 /* FreeBSD has no media type for Quad ribbon fiber */ 2844 return; 2845 } 2846 2847 if (*ptr == 'R') { 2848 /* -R is XFP */ 2849 mxge_media_types = mxge_xfp_media_types; 2850 mxge_media_type_entries = 2851 sizeof (mxge_xfp_media_types) / 2852 sizeof (mxge_xfp_media_types[0]); 2853 byte = MXGE_XFP_COMPLIANCE_BYTE; 2854 cage_type = "XFP"; 2855 } 2856 2857 if (*ptr == 'S' || *(ptr +1) == 'S') { 2858 /* -S or -2S is SFP+ */ 2859 mxge_media_types = mxge_sfp_media_types; 2860 mxge_media_type_entries = 2861 sizeof (mxge_sfp_media_types) / 2862 sizeof (mxge_sfp_media_types[0]); 2863 cage_type = "SFP+"; 2864 byte = 3; 2865 } 2866 2867 if (mxge_media_types == NULL) { 2868 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2869 return; 2870 } 2871 2872 /* 2873 * At this point we know the NIC has an XFP cage, so now we 2874 * try to determine what is in the cage by using the 2875 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2876 * register. We read just one byte, which may take over 2877 * a millisecond 2878 */ 2879 2880 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2881 cmd.data1 = byte; 2882 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2883 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2884 device_printf(sc->dev, "failed to read XFP\n"); 2885 } 2886 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2887 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2888 } 2889 if (err != MXGEFW_CMD_OK) { 2890 return; 2891 } 2892 2893 /* now we wait for the data to be cached */ 2894 cmd.data0 = byte; 2895 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2896 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2897 DELAY(1000); 2898 cmd.data0 = byte; 2899 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2900 } 2901 if (err != MXGEFW_CMD_OK) { 2902 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2903 cage_type, err, ms); 2904 return; 2905 } 2906 2907 if (cmd.data0 == mxge_media_types[0].bitmask) { 2908 if (mxge_verbose) 2909 device_printf(sc->dev, "%s:%s\n", cage_type, 2910 mxge_media_types[0].name); 2911 mxge_set_media(sc, IFM_10G_CX4); 2912 return; 2913 } 2914 for (i = 1; i < mxge_media_type_entries; i++) { 2915 if (cmd.data0 & mxge_media_types[i].bitmask) { 2916 if (mxge_verbose) 2917 device_printf(sc->dev, "%s:%s\n", 2918 cage_type, 2919 mxge_media_types[i].name); 2920 2921 mxge_set_media(sc, mxge_media_types[i].flag); 2922 return; 2923 } 2924 } 2925 device_printf(sc->dev, "%s media 0x%x unknown\n", cage_type, 2926 cmd.data0); 2927 2928 return; 2929} 2930 2931static void 2932mxge_intr(void *arg) 2933{ 2934 struct mxge_slice_state *ss = arg; 2935 mxge_softc_t *sc = ss->sc; 2936 mcp_irq_data_t *stats = ss->fw_stats; 2937 mxge_tx_ring_t *tx = &ss->tx; 2938 mxge_rx_done_t *rx_done = &ss->rx_done; 2939 uint32_t send_done_count; 2940 uint8_t valid; 2941 2942 2943#ifndef IFNET_BUF_RING 2944 /* an interrupt on a non-zero slice is implicitly valid 2945 since MSI-X irqs are not shared */ 2946 if (ss != sc->ss) { 2947 mxge_clean_rx_done(ss); 2948 *ss->irq_claim = be32toh(3); 2949 return; 2950 } 2951#endif 2952 2953 /* make sure the DMA has finished */ 2954 if (!stats->valid) { 2955 return; 2956 } 2957 valid = stats->valid; 2958 2959 if (sc->legacy_irq) { 2960 /* lower legacy IRQ */ 2961 *sc->irq_deassert = 0; 2962 if (!mxge_deassert_wait) 2963 /* don't wait for conf. that irq is low */ 2964 stats->valid = 0; 2965 } else { 2966 stats->valid = 0; 2967 } 2968 2969 /* loop while waiting for legacy irq deassertion */ 2970 do { 2971 /* check for transmit completes and receives */ 2972 send_done_count = be32toh(stats->send_done_count); 2973 while ((send_done_count != tx->pkt_done) || 2974 (rx_done->entry[rx_done->idx].length != 0)) { 2975 if (send_done_count != tx->pkt_done) 2976 mxge_tx_done(ss, (int)send_done_count); 2977 mxge_clean_rx_done(ss); 2978 send_done_count = be32toh(stats->send_done_count); 2979 } 2980 if (sc->legacy_irq && mxge_deassert_wait) 2981 wmb(); 2982 } while (*((volatile uint8_t *) &stats->valid)); 2983 2984 /* fw link & error stats meaningful only on the first slice */ 2985 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 2986 if (sc->link_state != stats->link_up) { 2987 sc->link_state = stats->link_up; 2988 if (sc->link_state) { 2989 if_link_state_change(sc->ifp, LINK_STATE_UP); 2990 if (mxge_verbose) 2991 device_printf(sc->dev, "link up\n"); 2992 } else { 2993 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2994 if (mxge_verbose) 2995 device_printf(sc->dev, "link down\n"); 2996 } 2997 sc->need_media_probe = 1; 2998 } 2999 if (sc->rdma_tags_available != 3000 be32toh(stats->rdma_tags_available)) { 3001 sc->rdma_tags_available = 3002 be32toh(stats->rdma_tags_available); 3003 device_printf(sc->dev, "RDMA timed out! %d tags " 3004 "left\n", sc->rdma_tags_available); 3005 } 3006 3007 if (stats->link_down) { 3008 sc->down_cnt += stats->link_down; 3009 sc->link_state = 0; 3010 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3011 } 3012 } 3013 3014 /* check to see if we have rx token to pass back */ 3015 if (valid & 0x1) 3016 *ss->irq_claim = be32toh(3); 3017 *(ss->irq_claim + 1) = be32toh(3); 3018} 3019 3020static void 3021mxge_init(void *arg) 3022{ 3023} 3024 3025 3026 3027static void 3028mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3029{ 3030 struct lro_entry *lro_entry; 3031 int i; 3032 3033 while (!SLIST_EMPTY(&ss->lro_free)) { 3034 lro_entry = SLIST_FIRST(&ss->lro_free); 3035 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3036 free(lro_entry, M_DEVBUF); 3037 } 3038 3039 for (i = 0; i <= ss->rx_big.mask; i++) { 3040 if (ss->rx_big.info[i].m == NULL) 3041 continue; 3042 bus_dmamap_unload(ss->rx_big.dmat, 3043 ss->rx_big.info[i].map); 3044 m_freem(ss->rx_big.info[i].m); 3045 ss->rx_big.info[i].m = NULL; 3046 } 3047 3048 for (i = 0; i <= ss->rx_small.mask; i++) { 3049 if (ss->rx_small.info[i].m == NULL) 3050 continue; 3051 bus_dmamap_unload(ss->rx_small.dmat, 3052 ss->rx_small.info[i].map); 3053 m_freem(ss->rx_small.info[i].m); 3054 ss->rx_small.info[i].m = NULL; 3055 } 3056 3057 /* transmit ring used only on the first slice */ 3058 if (ss->tx.info == NULL) 3059 return; 3060 3061 for (i = 0; i <= ss->tx.mask; i++) { 3062 ss->tx.info[i].flag = 0; 3063 if (ss->tx.info[i].m == NULL) 3064 continue; 3065 bus_dmamap_unload(ss->tx.dmat, 3066 ss->tx.info[i].map); 3067 m_freem(ss->tx.info[i].m); 3068 ss->tx.info[i].m = NULL; 3069 } 3070} 3071 3072static void 3073mxge_free_mbufs(mxge_softc_t *sc) 3074{ 3075 int slice; 3076 3077 for (slice = 0; slice < sc->num_slices; slice++) 3078 mxge_free_slice_mbufs(&sc->ss[slice]); 3079} 3080 3081static void 3082mxge_free_slice_rings(struct mxge_slice_state *ss) 3083{ 3084 int i; 3085 3086 3087 if (ss->rx_done.entry != NULL) 3088 mxge_dma_free(&ss->rx_done.dma); 3089 ss->rx_done.entry = NULL; 3090 3091 if (ss->tx.req_bytes != NULL) 3092 free(ss->tx.req_bytes, M_DEVBUF); 3093 ss->tx.req_bytes = NULL; 3094 3095 if (ss->tx.seg_list != NULL) 3096 free(ss->tx.seg_list, M_DEVBUF); 3097 ss->tx.seg_list = NULL; 3098 3099 if (ss->rx_small.shadow != NULL) 3100 free(ss->rx_small.shadow, M_DEVBUF); 3101 ss->rx_small.shadow = NULL; 3102 3103 if (ss->rx_big.shadow != NULL) 3104 free(ss->rx_big.shadow, M_DEVBUF); 3105 ss->rx_big.shadow = NULL; 3106 3107 if (ss->tx.info != NULL) { 3108 if (ss->tx.dmat != NULL) { 3109 for (i = 0; i <= ss->tx.mask; i++) { 3110 bus_dmamap_destroy(ss->tx.dmat, 3111 ss->tx.info[i].map); 3112 } 3113 bus_dma_tag_destroy(ss->tx.dmat); 3114 } 3115 free(ss->tx.info, M_DEVBUF); 3116 } 3117 ss->tx.info = NULL; 3118 3119 if (ss->rx_small.info != NULL) { 3120 if (ss->rx_small.dmat != NULL) { 3121 for (i = 0; i <= ss->rx_small.mask; i++) { 3122 bus_dmamap_destroy(ss->rx_small.dmat, 3123 ss->rx_small.info[i].map); 3124 } 3125 bus_dmamap_destroy(ss->rx_small.dmat, 3126 ss->rx_small.extra_map); 3127 bus_dma_tag_destroy(ss->rx_small.dmat); 3128 } 3129 free(ss->rx_small.info, M_DEVBUF); 3130 } 3131 ss->rx_small.info = NULL; 3132 3133 if (ss->rx_big.info != NULL) { 3134 if (ss->rx_big.dmat != NULL) { 3135 for (i = 0; i <= ss->rx_big.mask; i++) { 3136 bus_dmamap_destroy(ss->rx_big.dmat, 3137 ss->rx_big.info[i].map); 3138 } 3139 bus_dmamap_destroy(ss->rx_big.dmat, 3140 ss->rx_big.extra_map); 3141 bus_dma_tag_destroy(ss->rx_big.dmat); 3142 } 3143 free(ss->rx_big.info, M_DEVBUF); 3144 } 3145 ss->rx_big.info = NULL; 3146} 3147 3148static void 3149mxge_free_rings(mxge_softc_t *sc) 3150{ 3151 int slice; 3152 3153 for (slice = 0; slice < sc->num_slices; slice++) 3154 mxge_free_slice_rings(&sc->ss[slice]); 3155} 3156 3157static int 3158mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3159 int tx_ring_entries) 3160{ 3161 mxge_softc_t *sc = ss->sc; 3162 size_t bytes; 3163 int err, i; 3164 3165 err = ENOMEM; 3166 3167 /* allocate per-slice receive resources */ 3168 3169 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3170 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3171 3172 /* allocate the rx shadow rings */ 3173 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3174 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3175 if (ss->rx_small.shadow == NULL) 3176 return err;; 3177 3178 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3179 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3180 if (ss->rx_big.shadow == NULL) 3181 return err;; 3182 3183 /* allocate the rx host info rings */ 3184 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3185 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3186 if (ss->rx_small.info == NULL) 3187 return err;; 3188 3189 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3190 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3191 if (ss->rx_big.info == NULL) 3192 return err;; 3193 3194 /* allocate the rx busdma resources */ 3195 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3196 1, /* alignment */ 3197 4096, /* boundary */ 3198 BUS_SPACE_MAXADDR, /* low */ 3199 BUS_SPACE_MAXADDR, /* high */ 3200 NULL, NULL, /* filter */ 3201 MHLEN, /* maxsize */ 3202 1, /* num segs */ 3203 MHLEN, /* maxsegsize */ 3204 BUS_DMA_ALLOCNOW, /* flags */ 3205 NULL, NULL, /* lock */ 3206 &ss->rx_small.dmat); /* tag */ 3207 if (err != 0) { 3208 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3209 err); 3210 return err;; 3211 } 3212 3213 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3214 1, /* alignment */ 3215#if MXGE_VIRT_JUMBOS 3216 4096, /* boundary */ 3217#else 3218 0, /* boundary */ 3219#endif 3220 BUS_SPACE_MAXADDR, /* low */ 3221 BUS_SPACE_MAXADDR, /* high */ 3222 NULL, NULL, /* filter */ 3223 3*4096, /* maxsize */ 3224#if MXGE_VIRT_JUMBOS 3225 3, /* num segs */ 3226 4096, /* maxsegsize*/ 3227#else 3228 1, /* num segs */ 3229 MJUM9BYTES, /* maxsegsize*/ 3230#endif 3231 BUS_DMA_ALLOCNOW, /* flags */ 3232 NULL, NULL, /* lock */ 3233 &ss->rx_big.dmat); /* tag */ 3234 if (err != 0) { 3235 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3236 err); 3237 return err;; 3238 } 3239 for (i = 0; i <= ss->rx_small.mask; i++) { 3240 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3241 &ss->rx_small.info[i].map); 3242 if (err != 0) { 3243 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3244 err); 3245 return err;; 3246 } 3247 } 3248 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3249 &ss->rx_small.extra_map); 3250 if (err != 0) { 3251 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3252 err); 3253 return err;; 3254 } 3255 3256 for (i = 0; i <= ss->rx_big.mask; i++) { 3257 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3258 &ss->rx_big.info[i].map); 3259 if (err != 0) { 3260 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3261 err); 3262 return err;; 3263 } 3264 } 3265 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3266 &ss->rx_big.extra_map); 3267 if (err != 0) { 3268 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3269 err); 3270 return err;; 3271 } 3272 3273 /* now allocate TX resouces */ 3274 3275#ifndef IFNET_BUF_RING 3276 /* only use a single TX ring for now */ 3277 if (ss != ss->sc->ss) 3278 return 0; 3279#endif 3280 3281 ss->tx.mask = tx_ring_entries - 1; 3282 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3283 3284 3285 /* allocate the tx request copy block */ 3286 bytes = 8 + 3287 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3288 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3289 if (ss->tx.req_bytes == NULL) 3290 return err;; 3291 /* ensure req_list entries are aligned to 8 bytes */ 3292 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3293 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3294 3295 /* allocate the tx busdma segment list */ 3296 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3297 ss->tx.seg_list = (bus_dma_segment_t *) 3298 malloc(bytes, M_DEVBUF, M_WAITOK); 3299 if (ss->tx.seg_list == NULL) 3300 return err;; 3301 3302 /* allocate the tx host info ring */ 3303 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3304 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3305 if (ss->tx.info == NULL) 3306 return err;; 3307 3308 /* allocate the tx busdma resources */ 3309 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3310 1, /* alignment */ 3311 sc->tx_boundary, /* boundary */ 3312 BUS_SPACE_MAXADDR, /* low */ 3313 BUS_SPACE_MAXADDR, /* high */ 3314 NULL, NULL, /* filter */ 3315 65536 + 256, /* maxsize */ 3316 ss->tx.max_desc - 2, /* num segs */ 3317 sc->tx_boundary, /* maxsegsz */ 3318 BUS_DMA_ALLOCNOW, /* flags */ 3319 NULL, NULL, /* lock */ 3320 &ss->tx.dmat); /* tag */ 3321 3322 if (err != 0) { 3323 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3324 err); 3325 return err;; 3326 } 3327 3328 /* now use these tags to setup dmamaps for each slot 3329 in the ring */ 3330 for (i = 0; i <= ss->tx.mask; i++) { 3331 err = bus_dmamap_create(ss->tx.dmat, 0, 3332 &ss->tx.info[i].map); 3333 if (err != 0) { 3334 device_printf(sc->dev, "Err %d tx dmamap\n", 3335 err); 3336 return err;; 3337 } 3338 } 3339 return 0; 3340 3341} 3342 3343static int 3344mxge_alloc_rings(mxge_softc_t *sc) 3345{ 3346 mxge_cmd_t cmd; 3347 int tx_ring_size; 3348 int tx_ring_entries, rx_ring_entries; 3349 int err, slice; 3350 3351 /* get ring sizes */ 3352 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3353 tx_ring_size = cmd.data0; 3354 if (err != 0) { 3355 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3356 goto abort; 3357 } 3358 3359 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3360 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3361 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3362 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3363 IFQ_SET_READY(&sc->ifp->if_snd); 3364 3365 for (slice = 0; slice < sc->num_slices; slice++) { 3366 err = mxge_alloc_slice_rings(&sc->ss[slice], 3367 rx_ring_entries, 3368 tx_ring_entries); 3369 if (err != 0) 3370 goto abort; 3371 } 3372 return 0; 3373 3374abort: 3375 mxge_free_rings(sc); 3376 return err; 3377 3378} 3379 3380 3381static void 3382mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3383{ 3384 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3385 3386 if (bufsize < MCLBYTES) { 3387 /* easy, everything fits in a single buffer */ 3388 *big_buf_size = MCLBYTES; 3389 *cl_size = MCLBYTES; 3390 *nbufs = 1; 3391 return; 3392 } 3393 3394 if (bufsize < MJUMPAGESIZE) { 3395 /* still easy, everything still fits in a single buffer */ 3396 *big_buf_size = MJUMPAGESIZE; 3397 *cl_size = MJUMPAGESIZE; 3398 *nbufs = 1; 3399 return; 3400 } 3401#if MXGE_VIRT_JUMBOS 3402 /* now we need to use virtually contiguous buffers */ 3403 *cl_size = MJUM9BYTES; 3404 *big_buf_size = 4096; 3405 *nbufs = mtu / 4096 + 1; 3406 /* needs to be a power of two, so round up */ 3407 if (*nbufs == 3) 3408 *nbufs = 4; 3409#else 3410 *cl_size = MJUM9BYTES; 3411 *big_buf_size = MJUM9BYTES; 3412 *nbufs = 1; 3413#endif 3414} 3415 3416static int 3417mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3418{ 3419 mxge_softc_t *sc; 3420 mxge_cmd_t cmd; 3421 bus_dmamap_t map; 3422 struct lro_entry *lro_entry; 3423 int err, i, slice; 3424 3425 3426 sc = ss->sc; 3427 slice = ss - sc->ss; 3428 3429 SLIST_INIT(&ss->lro_free); 3430 SLIST_INIT(&ss->lro_active); 3431 3432 for (i = 0; i < sc->lro_cnt; i++) { 3433 lro_entry = (struct lro_entry *) 3434 malloc(sizeof (*lro_entry), M_DEVBUF, 3435 M_NOWAIT | M_ZERO); 3436 if (lro_entry == NULL) { 3437 sc->lro_cnt = i; 3438 break; 3439 } 3440 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3441 } 3442 /* get the lanai pointers to the send and receive rings */ 3443 3444 err = 0; 3445#ifndef IFNET_BUF_RING 3446 /* We currently only send from the first slice */ 3447 if (slice == 0) { 3448#endif 3449 cmd.data0 = slice; 3450 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3451 ss->tx.lanai = 3452 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3453 ss->tx.send_go = (volatile uint32_t *) 3454 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3455 ss->tx.send_stop = (volatile uint32_t *) 3456 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3457#ifndef IFNET_BUF_RING 3458 } 3459#endif 3460 cmd.data0 = slice; 3461 err |= mxge_send_cmd(sc, 3462 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3463 ss->rx_small.lanai = 3464 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3465 cmd.data0 = slice; 3466 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3467 ss->rx_big.lanai = 3468 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3469 3470 if (err != 0) { 3471 device_printf(sc->dev, 3472 "failed to get ring sizes or locations\n"); 3473 return EIO; 3474 } 3475 3476 /* stock receive rings */ 3477 for (i = 0; i <= ss->rx_small.mask; i++) { 3478 map = ss->rx_small.info[i].map; 3479 err = mxge_get_buf_small(ss, map, i); 3480 if (err) { 3481 device_printf(sc->dev, "alloced %d/%d smalls\n", 3482 i, ss->rx_small.mask + 1); 3483 return ENOMEM; 3484 } 3485 } 3486 for (i = 0; i <= ss->rx_big.mask; i++) { 3487 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3488 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3489 } 3490 ss->rx_big.nbufs = nbufs; 3491 ss->rx_big.cl_size = cl_size; 3492 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3493 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3494 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3495 map = ss->rx_big.info[i].map; 3496 err = mxge_get_buf_big(ss, map, i); 3497 if (err) { 3498 device_printf(sc->dev, "alloced %d/%d bigs\n", 3499 i, ss->rx_big.mask + 1); 3500 return ENOMEM; 3501 } 3502 } 3503 return 0; 3504} 3505 3506static int 3507mxge_open(mxge_softc_t *sc) 3508{ 3509 mxge_cmd_t cmd; 3510 int err, big_bytes, nbufs, slice, cl_size, i; 3511 bus_addr_t bus; 3512 volatile uint8_t *itable; 3513 struct mxge_slice_state *ss; 3514 3515 /* Copy the MAC address in case it was overridden */ 3516 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3517 3518 err = mxge_reset(sc, 1); 3519 if (err != 0) { 3520 device_printf(sc->dev, "failed to reset\n"); 3521 return EIO; 3522 } 3523 3524 if (sc->num_slices > 1) { 3525 /* setup the indirection table */ 3526 cmd.data0 = sc->num_slices; 3527 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3528 &cmd); 3529 3530 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3531 &cmd); 3532 if (err != 0) { 3533 device_printf(sc->dev, 3534 "failed to setup rss tables\n"); 3535 return err; 3536 } 3537 3538 /* just enable an identity mapping */ 3539 itable = sc->sram + cmd.data0; 3540 for (i = 0; i < sc->num_slices; i++) 3541 itable[i] = (uint8_t)i; 3542 3543 cmd.data0 = 1; 3544 cmd.data1 = mxge_rss_hash_type; 3545 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3546 if (err != 0) { 3547 device_printf(sc->dev, "failed to enable slices\n"); 3548 return err; 3549 } 3550 } 3551 3552 3553 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3554 3555 cmd.data0 = nbufs; 3556 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3557 &cmd); 3558 /* error is only meaningful if we're trying to set 3559 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3560 if (err && nbufs > 1) { 3561 device_printf(sc->dev, 3562 "Failed to set alway-use-n to %d\n", 3563 nbufs); 3564 return EIO; 3565 } 3566 /* Give the firmware the mtu and the big and small buffer 3567 sizes. The firmware wants the big buf size to be a power 3568 of two. Luckily, FreeBSD's clusters are powers of two */ 3569 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3570 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3571 cmd.data0 = MHLEN - MXGEFW_PAD; 3572 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3573 &cmd); 3574 cmd.data0 = big_bytes; 3575 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3576 3577 if (err != 0) { 3578 device_printf(sc->dev, "failed to setup params\n"); 3579 goto abort; 3580 } 3581 3582 /* Now give him the pointer to the stats block */ 3583 for (slice = 0; 3584#ifdef IFNET_BUF_RING 3585 slice < sc->num_slices; 3586#else 3587 slice < 1; 3588#endif 3589 slice++) { 3590 ss = &sc->ss[slice]; 3591 cmd.data0 = 3592 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3593 cmd.data1 = 3594 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3595 cmd.data2 = sizeof(struct mcp_irq_data); 3596 cmd.data2 |= (slice << 16); 3597 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3598 } 3599 3600 if (err != 0) { 3601 bus = sc->ss->fw_stats_dma.bus_addr; 3602 bus += offsetof(struct mcp_irq_data, send_done_count); 3603 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3604 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3605 err = mxge_send_cmd(sc, 3606 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3607 &cmd); 3608 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3609 sc->fw_multicast_support = 0; 3610 } else { 3611 sc->fw_multicast_support = 1; 3612 } 3613 3614 if (err != 0) { 3615 device_printf(sc->dev, "failed to setup params\n"); 3616 goto abort; 3617 } 3618 3619 for (slice = 0; slice < sc->num_slices; slice++) { 3620 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3621 if (err != 0) { 3622 device_printf(sc->dev, "couldn't open slice %d\n", 3623 slice); 3624 goto abort; 3625 } 3626 } 3627 3628 /* Finally, start the firmware running */ 3629 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3630 if (err) { 3631 device_printf(sc->dev, "Couldn't bring up link\n"); 3632 goto abort; 3633 } 3634#ifdef IFNET_BUF_RING 3635 for (slice = 0; slice < sc->num_slices; slice++) { 3636 ss = &sc->ss[slice]; 3637 ss->if_drv_flags |= IFF_DRV_RUNNING; 3638 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3639 } 3640#endif 3641 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3642 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3643 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3644 3645 return 0; 3646 3647 3648abort: 3649 mxge_free_mbufs(sc); 3650 3651 return err; 3652} 3653 3654static int 3655mxge_close(mxge_softc_t *sc, int down) 3656{ 3657 mxge_cmd_t cmd; 3658 int err, old_down_cnt; 3659#ifdef IFNET_BUF_RING 3660 struct mxge_slice_state *ss; 3661 int slice; 3662#endif 3663 3664 callout_stop(&sc->co_hdl); 3665#ifdef IFNET_BUF_RING 3666 for (slice = 0; slice < sc->num_slices; slice++) { 3667 ss = &sc->ss[slice]; 3668 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3669 } 3670#endif 3671 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3672 if (!down) { 3673 old_down_cnt = sc->down_cnt; 3674 wmb(); 3675 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3676 if (err) { 3677 device_printf(sc->dev, 3678 "Couldn't bring down link\n"); 3679 } 3680 if (old_down_cnt == sc->down_cnt) { 3681 /* wait for down irq */ 3682 DELAY(10 * sc->intr_coal_delay); 3683 } 3684 wmb(); 3685 if (old_down_cnt == sc->down_cnt) { 3686 device_printf(sc->dev, "never got down irq\n"); 3687 } 3688 } 3689 mxge_free_mbufs(sc); 3690 3691 return 0; 3692} 3693 3694static void 3695mxge_setup_cfg_space(mxge_softc_t *sc) 3696{ 3697 device_t dev = sc->dev; 3698 int reg; 3699 uint16_t cmd, lnk, pectl; 3700 3701 /* find the PCIe link width and set max read request to 4KB*/ 3702 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3703 lnk = pci_read_config(dev, reg + 0x12, 2); 3704 sc->link_width = (lnk >> 4) & 0x3f; 3705 3706 if (sc->pectl == 0) { 3707 pectl = pci_read_config(dev, reg + 0x8, 2); 3708 pectl = (pectl & ~0x7000) | (5 << 12); 3709 pci_write_config(dev, reg + 0x8, pectl, 2); 3710 sc->pectl = pectl; 3711 } else { 3712 /* restore saved pectl after watchdog reset */ 3713 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3714 } 3715 } 3716 3717 /* Enable DMA and Memory space access */ 3718 pci_enable_busmaster(dev); 3719 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3720 cmd |= PCIM_CMD_MEMEN; 3721 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3722} 3723 3724static uint32_t 3725mxge_read_reboot(mxge_softc_t *sc) 3726{ 3727 device_t dev = sc->dev; 3728 uint32_t vs; 3729 3730 /* find the vendor specific offset */ 3731 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3732 device_printf(sc->dev, 3733 "could not find vendor specific offset\n"); 3734 return (uint32_t)-1; 3735 } 3736 /* enable read32 mode */ 3737 pci_write_config(dev, vs + 0x10, 0x3, 1); 3738 /* tell NIC which register to read */ 3739 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3740 return (pci_read_config(dev, vs + 0x14, 4)); 3741} 3742 3743static void 3744mxge_watchdog_reset(mxge_softc_t *sc) 3745{ 3746 struct pci_devinfo *dinfo; 3747 struct mxge_slice_state *ss; 3748 int err, running, s, num_tx_slices = 1; 3749 uint32_t reboot; 3750 uint16_t cmd; 3751 3752 err = ENXIO; 3753 3754 device_printf(sc->dev, "Watchdog reset!\n"); 3755 3756 /* 3757 * check to see if the NIC rebooted. If it did, then all of 3758 * PCI config space has been reset, and things like the 3759 * busmaster bit will be zero. If this is the case, then we 3760 * must restore PCI config space before the NIC can be used 3761 * again 3762 */ 3763 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3764 if (cmd == 0xffff) { 3765 /* 3766 * maybe the watchdog caught the NIC rebooting; wait 3767 * up to 100ms for it to finish. If it does not come 3768 * back, then give up 3769 */ 3770 DELAY(1000*100); 3771 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3772 if (cmd == 0xffff) { 3773 device_printf(sc->dev, "NIC disappeared!\n"); 3774 } 3775 } 3776 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3777 /* print the reboot status */ 3778 reboot = mxge_read_reboot(sc); 3779 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3780 reboot); 3781 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3782 if (running) { 3783 3784 /* 3785 * quiesce NIC so that TX routines will not try to 3786 * xmit after restoration of BAR 3787 */ 3788 3789 /* Mark the link as down */ 3790 if (sc->link_state) { 3791 sc->link_state = 0; 3792 if_link_state_change(sc->ifp, 3793 LINK_STATE_DOWN); 3794 } 3795#ifdef IFNET_BUF_RING 3796 num_tx_slices = sc->num_slices; 3797#endif 3798 /* grab all TX locks to ensure no tx */ 3799 for (s = 0; s < num_tx_slices; s++) { 3800 ss = &sc->ss[s]; 3801 mtx_lock(&ss->tx.mtx); 3802 } 3803 mxge_close(sc, 1); 3804 } 3805 /* restore PCI configuration space */ 3806 dinfo = device_get_ivars(sc->dev); 3807 pci_cfg_restore(sc->dev, dinfo); 3808 3809 /* and redo any changes we made to our config space */ 3810 mxge_setup_cfg_space(sc); 3811 3812 /* reload f/w */ 3813 err = mxge_load_firmware(sc, 0); 3814 if (err) { 3815 device_printf(sc->dev, 3816 "Unable to re-load f/w\n"); 3817 } 3818 if (running) { 3819 if (!err) 3820 err = mxge_open(sc); 3821 /* release all TX locks */ 3822 for (s = 0; s < num_tx_slices; s++) { 3823 ss = &sc->ss[s]; 3824#ifdef IFNET_BUF_RING 3825 mxge_start_locked(ss); 3826#endif 3827 mtx_unlock(&ss->tx.mtx); 3828 } 3829 } 3830 sc->watchdog_resets++; 3831 } else { 3832 device_printf(sc->dev, 3833 "NIC did not reboot, not resetting\n"); 3834 err = 0; 3835 } 3836 if (err) { 3837 device_printf(sc->dev, "watchdog reset failed\n"); 3838 } else { 3839 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 3840 callout_reset(&sc->co_hdl, mxge_ticks, 3841 mxge_tick, sc); 3842 } 3843} 3844 3845static void 3846mxge_watchdog_task(void *arg, int pending) 3847{ 3848 mxge_softc_t *sc = arg; 3849 3850 3851 mtx_lock(&sc->driver_mtx); 3852 mxge_watchdog_reset(sc); 3853 mtx_unlock(&sc->driver_mtx); 3854} 3855 3856static void 3857mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3858{ 3859 tx = &sc->ss[slice].tx; 3860 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3861 device_printf(sc->dev, 3862 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3863 tx->req, tx->done, tx->queue_active); 3864 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3865 tx->activate, tx->deactivate); 3866 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3867 tx->pkt_done, 3868 be32toh(sc->ss->fw_stats->send_done_count)); 3869} 3870 3871static int 3872mxge_watchdog(mxge_softc_t *sc) 3873{ 3874 mxge_tx_ring_t *tx; 3875 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3876 int i, err = 0; 3877 3878 /* see if we have outstanding transmits, which 3879 have been pending for more than mxge_ticks */ 3880 for (i = 0; 3881#ifdef IFNET_BUF_RING 3882 (i < sc->num_slices) && (err == 0); 3883#else 3884 (i < 1) && (err == 0); 3885#endif 3886 i++) { 3887 tx = &sc->ss[i].tx; 3888 if (tx->req != tx->done && 3889 tx->watchdog_req != tx->watchdog_done && 3890 tx->done == tx->watchdog_done) { 3891 /* check for pause blocking before resetting */ 3892 if (tx->watchdog_rx_pause == rx_pause) { 3893 mxge_warn_stuck(sc, tx, i); 3894 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3895 return (ENXIO); 3896 } 3897 else 3898 device_printf(sc->dev, "Flow control blocking " 3899 "xmits, check link partner\n"); 3900 } 3901 3902 tx->watchdog_req = tx->req; 3903 tx->watchdog_done = tx->done; 3904 tx->watchdog_rx_pause = rx_pause; 3905 } 3906 3907 if (sc->need_media_probe) 3908 mxge_media_probe(sc); 3909 return (err); 3910} 3911 3912static void 3913mxge_update_stats(mxge_softc_t *sc) 3914{ 3915 struct mxge_slice_state *ss; 3916 u_long ipackets = 0; 3917 u_long opackets = 0; 3918#ifdef IFNET_BUF_RING 3919 u_long obytes = 0; 3920 u_long omcasts = 0; 3921 u_long odrops = 0; 3922#endif 3923 u_long oerrors = 0; 3924 int slice; 3925 3926 for (slice = 0; slice < sc->num_slices; slice++) { 3927 ss = &sc->ss[slice]; 3928 ipackets += ss->ipackets; 3929 opackets += ss->opackets; 3930#ifdef IFNET_BUF_RING 3931 obytes += ss->obytes; 3932 omcasts += ss->omcasts; 3933 odrops += ss->tx.br->br_drops; 3934#endif 3935 oerrors += ss->oerrors; 3936 } 3937 sc->ifp->if_ipackets = ipackets; 3938 sc->ifp->if_opackets = opackets; 3939#ifdef IFNET_BUF_RING 3940 sc->ifp->if_obytes = obytes; 3941 sc->ifp->if_omcasts = omcasts; 3942 sc->ifp->if_snd.ifq_drops = odrops; 3943#endif 3944 sc->ifp->if_oerrors = oerrors; 3945} 3946 3947static void 3948mxge_tick(void *arg) 3949{ 3950 mxge_softc_t *sc = arg; 3951 int err = 0; 3952 3953 /* aggregate stats from different slices */ 3954 mxge_update_stats(sc); 3955 if (!sc->watchdog_countdown) { 3956 err = mxge_watchdog(sc); 3957 sc->watchdog_countdown = 4; 3958 } 3959 sc->watchdog_countdown--; 3960 if (err == 0) 3961 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3962 3963} 3964 3965static int 3966mxge_media_change(struct ifnet *ifp) 3967{ 3968 return EINVAL; 3969} 3970 3971static int 3972mxge_change_mtu(mxge_softc_t *sc, int mtu) 3973{ 3974 struct ifnet *ifp = sc->ifp; 3975 int real_mtu, old_mtu; 3976 int err = 0; 3977 3978 3979 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3980 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 3981 return EINVAL; 3982 mtx_lock(&sc->driver_mtx); 3983 old_mtu = ifp->if_mtu; 3984 ifp->if_mtu = mtu; 3985 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3986 mxge_close(sc, 0); 3987 err = mxge_open(sc); 3988 if (err != 0) { 3989 ifp->if_mtu = old_mtu; 3990 mxge_close(sc, 0); 3991 (void) mxge_open(sc); 3992 } 3993 } 3994 mtx_unlock(&sc->driver_mtx); 3995 return err; 3996} 3997 3998static void 3999mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4000{ 4001 mxge_softc_t *sc = ifp->if_softc; 4002 4003 4004 if (sc == NULL) 4005 return; 4006 ifmr->ifm_status = IFM_AVALID; 4007 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4008 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 4009 ifmr->ifm_active |= sc->link_state ? IFM_FDX : 0; 4010} 4011 4012static int 4013mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4014{ 4015 mxge_softc_t *sc = ifp->if_softc; 4016 struct ifreq *ifr = (struct ifreq *)data; 4017 int err, mask; 4018 4019 err = 0; 4020 switch (command) { 4021 case SIOCSIFADDR: 4022 case SIOCGIFADDR: 4023 err = ether_ioctl(ifp, command, data); 4024 break; 4025 4026 case SIOCSIFMTU: 4027 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4028 break; 4029 4030 case SIOCSIFFLAGS: 4031 mtx_lock(&sc->driver_mtx); 4032 if (sc->dying) { 4033 mtx_unlock(&sc->driver_mtx); 4034 return EINVAL; 4035 } 4036 if (ifp->if_flags & IFF_UP) { 4037 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4038 err = mxge_open(sc); 4039 } else { 4040 /* take care of promis can allmulti 4041 flag chages */ 4042 mxge_change_promisc(sc, 4043 ifp->if_flags & IFF_PROMISC); 4044 mxge_set_multicast_list(sc); 4045 } 4046 } else { 4047 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4048 mxge_close(sc, 0); 4049 } 4050 } 4051 mtx_unlock(&sc->driver_mtx); 4052 break; 4053 4054 case SIOCADDMULTI: 4055 case SIOCDELMULTI: 4056 mtx_lock(&sc->driver_mtx); 4057 mxge_set_multicast_list(sc); 4058 mtx_unlock(&sc->driver_mtx); 4059 break; 4060 4061 case SIOCSIFCAP: 4062 mtx_lock(&sc->driver_mtx); 4063 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4064 if (mask & IFCAP_TXCSUM) { 4065 if (IFCAP_TXCSUM & ifp->if_capenable) { 4066 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4067 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4068 | CSUM_TSO); 4069 } else { 4070 ifp->if_capenable |= IFCAP_TXCSUM; 4071 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4072 } 4073 } else if (mask & IFCAP_RXCSUM) { 4074 if (IFCAP_RXCSUM & ifp->if_capenable) { 4075 ifp->if_capenable &= ~IFCAP_RXCSUM; 4076 sc->csum_flag = 0; 4077 } else { 4078 ifp->if_capenable |= IFCAP_RXCSUM; 4079 sc->csum_flag = 1; 4080 } 4081 } 4082 if (mask & IFCAP_TSO4) { 4083 if (IFCAP_TSO4 & ifp->if_capenable) { 4084 ifp->if_capenable &= ~IFCAP_TSO4; 4085 ifp->if_hwassist &= ~CSUM_TSO; 4086 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4087 ifp->if_capenable |= IFCAP_TSO4; 4088 ifp->if_hwassist |= CSUM_TSO; 4089 } else { 4090 printf("mxge requires tx checksum offload" 4091 " be enabled to use TSO\n"); 4092 err = EINVAL; 4093 } 4094 } 4095 if (mask & IFCAP_LRO) { 4096 if (IFCAP_LRO & ifp->if_capenable) 4097 err = mxge_change_lro_locked(sc, 0); 4098 else 4099 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4100 } 4101 if (mask & IFCAP_VLAN_HWTAGGING) 4102 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4103 mtx_unlock(&sc->driver_mtx); 4104 VLAN_CAPABILITIES(ifp); 4105 4106 break; 4107 4108 case SIOCGIFMEDIA: 4109 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4110 &sc->media, command); 4111 break; 4112 4113 default: 4114 err = ENOTTY; 4115 } 4116 return err; 4117} 4118 4119static void 4120mxge_fetch_tunables(mxge_softc_t *sc) 4121{ 4122 4123 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4124 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4125 &mxge_flow_control); 4126 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4127 &mxge_intr_coal_delay); 4128 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4129 &mxge_nvidia_ecrc_enable); 4130 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4131 &mxge_force_firmware); 4132 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4133 &mxge_deassert_wait); 4134 TUNABLE_INT_FETCH("hw.mxge.verbose", 4135 &mxge_verbose); 4136 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4137 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4138 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4139 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4140 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4141 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4142 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4143 if (sc->lro_cnt != 0) 4144 mxge_lro_cnt = sc->lro_cnt; 4145 4146 if (bootverbose) 4147 mxge_verbose = 1; 4148 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4149 mxge_intr_coal_delay = 30; 4150 if (mxge_ticks == 0) 4151 mxge_ticks = hz / 2; 4152 sc->pause = mxge_flow_control; 4153 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4154 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4155 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_PORT; 4156 } 4157 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4158 mxge_initial_mtu < ETHER_MIN_LEN) 4159 mxge_initial_mtu = ETHERMTU_JUMBO; 4160 4161 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4162 mxge_throttle = MXGE_MAX_THROTTLE; 4163 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4164 mxge_throttle = MXGE_MIN_THROTTLE; 4165 sc->throttle = mxge_throttle; 4166} 4167 4168 4169static void 4170mxge_free_slices(mxge_softc_t *sc) 4171{ 4172 struct mxge_slice_state *ss; 4173 int i; 4174 4175 4176 if (sc->ss == NULL) 4177 return; 4178 4179 for (i = 0; i < sc->num_slices; i++) { 4180 ss = &sc->ss[i]; 4181 if (ss->fw_stats != NULL) { 4182 mxge_dma_free(&ss->fw_stats_dma); 4183 ss->fw_stats = NULL; 4184#ifdef IFNET_BUF_RING 4185 if (ss->tx.br != NULL) { 4186 drbr_free(ss->tx.br, M_DEVBUF); 4187 ss->tx.br = NULL; 4188 } 4189#endif 4190 mtx_destroy(&ss->tx.mtx); 4191 } 4192 if (ss->rx_done.entry != NULL) { 4193 mxge_dma_free(&ss->rx_done.dma); 4194 ss->rx_done.entry = NULL; 4195 } 4196 } 4197 free(sc->ss, M_DEVBUF); 4198 sc->ss = NULL; 4199} 4200 4201static int 4202mxge_alloc_slices(mxge_softc_t *sc) 4203{ 4204 mxge_cmd_t cmd; 4205 struct mxge_slice_state *ss; 4206 size_t bytes; 4207 int err, i, max_intr_slots; 4208 4209 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4210 if (err != 0) { 4211 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4212 return err; 4213 } 4214 sc->rx_ring_size = cmd.data0; 4215 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4216 4217 bytes = sizeof (*sc->ss) * sc->num_slices; 4218 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4219 if (sc->ss == NULL) 4220 return (ENOMEM); 4221 for (i = 0; i < sc->num_slices; i++) { 4222 ss = &sc->ss[i]; 4223 4224 ss->sc = sc; 4225 4226 /* allocate per-slice rx interrupt queues */ 4227 4228 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4229 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4230 if (err != 0) 4231 goto abort; 4232 ss->rx_done.entry = ss->rx_done.dma.addr; 4233 bzero(ss->rx_done.entry, bytes); 4234 4235 /* 4236 * allocate the per-slice firmware stats; stats 4237 * (including tx) are used used only on the first 4238 * slice for now 4239 */ 4240#ifndef IFNET_BUF_RING 4241 if (i > 0) 4242 continue; 4243#endif 4244 4245 bytes = sizeof (*ss->fw_stats); 4246 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4247 sizeof (*ss->fw_stats), 64); 4248 if (err != 0) 4249 goto abort; 4250 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4251 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4252 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4253 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4254#ifdef IFNET_BUF_RING 4255 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4256 &ss->tx.mtx); 4257#endif 4258 } 4259 4260 return (0); 4261 4262abort: 4263 mxge_free_slices(sc); 4264 return (ENOMEM); 4265} 4266 4267static void 4268mxge_slice_probe(mxge_softc_t *sc) 4269{ 4270 mxge_cmd_t cmd; 4271 char *old_fw; 4272 int msix_cnt, status, max_intr_slots; 4273 4274 sc->num_slices = 1; 4275 /* 4276 * don't enable multiple slices if they are not enabled, 4277 * or if this is not an SMP system 4278 */ 4279 4280 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4281 return; 4282 4283 /* see how many MSI-X interrupts are available */ 4284 msix_cnt = pci_msix_count(sc->dev); 4285 if (msix_cnt < 2) 4286 return; 4287 4288 /* now load the slice aware firmware see what it supports */ 4289 old_fw = sc->fw_name; 4290 if (old_fw == mxge_fw_aligned) 4291 sc->fw_name = mxge_fw_rss_aligned; 4292 else 4293 sc->fw_name = mxge_fw_rss_unaligned; 4294 status = mxge_load_firmware(sc, 0); 4295 if (status != 0) { 4296 device_printf(sc->dev, "Falling back to a single slice\n"); 4297 return; 4298 } 4299 4300 /* try to send a reset command to the card to see if it 4301 is alive */ 4302 memset(&cmd, 0, sizeof (cmd)); 4303 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4304 if (status != 0) { 4305 device_printf(sc->dev, "failed reset\n"); 4306 goto abort_with_fw; 4307 } 4308 4309 /* get rx ring size */ 4310 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4311 if (status != 0) { 4312 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4313 goto abort_with_fw; 4314 } 4315 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4316 4317 /* tell it the size of the interrupt queues */ 4318 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4319 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4320 if (status != 0) { 4321 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4322 goto abort_with_fw; 4323 } 4324 4325 /* ask the maximum number of slices it supports */ 4326 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4327 if (status != 0) { 4328 device_printf(sc->dev, 4329 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4330 goto abort_with_fw; 4331 } 4332 sc->num_slices = cmd.data0; 4333 if (sc->num_slices > msix_cnt) 4334 sc->num_slices = msix_cnt; 4335 4336 if (mxge_max_slices == -1) { 4337 /* cap to number of CPUs in system */ 4338 if (sc->num_slices > mp_ncpus) 4339 sc->num_slices = mp_ncpus; 4340 } else { 4341 if (sc->num_slices > mxge_max_slices) 4342 sc->num_slices = mxge_max_slices; 4343 } 4344 /* make sure it is a power of two */ 4345 while (sc->num_slices & (sc->num_slices - 1)) 4346 sc->num_slices--; 4347 4348 if (mxge_verbose) 4349 device_printf(sc->dev, "using %d slices\n", 4350 sc->num_slices); 4351 4352 return; 4353 4354abort_with_fw: 4355 sc->fw_name = old_fw; 4356 (void) mxge_load_firmware(sc, 0); 4357} 4358 4359static int 4360mxge_add_msix_irqs(mxge_softc_t *sc) 4361{ 4362 size_t bytes; 4363 int count, err, i, rid; 4364 4365 rid = PCIR_BAR(2); 4366 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4367 &rid, RF_ACTIVE); 4368 4369 if (sc->msix_table_res == NULL) { 4370 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4371 return ENXIO; 4372 } 4373 4374 count = sc->num_slices; 4375 err = pci_alloc_msix(sc->dev, &count); 4376 if (err != 0) { 4377 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4378 "err = %d \n", sc->num_slices, err); 4379 goto abort_with_msix_table; 4380 } 4381 if (count < sc->num_slices) { 4382 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4383 count, sc->num_slices); 4384 device_printf(sc->dev, 4385 "Try setting hw.mxge.max_slices to %d\n", 4386 count); 4387 err = ENOSPC; 4388 goto abort_with_msix; 4389 } 4390 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4391 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4392 if (sc->msix_irq_res == NULL) { 4393 err = ENOMEM; 4394 goto abort_with_msix; 4395 } 4396 4397 for (i = 0; i < sc->num_slices; i++) { 4398 rid = i + 1; 4399 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4400 SYS_RES_IRQ, 4401 &rid, RF_ACTIVE); 4402 if (sc->msix_irq_res[i] == NULL) { 4403 device_printf(sc->dev, "couldn't allocate IRQ res" 4404 " for message %d\n", i); 4405 err = ENXIO; 4406 goto abort_with_res; 4407 } 4408 } 4409 4410 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4411 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4412 4413 for (i = 0; i < sc->num_slices; i++) { 4414 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4415 INTR_TYPE_NET | INTR_MPSAFE, 4416#if __FreeBSD_version > 700030 4417 NULL, 4418#endif 4419 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4420 if (err != 0) { 4421 device_printf(sc->dev, "couldn't setup intr for " 4422 "message %d\n", i); 4423 goto abort_with_intr; 4424 } 4425 } 4426 4427 if (mxge_verbose) { 4428 device_printf(sc->dev, "using %d msix IRQs:", 4429 sc->num_slices); 4430 for (i = 0; i < sc->num_slices; i++) 4431 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4432 printf("\n"); 4433 } 4434 return (0); 4435 4436abort_with_intr: 4437 for (i = 0; i < sc->num_slices; i++) { 4438 if (sc->msix_ih[i] != NULL) { 4439 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4440 sc->msix_ih[i]); 4441 sc->msix_ih[i] = NULL; 4442 } 4443 } 4444 free(sc->msix_ih, M_DEVBUF); 4445 4446 4447abort_with_res: 4448 for (i = 0; i < sc->num_slices; i++) { 4449 rid = i + 1; 4450 if (sc->msix_irq_res[i] != NULL) 4451 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4452 sc->msix_irq_res[i]); 4453 sc->msix_irq_res[i] = NULL; 4454 } 4455 free(sc->msix_irq_res, M_DEVBUF); 4456 4457 4458abort_with_msix: 4459 pci_release_msi(sc->dev); 4460 4461abort_with_msix_table: 4462 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4463 sc->msix_table_res); 4464 4465 return err; 4466} 4467 4468static int 4469mxge_add_single_irq(mxge_softc_t *sc) 4470{ 4471 int count, err, rid; 4472 4473 count = pci_msi_count(sc->dev); 4474 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4475 rid = 1; 4476 } else { 4477 rid = 0; 4478 sc->legacy_irq = 1; 4479 } 4480 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4481 1, RF_SHAREABLE | RF_ACTIVE); 4482 if (sc->irq_res == NULL) { 4483 device_printf(sc->dev, "could not alloc interrupt\n"); 4484 return ENXIO; 4485 } 4486 if (mxge_verbose) 4487 device_printf(sc->dev, "using %s irq %ld\n", 4488 sc->legacy_irq ? "INTx" : "MSI", 4489 rman_get_start(sc->irq_res)); 4490 err = bus_setup_intr(sc->dev, sc->irq_res, 4491 INTR_TYPE_NET | INTR_MPSAFE, 4492#if __FreeBSD_version > 700030 4493 NULL, 4494#endif 4495 mxge_intr, &sc->ss[0], &sc->ih); 4496 if (err != 0) { 4497 bus_release_resource(sc->dev, SYS_RES_IRQ, 4498 sc->legacy_irq ? 0 : 1, sc->irq_res); 4499 if (!sc->legacy_irq) 4500 pci_release_msi(sc->dev); 4501 } 4502 return err; 4503} 4504 4505static void 4506mxge_rem_msix_irqs(mxge_softc_t *sc) 4507{ 4508 int i, rid; 4509 4510 for (i = 0; i < sc->num_slices; i++) { 4511 if (sc->msix_ih[i] != NULL) { 4512 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4513 sc->msix_ih[i]); 4514 sc->msix_ih[i] = NULL; 4515 } 4516 } 4517 free(sc->msix_ih, M_DEVBUF); 4518 4519 for (i = 0; i < sc->num_slices; i++) { 4520 rid = i + 1; 4521 if (sc->msix_irq_res[i] != NULL) 4522 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4523 sc->msix_irq_res[i]); 4524 sc->msix_irq_res[i] = NULL; 4525 } 4526 free(sc->msix_irq_res, M_DEVBUF); 4527 4528 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4529 sc->msix_table_res); 4530 4531 pci_release_msi(sc->dev); 4532 return; 4533} 4534 4535static void 4536mxge_rem_single_irq(mxge_softc_t *sc) 4537{ 4538 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4539 bus_release_resource(sc->dev, SYS_RES_IRQ, 4540 sc->legacy_irq ? 0 : 1, sc->irq_res); 4541 if (!sc->legacy_irq) 4542 pci_release_msi(sc->dev); 4543} 4544 4545static void 4546mxge_rem_irq(mxge_softc_t *sc) 4547{ 4548 if (sc->num_slices > 1) 4549 mxge_rem_msix_irqs(sc); 4550 else 4551 mxge_rem_single_irq(sc); 4552} 4553 4554static int 4555mxge_add_irq(mxge_softc_t *sc) 4556{ 4557 int err; 4558 4559 if (sc->num_slices > 1) 4560 err = mxge_add_msix_irqs(sc); 4561 else 4562 err = mxge_add_single_irq(sc); 4563 4564 if (0 && err == 0 && sc->num_slices > 1) { 4565 mxge_rem_msix_irqs(sc); 4566 err = mxge_add_msix_irqs(sc); 4567 } 4568 return err; 4569} 4570 4571 4572static int 4573mxge_attach(device_t dev) 4574{ 4575 mxge_softc_t *sc = device_get_softc(dev); 4576 struct ifnet *ifp; 4577 int err, rid; 4578 4579 sc->dev = dev; 4580 mxge_fetch_tunables(sc); 4581 4582 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4583 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4584 taskqueue_thread_enqueue, 4585 &sc->tq); 4586 if (sc->tq == NULL) { 4587 err = ENOMEM; 4588 goto abort_with_nothing; 4589 } 4590 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4591 device_get_nameunit(sc->dev)); 4592 4593 err = bus_dma_tag_create(NULL, /* parent */ 4594 1, /* alignment */ 4595 0, /* boundary */ 4596 BUS_SPACE_MAXADDR, /* low */ 4597 BUS_SPACE_MAXADDR, /* high */ 4598 NULL, NULL, /* filter */ 4599 65536 + 256, /* maxsize */ 4600 MXGE_MAX_SEND_DESC, /* num segs */ 4601 65536, /* maxsegsize */ 4602 0, /* flags */ 4603 NULL, NULL, /* lock */ 4604 &sc->parent_dmat); /* tag */ 4605 4606 if (err != 0) { 4607 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4608 err); 4609 goto abort_with_tq; 4610 } 4611 4612 ifp = sc->ifp = if_alloc(IFT_ETHER); 4613 if (ifp == NULL) { 4614 device_printf(dev, "can not if_alloc()\n"); 4615 err = ENOSPC; 4616 goto abort_with_parent_dmat; 4617 } 4618 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4619 4620 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4621 device_get_nameunit(dev)); 4622 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4623 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4624 "%s:drv", device_get_nameunit(dev)); 4625 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4626 MTX_NETWORK_LOCK, MTX_DEF); 4627 4628 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4629 4630 mxge_setup_cfg_space(sc); 4631 4632 /* Map the board into the kernel */ 4633 rid = PCIR_BARS; 4634 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4635 ~0, 1, RF_ACTIVE); 4636 if (sc->mem_res == NULL) { 4637 device_printf(dev, "could not map memory\n"); 4638 err = ENXIO; 4639 goto abort_with_lock; 4640 } 4641 sc->sram = rman_get_virtual(sc->mem_res); 4642 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4643 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4644 device_printf(dev, "impossible memory region size %ld\n", 4645 rman_get_size(sc->mem_res)); 4646 err = ENXIO; 4647 goto abort_with_mem_res; 4648 } 4649 4650 /* make NULL terminated copy of the EEPROM strings section of 4651 lanai SRAM */ 4652 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4653 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4654 rman_get_bushandle(sc->mem_res), 4655 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4656 sc->eeprom_strings, 4657 MXGE_EEPROM_STRINGS_SIZE - 2); 4658 err = mxge_parse_strings(sc); 4659 if (err != 0) 4660 goto abort_with_mem_res; 4661 4662 /* Enable write combining for efficient use of PCIe bus */ 4663 mxge_enable_wc(sc); 4664 4665 /* Allocate the out of band dma memory */ 4666 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4667 sizeof (mxge_cmd_t), 64); 4668 if (err != 0) 4669 goto abort_with_mem_res; 4670 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4671 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4672 if (err != 0) 4673 goto abort_with_cmd_dma; 4674 4675 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4676 if (err != 0) 4677 goto abort_with_zeropad_dma; 4678 4679 /* select & load the firmware */ 4680 err = mxge_select_firmware(sc); 4681 if (err != 0) 4682 goto abort_with_dmabench; 4683 sc->intr_coal_delay = mxge_intr_coal_delay; 4684 4685 mxge_slice_probe(sc); 4686 err = mxge_alloc_slices(sc); 4687 if (err != 0) 4688 goto abort_with_dmabench; 4689 4690 err = mxge_reset(sc, 0); 4691 if (err != 0) 4692 goto abort_with_slices; 4693 4694 err = mxge_alloc_rings(sc); 4695 if (err != 0) { 4696 device_printf(sc->dev, "failed to allocate rings\n"); 4697 goto abort_with_dmabench; 4698 } 4699 4700 err = mxge_add_irq(sc); 4701 if (err != 0) { 4702 device_printf(sc->dev, "failed to add irq\n"); 4703 goto abort_with_rings; 4704 } 4705 4706 ifp->if_baudrate = IF_Gbps(10UL); 4707 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4708 IFCAP_VLAN_MTU; 4709#ifdef INET 4710 ifp->if_capabilities |= IFCAP_LRO; 4711#endif 4712 4713#ifdef MXGE_NEW_VLAN_API 4714 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4715#endif 4716 4717 sc->max_mtu = mxge_max_mtu(sc); 4718 if (sc->max_mtu >= 9000) 4719 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4720 else 4721 device_printf(dev, "MTU limited to %d. Install " 4722 "latest firmware for 9000 byte jumbo support\n", 4723 sc->max_mtu - ETHER_HDR_LEN); 4724 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4725 ifp->if_capenable = ifp->if_capabilities; 4726 if (sc->lro_cnt == 0) 4727 ifp->if_capenable &= ~IFCAP_LRO; 4728 sc->csum_flag = 1; 4729 ifp->if_init = mxge_init; 4730 ifp->if_softc = sc; 4731 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4732 ifp->if_ioctl = mxge_ioctl; 4733 ifp->if_start = mxge_start; 4734 /* Initialise the ifmedia structure */ 4735 ifmedia_init(&sc->media, 0, mxge_media_change, 4736 mxge_media_status); 4737 mxge_set_media(sc, IFM_ETHER | IFM_AUTO); 4738 mxge_media_probe(sc); 4739 sc->dying = 0; 4740 ether_ifattach(ifp, sc->mac_addr); 4741 /* ether_ifattach sets mtu to ETHERMTU */ 4742 if (mxge_initial_mtu != ETHERMTU) 4743 mxge_change_mtu(sc, mxge_initial_mtu); 4744 4745 mxge_add_sysctls(sc); 4746#ifdef IFNET_BUF_RING 4747 ifp->if_transmit = mxge_transmit; 4748 ifp->if_qflush = mxge_qflush; 4749#endif 4750 return 0; 4751 4752abort_with_rings: 4753 mxge_free_rings(sc); 4754abort_with_slices: 4755 mxge_free_slices(sc); 4756abort_with_dmabench: 4757 mxge_dma_free(&sc->dmabench_dma); 4758abort_with_zeropad_dma: 4759 mxge_dma_free(&sc->zeropad_dma); 4760abort_with_cmd_dma: 4761 mxge_dma_free(&sc->cmd_dma); 4762abort_with_mem_res: 4763 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4764abort_with_lock: 4765 pci_disable_busmaster(dev); 4766 mtx_destroy(&sc->cmd_mtx); 4767 mtx_destroy(&sc->driver_mtx); 4768 if_free(ifp); 4769abort_with_parent_dmat: 4770 bus_dma_tag_destroy(sc->parent_dmat); 4771abort_with_tq: 4772 if (sc->tq != NULL) { 4773 taskqueue_drain(sc->tq, &sc->watchdog_task); 4774 taskqueue_free(sc->tq); 4775 sc->tq = NULL; 4776 } 4777abort_with_nothing: 4778 return err; 4779} 4780 4781static int 4782mxge_detach(device_t dev) 4783{ 4784 mxge_softc_t *sc = device_get_softc(dev); 4785 4786 if (mxge_vlans_active(sc)) { 4787 device_printf(sc->dev, 4788 "Detach vlans before removing module\n"); 4789 return EBUSY; 4790 } 4791 mtx_lock(&sc->driver_mtx); 4792 sc->dying = 1; 4793 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4794 mxge_close(sc, 0); 4795 mtx_unlock(&sc->driver_mtx); 4796 ether_ifdetach(sc->ifp); 4797 if (sc->tq != NULL) { 4798 taskqueue_drain(sc->tq, &sc->watchdog_task); 4799 taskqueue_free(sc->tq); 4800 sc->tq = NULL; 4801 } 4802 callout_drain(&sc->co_hdl); 4803 ifmedia_removeall(&sc->media); 4804 mxge_dummy_rdma(sc, 0); 4805 mxge_rem_sysctls(sc); 4806 mxge_rem_irq(sc); 4807 mxge_free_rings(sc); 4808 mxge_free_slices(sc); 4809 mxge_dma_free(&sc->dmabench_dma); 4810 mxge_dma_free(&sc->zeropad_dma); 4811 mxge_dma_free(&sc->cmd_dma); 4812 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4813 pci_disable_busmaster(dev); 4814 mtx_destroy(&sc->cmd_mtx); 4815 mtx_destroy(&sc->driver_mtx); 4816 if_free(sc->ifp); 4817 bus_dma_tag_destroy(sc->parent_dmat); 4818 return 0; 4819} 4820 4821static int 4822mxge_shutdown(device_t dev) 4823{ 4824 return 0; 4825} 4826 4827/* 4828 This file uses Myri10GE driver indentation. 4829 4830 Local Variables: 4831 c-file-style:"linux" 4832 tab-width:8 4833 End: 4834*/ 4835