if_mxge.c revision 207761
1/****************************************************************************** 2 3Copyright (c) 2006-2009, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 207761 2010-05-07 22:09:17Z fabient $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50/* count xmits ourselves, rather than via drbr */ 51#define NO_SLOW_STATS 52#include <net/if.h> 53#include <net/if_arp.h> 54#include <net/ethernet.h> 55#include <net/if_dl.h> 56#include <net/if_media.h> 57 58#include <net/bpf.h> 59 60#include <net/if_types.h> 61#include <net/if_vlan_var.h> 62#include <net/zlib.h> 63 64#include <netinet/in_systm.h> 65#include <netinet/in.h> 66#include <netinet/ip.h> 67#include <netinet/tcp.h> 68 69#include <machine/bus.h> 70#include <machine/in_cksum.h> 71#include <machine/resource.h> 72#include <sys/bus.h> 73#include <sys/rman.h> 74#include <sys/smp.h> 75 76#include <dev/pci/pcireg.h> 77#include <dev/pci/pcivar.h> 78#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 79 80#include <vm/vm.h> /* for pmap_mapdev() */ 81#include <vm/pmap.h> 82 83#if defined(__i386) || defined(__amd64) 84#include <machine/specialreg.h> 85#endif 86 87#include <dev/mxge/mxge_mcp.h> 88#include <dev/mxge/mcp_gen_header.h> 89/*#define MXGE_FAKE_IFP*/ 90#include <dev/mxge/if_mxge_var.h> 91#ifdef IFNET_BUF_RING 92#include <sys/buf_ring.h> 93#endif 94 95#include "opt_inet.h" 96 97/* tunable params */ 98static int mxge_nvidia_ecrc_enable = 1; 99static int mxge_force_firmware = 0; 100static int mxge_intr_coal_delay = 30; 101static int mxge_deassert_wait = 1; 102static int mxge_flow_control = 1; 103static int mxge_verbose = 0; 104static int mxge_lro_cnt = 8; 105static int mxge_ticks; 106static int mxge_max_slices = 1; 107static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 108static int mxge_always_promisc = 0; 109static int mxge_initial_mtu = ETHERMTU_JUMBO; 110static int mxge_throttle = 0; 111static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 112static char *mxge_fw_aligned = "mxge_eth_z8e"; 113static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 114static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 115 116static int mxge_probe(device_t dev); 117static int mxge_attach(device_t dev); 118static int mxge_detach(device_t dev); 119static int mxge_shutdown(device_t dev); 120static void mxge_intr(void *arg); 121 122static device_method_t mxge_methods[] = 123{ 124 /* Device interface */ 125 DEVMETHOD(device_probe, mxge_probe), 126 DEVMETHOD(device_attach, mxge_attach), 127 DEVMETHOD(device_detach, mxge_detach), 128 DEVMETHOD(device_shutdown, mxge_shutdown), 129 {0, 0} 130}; 131 132static driver_t mxge_driver = 133{ 134 "mxge", 135 mxge_methods, 136 sizeof(mxge_softc_t), 137}; 138 139static devclass_t mxge_devclass; 140 141/* Declare ourselves to be a child of the PCI bus.*/ 142DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 143MODULE_DEPEND(mxge, firmware, 1, 1, 1); 144MODULE_DEPEND(mxge, zlib, 1, 1, 1); 145 146static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 147static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 148static int mxge_close(mxge_softc_t *sc, int down); 149static int mxge_open(mxge_softc_t *sc); 150static void mxge_tick(void *arg); 151 152static int 153mxge_probe(device_t dev) 154{ 155 int rev; 156 157 158 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 159 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 160 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 161 rev = pci_get_revid(dev); 162 switch (rev) { 163 case MXGE_PCI_REV_Z8E: 164 device_set_desc(dev, "Myri10G-PCIE-8A"); 165 break; 166 case MXGE_PCI_REV_Z8ES: 167 device_set_desc(dev, "Myri10G-PCIE-8B"); 168 break; 169 default: 170 device_set_desc(dev, "Myri10G-PCIE-8??"); 171 device_printf(dev, "Unrecognized rev %d NIC\n", 172 rev); 173 break; 174 } 175 return 0; 176 } 177 return ENXIO; 178} 179 180static void 181mxge_enable_wc(mxge_softc_t *sc) 182{ 183#if defined(__i386) || defined(__amd64) 184 vm_offset_t len; 185 int err; 186 187 sc->wc = 1; 188 len = rman_get_size(sc->mem_res); 189 err = pmap_change_attr((vm_offset_t) sc->sram, 190 len, PAT_WRITE_COMBINING); 191 if (err != 0) { 192 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 193 err); 194 sc->wc = 0; 195 } 196#endif 197} 198 199 200/* callback to get our DMA address */ 201static void 202mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 203 int error) 204{ 205 if (error == 0) { 206 *(bus_addr_t *) arg = segs->ds_addr; 207 } 208} 209 210static int 211mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 212 bus_size_t alignment) 213{ 214 int err; 215 device_t dev = sc->dev; 216 bus_size_t boundary, maxsegsize; 217 218 if (bytes > 4096 && alignment == 4096) { 219 boundary = 0; 220 maxsegsize = bytes; 221 } else { 222 boundary = 4096; 223 maxsegsize = 4096; 224 } 225 226 /* allocate DMAable memory tags */ 227 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 228 alignment, /* alignment */ 229 boundary, /* boundary */ 230 BUS_SPACE_MAXADDR, /* low */ 231 BUS_SPACE_MAXADDR, /* high */ 232 NULL, NULL, /* filter */ 233 bytes, /* maxsize */ 234 1, /* num segs */ 235 maxsegsize, /* maxsegsize */ 236 BUS_DMA_COHERENT, /* flags */ 237 NULL, NULL, /* lock */ 238 &dma->dmat); /* tag */ 239 if (err != 0) { 240 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 241 return err; 242 } 243 244 /* allocate DMAable memory & map */ 245 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 246 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 247 | BUS_DMA_ZERO), &dma->map); 248 if (err != 0) { 249 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 250 goto abort_with_dmat; 251 } 252 253 /* load the memory */ 254 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 255 mxge_dmamap_callback, 256 (void *)&dma->bus_addr, 0); 257 if (err != 0) { 258 device_printf(dev, "couldn't load map (err = %d)\n", err); 259 goto abort_with_mem; 260 } 261 return 0; 262 263abort_with_mem: 264 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 265abort_with_dmat: 266 (void)bus_dma_tag_destroy(dma->dmat); 267 return err; 268} 269 270 271static void 272mxge_dma_free(mxge_dma_t *dma) 273{ 274 bus_dmamap_unload(dma->dmat, dma->map); 275 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 276 (void)bus_dma_tag_destroy(dma->dmat); 277} 278 279/* 280 * The eeprom strings on the lanaiX have the format 281 * SN=x\0 282 * MAC=x:x:x:x:x:x\0 283 * PC=text\0 284 */ 285 286static int 287mxge_parse_strings(mxge_softc_t *sc) 288{ 289#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 290 291 char *ptr, *limit; 292 int i, found_mac; 293 294 ptr = sc->eeprom_strings; 295 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 296 found_mac = 0; 297 while (ptr < limit && *ptr != '\0') { 298 if (memcmp(ptr, "MAC=", 4) == 0) { 299 ptr += 1; 300 sc->mac_addr_string = ptr; 301 for (i = 0; i < 6; i++) { 302 ptr += 3; 303 if ((ptr + 2) > limit) 304 goto abort; 305 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 306 found_mac = 1; 307 } 308 } else if (memcmp(ptr, "PC=", 3) == 0) { 309 ptr += 3; 310 strncpy(sc->product_code_string, ptr, 311 sizeof (sc->product_code_string) - 1); 312 } else if (memcmp(ptr, "SN=", 3) == 0) { 313 ptr += 3; 314 strncpy(sc->serial_number_string, ptr, 315 sizeof (sc->serial_number_string) - 1); 316 } 317 MXGE_NEXT_STRING(ptr); 318 } 319 320 if (found_mac) 321 return 0; 322 323 abort: 324 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 325 326 return ENXIO; 327} 328 329#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 330static void 331mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 332{ 333 uint32_t val; 334 unsigned long base, off; 335 char *va, *cfgptr; 336 device_t pdev, mcp55; 337 uint16_t vendor_id, device_id, word; 338 uintptr_t bus, slot, func, ivend, idev; 339 uint32_t *ptr32; 340 341 342 if (!mxge_nvidia_ecrc_enable) 343 return; 344 345 pdev = device_get_parent(device_get_parent(sc->dev)); 346 if (pdev == NULL) { 347 device_printf(sc->dev, "could not find parent?\n"); 348 return; 349 } 350 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 351 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 352 353 if (vendor_id != 0x10de) 354 return; 355 356 base = 0; 357 358 if (device_id == 0x005d) { 359 /* ck804, base address is magic */ 360 base = 0xe0000000UL; 361 } else if (device_id >= 0x0374 && device_id <= 0x378) { 362 /* mcp55, base address stored in chipset */ 363 mcp55 = pci_find_bsf(0, 0, 0); 364 if (mcp55 && 365 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 366 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 367 word = pci_read_config(mcp55, 0x90, 2); 368 base = ((unsigned long)word & 0x7ffeU) << 25; 369 } 370 } 371 if (!base) 372 return; 373 374 /* XXXX 375 Test below is commented because it is believed that doing 376 config read/write beyond 0xff will access the config space 377 for the next larger function. Uncomment this and remove 378 the hacky pmap_mapdev() way of accessing config space when 379 FreeBSD grows support for extended pcie config space access 380 */ 381#if 0 382 /* See if we can, by some miracle, access the extended 383 config space */ 384 val = pci_read_config(pdev, 0x178, 4); 385 if (val != 0xffffffff) { 386 val |= 0x40; 387 pci_write_config(pdev, 0x178, val, 4); 388 return; 389 } 390#endif 391 /* Rather than using normal pci config space writes, we must 392 * map the Nvidia config space ourselves. This is because on 393 * opteron/nvidia class machine the 0xe000000 mapping is 394 * handled by the nvidia chipset, that means the internal PCI 395 * device (the on-chip northbridge), or the amd-8131 bridge 396 * and things behind them are not visible by this method. 397 */ 398 399 BUS_READ_IVAR(device_get_parent(pdev), pdev, 400 PCI_IVAR_BUS, &bus); 401 BUS_READ_IVAR(device_get_parent(pdev), pdev, 402 PCI_IVAR_SLOT, &slot); 403 BUS_READ_IVAR(device_get_parent(pdev), pdev, 404 PCI_IVAR_FUNCTION, &func); 405 BUS_READ_IVAR(device_get_parent(pdev), pdev, 406 PCI_IVAR_VENDOR, &ivend); 407 BUS_READ_IVAR(device_get_parent(pdev), pdev, 408 PCI_IVAR_DEVICE, &idev); 409 410 off = base 411 + 0x00100000UL * (unsigned long)bus 412 + 0x00001000UL * (unsigned long)(func 413 + 8 * slot); 414 415 /* map it into the kernel */ 416 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 417 418 419 if (va == NULL) { 420 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 421 return; 422 } 423 /* get a pointer to the config space mapped into the kernel */ 424 cfgptr = va + (off & PAGE_MASK); 425 426 /* make sure that we can really access it */ 427 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 428 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 429 if (! (vendor_id == ivend && device_id == idev)) { 430 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 431 vendor_id, device_id); 432 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 433 return; 434 } 435 436 ptr32 = (uint32_t*)(cfgptr + 0x178); 437 val = *ptr32; 438 439 if (val == 0xffffffff) { 440 device_printf(sc->dev, "extended mapping failed\n"); 441 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 442 return; 443 } 444 *ptr32 = val | 0x40; 445 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 446 if (mxge_verbose) 447 device_printf(sc->dev, 448 "Enabled ECRC on upstream Nvidia bridge " 449 "at %d:%d:%d\n", 450 (int)bus, (int)slot, (int)func); 451 return; 452} 453#else 454static void 455mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 456{ 457 device_printf(sc->dev, 458 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 459 return; 460} 461#endif 462 463 464static int 465mxge_dma_test(mxge_softc_t *sc, int test_type) 466{ 467 mxge_cmd_t cmd; 468 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 469 int status; 470 uint32_t len; 471 char *test = " "; 472 473 474 /* Run a small DMA test. 475 * The magic multipliers to the length tell the firmware 476 * to do DMA read, write, or read+write tests. The 477 * results are returned in cmd.data0. The upper 16 478 * bits of the return is the number of transfers completed. 479 * The lower 16 bits is the time in 0.5us ticks that the 480 * transfers took to complete. 481 */ 482 483 len = sc->tx_boundary; 484 485 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 486 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 487 cmd.data2 = len * 0x10000; 488 status = mxge_send_cmd(sc, test_type, &cmd); 489 if (status != 0) { 490 test = "read"; 491 goto abort; 492 } 493 sc->read_dma = ((cmd.data0>>16) * len * 2) / 494 (cmd.data0 & 0xffff); 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x1; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "write"; 501 goto abort; 502 } 503 sc->write_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 506 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 507 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 508 cmd.data2 = len * 0x10001; 509 status = mxge_send_cmd(sc, test_type, &cmd); 510 if (status != 0) { 511 test = "read/write"; 512 goto abort; 513 } 514 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 515 (cmd.data0 & 0xffff); 516 517abort: 518 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 519 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 520 test, status); 521 522 return status; 523} 524 525/* 526 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 527 * when the PCI-E Completion packets are aligned on an 8-byte 528 * boundary. Some PCI-E chip sets always align Completion packets; on 529 * the ones that do not, the alignment can be enforced by enabling 530 * ECRC generation (if supported). 531 * 532 * When PCI-E Completion packets are not aligned, it is actually more 533 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 534 * 535 * If the driver can neither enable ECRC nor verify that it has 536 * already been enabled, then it must use a firmware image which works 537 * around unaligned completion packets (ethp_z8e.dat), and it should 538 * also ensure that it never gives the device a Read-DMA which is 539 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 540 * enabled, then the driver should use the aligned (eth_z8e.dat) 541 * firmware image, and set tx_boundary to 4KB. 542 */ 543 544static int 545mxge_firmware_probe(mxge_softc_t *sc) 546{ 547 device_t dev = sc->dev; 548 int reg, status; 549 uint16_t pectl; 550 551 sc->tx_boundary = 4096; 552 /* 553 * Verify the max read request size was set to 4KB 554 * before trying the test with 4KB. 555 */ 556 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 557 pectl = pci_read_config(dev, reg + 0x8, 2); 558 if ((pectl & (5 << 12)) != (5 << 12)) { 559 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 560 pectl); 561 sc->tx_boundary = 2048; 562 } 563 } 564 565 /* 566 * load the optimized firmware (which assumes aligned PCIe 567 * completions) in order to see if it works on this host. 568 */ 569 sc->fw_name = mxge_fw_aligned; 570 status = mxge_load_firmware(sc, 1); 571 if (status != 0) { 572 return status; 573 } 574 575 /* 576 * Enable ECRC if possible 577 */ 578 mxge_enable_nvidia_ecrc(sc); 579 580 /* 581 * Run a DMA test which watches for unaligned completions and 582 * aborts on the first one seen. 583 */ 584 585 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 586 if (status == 0) 587 return 0; /* keep the aligned firmware */ 588 589 if (status != E2BIG) 590 device_printf(dev, "DMA test failed: %d\n", status); 591 if (status == ENOSYS) 592 device_printf(dev, "Falling back to ethp! " 593 "Please install up to date fw\n"); 594 return status; 595} 596 597static int 598mxge_select_firmware(mxge_softc_t *sc) 599{ 600 int aligned = 0; 601 int force_firmware = mxge_force_firmware; 602 603 if (sc->throttle) 604 force_firmware = sc->throttle; 605 606 if (force_firmware != 0) { 607 if (force_firmware == 1) 608 aligned = 1; 609 else 610 aligned = 0; 611 if (mxge_verbose) 612 device_printf(sc->dev, 613 "Assuming %s completions (forced)\n", 614 aligned ? "aligned" : "unaligned"); 615 goto abort; 616 } 617 618 /* if the PCIe link width is 4 or less, we can use the aligned 619 firmware and skip any checks */ 620 if (sc->link_width != 0 && sc->link_width <= 4) { 621 device_printf(sc->dev, 622 "PCIe x%d Link, expect reduced performance\n", 623 sc->link_width); 624 aligned = 1; 625 goto abort; 626 } 627 628 if (0 == mxge_firmware_probe(sc)) 629 return 0; 630 631abort: 632 if (aligned) { 633 sc->fw_name = mxge_fw_aligned; 634 sc->tx_boundary = 4096; 635 } else { 636 sc->fw_name = mxge_fw_unaligned; 637 sc->tx_boundary = 2048; 638 } 639 return (mxge_load_firmware(sc, 0)); 640} 641 642union qualhack 643{ 644 const char *ro_char; 645 char *rw_char; 646}; 647 648static int 649mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 650{ 651 652 653 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 654 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 655 be32toh(hdr->mcp_type)); 656 return EIO; 657 } 658 659 /* save firmware version for sysctl */ 660 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 661 if (mxge_verbose) 662 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 663 664 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 665 &sc->fw_ver_minor, &sc->fw_ver_tiny); 666 667 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 668 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 669 device_printf(sc->dev, "Found firmware version %s\n", 670 sc->fw_version); 671 device_printf(sc->dev, "Driver needs %d.%d\n", 672 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 673 return EINVAL; 674 } 675 return 0; 676 677} 678 679static void * 680z_alloc(void *nil, u_int items, u_int size) 681{ 682 void *ptr; 683 684 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 685 return ptr; 686} 687 688static void 689z_free(void *nil, void *ptr) 690{ 691 free(ptr, M_TEMP); 692} 693 694 695static int 696mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 697{ 698 z_stream zs; 699 char *inflate_buffer; 700 const struct firmware *fw; 701 const mcp_gen_header_t *hdr; 702 unsigned hdr_offset; 703 int status; 704 unsigned int i; 705 char dummy; 706 size_t fw_len; 707 708 fw = firmware_get(sc->fw_name); 709 if (fw == NULL) { 710 device_printf(sc->dev, "Could not find firmware image %s\n", 711 sc->fw_name); 712 return ENOENT; 713 } 714 715 716 717 /* setup zlib and decompress f/w */ 718 bzero(&zs, sizeof (zs)); 719 zs.zalloc = z_alloc; 720 zs.zfree = z_free; 721 status = inflateInit(&zs); 722 if (status != Z_OK) { 723 status = EIO; 724 goto abort_with_fw; 725 } 726 727 /* the uncompressed size is stored as the firmware version, 728 which would otherwise go unused */ 729 fw_len = (size_t) fw->version; 730 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 731 if (inflate_buffer == NULL) 732 goto abort_with_zs; 733 zs.avail_in = fw->datasize; 734 zs.next_in = __DECONST(char *, fw->data); 735 zs.avail_out = fw_len; 736 zs.next_out = inflate_buffer; 737 status = inflate(&zs, Z_FINISH); 738 if (status != Z_STREAM_END) { 739 device_printf(sc->dev, "zlib %d\n", status); 740 status = EIO; 741 goto abort_with_buffer; 742 } 743 744 /* check id */ 745 hdr_offset = htobe32(*(const uint32_t *) 746 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 747 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 748 device_printf(sc->dev, "Bad firmware file"); 749 status = EIO; 750 goto abort_with_buffer; 751 } 752 hdr = (const void*)(inflate_buffer + hdr_offset); 753 754 status = mxge_validate_firmware(sc, hdr); 755 if (status != 0) 756 goto abort_with_buffer; 757 758 /* Copy the inflated firmware to NIC SRAM. */ 759 for (i = 0; i < fw_len; i += 256) { 760 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 761 inflate_buffer + i, 762 min(256U, (unsigned)(fw_len - i))); 763 wmb(); 764 dummy = *sc->sram; 765 wmb(); 766 } 767 768 *limit = fw_len; 769 status = 0; 770abort_with_buffer: 771 free(inflate_buffer, M_TEMP); 772abort_with_zs: 773 inflateEnd(&zs); 774abort_with_fw: 775 firmware_put(fw, FIRMWARE_UNLOAD); 776 return status; 777} 778 779/* 780 * Enable or disable periodic RDMAs from the host to make certain 781 * chipsets resend dropped PCIe messages 782 */ 783 784static void 785mxge_dummy_rdma(mxge_softc_t *sc, int enable) 786{ 787 char buf_bytes[72]; 788 volatile uint32_t *confirm; 789 volatile char *submit; 790 uint32_t *buf, dma_low, dma_high; 791 int i; 792 793 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 794 795 /* clear confirmation addr */ 796 confirm = (volatile uint32_t *)sc->cmd; 797 *confirm = 0; 798 wmb(); 799 800 /* send an rdma command to the PCIe engine, and wait for the 801 response in the confirmation address. The firmware should 802 write a -1 there to indicate it is alive and well 803 */ 804 805 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 806 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 807 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 808 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 809 buf[2] = htobe32(0xffffffff); /* confirm data */ 810 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 812 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 813 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 814 buf[5] = htobe32(enable); /* enable? */ 815 816 817 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 818 819 mxge_pio_copy(submit, buf, 64); 820 wmb(); 821 DELAY(1000); 822 wmb(); 823 i = 0; 824 while (*confirm != 0xffffffff && i < 20) { 825 DELAY(1000); 826 i++; 827 } 828 if (*confirm != 0xffffffff) { 829 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 830 (enable ? "enable" : "disable"), confirm, 831 *confirm); 832 } 833 return; 834} 835 836static int 837mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 838{ 839 mcp_cmd_t *buf; 840 char buf_bytes[sizeof(*buf) + 8]; 841 volatile mcp_cmd_response_t *response = sc->cmd; 842 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 843 uint32_t dma_low, dma_high; 844 int err, sleep_total = 0; 845 846 /* ensure buf is aligned to 8 bytes */ 847 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 848 849 buf->data0 = htobe32(data->data0); 850 buf->data1 = htobe32(data->data1); 851 buf->data2 = htobe32(data->data2); 852 buf->cmd = htobe32(cmd); 853 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 854 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 855 856 buf->response_addr.low = htobe32(dma_low); 857 buf->response_addr.high = htobe32(dma_high); 858 mtx_lock(&sc->cmd_mtx); 859 response->result = 0xffffffff; 860 wmb(); 861 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 862 863 /* wait up to 20ms */ 864 err = EAGAIN; 865 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 866 bus_dmamap_sync(sc->cmd_dma.dmat, 867 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 868 wmb(); 869 switch (be32toh(response->result)) { 870 case 0: 871 data->data0 = be32toh(response->data); 872 err = 0; 873 break; 874 case 0xffffffff: 875 DELAY(1000); 876 break; 877 case MXGEFW_CMD_UNKNOWN: 878 err = ENOSYS; 879 break; 880 case MXGEFW_CMD_ERROR_UNALIGNED: 881 err = E2BIG; 882 break; 883 case MXGEFW_CMD_ERROR_BUSY: 884 err = EBUSY; 885 break; 886 case MXGEFW_CMD_ERROR_I2C_ABSENT: 887 err = ENXIO; 888 break; 889 default: 890 device_printf(sc->dev, 891 "mxge: command %d " 892 "failed, result = %d\n", 893 cmd, be32toh(response->result)); 894 err = ENXIO; 895 break; 896 } 897 if (err != EAGAIN) 898 break; 899 } 900 if (err == EAGAIN) 901 device_printf(sc->dev, "mxge: command %d timed out" 902 "result = %d\n", 903 cmd, be32toh(response->result)); 904 mtx_unlock(&sc->cmd_mtx); 905 return err; 906} 907 908static int 909mxge_adopt_running_firmware(mxge_softc_t *sc) 910{ 911 struct mcp_gen_header *hdr; 912 const size_t bytes = sizeof (struct mcp_gen_header); 913 size_t hdr_offset; 914 int status; 915 916 /* find running firmware header */ 917 hdr_offset = htobe32(*(volatile uint32_t *) 918 (sc->sram + MCP_HEADER_PTR_OFFSET)); 919 920 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 921 device_printf(sc->dev, 922 "Running firmware has bad header offset (%d)\n", 923 (int)hdr_offset); 924 return EIO; 925 } 926 927 /* copy header of running firmware from SRAM to host memory to 928 * validate firmware */ 929 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 930 if (hdr == NULL) { 931 device_printf(sc->dev, "could not malloc firmware hdr\n"); 932 return ENOMEM; 933 } 934 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 935 rman_get_bushandle(sc->mem_res), 936 hdr_offset, (char *)hdr, bytes); 937 status = mxge_validate_firmware(sc, hdr); 938 free(hdr, M_DEVBUF); 939 940 /* 941 * check to see if adopted firmware has bug where adopting 942 * it will cause broadcasts to be filtered unless the NIC 943 * is kept in ALLMULTI mode 944 */ 945 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 946 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 947 sc->adopted_rx_filter_bug = 1; 948 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 949 "working around rx filter bug\n", 950 sc->fw_ver_major, sc->fw_ver_minor, 951 sc->fw_ver_tiny); 952 } 953 954 return status; 955} 956 957 958static int 959mxge_load_firmware(mxge_softc_t *sc, int adopt) 960{ 961 volatile uint32_t *confirm; 962 volatile char *submit; 963 char buf_bytes[72]; 964 uint32_t *buf, size, dma_low, dma_high; 965 int status, i; 966 967 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 968 969 size = sc->sram_size; 970 status = mxge_load_firmware_helper(sc, &size); 971 if (status) { 972 if (!adopt) 973 return status; 974 /* Try to use the currently running firmware, if 975 it is new enough */ 976 status = mxge_adopt_running_firmware(sc); 977 if (status) { 978 device_printf(sc->dev, 979 "failed to adopt running firmware\n"); 980 return status; 981 } 982 device_printf(sc->dev, 983 "Successfully adopted running firmware\n"); 984 if (sc->tx_boundary == 4096) { 985 device_printf(sc->dev, 986 "Using firmware currently running on NIC" 987 ". For optimal\n"); 988 device_printf(sc->dev, 989 "performance consider loading optimized " 990 "firmware\n"); 991 } 992 sc->fw_name = mxge_fw_unaligned; 993 sc->tx_boundary = 2048; 994 return 0; 995 } 996 /* clear confirmation addr */ 997 confirm = (volatile uint32_t *)sc->cmd; 998 *confirm = 0; 999 wmb(); 1000 /* send a reload command to the bootstrap MCP, and wait for the 1001 response in the confirmation address. The firmware should 1002 write a -1 there to indicate it is alive and well 1003 */ 1004 1005 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1006 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1007 1008 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1009 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1010 buf[2] = htobe32(0xffffffff); /* confirm data */ 1011 1012 /* FIX: All newest firmware should un-protect the bottom of 1013 the sram before handoff. However, the very first interfaces 1014 do not. Therefore the handoff copy must skip the first 8 bytes 1015 */ 1016 /* where the code starts*/ 1017 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1018 buf[4] = htobe32(size - 8); /* length of code */ 1019 buf[5] = htobe32(8); /* where to copy to */ 1020 buf[6] = htobe32(0); /* where to jump to */ 1021 1022 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1023 mxge_pio_copy(submit, buf, 64); 1024 wmb(); 1025 DELAY(1000); 1026 wmb(); 1027 i = 0; 1028 while (*confirm != 0xffffffff && i < 20) { 1029 DELAY(1000*10); 1030 i++; 1031 bus_dmamap_sync(sc->cmd_dma.dmat, 1032 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1033 } 1034 if (*confirm != 0xffffffff) { 1035 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1036 confirm, *confirm); 1037 1038 return ENXIO; 1039 } 1040 return 0; 1041} 1042 1043static int 1044mxge_update_mac_address(mxge_softc_t *sc) 1045{ 1046 mxge_cmd_t cmd; 1047 uint8_t *addr = sc->mac_addr; 1048 int status; 1049 1050 1051 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1052 | (addr[2] << 8) | addr[3]); 1053 1054 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1055 1056 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1057 return status; 1058} 1059 1060static int 1061mxge_change_pause(mxge_softc_t *sc, int pause) 1062{ 1063 mxge_cmd_t cmd; 1064 int status; 1065 1066 if (pause) 1067 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1068 &cmd); 1069 else 1070 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1071 &cmd); 1072 1073 if (status) { 1074 device_printf(sc->dev, "Failed to set flow control mode\n"); 1075 return ENXIO; 1076 } 1077 sc->pause = pause; 1078 return 0; 1079} 1080 1081static void 1082mxge_change_promisc(mxge_softc_t *sc, int promisc) 1083{ 1084 mxge_cmd_t cmd; 1085 int status; 1086 1087 if (mxge_always_promisc) 1088 promisc = 1; 1089 1090 if (promisc) 1091 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1092 &cmd); 1093 else 1094 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1095 &cmd); 1096 1097 if (status) { 1098 device_printf(sc->dev, "Failed to set promisc mode\n"); 1099 } 1100} 1101 1102static void 1103mxge_set_multicast_list(mxge_softc_t *sc) 1104{ 1105 mxge_cmd_t cmd; 1106 struct ifmultiaddr *ifma; 1107 struct ifnet *ifp = sc->ifp; 1108 int err; 1109 1110 /* This firmware is known to not support multicast */ 1111 if (!sc->fw_multicast_support) 1112 return; 1113 1114 /* Disable multicast filtering while we play with the lists*/ 1115 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1116 if (err != 0) { 1117 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1118 " error status: %d\n", err); 1119 return; 1120 } 1121 1122 if (sc->adopted_rx_filter_bug) 1123 return; 1124 1125 if (ifp->if_flags & IFF_ALLMULTI) 1126 /* request to disable multicast filtering, so quit here */ 1127 return; 1128 1129 /* Flush all the filters */ 1130 1131 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1132 if (err != 0) { 1133 device_printf(sc->dev, 1134 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1135 ", error status: %d\n", err); 1136 return; 1137 } 1138 1139 /* Walk the multicast list, and add each address */ 1140 1141 if_maddr_rlock(ifp); 1142 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1143 if (ifma->ifma_addr->sa_family != AF_LINK) 1144 continue; 1145 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1146 &cmd.data0, 4); 1147 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1148 &cmd.data1, 2); 1149 cmd.data0 = htonl(cmd.data0); 1150 cmd.data1 = htonl(cmd.data1); 1151 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1152 if (err != 0) { 1153 device_printf(sc->dev, "Failed " 1154 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1155 "%d\t", err); 1156 /* abort, leaving multicast filtering off */ 1157 if_maddr_runlock(ifp); 1158 return; 1159 } 1160 } 1161 if_maddr_runlock(ifp); 1162 /* Enable multicast filtering */ 1163 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1164 if (err != 0) { 1165 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1166 ", error status: %d\n", err); 1167 } 1168} 1169 1170static int 1171mxge_max_mtu(mxge_softc_t *sc) 1172{ 1173 mxge_cmd_t cmd; 1174 int status; 1175 1176 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1177 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1178 1179 /* try to set nbufs to see if it we can 1180 use virtually contiguous jumbos */ 1181 cmd.data0 = 0; 1182 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1183 &cmd); 1184 if (status == 0) 1185 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1186 1187 /* otherwise, we're limited to MJUMPAGESIZE */ 1188 return MJUMPAGESIZE - MXGEFW_PAD; 1189} 1190 1191static int 1192mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1193{ 1194 struct mxge_slice_state *ss; 1195 mxge_rx_done_t *rx_done; 1196 volatile uint32_t *irq_claim; 1197 mxge_cmd_t cmd; 1198 int slice, status; 1199 1200 /* try to send a reset command to the card to see if it 1201 is alive */ 1202 memset(&cmd, 0, sizeof (cmd)); 1203 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1204 if (status != 0) { 1205 device_printf(sc->dev, "failed reset\n"); 1206 return ENXIO; 1207 } 1208 1209 mxge_dummy_rdma(sc, 1); 1210 1211 1212 /* set the intrq size */ 1213 cmd.data0 = sc->rx_ring_size; 1214 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1215 1216 /* 1217 * Even though we already know how many slices are supported 1218 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1219 * has magic side effects, and must be called after a reset. 1220 * It must be called prior to calling any RSS related cmds, 1221 * including assigning an interrupt queue for anything but 1222 * slice 0. It must also be called *after* 1223 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1224 * the firmware to compute offsets. 1225 */ 1226 1227 if (sc->num_slices > 1) { 1228 /* ask the maximum number of slices it supports */ 1229 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1230 &cmd); 1231 if (status != 0) { 1232 device_printf(sc->dev, 1233 "failed to get number of slices\n"); 1234 return status; 1235 } 1236 /* 1237 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1238 * to setting up the interrupt queue DMA 1239 */ 1240 cmd.data0 = sc->num_slices; 1241 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1242#ifdef IFNET_BUF_RING 1243 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1244#endif 1245 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1246 &cmd); 1247 if (status != 0) { 1248 device_printf(sc->dev, 1249 "failed to set number of slices\n"); 1250 return status; 1251 } 1252 } 1253 1254 1255 if (interrupts_setup) { 1256 /* Now exchange information about interrupts */ 1257 for (slice = 0; slice < sc->num_slices; slice++) { 1258 rx_done = &sc->ss[slice].rx_done; 1259 memset(rx_done->entry, 0, sc->rx_ring_size); 1260 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1261 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1262 cmd.data2 = slice; 1263 status |= mxge_send_cmd(sc, 1264 MXGEFW_CMD_SET_INTRQ_DMA, 1265 &cmd); 1266 } 1267 } 1268 1269 status |= mxge_send_cmd(sc, 1270 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1271 1272 1273 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1274 1275 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1276 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1277 1278 1279 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1280 &cmd); 1281 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 if (status != 0) { 1283 device_printf(sc->dev, "failed set interrupt parameters\n"); 1284 return status; 1285 } 1286 1287 1288 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1289 1290 1291 /* run a DMA benchmark */ 1292 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1293 1294 for (slice = 0; slice < sc->num_slices; slice++) { 1295 ss = &sc->ss[slice]; 1296 1297 ss->irq_claim = irq_claim + (2 * slice); 1298 /* reset mcp/driver shared state back to 0 */ 1299 ss->rx_done.idx = 0; 1300 ss->rx_done.cnt = 0; 1301 ss->tx.req = 0; 1302 ss->tx.done = 0; 1303 ss->tx.pkt_done = 0; 1304 ss->tx.queue_active = 0; 1305 ss->tx.activate = 0; 1306 ss->tx.deactivate = 0; 1307 ss->tx.wake = 0; 1308 ss->tx.defrag = 0; 1309 ss->tx.stall = 0; 1310 ss->rx_big.cnt = 0; 1311 ss->rx_small.cnt = 0; 1312 ss->lro_bad_csum = 0; 1313 ss->lro_queued = 0; 1314 ss->lro_flushed = 0; 1315 if (ss->fw_stats != NULL) { 1316 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1317 } 1318 } 1319 sc->rdma_tags_available = 15; 1320 status = mxge_update_mac_address(sc); 1321 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1322 mxge_change_pause(sc, sc->pause); 1323 mxge_set_multicast_list(sc); 1324 if (sc->throttle) { 1325 cmd.data0 = sc->throttle; 1326 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1327 &cmd)) { 1328 device_printf(sc->dev, 1329 "can't enable throttle\n"); 1330 } 1331 } 1332 return status; 1333} 1334 1335static int 1336mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1337{ 1338 mxge_cmd_t cmd; 1339 mxge_softc_t *sc; 1340 int err; 1341 unsigned int throttle; 1342 1343 sc = arg1; 1344 throttle = sc->throttle; 1345 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1346 if (err != 0) { 1347 return err; 1348 } 1349 1350 if (throttle == sc->throttle) 1351 return 0; 1352 1353 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1354 return EINVAL; 1355 1356 mtx_lock(&sc->driver_mtx); 1357 cmd.data0 = throttle; 1358 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1359 if (err == 0) 1360 sc->throttle = throttle; 1361 mtx_unlock(&sc->driver_mtx); 1362 return err; 1363} 1364 1365static int 1366mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1367{ 1368 mxge_softc_t *sc; 1369 unsigned int intr_coal_delay; 1370 int err; 1371 1372 sc = arg1; 1373 intr_coal_delay = sc->intr_coal_delay; 1374 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1375 if (err != 0) { 1376 return err; 1377 } 1378 if (intr_coal_delay == sc->intr_coal_delay) 1379 return 0; 1380 1381 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1382 return EINVAL; 1383 1384 mtx_lock(&sc->driver_mtx); 1385 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1386 sc->intr_coal_delay = intr_coal_delay; 1387 1388 mtx_unlock(&sc->driver_mtx); 1389 return err; 1390} 1391 1392static int 1393mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1394{ 1395 mxge_softc_t *sc; 1396 unsigned int enabled; 1397 int err; 1398 1399 sc = arg1; 1400 enabled = sc->pause; 1401 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1402 if (err != 0) { 1403 return err; 1404 } 1405 if (enabled == sc->pause) 1406 return 0; 1407 1408 mtx_lock(&sc->driver_mtx); 1409 err = mxge_change_pause(sc, enabled); 1410 mtx_unlock(&sc->driver_mtx); 1411 return err; 1412} 1413 1414static int 1415mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt) 1416{ 1417 struct ifnet *ifp; 1418 int err = 0; 1419 1420 ifp = sc->ifp; 1421 if (lro_cnt == 0) 1422 ifp->if_capenable &= ~IFCAP_LRO; 1423 else 1424 ifp->if_capenable |= IFCAP_LRO; 1425 sc->lro_cnt = lro_cnt; 1426 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1427 mxge_close(sc, 0); 1428 err = mxge_open(sc); 1429 } 1430 return err; 1431} 1432 1433static int 1434mxge_change_lro(SYSCTL_HANDLER_ARGS) 1435{ 1436 mxge_softc_t *sc; 1437 unsigned int lro_cnt; 1438 int err; 1439 1440 sc = arg1; 1441 lro_cnt = sc->lro_cnt; 1442 err = sysctl_handle_int(oidp, &lro_cnt, arg2, req); 1443 if (err != 0) 1444 return err; 1445 1446 if (lro_cnt == sc->lro_cnt) 1447 return 0; 1448 1449 if (lro_cnt > 128) 1450 return EINVAL; 1451 1452 mtx_lock(&sc->driver_mtx); 1453 err = mxge_change_lro_locked(sc, lro_cnt); 1454 mtx_unlock(&sc->driver_mtx); 1455 return err; 1456} 1457 1458static int 1459mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1460{ 1461 int err; 1462 1463 if (arg1 == NULL) 1464 return EFAULT; 1465 arg2 = be32toh(*(int *)arg1); 1466 arg1 = NULL; 1467 err = sysctl_handle_int(oidp, arg1, arg2, req); 1468 1469 return err; 1470} 1471 1472static void 1473mxge_rem_sysctls(mxge_softc_t *sc) 1474{ 1475 struct mxge_slice_state *ss; 1476 int slice; 1477 1478 if (sc->slice_sysctl_tree == NULL) 1479 return; 1480 1481 for (slice = 0; slice < sc->num_slices; slice++) { 1482 ss = &sc->ss[slice]; 1483 if (ss == NULL || ss->sysctl_tree == NULL) 1484 continue; 1485 sysctl_ctx_free(&ss->sysctl_ctx); 1486 ss->sysctl_tree = NULL; 1487 } 1488 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1489 sc->slice_sysctl_tree = NULL; 1490} 1491 1492static void 1493mxge_add_sysctls(mxge_softc_t *sc) 1494{ 1495 struct sysctl_ctx_list *ctx; 1496 struct sysctl_oid_list *children; 1497 mcp_irq_data_t *fw; 1498 struct mxge_slice_state *ss; 1499 int slice; 1500 char slice_num[8]; 1501 1502 ctx = device_get_sysctl_ctx(sc->dev); 1503 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1504 fw = sc->ss[0].fw_stats; 1505 1506 /* random information */ 1507 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1508 "firmware_version", 1509 CTLFLAG_RD, &sc->fw_version, 1510 0, "firmware version"); 1511 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1512 "serial_number", 1513 CTLFLAG_RD, &sc->serial_number_string, 1514 0, "serial number"); 1515 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1516 "product_code", 1517 CTLFLAG_RD, &sc->product_code_string, 1518 0, "product_code"); 1519 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1520 "pcie_link_width", 1521 CTLFLAG_RD, &sc->link_width, 1522 0, "tx_boundary"); 1523 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1524 "tx_boundary", 1525 CTLFLAG_RD, &sc->tx_boundary, 1526 0, "tx_boundary"); 1527 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1528 "write_combine", 1529 CTLFLAG_RD, &sc->wc, 1530 0, "write combining PIO?"); 1531 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1532 "read_dma_MBs", 1533 CTLFLAG_RD, &sc->read_dma, 1534 0, "DMA Read speed in MB/s"); 1535 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1536 "write_dma_MBs", 1537 CTLFLAG_RD, &sc->write_dma, 1538 0, "DMA Write speed in MB/s"); 1539 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1540 "read_write_dma_MBs", 1541 CTLFLAG_RD, &sc->read_write_dma, 1542 0, "DMA concurrent Read/Write speed in MB/s"); 1543 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1544 "watchdog_resets", 1545 CTLFLAG_RD, &sc->watchdog_resets, 1546 0, "Number of times NIC was reset"); 1547 1548 1549 /* performance related tunables */ 1550 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1551 "intr_coal_delay", 1552 CTLTYPE_INT|CTLFLAG_RW, sc, 1553 0, mxge_change_intr_coal, 1554 "I", "interrupt coalescing delay in usecs"); 1555 1556 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1557 "throttle", 1558 CTLTYPE_INT|CTLFLAG_RW, sc, 1559 0, mxge_change_throttle, 1560 "I", "transmit throttling"); 1561 1562 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1563 "flow_control_enabled", 1564 CTLTYPE_INT|CTLFLAG_RW, sc, 1565 0, mxge_change_flow_control, 1566 "I", "interrupt coalescing delay in usecs"); 1567 1568 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1569 "deassert_wait", 1570 CTLFLAG_RW, &mxge_deassert_wait, 1571 0, "Wait for IRQ line to go low in ihandler"); 1572 1573 /* stats block from firmware is in network byte order. 1574 Need to swap it */ 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "link_up", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1578 0, mxge_handle_be32, 1579 "I", "link up"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "rdma_tags_available", 1582 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1583 0, mxge_handle_be32, 1584 "I", "rdma_tags_available"); 1585 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1586 "dropped_bad_crc32", 1587 CTLTYPE_INT|CTLFLAG_RD, 1588 &fw->dropped_bad_crc32, 1589 0, mxge_handle_be32, 1590 "I", "dropped_bad_crc32"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_bad_phy", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_bad_phy, 1595 0, mxge_handle_be32, 1596 "I", "dropped_bad_phy"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_link_error_or_filtered", 1599 CTLTYPE_INT|CTLFLAG_RD, 1600 &fw->dropped_link_error_or_filtered, 1601 0, mxge_handle_be32, 1602 "I", "dropped_link_error_or_filtered"); 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_link_overflow", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1606 0, mxge_handle_be32, 1607 "I", "dropped_link_overflow"); 1608 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1609 "dropped_multicast_filtered", 1610 CTLTYPE_INT|CTLFLAG_RD, 1611 &fw->dropped_multicast_filtered, 1612 0, mxge_handle_be32, 1613 "I", "dropped_multicast_filtered"); 1614 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1615 "dropped_no_big_buffer", 1616 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1617 0, mxge_handle_be32, 1618 "I", "dropped_no_big_buffer"); 1619 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1620 "dropped_no_small_buffer", 1621 CTLTYPE_INT|CTLFLAG_RD, 1622 &fw->dropped_no_small_buffer, 1623 0, mxge_handle_be32, 1624 "I", "dropped_no_small_buffer"); 1625 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1626 "dropped_overrun", 1627 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1628 0, mxge_handle_be32, 1629 "I", "dropped_overrun"); 1630 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1631 "dropped_pause", 1632 CTLTYPE_INT|CTLFLAG_RD, 1633 &fw->dropped_pause, 1634 0, mxge_handle_be32, 1635 "I", "dropped_pause"); 1636 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1637 "dropped_runt", 1638 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1639 0, mxge_handle_be32, 1640 "I", "dropped_runt"); 1641 1642 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1643 "dropped_unicast_filtered", 1644 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1645 0, mxge_handle_be32, 1646 "I", "dropped_unicast_filtered"); 1647 1648 /* verbose printing? */ 1649 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1650 "verbose", 1651 CTLFLAG_RW, &mxge_verbose, 1652 0, "verbose printing"); 1653 1654 /* lro */ 1655 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1656 "lro_cnt", 1657 CTLTYPE_INT|CTLFLAG_RW, sc, 1658 0, mxge_change_lro, 1659 "I", "number of lro merge queues"); 1660 1661 1662 /* add counters exported for debugging from all slices */ 1663 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1664 sc->slice_sysctl_tree = 1665 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1666 "slice", CTLFLAG_RD, 0, ""); 1667 1668 for (slice = 0; slice < sc->num_slices; slice++) { 1669 ss = &sc->ss[slice]; 1670 sysctl_ctx_init(&ss->sysctl_ctx); 1671 ctx = &ss->sysctl_ctx; 1672 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1673 sprintf(slice_num, "%d", slice); 1674 ss->sysctl_tree = 1675 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1676 CTLFLAG_RD, 0, ""); 1677 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "rx_small_cnt", 1680 CTLFLAG_RD, &ss->rx_small.cnt, 1681 0, "rx_small_cnt"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "rx_big_cnt", 1684 CTLFLAG_RD, &ss->rx_big.cnt, 1685 0, "rx_small_cnt"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "lro_flushed", CTLFLAG_RD, &ss->lro_flushed, 1688 0, "number of lro merge queues flushed"); 1689 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "lro_queued", CTLFLAG_RD, &ss->lro_queued, 1692 0, "number of frames appended to lro merge" 1693 "queues"); 1694 1695#ifndef IFNET_BUF_RING 1696 /* only transmit from slice 0 for now */ 1697 if (slice > 0) 1698 continue; 1699#endif 1700 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1701 "tx_req", 1702 CTLFLAG_RD, &ss->tx.req, 1703 0, "tx_req"); 1704 1705 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1706 "tx_done", 1707 CTLFLAG_RD, &ss->tx.done, 1708 0, "tx_done"); 1709 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1710 "tx_pkt_done", 1711 CTLFLAG_RD, &ss->tx.pkt_done, 1712 0, "tx_done"); 1713 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1714 "tx_stall", 1715 CTLFLAG_RD, &ss->tx.stall, 1716 0, "tx_stall"); 1717 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1718 "tx_wake", 1719 CTLFLAG_RD, &ss->tx.wake, 1720 0, "tx_wake"); 1721 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1722 "tx_defrag", 1723 CTLFLAG_RD, &ss->tx.defrag, 1724 0, "tx_defrag"); 1725 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1726 "tx_queue_active", 1727 CTLFLAG_RD, &ss->tx.queue_active, 1728 0, "tx_queue_active"); 1729 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1730 "tx_activate", 1731 CTLFLAG_RD, &ss->tx.activate, 1732 0, "tx_activate"); 1733 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1734 "tx_deactivate", 1735 CTLFLAG_RD, &ss->tx.deactivate, 1736 0, "tx_deactivate"); 1737 } 1738} 1739 1740/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1741 backwards one at a time and handle ring wraps */ 1742 1743static inline void 1744mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1745 mcp_kreq_ether_send_t *src, int cnt) 1746{ 1747 int idx, starting_slot; 1748 starting_slot = tx->req; 1749 while (cnt > 1) { 1750 cnt--; 1751 idx = (starting_slot + cnt) & tx->mask; 1752 mxge_pio_copy(&tx->lanai[idx], 1753 &src[cnt], sizeof(*src)); 1754 wmb(); 1755 } 1756} 1757 1758/* 1759 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1760 * at most 32 bytes at a time, so as to avoid involving the software 1761 * pio handler in the nic. We re-write the first segment's flags 1762 * to mark them valid only after writing the entire chain 1763 */ 1764 1765static inline void 1766mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1767 int cnt) 1768{ 1769 int idx, i; 1770 uint32_t *src_ints; 1771 volatile uint32_t *dst_ints; 1772 mcp_kreq_ether_send_t *srcp; 1773 volatile mcp_kreq_ether_send_t *dstp, *dst; 1774 uint8_t last_flags; 1775 1776 idx = tx->req & tx->mask; 1777 1778 last_flags = src->flags; 1779 src->flags = 0; 1780 wmb(); 1781 dst = dstp = &tx->lanai[idx]; 1782 srcp = src; 1783 1784 if ((idx + cnt) < tx->mask) { 1785 for (i = 0; i < (cnt - 1); i += 2) { 1786 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1787 wmb(); /* force write every 32 bytes */ 1788 srcp += 2; 1789 dstp += 2; 1790 } 1791 } else { 1792 /* submit all but the first request, and ensure 1793 that it is submitted below */ 1794 mxge_submit_req_backwards(tx, src, cnt); 1795 i = 0; 1796 } 1797 if (i < cnt) { 1798 /* submit the first request */ 1799 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1800 wmb(); /* barrier before setting valid flag */ 1801 } 1802 1803 /* re-write the last 32-bits with the valid flags */ 1804 src->flags = last_flags; 1805 src_ints = (uint32_t *)src; 1806 src_ints+=3; 1807 dst_ints = (volatile uint32_t *)dst; 1808 dst_ints+=3; 1809 *dst_ints = *src_ints; 1810 tx->req += cnt; 1811 wmb(); 1812} 1813 1814#if IFCAP_TSO4 1815 1816static void 1817mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1818 int busdma_seg_cnt, int ip_off) 1819{ 1820 mxge_tx_ring_t *tx; 1821 mcp_kreq_ether_send_t *req; 1822 bus_dma_segment_t *seg; 1823 struct ip *ip; 1824 struct tcphdr *tcp; 1825 uint32_t low, high_swapped; 1826 int len, seglen, cum_len, cum_len_next; 1827 int next_is_first, chop, cnt, rdma_count, small; 1828 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1829 uint8_t flags, flags_next; 1830 static int once; 1831 1832 mss = m->m_pkthdr.tso_segsz; 1833 1834 /* negative cum_len signifies to the 1835 * send loop that we are still in the 1836 * header portion of the TSO packet. 1837 */ 1838 1839 /* ensure we have the ethernet, IP and TCP 1840 header together in the first mbuf, copy 1841 it to a scratch buffer if not */ 1842 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 1843 m_copydata(m, 0, ip_off + sizeof (*ip), 1844 ss->scratch); 1845 ip = (struct ip *)(ss->scratch + ip_off); 1846 } else { 1847 ip = (struct ip *)(mtod(m, char *) + ip_off); 1848 } 1849 if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) 1850 + sizeof (*tcp))) { 1851 m_copydata(m, 0, ip_off + (ip->ip_hl << 2) 1852 + sizeof (*tcp), ss->scratch); 1853 ip = (struct ip *)(mtod(m, char *) + ip_off); 1854 } 1855 1856 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1857 cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); 1858 1859 /* TSO implies checksum offload on this hardware */ 1860 cksum_offset = ip_off + (ip->ip_hl << 2); 1861 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1862 1863 1864 /* for TSO, pseudo_hdr_offset holds mss. 1865 * The firmware figures out where to put 1866 * the checksum by parsing the header. */ 1867 pseudo_hdr_offset = htobe16(mss); 1868 1869 tx = &ss->tx; 1870 req = tx->req_list; 1871 seg = tx->seg_list; 1872 cnt = 0; 1873 rdma_count = 0; 1874 /* "rdma_count" is the number of RDMAs belonging to the 1875 * current packet BEFORE the current send request. For 1876 * non-TSO packets, this is equal to "count". 1877 * For TSO packets, rdma_count needs to be reset 1878 * to 0 after a segment cut. 1879 * 1880 * The rdma_count field of the send request is 1881 * the number of RDMAs of the packet starting at 1882 * that request. For TSO send requests with one ore more cuts 1883 * in the middle, this is the number of RDMAs starting 1884 * after the last cut in the request. All previous 1885 * segments before the last cut implicitly have 1 RDMA. 1886 * 1887 * Since the number of RDMAs is not known beforehand, 1888 * it must be filled-in retroactively - after each 1889 * segmentation cut or at the end of the entire packet. 1890 */ 1891 1892 while (busdma_seg_cnt) { 1893 /* Break the busdma segment up into pieces*/ 1894 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1895 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1896 len = seg->ds_len; 1897 1898 while (len) { 1899 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1900 seglen = len; 1901 cum_len_next = cum_len + seglen; 1902 (req-rdma_count)->rdma_count = rdma_count + 1; 1903 if (__predict_true(cum_len >= 0)) { 1904 /* payload */ 1905 chop = (cum_len_next > mss); 1906 cum_len_next = cum_len_next % mss; 1907 next_is_first = (cum_len_next == 0); 1908 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1909 flags_next |= next_is_first * 1910 MXGEFW_FLAGS_FIRST; 1911 rdma_count |= -(chop | next_is_first); 1912 rdma_count += chop & !next_is_first; 1913 } else if (cum_len_next >= 0) { 1914 /* header ends */ 1915 rdma_count = -1; 1916 cum_len_next = 0; 1917 seglen = -cum_len; 1918 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1919 flags_next = MXGEFW_FLAGS_TSO_PLD | 1920 MXGEFW_FLAGS_FIRST | 1921 (small * MXGEFW_FLAGS_SMALL); 1922 } 1923 1924 req->addr_high = high_swapped; 1925 req->addr_low = htobe32(low); 1926 req->pseudo_hdr_offset = pseudo_hdr_offset; 1927 req->pad = 0; 1928 req->rdma_count = 1; 1929 req->length = htobe16(seglen); 1930 req->cksum_offset = cksum_offset; 1931 req->flags = flags | ((cum_len & 1) * 1932 MXGEFW_FLAGS_ALIGN_ODD); 1933 low += seglen; 1934 len -= seglen; 1935 cum_len = cum_len_next; 1936 flags = flags_next; 1937 req++; 1938 cnt++; 1939 rdma_count++; 1940 if (__predict_false(cksum_offset > seglen)) 1941 cksum_offset -= seglen; 1942 else 1943 cksum_offset = 0; 1944 if (__predict_false(cnt > tx->max_desc)) 1945 goto drop; 1946 } 1947 busdma_seg_cnt--; 1948 seg++; 1949 } 1950 (req-rdma_count)->rdma_count = rdma_count; 1951 1952 do { 1953 req--; 1954 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1955 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1956 1957 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1958 mxge_submit_req(tx, tx->req_list, cnt); 1959#ifdef IFNET_BUF_RING 1960 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 1961 /* tell the NIC to start polling this slice */ 1962 *tx->send_go = 1; 1963 tx->queue_active = 1; 1964 tx->activate++; 1965 wmb(); 1966 } 1967#endif 1968 return; 1969 1970drop: 1971 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 1972 m_freem(m); 1973 ss->oerrors++; 1974 if (!once) { 1975 printf("tx->max_desc exceeded via TSO!\n"); 1976 printf("mss = %d, %ld, %d!\n", mss, 1977 (long)seg - (long)tx->seg_list, tx->max_desc); 1978 once = 1; 1979 } 1980 return; 1981 1982} 1983 1984#endif /* IFCAP_TSO4 */ 1985 1986#ifdef MXGE_NEW_VLAN_API 1987/* 1988 * We reproduce the software vlan tag insertion from 1989 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 1990 * vlan tag insertion. We need to advertise this in order to have the 1991 * vlan interface respect our csum offload flags. 1992 */ 1993static struct mbuf * 1994mxge_vlan_tag_insert(struct mbuf *m) 1995{ 1996 struct ether_vlan_header *evl; 1997 1998 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_DONTWAIT); 1999 if (__predict_false(m == NULL)) 2000 return NULL; 2001 if (m->m_len < sizeof(*evl)) { 2002 m = m_pullup(m, sizeof(*evl)); 2003 if (__predict_false(m == NULL)) 2004 return NULL; 2005 } 2006 /* 2007 * Transform the Ethernet header into an Ethernet header 2008 * with 802.1Q encapsulation. 2009 */ 2010 evl = mtod(m, struct ether_vlan_header *); 2011 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2012 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2013 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2014 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2015 m->m_flags &= ~M_VLANTAG; 2016 return m; 2017} 2018#endif /* MXGE_NEW_VLAN_API */ 2019 2020static void 2021mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2022{ 2023 mxge_softc_t *sc; 2024 mcp_kreq_ether_send_t *req; 2025 bus_dma_segment_t *seg; 2026 struct mbuf *m_tmp; 2027 struct ifnet *ifp; 2028 mxge_tx_ring_t *tx; 2029 struct ip *ip; 2030 int cnt, cum_len, err, i, idx, odd_flag, ip_off; 2031 uint16_t pseudo_hdr_offset; 2032 uint8_t flags, cksum_offset; 2033 2034 2035 sc = ss->sc; 2036 ifp = sc->ifp; 2037 tx = &ss->tx; 2038 2039 ip_off = sizeof (struct ether_header); 2040#ifdef MXGE_NEW_VLAN_API 2041 if (m->m_flags & M_VLANTAG) { 2042 m = mxge_vlan_tag_insert(m); 2043 if (__predict_false(m == NULL)) 2044 goto drop; 2045 ip_off += ETHER_VLAN_ENCAP_LEN; 2046 } 2047#endif 2048 /* (try to) map the frame for DMA */ 2049 idx = tx->req & tx->mask; 2050 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2051 m, tx->seg_list, &cnt, 2052 BUS_DMA_NOWAIT); 2053 if (__predict_false(err == EFBIG)) { 2054 /* Too many segments in the chain. Try 2055 to defrag */ 2056 m_tmp = m_defrag(m, M_NOWAIT); 2057 if (m_tmp == NULL) { 2058 goto drop; 2059 } 2060 ss->tx.defrag++; 2061 m = m_tmp; 2062 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2063 tx->info[idx].map, 2064 m, tx->seg_list, &cnt, 2065 BUS_DMA_NOWAIT); 2066 } 2067 if (__predict_false(err != 0)) { 2068 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2069 " packet len = %d\n", err, m->m_pkthdr.len); 2070 goto drop; 2071 } 2072 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2073 BUS_DMASYNC_PREWRITE); 2074 tx->info[idx].m = m; 2075 2076#if IFCAP_TSO4 2077 /* TSO is different enough, we handle it in another routine */ 2078 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2079 mxge_encap_tso(ss, m, cnt, ip_off); 2080 return; 2081 } 2082#endif 2083 2084 req = tx->req_list; 2085 cksum_offset = 0; 2086 pseudo_hdr_offset = 0; 2087 flags = MXGEFW_FLAGS_NO_TSO; 2088 2089 /* checksum offloading? */ 2090 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 2091 /* ensure ip header is in first mbuf, copy 2092 it to a scratch buffer if not */ 2093 if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { 2094 m_copydata(m, 0, ip_off + sizeof (*ip), 2095 ss->scratch); 2096 ip = (struct ip *)(ss->scratch + ip_off); 2097 } else { 2098 ip = (struct ip *)(mtod(m, char *) + ip_off); 2099 } 2100 cksum_offset = ip_off + (ip->ip_hl << 2); 2101 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2102 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2103 req->cksum_offset = cksum_offset; 2104 flags |= MXGEFW_FLAGS_CKSUM; 2105 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2106 } else { 2107 odd_flag = 0; 2108 } 2109 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2110 flags |= MXGEFW_FLAGS_SMALL; 2111 2112 /* convert segments into a request list */ 2113 cum_len = 0; 2114 seg = tx->seg_list; 2115 req->flags = MXGEFW_FLAGS_FIRST; 2116 for (i = 0; i < cnt; i++) { 2117 req->addr_low = 2118 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2119 req->addr_high = 2120 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2121 req->length = htobe16(seg->ds_len); 2122 req->cksum_offset = cksum_offset; 2123 if (cksum_offset > seg->ds_len) 2124 cksum_offset -= seg->ds_len; 2125 else 2126 cksum_offset = 0; 2127 req->pseudo_hdr_offset = pseudo_hdr_offset; 2128 req->pad = 0; /* complete solid 16-byte block */ 2129 req->rdma_count = 1; 2130 req->flags |= flags | ((cum_len & 1) * odd_flag); 2131 cum_len += seg->ds_len; 2132 seg++; 2133 req++; 2134 req->flags = 0; 2135 } 2136 req--; 2137 /* pad runts to 60 bytes */ 2138 if (cum_len < 60) { 2139 req++; 2140 req->addr_low = 2141 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2142 req->addr_high = 2143 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2144 req->length = htobe16(60 - cum_len); 2145 req->cksum_offset = 0; 2146 req->pseudo_hdr_offset = pseudo_hdr_offset; 2147 req->pad = 0; /* complete solid 16-byte block */ 2148 req->rdma_count = 1; 2149 req->flags |= flags | ((cum_len & 1) * odd_flag); 2150 cnt++; 2151 } 2152 2153 tx->req_list[0].rdma_count = cnt; 2154#if 0 2155 /* print what the firmware will see */ 2156 for (i = 0; i < cnt; i++) { 2157 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2158 "cso:%d, flags:0x%x, rdma:%d\n", 2159 i, (int)ntohl(tx->req_list[i].addr_high), 2160 (int)ntohl(tx->req_list[i].addr_low), 2161 (int)ntohs(tx->req_list[i].length), 2162 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2163 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2164 tx->req_list[i].rdma_count); 2165 } 2166 printf("--------------\n"); 2167#endif 2168 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2169 mxge_submit_req(tx, tx->req_list, cnt); 2170#ifdef IFNET_BUF_RING 2171 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2172 /* tell the NIC to start polling this slice */ 2173 *tx->send_go = 1; 2174 tx->queue_active = 1; 2175 tx->activate++; 2176 wmb(); 2177 } 2178#endif 2179 return; 2180 2181drop: 2182 m_freem(m); 2183 ss->oerrors++; 2184 return; 2185} 2186 2187#ifdef IFNET_BUF_RING 2188static void 2189mxge_qflush(struct ifnet *ifp) 2190{ 2191 mxge_softc_t *sc = ifp->if_softc; 2192 mxge_tx_ring_t *tx; 2193 struct mbuf *m; 2194 int slice; 2195 2196 for (slice = 0; slice < sc->num_slices; slice++) { 2197 tx = &sc->ss[slice].tx; 2198 mtx_lock(&tx->mtx); 2199 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2200 m_freem(m); 2201 mtx_unlock(&tx->mtx); 2202 } 2203 if_qflush(ifp); 2204} 2205 2206static inline void 2207mxge_start_locked(struct mxge_slice_state *ss) 2208{ 2209 mxge_softc_t *sc; 2210 struct mbuf *m; 2211 struct ifnet *ifp; 2212 mxge_tx_ring_t *tx; 2213 2214 sc = ss->sc; 2215 ifp = sc->ifp; 2216 tx = &ss->tx; 2217 2218 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2219 m = drbr_dequeue(ifp, tx->br); 2220 if (m == NULL) { 2221 return; 2222 } 2223 /* let BPF see it */ 2224 BPF_MTAP(ifp, m); 2225 2226 /* give it to the nic */ 2227 mxge_encap(ss, m); 2228 } 2229 /* ran out of transmit slots */ 2230 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2231 && (!drbr_empty(ifp, tx->br))) { 2232 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2233 tx->stall++; 2234 } 2235} 2236 2237static int 2238mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2239{ 2240 mxge_softc_t *sc; 2241 struct ifnet *ifp; 2242 mxge_tx_ring_t *tx; 2243 int err; 2244 2245 sc = ss->sc; 2246 ifp = sc->ifp; 2247 tx = &ss->tx; 2248 2249 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2250 IFF_DRV_RUNNING) { 2251 err = drbr_enqueue(ifp, tx->br, m); 2252 return (err); 2253 } 2254 2255 if (!drbr_needs_enqueue(ifp, tx->br) && 2256 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2257 /* let BPF see it */ 2258 BPF_MTAP(ifp, m); 2259 /* give it to the nic */ 2260 mxge_encap(ss, m); 2261 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2262 return (err); 2263 } 2264 if (!drbr_empty(ifp, tx->br)) 2265 mxge_start_locked(ss); 2266 return (0); 2267} 2268 2269static int 2270mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2271{ 2272 mxge_softc_t *sc = ifp->if_softc; 2273 struct mxge_slice_state *ss; 2274 mxge_tx_ring_t *tx; 2275 int err = 0; 2276 int slice; 2277 2278 slice = m->m_pkthdr.flowid; 2279 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2280 2281 ss = &sc->ss[slice]; 2282 tx = &ss->tx; 2283 2284 if (mtx_trylock(&tx->mtx)) { 2285 err = mxge_transmit_locked(ss, m); 2286 mtx_unlock(&tx->mtx); 2287 } else { 2288 err = drbr_enqueue(ifp, tx->br, m); 2289 } 2290 2291 return (err); 2292} 2293 2294#else 2295 2296static inline void 2297mxge_start_locked(struct mxge_slice_state *ss) 2298{ 2299 mxge_softc_t *sc; 2300 struct mbuf *m; 2301 struct ifnet *ifp; 2302 mxge_tx_ring_t *tx; 2303 2304 sc = ss->sc; 2305 ifp = sc->ifp; 2306 tx = &ss->tx; 2307 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2308 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2309 if (m == NULL) { 2310 return; 2311 } 2312 /* let BPF see it */ 2313 BPF_MTAP(ifp, m); 2314 2315 /* give it to the nic */ 2316 mxge_encap(ss, m); 2317 } 2318 /* ran out of transmit slots */ 2319 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2320 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2321 tx->stall++; 2322 } 2323} 2324#endif 2325static void 2326mxge_start(struct ifnet *ifp) 2327{ 2328 mxge_softc_t *sc = ifp->if_softc; 2329 struct mxge_slice_state *ss; 2330 2331 /* only use the first slice for now */ 2332 ss = &sc->ss[0]; 2333 mtx_lock(&ss->tx.mtx); 2334 mxge_start_locked(ss); 2335 mtx_unlock(&ss->tx.mtx); 2336} 2337 2338/* 2339 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2340 * at most 32 bytes at a time, so as to avoid involving the software 2341 * pio handler in the nic. We re-write the first segment's low 2342 * DMA address to mark it valid only after we write the entire chunk 2343 * in a burst 2344 */ 2345static inline void 2346mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2347 mcp_kreq_ether_recv_t *src) 2348{ 2349 uint32_t low; 2350 2351 low = src->addr_low; 2352 src->addr_low = 0xffffffff; 2353 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2354 wmb(); 2355 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2356 wmb(); 2357 src->addr_low = low; 2358 dst->addr_low = low; 2359 wmb(); 2360} 2361 2362static int 2363mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2364{ 2365 bus_dma_segment_t seg; 2366 struct mbuf *m; 2367 mxge_rx_ring_t *rx = &ss->rx_small; 2368 int cnt, err; 2369 2370 m = m_gethdr(M_DONTWAIT, MT_DATA); 2371 if (m == NULL) { 2372 rx->alloc_fail++; 2373 err = ENOBUFS; 2374 goto done; 2375 } 2376 m->m_len = MHLEN; 2377 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2378 &seg, &cnt, BUS_DMA_NOWAIT); 2379 if (err != 0) { 2380 m_free(m); 2381 goto done; 2382 } 2383 rx->info[idx].m = m; 2384 rx->shadow[idx].addr_low = 2385 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2386 rx->shadow[idx].addr_high = 2387 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2388 2389done: 2390 if ((idx & 7) == 7) 2391 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2392 return err; 2393} 2394 2395static int 2396mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2397{ 2398 bus_dma_segment_t seg[3]; 2399 struct mbuf *m; 2400 mxge_rx_ring_t *rx = &ss->rx_big; 2401 int cnt, err, i; 2402 2403 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2404 if (m == NULL) { 2405 rx->alloc_fail++; 2406 err = ENOBUFS; 2407 goto done; 2408 } 2409 m->m_len = rx->mlen; 2410 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2411 seg, &cnt, BUS_DMA_NOWAIT); 2412 if (err != 0) { 2413 m_free(m); 2414 goto done; 2415 } 2416 rx->info[idx].m = m; 2417 rx->shadow[idx].addr_low = 2418 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2419 rx->shadow[idx].addr_high = 2420 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2421 2422#if MXGE_VIRT_JUMBOS 2423 for (i = 1; i < cnt; i++) { 2424 rx->shadow[idx + i].addr_low = 2425 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2426 rx->shadow[idx + i].addr_high = 2427 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2428 } 2429#endif 2430 2431done: 2432 for (i = 0; i < rx->nbufs; i++) { 2433 if ((idx & 7) == 7) { 2434 mxge_submit_8rx(&rx->lanai[idx - 7], 2435 &rx->shadow[idx - 7]); 2436 } 2437 idx++; 2438 } 2439 return err; 2440} 2441 2442/* 2443 * Myri10GE hardware checksums are not valid if the sender 2444 * padded the frame with non-zero padding. This is because 2445 * the firmware just does a simple 16-bit 1s complement 2446 * checksum across the entire frame, excluding the first 14 2447 * bytes. It is best to simply to check the checksum and 2448 * tell the stack about it only if the checksum is good 2449 */ 2450 2451static inline uint16_t 2452mxge_rx_csum(struct mbuf *m, int csum) 2453{ 2454 struct ether_header *eh; 2455 struct ip *ip; 2456 uint16_t c; 2457 2458 eh = mtod(m, struct ether_header *); 2459 2460 /* only deal with IPv4 TCP & UDP for now */ 2461 if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP))) 2462 return 1; 2463 ip = (struct ip *)(eh + 1); 2464 if (__predict_false(ip->ip_p != IPPROTO_TCP && 2465 ip->ip_p != IPPROTO_UDP)) 2466 return 1; 2467#ifdef INET 2468 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2469 htonl(ntohs(csum) + ntohs(ip->ip_len) + 2470 - (ip->ip_hl << 2) + ip->ip_p)); 2471#else 2472 c = 1; 2473#endif 2474 c ^= 0xffff; 2475 return (c); 2476} 2477 2478static void 2479mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2480{ 2481 struct ether_vlan_header *evl; 2482 struct ether_header *eh; 2483 uint32_t partial; 2484 2485 evl = mtod(m, struct ether_vlan_header *); 2486 eh = mtod(m, struct ether_header *); 2487 2488 /* 2489 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2490 * after what the firmware thought was the end of the ethernet 2491 * header. 2492 */ 2493 2494 /* put checksum into host byte order */ 2495 *csum = ntohs(*csum); 2496 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2497 (*csum) += ~partial; 2498 (*csum) += ((*csum) < ~partial); 2499 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2500 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2501 2502 /* restore checksum to network byte order; 2503 later consumers expect this */ 2504 *csum = htons(*csum); 2505 2506 /* save the tag */ 2507#ifdef MXGE_NEW_VLAN_API 2508 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2509#else 2510 { 2511 struct m_tag *mtag; 2512 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2513 M_NOWAIT); 2514 if (mtag == NULL) 2515 return; 2516 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2517 m_tag_prepend(m, mtag); 2518 } 2519 2520#endif 2521 m->m_flags |= M_VLANTAG; 2522 2523 /* 2524 * Remove the 802.1q header by copying the Ethernet 2525 * addresses over it and adjusting the beginning of 2526 * the data in the mbuf. The encapsulated Ethernet 2527 * type field is already in place. 2528 */ 2529 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2530 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2531 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2532} 2533 2534 2535static inline void 2536mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2537{ 2538 mxge_softc_t *sc; 2539 struct ifnet *ifp; 2540 struct mbuf *m; 2541 struct ether_header *eh; 2542 mxge_rx_ring_t *rx; 2543 bus_dmamap_t old_map; 2544 int idx; 2545 uint16_t tcpudp_csum; 2546 2547 sc = ss->sc; 2548 ifp = sc->ifp; 2549 rx = &ss->rx_big; 2550 idx = rx->cnt & rx->mask; 2551 rx->cnt += rx->nbufs; 2552 /* save a pointer to the received mbuf */ 2553 m = rx->info[idx].m; 2554 /* try to replace the received mbuf */ 2555 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2556 /* drop the frame -- the old mbuf is re-cycled */ 2557 ifp->if_ierrors++; 2558 return; 2559 } 2560 2561 /* unmap the received buffer */ 2562 old_map = rx->info[idx].map; 2563 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2564 bus_dmamap_unload(rx->dmat, old_map); 2565 2566 /* swap the bus_dmamap_t's */ 2567 rx->info[idx].map = rx->extra_map; 2568 rx->extra_map = old_map; 2569 2570 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2571 * aligned */ 2572 m->m_data += MXGEFW_PAD; 2573 2574 m->m_pkthdr.rcvif = ifp; 2575 m->m_len = m->m_pkthdr.len = len; 2576 ss->ipackets++; 2577 eh = mtod(m, struct ether_header *); 2578 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2579 mxge_vlan_tag_remove(m, &csum); 2580 } 2581 /* if the checksum is valid, mark it in the mbuf header */ 2582 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2583 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2584 return; 2585 /* otherwise, it was a UDP frame, or a TCP frame which 2586 we could not do LRO on. Tell the stack that the 2587 checksum is good */ 2588 m->m_pkthdr.csum_data = 0xffff; 2589 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2590 } 2591 /* flowid only valid if RSS hashing is enabled */ 2592 if (sc->num_slices > 1) { 2593 m->m_pkthdr.flowid = (ss - sc->ss); 2594 m->m_flags |= M_FLOWID; 2595 } 2596 /* pass the frame up the stack */ 2597 (*ifp->if_input)(ifp, m); 2598} 2599 2600static inline void 2601mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum) 2602{ 2603 mxge_softc_t *sc; 2604 struct ifnet *ifp; 2605 struct ether_header *eh; 2606 struct mbuf *m; 2607 mxge_rx_ring_t *rx; 2608 bus_dmamap_t old_map; 2609 int idx; 2610 uint16_t tcpudp_csum; 2611 2612 sc = ss->sc; 2613 ifp = sc->ifp; 2614 rx = &ss->rx_small; 2615 idx = rx->cnt & rx->mask; 2616 rx->cnt++; 2617 /* save a pointer to the received mbuf */ 2618 m = rx->info[idx].m; 2619 /* try to replace the received mbuf */ 2620 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2621 /* drop the frame -- the old mbuf is re-cycled */ 2622 ifp->if_ierrors++; 2623 return; 2624 } 2625 2626 /* unmap the received buffer */ 2627 old_map = rx->info[idx].map; 2628 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2629 bus_dmamap_unload(rx->dmat, old_map); 2630 2631 /* swap the bus_dmamap_t's */ 2632 rx->info[idx].map = rx->extra_map; 2633 rx->extra_map = old_map; 2634 2635 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2636 * aligned */ 2637 m->m_data += MXGEFW_PAD; 2638 2639 m->m_pkthdr.rcvif = ifp; 2640 m->m_len = m->m_pkthdr.len = len; 2641 ss->ipackets++; 2642 eh = mtod(m, struct ether_header *); 2643 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2644 mxge_vlan_tag_remove(m, &csum); 2645 } 2646 /* if the checksum is valid, mark it in the mbuf header */ 2647 if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) { 2648 if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum))) 2649 return; 2650 /* otherwise, it was a UDP frame, or a TCP frame which 2651 we could not do LRO on. Tell the stack that the 2652 checksum is good */ 2653 m->m_pkthdr.csum_data = 0xffff; 2654 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID; 2655 } 2656 /* flowid only valid if RSS hashing is enabled */ 2657 if (sc->num_slices > 1) { 2658 m->m_pkthdr.flowid = (ss - sc->ss); 2659 m->m_flags |= M_FLOWID; 2660 } 2661 /* pass the frame up the stack */ 2662 (*ifp->if_input)(ifp, m); 2663} 2664 2665static inline void 2666mxge_clean_rx_done(struct mxge_slice_state *ss) 2667{ 2668 mxge_rx_done_t *rx_done = &ss->rx_done; 2669 int limit = 0; 2670 uint16_t length; 2671 uint16_t checksum; 2672 2673 2674 while (rx_done->entry[rx_done->idx].length != 0) { 2675 length = ntohs(rx_done->entry[rx_done->idx].length); 2676 rx_done->entry[rx_done->idx].length = 0; 2677 checksum = rx_done->entry[rx_done->idx].checksum; 2678 if (length <= (MHLEN - MXGEFW_PAD)) 2679 mxge_rx_done_small(ss, length, checksum); 2680 else 2681 mxge_rx_done_big(ss, length, checksum); 2682 rx_done->cnt++; 2683 rx_done->idx = rx_done->cnt & rx_done->mask; 2684 2685 /* limit potential for livelock */ 2686 if (__predict_false(++limit > rx_done->mask / 2)) 2687 break; 2688 } 2689#ifdef INET 2690 while (!SLIST_EMPTY(&ss->lro_active)) { 2691 struct lro_entry *lro = SLIST_FIRST(&ss->lro_active); 2692 SLIST_REMOVE_HEAD(&ss->lro_active, next); 2693 mxge_lro_flush(ss, lro); 2694 } 2695#endif 2696} 2697 2698 2699static inline void 2700mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2701{ 2702 struct ifnet *ifp; 2703 mxge_tx_ring_t *tx; 2704 struct mbuf *m; 2705 bus_dmamap_t map; 2706 int idx; 2707 int *flags; 2708 2709 tx = &ss->tx; 2710 ifp = ss->sc->ifp; 2711 while (tx->pkt_done != mcp_idx) { 2712 idx = tx->done & tx->mask; 2713 tx->done++; 2714 m = tx->info[idx].m; 2715 /* mbuf and DMA map only attached to the first 2716 segment per-mbuf */ 2717 if (m != NULL) { 2718 ss->obytes += m->m_pkthdr.len; 2719 if (m->m_flags & M_MCAST) 2720 ss->omcasts++; 2721 ss->opackets++; 2722 tx->info[idx].m = NULL; 2723 map = tx->info[idx].map; 2724 bus_dmamap_unload(tx->dmat, map); 2725 m_freem(m); 2726 } 2727 if (tx->info[idx].flag) { 2728 tx->info[idx].flag = 0; 2729 tx->pkt_done++; 2730 } 2731 } 2732 2733 /* If we have space, clear IFF_OACTIVE to tell the stack that 2734 its OK to send packets */ 2735#ifdef IFNET_BUF_RING 2736 flags = &ss->if_drv_flags; 2737#else 2738 flags = &ifp->if_drv_flags; 2739#endif 2740 mtx_lock(&ss->tx.mtx); 2741 if ((*flags) & IFF_DRV_OACTIVE && 2742 tx->req - tx->done < (tx->mask + 1)/4) { 2743 *(flags) &= ~IFF_DRV_OACTIVE; 2744 ss->tx.wake++; 2745 mxge_start_locked(ss); 2746 } 2747#ifdef IFNET_BUF_RING 2748 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2749 /* let the NIC stop polling this queue, since there 2750 * are no more transmits pending */ 2751 if (tx->req == tx->done) { 2752 *tx->send_stop = 1; 2753 tx->queue_active = 0; 2754 tx->deactivate++; 2755 wmb(); 2756 } 2757 } 2758#endif 2759 mtx_unlock(&ss->tx.mtx); 2760 2761} 2762 2763static struct mxge_media_type mxge_xfp_media_types[] = 2764{ 2765 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2766 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2767 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2768 {0, (1 << 5), "10GBASE-ER"}, 2769 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2770 {0, (1 << 3), "10GBASE-SW"}, 2771 {0, (1 << 2), "10GBASE-LW"}, 2772 {0, (1 << 1), "10GBASE-EW"}, 2773 {0, (1 << 0), "Reserved"} 2774}; 2775static struct mxge_media_type mxge_sfp_media_types[] = 2776{ 2777 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2778 {0, (1 << 7), "Reserved"}, 2779 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2780 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2781 {IFM_10G_SR, (1 << 4), "10GBASE-SR"} 2782}; 2783 2784static void 2785mxge_media_set(mxge_softc_t *sc, int media_type) 2786{ 2787 2788 2789 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2790 0, NULL); 2791 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2792 sc->current_media = media_type; 2793 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2794} 2795 2796static void 2797mxge_media_init(mxge_softc_t *sc) 2798{ 2799 char *ptr; 2800 int i; 2801 2802 ifmedia_removeall(&sc->media); 2803 mxge_media_set(sc, IFM_AUTO); 2804 2805 /* 2806 * parse the product code to deterimine the interface type 2807 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2808 * after the 3rd dash in the driver's cached copy of the 2809 * EEPROM's product code string. 2810 */ 2811 ptr = sc->product_code_string; 2812 if (ptr == NULL) { 2813 device_printf(sc->dev, "Missing product code\n"); 2814 return; 2815 } 2816 2817 for (i = 0; i < 3; i++, ptr++) { 2818 ptr = index(ptr, '-'); 2819 if (ptr == NULL) { 2820 device_printf(sc->dev, 2821 "only %d dashes in PC?!?\n", i); 2822 return; 2823 } 2824 } 2825 if (*ptr == 'C') { 2826 /* -C is CX4 */ 2827 sc->connector = MXGE_CX4; 2828 mxge_media_set(sc, IFM_10G_CX4); 2829 } else if (*ptr == 'Q') { 2830 /* -Q is Quad Ribbon Fiber */ 2831 sc->connector = MXGE_QRF; 2832 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2833 /* FreeBSD has no media type for Quad ribbon fiber */ 2834 } else if (*ptr == 'R') { 2835 /* -R is XFP */ 2836 sc->connector = MXGE_XFP; 2837 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2838 /* -S or -2S is SFP+ */ 2839 sc->connector = MXGE_SFP; 2840 } else { 2841 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2842 } 2843} 2844 2845/* 2846 * Determine the media type for a NIC. Some XFPs will identify 2847 * themselves only when their link is up, so this is initiated via a 2848 * link up interrupt. However, this can potentially take up to 2849 * several milliseconds, so it is run via the watchdog routine, rather 2850 * than in the interrupt handler itself. 2851 */ 2852static void 2853mxge_media_probe(mxge_softc_t *sc) 2854{ 2855 mxge_cmd_t cmd; 2856 char *cage_type; 2857 2858 struct mxge_media_type *mxge_media_types = NULL; 2859 int i, err, ms, mxge_media_type_entries; 2860 uint32_t byte; 2861 2862 sc->need_media_probe = 0; 2863 2864 if (sc->connector == MXGE_XFP) { 2865 /* -R is XFP */ 2866 mxge_media_types = mxge_xfp_media_types; 2867 mxge_media_type_entries = 2868 sizeof (mxge_xfp_media_types) / 2869 sizeof (mxge_xfp_media_types[0]); 2870 byte = MXGE_XFP_COMPLIANCE_BYTE; 2871 cage_type = "XFP"; 2872 } else if (sc->connector == MXGE_SFP) { 2873 /* -S or -2S is SFP+ */ 2874 mxge_media_types = mxge_sfp_media_types; 2875 mxge_media_type_entries = 2876 sizeof (mxge_sfp_media_types) / 2877 sizeof (mxge_sfp_media_types[0]); 2878 cage_type = "SFP+"; 2879 byte = 3; 2880 } else { 2881 /* nothing to do; media type cannot change */ 2882 return; 2883 } 2884 2885 /* 2886 * At this point we know the NIC has an XFP cage, so now we 2887 * try to determine what is in the cage by using the 2888 * firmware's XFP I2C commands to read the XFP 10GbE compilance 2889 * register. We read just one byte, which may take over 2890 * a millisecond 2891 */ 2892 2893 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 2894 cmd.data1 = byte; 2895 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 2896 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 2897 device_printf(sc->dev, "failed to read XFP\n"); 2898 } 2899 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 2900 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 2901 } 2902 if (err != MXGEFW_CMD_OK) { 2903 return; 2904 } 2905 2906 /* now we wait for the data to be cached */ 2907 cmd.data0 = byte; 2908 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2909 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 2910 DELAY(1000); 2911 cmd.data0 = byte; 2912 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 2913 } 2914 if (err != MXGEFW_CMD_OK) { 2915 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 2916 cage_type, err, ms); 2917 return; 2918 } 2919 2920 if (cmd.data0 == mxge_media_types[0].bitmask) { 2921 if (mxge_verbose) 2922 device_printf(sc->dev, "%s:%s\n", cage_type, 2923 mxge_media_types[0].name); 2924 if (sc->current_media != mxge_media_types[0].flag) { 2925 mxge_media_init(sc); 2926 mxge_media_set(sc, mxge_media_types[0].flag); 2927 } 2928 return; 2929 } 2930 for (i = 1; i < mxge_media_type_entries; i++) { 2931 if (cmd.data0 & mxge_media_types[i].bitmask) { 2932 if (mxge_verbose) 2933 device_printf(sc->dev, "%s:%s\n", 2934 cage_type, 2935 mxge_media_types[i].name); 2936 2937 if (sc->current_media != mxge_media_types[i].flag) { 2938 mxge_media_init(sc); 2939 mxge_media_set(sc, mxge_media_types[i].flag); 2940 } 2941 return; 2942 } 2943 } 2944 if (mxge_verbose) 2945 device_printf(sc->dev, "%s media 0x%x unknown\n", 2946 cage_type, cmd.data0); 2947 2948 return; 2949} 2950 2951static void 2952mxge_intr(void *arg) 2953{ 2954 struct mxge_slice_state *ss = arg; 2955 mxge_softc_t *sc = ss->sc; 2956 mcp_irq_data_t *stats = ss->fw_stats; 2957 mxge_tx_ring_t *tx = &ss->tx; 2958 mxge_rx_done_t *rx_done = &ss->rx_done; 2959 uint32_t send_done_count; 2960 uint8_t valid; 2961 2962 2963#ifndef IFNET_BUF_RING 2964 /* an interrupt on a non-zero slice is implicitly valid 2965 since MSI-X irqs are not shared */ 2966 if (ss != sc->ss) { 2967 mxge_clean_rx_done(ss); 2968 *ss->irq_claim = be32toh(3); 2969 return; 2970 } 2971#endif 2972 2973 /* make sure the DMA has finished */ 2974 if (!stats->valid) { 2975 return; 2976 } 2977 valid = stats->valid; 2978 2979 if (sc->legacy_irq) { 2980 /* lower legacy IRQ */ 2981 *sc->irq_deassert = 0; 2982 if (!mxge_deassert_wait) 2983 /* don't wait for conf. that irq is low */ 2984 stats->valid = 0; 2985 } else { 2986 stats->valid = 0; 2987 } 2988 2989 /* loop while waiting for legacy irq deassertion */ 2990 do { 2991 /* check for transmit completes and receives */ 2992 send_done_count = be32toh(stats->send_done_count); 2993 while ((send_done_count != tx->pkt_done) || 2994 (rx_done->entry[rx_done->idx].length != 0)) { 2995 if (send_done_count != tx->pkt_done) 2996 mxge_tx_done(ss, (int)send_done_count); 2997 mxge_clean_rx_done(ss); 2998 send_done_count = be32toh(stats->send_done_count); 2999 } 3000 if (sc->legacy_irq && mxge_deassert_wait) 3001 wmb(); 3002 } while (*((volatile uint8_t *) &stats->valid)); 3003 3004 /* fw link & error stats meaningful only on the first slice */ 3005 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3006 if (sc->link_state != stats->link_up) { 3007 sc->link_state = stats->link_up; 3008 if (sc->link_state) { 3009 if_link_state_change(sc->ifp, LINK_STATE_UP); 3010 sc->ifp->if_baudrate = IF_Gbps(10UL); 3011 if (mxge_verbose) 3012 device_printf(sc->dev, "link up\n"); 3013 } else { 3014 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3015 sc->ifp->if_baudrate = 0; 3016 if (mxge_verbose) 3017 device_printf(sc->dev, "link down\n"); 3018 } 3019 sc->need_media_probe = 1; 3020 } 3021 if (sc->rdma_tags_available != 3022 be32toh(stats->rdma_tags_available)) { 3023 sc->rdma_tags_available = 3024 be32toh(stats->rdma_tags_available); 3025 device_printf(sc->dev, "RDMA timed out! %d tags " 3026 "left\n", sc->rdma_tags_available); 3027 } 3028 3029 if (stats->link_down) { 3030 sc->down_cnt += stats->link_down; 3031 sc->link_state = 0; 3032 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3033 } 3034 } 3035 3036 /* check to see if we have rx token to pass back */ 3037 if (valid & 0x1) 3038 *ss->irq_claim = be32toh(3); 3039 *(ss->irq_claim + 1) = be32toh(3); 3040} 3041 3042static void 3043mxge_init(void *arg) 3044{ 3045} 3046 3047 3048 3049static void 3050mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3051{ 3052 struct lro_entry *lro_entry; 3053 int i; 3054 3055 while (!SLIST_EMPTY(&ss->lro_free)) { 3056 lro_entry = SLIST_FIRST(&ss->lro_free); 3057 SLIST_REMOVE_HEAD(&ss->lro_free, next); 3058 free(lro_entry, M_DEVBUF); 3059 } 3060 3061 for (i = 0; i <= ss->rx_big.mask; i++) { 3062 if (ss->rx_big.info[i].m == NULL) 3063 continue; 3064 bus_dmamap_unload(ss->rx_big.dmat, 3065 ss->rx_big.info[i].map); 3066 m_freem(ss->rx_big.info[i].m); 3067 ss->rx_big.info[i].m = NULL; 3068 } 3069 3070 for (i = 0; i <= ss->rx_small.mask; i++) { 3071 if (ss->rx_small.info[i].m == NULL) 3072 continue; 3073 bus_dmamap_unload(ss->rx_small.dmat, 3074 ss->rx_small.info[i].map); 3075 m_freem(ss->rx_small.info[i].m); 3076 ss->rx_small.info[i].m = NULL; 3077 } 3078 3079 /* transmit ring used only on the first slice */ 3080 if (ss->tx.info == NULL) 3081 return; 3082 3083 for (i = 0; i <= ss->tx.mask; i++) { 3084 ss->tx.info[i].flag = 0; 3085 if (ss->tx.info[i].m == NULL) 3086 continue; 3087 bus_dmamap_unload(ss->tx.dmat, 3088 ss->tx.info[i].map); 3089 m_freem(ss->tx.info[i].m); 3090 ss->tx.info[i].m = NULL; 3091 } 3092} 3093 3094static void 3095mxge_free_mbufs(mxge_softc_t *sc) 3096{ 3097 int slice; 3098 3099 for (slice = 0; slice < sc->num_slices; slice++) 3100 mxge_free_slice_mbufs(&sc->ss[slice]); 3101} 3102 3103static void 3104mxge_free_slice_rings(struct mxge_slice_state *ss) 3105{ 3106 int i; 3107 3108 3109 if (ss->rx_done.entry != NULL) 3110 mxge_dma_free(&ss->rx_done.dma); 3111 ss->rx_done.entry = NULL; 3112 3113 if (ss->tx.req_bytes != NULL) 3114 free(ss->tx.req_bytes, M_DEVBUF); 3115 ss->tx.req_bytes = NULL; 3116 3117 if (ss->tx.seg_list != NULL) 3118 free(ss->tx.seg_list, M_DEVBUF); 3119 ss->tx.seg_list = NULL; 3120 3121 if (ss->rx_small.shadow != NULL) 3122 free(ss->rx_small.shadow, M_DEVBUF); 3123 ss->rx_small.shadow = NULL; 3124 3125 if (ss->rx_big.shadow != NULL) 3126 free(ss->rx_big.shadow, M_DEVBUF); 3127 ss->rx_big.shadow = NULL; 3128 3129 if (ss->tx.info != NULL) { 3130 if (ss->tx.dmat != NULL) { 3131 for (i = 0; i <= ss->tx.mask; i++) { 3132 bus_dmamap_destroy(ss->tx.dmat, 3133 ss->tx.info[i].map); 3134 } 3135 bus_dma_tag_destroy(ss->tx.dmat); 3136 } 3137 free(ss->tx.info, M_DEVBUF); 3138 } 3139 ss->tx.info = NULL; 3140 3141 if (ss->rx_small.info != NULL) { 3142 if (ss->rx_small.dmat != NULL) { 3143 for (i = 0; i <= ss->rx_small.mask; i++) { 3144 bus_dmamap_destroy(ss->rx_small.dmat, 3145 ss->rx_small.info[i].map); 3146 } 3147 bus_dmamap_destroy(ss->rx_small.dmat, 3148 ss->rx_small.extra_map); 3149 bus_dma_tag_destroy(ss->rx_small.dmat); 3150 } 3151 free(ss->rx_small.info, M_DEVBUF); 3152 } 3153 ss->rx_small.info = NULL; 3154 3155 if (ss->rx_big.info != NULL) { 3156 if (ss->rx_big.dmat != NULL) { 3157 for (i = 0; i <= ss->rx_big.mask; i++) { 3158 bus_dmamap_destroy(ss->rx_big.dmat, 3159 ss->rx_big.info[i].map); 3160 } 3161 bus_dmamap_destroy(ss->rx_big.dmat, 3162 ss->rx_big.extra_map); 3163 bus_dma_tag_destroy(ss->rx_big.dmat); 3164 } 3165 free(ss->rx_big.info, M_DEVBUF); 3166 } 3167 ss->rx_big.info = NULL; 3168} 3169 3170static void 3171mxge_free_rings(mxge_softc_t *sc) 3172{ 3173 int slice; 3174 3175 for (slice = 0; slice < sc->num_slices; slice++) 3176 mxge_free_slice_rings(&sc->ss[slice]); 3177} 3178 3179static int 3180mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3181 int tx_ring_entries) 3182{ 3183 mxge_softc_t *sc = ss->sc; 3184 size_t bytes; 3185 int err, i; 3186 3187 err = ENOMEM; 3188 3189 /* allocate per-slice receive resources */ 3190 3191 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3192 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3193 3194 /* allocate the rx shadow rings */ 3195 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3196 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3197 if (ss->rx_small.shadow == NULL) 3198 return err; 3199 3200 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3201 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3202 if (ss->rx_big.shadow == NULL) 3203 return err; 3204 3205 /* allocate the rx host info rings */ 3206 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3207 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3208 if (ss->rx_small.info == NULL) 3209 return err; 3210 3211 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3212 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3213 if (ss->rx_big.info == NULL) 3214 return err; 3215 3216 /* allocate the rx busdma resources */ 3217 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3218 1, /* alignment */ 3219 4096, /* boundary */ 3220 BUS_SPACE_MAXADDR, /* low */ 3221 BUS_SPACE_MAXADDR, /* high */ 3222 NULL, NULL, /* filter */ 3223 MHLEN, /* maxsize */ 3224 1, /* num segs */ 3225 MHLEN, /* maxsegsize */ 3226 BUS_DMA_ALLOCNOW, /* flags */ 3227 NULL, NULL, /* lock */ 3228 &ss->rx_small.dmat); /* tag */ 3229 if (err != 0) { 3230 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3231 err); 3232 return err; 3233 } 3234 3235 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3236 1, /* alignment */ 3237#if MXGE_VIRT_JUMBOS 3238 4096, /* boundary */ 3239#else 3240 0, /* boundary */ 3241#endif 3242 BUS_SPACE_MAXADDR, /* low */ 3243 BUS_SPACE_MAXADDR, /* high */ 3244 NULL, NULL, /* filter */ 3245 3*4096, /* maxsize */ 3246#if MXGE_VIRT_JUMBOS 3247 3, /* num segs */ 3248 4096, /* maxsegsize*/ 3249#else 3250 1, /* num segs */ 3251 MJUM9BYTES, /* maxsegsize*/ 3252#endif 3253 BUS_DMA_ALLOCNOW, /* flags */ 3254 NULL, NULL, /* lock */ 3255 &ss->rx_big.dmat); /* tag */ 3256 if (err != 0) { 3257 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3258 err); 3259 return err; 3260 } 3261 for (i = 0; i <= ss->rx_small.mask; i++) { 3262 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3263 &ss->rx_small.info[i].map); 3264 if (err != 0) { 3265 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3266 err); 3267 return err; 3268 } 3269 } 3270 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3271 &ss->rx_small.extra_map); 3272 if (err != 0) { 3273 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3274 err); 3275 return err; 3276 } 3277 3278 for (i = 0; i <= ss->rx_big.mask; i++) { 3279 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3280 &ss->rx_big.info[i].map); 3281 if (err != 0) { 3282 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3283 err); 3284 return err; 3285 } 3286 } 3287 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3288 &ss->rx_big.extra_map); 3289 if (err != 0) { 3290 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3291 err); 3292 return err; 3293 } 3294 3295 /* now allocate TX resouces */ 3296 3297#ifndef IFNET_BUF_RING 3298 /* only use a single TX ring for now */ 3299 if (ss != ss->sc->ss) 3300 return 0; 3301#endif 3302 3303 ss->tx.mask = tx_ring_entries - 1; 3304 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3305 3306 3307 /* allocate the tx request copy block */ 3308 bytes = 8 + 3309 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3310 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3311 if (ss->tx.req_bytes == NULL) 3312 return err; 3313 /* ensure req_list entries are aligned to 8 bytes */ 3314 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3315 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3316 3317 /* allocate the tx busdma segment list */ 3318 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3319 ss->tx.seg_list = (bus_dma_segment_t *) 3320 malloc(bytes, M_DEVBUF, M_WAITOK); 3321 if (ss->tx.seg_list == NULL) 3322 return err; 3323 3324 /* allocate the tx host info ring */ 3325 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3326 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3327 if (ss->tx.info == NULL) 3328 return err; 3329 3330 /* allocate the tx busdma resources */ 3331 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3332 1, /* alignment */ 3333 sc->tx_boundary, /* boundary */ 3334 BUS_SPACE_MAXADDR, /* low */ 3335 BUS_SPACE_MAXADDR, /* high */ 3336 NULL, NULL, /* filter */ 3337 65536 + 256, /* maxsize */ 3338 ss->tx.max_desc - 2, /* num segs */ 3339 sc->tx_boundary, /* maxsegsz */ 3340 BUS_DMA_ALLOCNOW, /* flags */ 3341 NULL, NULL, /* lock */ 3342 &ss->tx.dmat); /* tag */ 3343 3344 if (err != 0) { 3345 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3346 err); 3347 return err; 3348 } 3349 3350 /* now use these tags to setup dmamaps for each slot 3351 in the ring */ 3352 for (i = 0; i <= ss->tx.mask; i++) { 3353 err = bus_dmamap_create(ss->tx.dmat, 0, 3354 &ss->tx.info[i].map); 3355 if (err != 0) { 3356 device_printf(sc->dev, "Err %d tx dmamap\n", 3357 err); 3358 return err; 3359 } 3360 } 3361 return 0; 3362 3363} 3364 3365static int 3366mxge_alloc_rings(mxge_softc_t *sc) 3367{ 3368 mxge_cmd_t cmd; 3369 int tx_ring_size; 3370 int tx_ring_entries, rx_ring_entries; 3371 int err, slice; 3372 3373 /* get ring sizes */ 3374 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3375 tx_ring_size = cmd.data0; 3376 if (err != 0) { 3377 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3378 goto abort; 3379 } 3380 3381 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3382 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3383 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3384 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3385 IFQ_SET_READY(&sc->ifp->if_snd); 3386 3387 for (slice = 0; slice < sc->num_slices; slice++) { 3388 err = mxge_alloc_slice_rings(&sc->ss[slice], 3389 rx_ring_entries, 3390 tx_ring_entries); 3391 if (err != 0) 3392 goto abort; 3393 } 3394 return 0; 3395 3396abort: 3397 mxge_free_rings(sc); 3398 return err; 3399 3400} 3401 3402 3403static void 3404mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3405{ 3406 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3407 3408 if (bufsize < MCLBYTES) { 3409 /* easy, everything fits in a single buffer */ 3410 *big_buf_size = MCLBYTES; 3411 *cl_size = MCLBYTES; 3412 *nbufs = 1; 3413 return; 3414 } 3415 3416 if (bufsize < MJUMPAGESIZE) { 3417 /* still easy, everything still fits in a single buffer */ 3418 *big_buf_size = MJUMPAGESIZE; 3419 *cl_size = MJUMPAGESIZE; 3420 *nbufs = 1; 3421 return; 3422 } 3423#if MXGE_VIRT_JUMBOS 3424 /* now we need to use virtually contiguous buffers */ 3425 *cl_size = MJUM9BYTES; 3426 *big_buf_size = 4096; 3427 *nbufs = mtu / 4096 + 1; 3428 /* needs to be a power of two, so round up */ 3429 if (*nbufs == 3) 3430 *nbufs = 4; 3431#else 3432 *cl_size = MJUM9BYTES; 3433 *big_buf_size = MJUM9BYTES; 3434 *nbufs = 1; 3435#endif 3436} 3437 3438static int 3439mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3440{ 3441 mxge_softc_t *sc; 3442 mxge_cmd_t cmd; 3443 bus_dmamap_t map; 3444 struct lro_entry *lro_entry; 3445 int err, i, slice; 3446 3447 3448 sc = ss->sc; 3449 slice = ss - sc->ss; 3450 3451 SLIST_INIT(&ss->lro_free); 3452 SLIST_INIT(&ss->lro_active); 3453 3454 for (i = 0; i < sc->lro_cnt; i++) { 3455 lro_entry = (struct lro_entry *) 3456 malloc(sizeof (*lro_entry), M_DEVBUF, 3457 M_NOWAIT | M_ZERO); 3458 if (lro_entry == NULL) { 3459 sc->lro_cnt = i; 3460 break; 3461 } 3462 SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next); 3463 } 3464 /* get the lanai pointers to the send and receive rings */ 3465 3466 err = 0; 3467#ifndef IFNET_BUF_RING 3468 /* We currently only send from the first slice */ 3469 if (slice == 0) { 3470#endif 3471 cmd.data0 = slice; 3472 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3473 ss->tx.lanai = 3474 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3475 ss->tx.send_go = (volatile uint32_t *) 3476 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3477 ss->tx.send_stop = (volatile uint32_t *) 3478 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3479#ifndef IFNET_BUF_RING 3480 } 3481#endif 3482 cmd.data0 = slice; 3483 err |= mxge_send_cmd(sc, 3484 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3485 ss->rx_small.lanai = 3486 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3487 cmd.data0 = slice; 3488 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3489 ss->rx_big.lanai = 3490 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3491 3492 if (err != 0) { 3493 device_printf(sc->dev, 3494 "failed to get ring sizes or locations\n"); 3495 return EIO; 3496 } 3497 3498 /* stock receive rings */ 3499 for (i = 0; i <= ss->rx_small.mask; i++) { 3500 map = ss->rx_small.info[i].map; 3501 err = mxge_get_buf_small(ss, map, i); 3502 if (err) { 3503 device_printf(sc->dev, "alloced %d/%d smalls\n", 3504 i, ss->rx_small.mask + 1); 3505 return ENOMEM; 3506 } 3507 } 3508 for (i = 0; i <= ss->rx_big.mask; i++) { 3509 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3510 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3511 } 3512 ss->rx_big.nbufs = nbufs; 3513 ss->rx_big.cl_size = cl_size; 3514 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3515 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3516 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3517 map = ss->rx_big.info[i].map; 3518 err = mxge_get_buf_big(ss, map, i); 3519 if (err) { 3520 device_printf(sc->dev, "alloced %d/%d bigs\n", 3521 i, ss->rx_big.mask + 1); 3522 return ENOMEM; 3523 } 3524 } 3525 return 0; 3526} 3527 3528static int 3529mxge_open(mxge_softc_t *sc) 3530{ 3531 mxge_cmd_t cmd; 3532 int err, big_bytes, nbufs, slice, cl_size, i; 3533 bus_addr_t bus; 3534 volatile uint8_t *itable; 3535 struct mxge_slice_state *ss; 3536 3537 /* Copy the MAC address in case it was overridden */ 3538 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3539 3540 err = mxge_reset(sc, 1); 3541 if (err != 0) { 3542 device_printf(sc->dev, "failed to reset\n"); 3543 return EIO; 3544 } 3545 3546 if (sc->num_slices > 1) { 3547 /* setup the indirection table */ 3548 cmd.data0 = sc->num_slices; 3549 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3550 &cmd); 3551 3552 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3553 &cmd); 3554 if (err != 0) { 3555 device_printf(sc->dev, 3556 "failed to setup rss tables\n"); 3557 return err; 3558 } 3559 3560 /* just enable an identity mapping */ 3561 itable = sc->sram + cmd.data0; 3562 for (i = 0; i < sc->num_slices; i++) 3563 itable[i] = (uint8_t)i; 3564 3565 cmd.data0 = 1; 3566 cmd.data1 = mxge_rss_hash_type; 3567 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3568 if (err != 0) { 3569 device_printf(sc->dev, "failed to enable slices\n"); 3570 return err; 3571 } 3572 } 3573 3574 3575 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3576 3577 cmd.data0 = nbufs; 3578 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3579 &cmd); 3580 /* error is only meaningful if we're trying to set 3581 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3582 if (err && nbufs > 1) { 3583 device_printf(sc->dev, 3584 "Failed to set alway-use-n to %d\n", 3585 nbufs); 3586 return EIO; 3587 } 3588 /* Give the firmware the mtu and the big and small buffer 3589 sizes. The firmware wants the big buf size to be a power 3590 of two. Luckily, FreeBSD's clusters are powers of two */ 3591 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3592 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3593 cmd.data0 = MHLEN - MXGEFW_PAD; 3594 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3595 &cmd); 3596 cmd.data0 = big_bytes; 3597 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3598 3599 if (err != 0) { 3600 device_printf(sc->dev, "failed to setup params\n"); 3601 goto abort; 3602 } 3603 3604 /* Now give him the pointer to the stats block */ 3605 for (slice = 0; 3606#ifdef IFNET_BUF_RING 3607 slice < sc->num_slices; 3608#else 3609 slice < 1; 3610#endif 3611 slice++) { 3612 ss = &sc->ss[slice]; 3613 cmd.data0 = 3614 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3615 cmd.data1 = 3616 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3617 cmd.data2 = sizeof(struct mcp_irq_data); 3618 cmd.data2 |= (slice << 16); 3619 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3620 } 3621 3622 if (err != 0) { 3623 bus = sc->ss->fw_stats_dma.bus_addr; 3624 bus += offsetof(struct mcp_irq_data, send_done_count); 3625 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3626 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3627 err = mxge_send_cmd(sc, 3628 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3629 &cmd); 3630 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3631 sc->fw_multicast_support = 0; 3632 } else { 3633 sc->fw_multicast_support = 1; 3634 } 3635 3636 if (err != 0) { 3637 device_printf(sc->dev, "failed to setup params\n"); 3638 goto abort; 3639 } 3640 3641 for (slice = 0; slice < sc->num_slices; slice++) { 3642 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3643 if (err != 0) { 3644 device_printf(sc->dev, "couldn't open slice %d\n", 3645 slice); 3646 goto abort; 3647 } 3648 } 3649 3650 /* Finally, start the firmware running */ 3651 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3652 if (err) { 3653 device_printf(sc->dev, "Couldn't bring up link\n"); 3654 goto abort; 3655 } 3656#ifdef IFNET_BUF_RING 3657 for (slice = 0; slice < sc->num_slices; slice++) { 3658 ss = &sc->ss[slice]; 3659 ss->if_drv_flags |= IFF_DRV_RUNNING; 3660 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3661 } 3662#endif 3663 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3664 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3665 3666 return 0; 3667 3668 3669abort: 3670 mxge_free_mbufs(sc); 3671 3672 return err; 3673} 3674 3675static int 3676mxge_close(mxge_softc_t *sc, int down) 3677{ 3678 mxge_cmd_t cmd; 3679 int err, old_down_cnt; 3680#ifdef IFNET_BUF_RING 3681 struct mxge_slice_state *ss; 3682 int slice; 3683#endif 3684 3685#ifdef IFNET_BUF_RING 3686 for (slice = 0; slice < sc->num_slices; slice++) { 3687 ss = &sc->ss[slice]; 3688 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3689 } 3690#endif 3691 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3692 if (!down) { 3693 old_down_cnt = sc->down_cnt; 3694 wmb(); 3695 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3696 if (err) { 3697 device_printf(sc->dev, 3698 "Couldn't bring down link\n"); 3699 } 3700 if (old_down_cnt == sc->down_cnt) { 3701 /* wait for down irq */ 3702 DELAY(10 * sc->intr_coal_delay); 3703 } 3704 wmb(); 3705 if (old_down_cnt == sc->down_cnt) { 3706 device_printf(sc->dev, "never got down irq\n"); 3707 } 3708 } 3709 mxge_free_mbufs(sc); 3710 3711 return 0; 3712} 3713 3714static void 3715mxge_setup_cfg_space(mxge_softc_t *sc) 3716{ 3717 device_t dev = sc->dev; 3718 int reg; 3719 uint16_t cmd, lnk, pectl; 3720 3721 /* find the PCIe link width and set max read request to 4KB*/ 3722 if (pci_find_extcap(dev, PCIY_EXPRESS, ®) == 0) { 3723 lnk = pci_read_config(dev, reg + 0x12, 2); 3724 sc->link_width = (lnk >> 4) & 0x3f; 3725 3726 if (sc->pectl == 0) { 3727 pectl = pci_read_config(dev, reg + 0x8, 2); 3728 pectl = (pectl & ~0x7000) | (5 << 12); 3729 pci_write_config(dev, reg + 0x8, pectl, 2); 3730 sc->pectl = pectl; 3731 } else { 3732 /* restore saved pectl after watchdog reset */ 3733 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3734 } 3735 } 3736 3737 /* Enable DMA and Memory space access */ 3738 pci_enable_busmaster(dev); 3739 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 3740 cmd |= PCIM_CMD_MEMEN; 3741 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 3742} 3743 3744static uint32_t 3745mxge_read_reboot(mxge_softc_t *sc) 3746{ 3747 device_t dev = sc->dev; 3748 uint32_t vs; 3749 3750 /* find the vendor specific offset */ 3751 if (pci_find_extcap(dev, PCIY_VENDOR, &vs) != 0) { 3752 device_printf(sc->dev, 3753 "could not find vendor specific offset\n"); 3754 return (uint32_t)-1; 3755 } 3756 /* enable read32 mode */ 3757 pci_write_config(dev, vs + 0x10, 0x3, 1); 3758 /* tell NIC which register to read */ 3759 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3760 return (pci_read_config(dev, vs + 0x14, 4)); 3761} 3762 3763static void 3764mxge_watchdog_reset(mxge_softc_t *sc) 3765{ 3766 struct pci_devinfo *dinfo; 3767 struct mxge_slice_state *ss; 3768 int err, running, s, num_tx_slices = 1; 3769 uint32_t reboot; 3770 uint16_t cmd; 3771 3772 err = ENXIO; 3773 3774 device_printf(sc->dev, "Watchdog reset!\n"); 3775 3776 /* 3777 * check to see if the NIC rebooted. If it did, then all of 3778 * PCI config space has been reset, and things like the 3779 * busmaster bit will be zero. If this is the case, then we 3780 * must restore PCI config space before the NIC can be used 3781 * again 3782 */ 3783 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3784 if (cmd == 0xffff) { 3785 /* 3786 * maybe the watchdog caught the NIC rebooting; wait 3787 * up to 100ms for it to finish. If it does not come 3788 * back, then give up 3789 */ 3790 DELAY(1000*100); 3791 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3792 if (cmd == 0xffff) { 3793 device_printf(sc->dev, "NIC disappeared!\n"); 3794 } 3795 } 3796 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3797 /* print the reboot status */ 3798 reboot = mxge_read_reboot(sc); 3799 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3800 reboot); 3801 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3802 if (running) { 3803 3804 /* 3805 * quiesce NIC so that TX routines will not try to 3806 * xmit after restoration of BAR 3807 */ 3808 3809 /* Mark the link as down */ 3810 if (sc->link_state) { 3811 sc->link_state = 0; 3812 if_link_state_change(sc->ifp, 3813 LINK_STATE_DOWN); 3814 } 3815#ifdef IFNET_BUF_RING 3816 num_tx_slices = sc->num_slices; 3817#endif 3818 /* grab all TX locks to ensure no tx */ 3819 for (s = 0; s < num_tx_slices; s++) { 3820 ss = &sc->ss[s]; 3821 mtx_lock(&ss->tx.mtx); 3822 } 3823 mxge_close(sc, 1); 3824 } 3825 /* restore PCI configuration space */ 3826 dinfo = device_get_ivars(sc->dev); 3827 pci_cfg_restore(sc->dev, dinfo); 3828 3829 /* and redo any changes we made to our config space */ 3830 mxge_setup_cfg_space(sc); 3831 3832 /* reload f/w */ 3833 err = mxge_load_firmware(sc, 0); 3834 if (err) { 3835 device_printf(sc->dev, 3836 "Unable to re-load f/w\n"); 3837 } 3838 if (running) { 3839 if (!err) 3840 err = mxge_open(sc); 3841 /* release all TX locks */ 3842 for (s = 0; s < num_tx_slices; s++) { 3843 ss = &sc->ss[s]; 3844#ifdef IFNET_BUF_RING 3845 mxge_start_locked(ss); 3846#endif 3847 mtx_unlock(&ss->tx.mtx); 3848 } 3849 } 3850 sc->watchdog_resets++; 3851 } else { 3852 device_printf(sc->dev, 3853 "NIC did not reboot, not resetting\n"); 3854 err = 0; 3855 } 3856 if (err) { 3857 device_printf(sc->dev, "watchdog reset failed\n"); 3858 } else { 3859 if (sc->dying == 2) 3860 sc->dying = 0; 3861 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3862 } 3863} 3864 3865static void 3866mxge_watchdog_task(void *arg, int pending) 3867{ 3868 mxge_softc_t *sc = arg; 3869 3870 3871 mtx_lock(&sc->driver_mtx); 3872 mxge_watchdog_reset(sc); 3873 mtx_unlock(&sc->driver_mtx); 3874} 3875 3876static void 3877mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3878{ 3879 tx = &sc->ss[slice].tx; 3880 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3881 device_printf(sc->dev, 3882 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3883 tx->req, tx->done, tx->queue_active); 3884 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3885 tx->activate, tx->deactivate); 3886 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3887 tx->pkt_done, 3888 be32toh(sc->ss->fw_stats->send_done_count)); 3889} 3890 3891static int 3892mxge_watchdog(mxge_softc_t *sc) 3893{ 3894 mxge_tx_ring_t *tx; 3895 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 3896 int i, err = 0; 3897 3898 /* see if we have outstanding transmits, which 3899 have been pending for more than mxge_ticks */ 3900 for (i = 0; 3901#ifdef IFNET_BUF_RING 3902 (i < sc->num_slices) && (err == 0); 3903#else 3904 (i < 1) && (err == 0); 3905#endif 3906 i++) { 3907 tx = &sc->ss[i].tx; 3908 if (tx->req != tx->done && 3909 tx->watchdog_req != tx->watchdog_done && 3910 tx->done == tx->watchdog_done) { 3911 /* check for pause blocking before resetting */ 3912 if (tx->watchdog_rx_pause == rx_pause) { 3913 mxge_warn_stuck(sc, tx, i); 3914 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3915 return (ENXIO); 3916 } 3917 else 3918 device_printf(sc->dev, "Flow control blocking " 3919 "xmits, check link partner\n"); 3920 } 3921 3922 tx->watchdog_req = tx->req; 3923 tx->watchdog_done = tx->done; 3924 tx->watchdog_rx_pause = rx_pause; 3925 } 3926 3927 if (sc->need_media_probe) 3928 mxge_media_probe(sc); 3929 return (err); 3930} 3931 3932static u_long 3933mxge_update_stats(mxge_softc_t *sc) 3934{ 3935 struct mxge_slice_state *ss; 3936 u_long pkts = 0; 3937 u_long ipackets = 0; 3938 u_long opackets = 0; 3939#ifdef IFNET_BUF_RING 3940 u_long obytes = 0; 3941 u_long omcasts = 0; 3942 u_long odrops = 0; 3943#endif 3944 u_long oerrors = 0; 3945 int slice; 3946 3947 for (slice = 0; slice < sc->num_slices; slice++) { 3948 ss = &sc->ss[slice]; 3949 ipackets += ss->ipackets; 3950 opackets += ss->opackets; 3951#ifdef IFNET_BUF_RING 3952 obytes += ss->obytes; 3953 omcasts += ss->omcasts; 3954 odrops += ss->tx.br->br_drops; 3955#endif 3956 oerrors += ss->oerrors; 3957 } 3958 pkts = (ipackets - sc->ifp->if_ipackets); 3959 pkts += (opackets - sc->ifp->if_opackets); 3960 sc->ifp->if_ipackets = ipackets; 3961 sc->ifp->if_opackets = opackets; 3962#ifdef IFNET_BUF_RING 3963 sc->ifp->if_obytes = obytes; 3964 sc->ifp->if_omcasts = omcasts; 3965 sc->ifp->if_snd.ifq_drops = odrops; 3966#endif 3967 sc->ifp->if_oerrors = oerrors; 3968 return pkts; 3969} 3970 3971static void 3972mxge_tick(void *arg) 3973{ 3974 mxge_softc_t *sc = arg; 3975 u_long pkts = 0; 3976 int err = 0; 3977 int running, ticks; 3978 uint16_t cmd; 3979 3980 ticks = mxge_ticks; 3981 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3982 if (running) { 3983 /* aggregate stats from different slices */ 3984 pkts = mxge_update_stats(sc); 3985 if (!sc->watchdog_countdown) { 3986 err = mxge_watchdog(sc); 3987 sc->watchdog_countdown = 4; 3988 } 3989 sc->watchdog_countdown--; 3990 } 3991 if (pkts == 0) { 3992 /* ensure NIC did not suffer h/w fault while idle */ 3993 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3994 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3995 sc->dying = 2; 3996 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 3997 err = ENXIO; 3998 } 3999 /* look less often if NIC is idle */ 4000 ticks *= 4; 4001 } 4002 4003 if (err == 0) 4004 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4005 4006} 4007 4008static int 4009mxge_media_change(struct ifnet *ifp) 4010{ 4011 return EINVAL; 4012} 4013 4014static int 4015mxge_change_mtu(mxge_softc_t *sc, int mtu) 4016{ 4017 struct ifnet *ifp = sc->ifp; 4018 int real_mtu, old_mtu; 4019 int err = 0; 4020 4021 4022 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4023 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4024 return EINVAL; 4025 mtx_lock(&sc->driver_mtx); 4026 old_mtu = ifp->if_mtu; 4027 ifp->if_mtu = mtu; 4028 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4029 mxge_close(sc, 0); 4030 err = mxge_open(sc); 4031 if (err != 0) { 4032 ifp->if_mtu = old_mtu; 4033 mxge_close(sc, 0); 4034 (void) mxge_open(sc); 4035 } 4036 } 4037 mtx_unlock(&sc->driver_mtx); 4038 return err; 4039} 4040 4041static void 4042mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4043{ 4044 mxge_softc_t *sc = ifp->if_softc; 4045 4046 4047 if (sc == NULL) 4048 return; 4049 ifmr->ifm_status = IFM_AVALID; 4050 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4051 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4052 ifmr->ifm_active |= sc->current_media; 4053} 4054 4055static int 4056mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4057{ 4058 mxge_softc_t *sc = ifp->if_softc; 4059 struct ifreq *ifr = (struct ifreq *)data; 4060 int err, mask; 4061 4062 err = 0; 4063 switch (command) { 4064 case SIOCSIFADDR: 4065 case SIOCGIFADDR: 4066 err = ether_ioctl(ifp, command, data); 4067 break; 4068 4069 case SIOCSIFMTU: 4070 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4071 break; 4072 4073 case SIOCSIFFLAGS: 4074 mtx_lock(&sc->driver_mtx); 4075 if (sc->dying) { 4076 mtx_unlock(&sc->driver_mtx); 4077 return EINVAL; 4078 } 4079 if (ifp->if_flags & IFF_UP) { 4080 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4081 err = mxge_open(sc); 4082 } else { 4083 /* take care of promis can allmulti 4084 flag chages */ 4085 mxge_change_promisc(sc, 4086 ifp->if_flags & IFF_PROMISC); 4087 mxge_set_multicast_list(sc); 4088 } 4089 } else { 4090 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4091 mxge_close(sc, 0); 4092 } 4093 } 4094 mtx_unlock(&sc->driver_mtx); 4095 break; 4096 4097 case SIOCADDMULTI: 4098 case SIOCDELMULTI: 4099 mtx_lock(&sc->driver_mtx); 4100 mxge_set_multicast_list(sc); 4101 mtx_unlock(&sc->driver_mtx); 4102 break; 4103 4104 case SIOCSIFCAP: 4105 mtx_lock(&sc->driver_mtx); 4106 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4107 if (mask & IFCAP_TXCSUM) { 4108 if (IFCAP_TXCSUM & ifp->if_capenable) { 4109 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4110 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 4111 | CSUM_TSO); 4112 } else { 4113 ifp->if_capenable |= IFCAP_TXCSUM; 4114 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4115 } 4116 } else if (mask & IFCAP_RXCSUM) { 4117 if (IFCAP_RXCSUM & ifp->if_capenable) { 4118 ifp->if_capenable &= ~IFCAP_RXCSUM; 4119 sc->csum_flag = 0; 4120 } else { 4121 ifp->if_capenable |= IFCAP_RXCSUM; 4122 sc->csum_flag = 1; 4123 } 4124 } 4125 if (mask & IFCAP_TSO4) { 4126 if (IFCAP_TSO4 & ifp->if_capenable) { 4127 ifp->if_capenable &= ~IFCAP_TSO4; 4128 ifp->if_hwassist &= ~CSUM_TSO; 4129 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4130 ifp->if_capenable |= IFCAP_TSO4; 4131 ifp->if_hwassist |= CSUM_TSO; 4132 } else { 4133 printf("mxge requires tx checksum offload" 4134 " be enabled to use TSO\n"); 4135 err = EINVAL; 4136 } 4137 } 4138 if (mask & IFCAP_LRO) { 4139 if (IFCAP_LRO & ifp->if_capenable) 4140 err = mxge_change_lro_locked(sc, 0); 4141 else 4142 err = mxge_change_lro_locked(sc, mxge_lro_cnt); 4143 } 4144 if (mask & IFCAP_VLAN_HWTAGGING) 4145 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4146 if (mask & IFCAP_VLAN_HWTSO) 4147 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4148 4149 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4150 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4151 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4152 4153 mtx_unlock(&sc->driver_mtx); 4154 VLAN_CAPABILITIES(ifp); 4155 4156 break; 4157 4158 case SIOCGIFMEDIA: 4159 mtx_lock(&sc->driver_mtx); 4160 mxge_media_probe(sc); 4161 mtx_unlock(&sc->driver_mtx); 4162 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4163 &sc->media, command); 4164 break; 4165 4166 default: 4167 err = ENOTTY; 4168 } 4169 return err; 4170} 4171 4172static void 4173mxge_fetch_tunables(mxge_softc_t *sc) 4174{ 4175 4176 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4177 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4178 &mxge_flow_control); 4179 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4180 &mxge_intr_coal_delay); 4181 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4182 &mxge_nvidia_ecrc_enable); 4183 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4184 &mxge_force_firmware); 4185 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4186 &mxge_deassert_wait); 4187 TUNABLE_INT_FETCH("hw.mxge.verbose", 4188 &mxge_verbose); 4189 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4190 TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt); 4191 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4192 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4193 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4194 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4195 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4196 if (sc->lro_cnt != 0) 4197 mxge_lro_cnt = sc->lro_cnt; 4198 4199 if (bootverbose) 4200 mxge_verbose = 1; 4201 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4202 mxge_intr_coal_delay = 30; 4203 if (mxge_ticks == 0) 4204 mxge_ticks = hz / 2; 4205 sc->pause = mxge_flow_control; 4206 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4207 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4208 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4209 } 4210 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4211 mxge_initial_mtu < ETHER_MIN_LEN) 4212 mxge_initial_mtu = ETHERMTU_JUMBO; 4213 4214 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4215 mxge_throttle = MXGE_MAX_THROTTLE; 4216 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4217 mxge_throttle = MXGE_MIN_THROTTLE; 4218 sc->throttle = mxge_throttle; 4219} 4220 4221 4222static void 4223mxge_free_slices(mxge_softc_t *sc) 4224{ 4225 struct mxge_slice_state *ss; 4226 int i; 4227 4228 4229 if (sc->ss == NULL) 4230 return; 4231 4232 for (i = 0; i < sc->num_slices; i++) { 4233 ss = &sc->ss[i]; 4234 if (ss->fw_stats != NULL) { 4235 mxge_dma_free(&ss->fw_stats_dma); 4236 ss->fw_stats = NULL; 4237#ifdef IFNET_BUF_RING 4238 if (ss->tx.br != NULL) { 4239 drbr_free(ss->tx.br, M_DEVBUF); 4240 ss->tx.br = NULL; 4241 } 4242#endif 4243 mtx_destroy(&ss->tx.mtx); 4244 } 4245 if (ss->rx_done.entry != NULL) { 4246 mxge_dma_free(&ss->rx_done.dma); 4247 ss->rx_done.entry = NULL; 4248 } 4249 } 4250 free(sc->ss, M_DEVBUF); 4251 sc->ss = NULL; 4252} 4253 4254static int 4255mxge_alloc_slices(mxge_softc_t *sc) 4256{ 4257 mxge_cmd_t cmd; 4258 struct mxge_slice_state *ss; 4259 size_t bytes; 4260 int err, i, max_intr_slots; 4261 4262 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4263 if (err != 0) { 4264 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4265 return err; 4266 } 4267 sc->rx_ring_size = cmd.data0; 4268 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4269 4270 bytes = sizeof (*sc->ss) * sc->num_slices; 4271 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4272 if (sc->ss == NULL) 4273 return (ENOMEM); 4274 for (i = 0; i < sc->num_slices; i++) { 4275 ss = &sc->ss[i]; 4276 4277 ss->sc = sc; 4278 4279 /* allocate per-slice rx interrupt queues */ 4280 4281 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4282 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4283 if (err != 0) 4284 goto abort; 4285 ss->rx_done.entry = ss->rx_done.dma.addr; 4286 bzero(ss->rx_done.entry, bytes); 4287 4288 /* 4289 * allocate the per-slice firmware stats; stats 4290 * (including tx) are used used only on the first 4291 * slice for now 4292 */ 4293#ifndef IFNET_BUF_RING 4294 if (i > 0) 4295 continue; 4296#endif 4297 4298 bytes = sizeof (*ss->fw_stats); 4299 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4300 sizeof (*ss->fw_stats), 64); 4301 if (err != 0) 4302 goto abort; 4303 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4304 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4305 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4306 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4307#ifdef IFNET_BUF_RING 4308 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4309 &ss->tx.mtx); 4310#endif 4311 } 4312 4313 return (0); 4314 4315abort: 4316 mxge_free_slices(sc); 4317 return (ENOMEM); 4318} 4319 4320static void 4321mxge_slice_probe(mxge_softc_t *sc) 4322{ 4323 mxge_cmd_t cmd; 4324 char *old_fw; 4325 int msix_cnt, status, max_intr_slots; 4326 4327 sc->num_slices = 1; 4328 /* 4329 * don't enable multiple slices if they are not enabled, 4330 * or if this is not an SMP system 4331 */ 4332 4333 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4334 return; 4335 4336 /* see how many MSI-X interrupts are available */ 4337 msix_cnt = pci_msix_count(sc->dev); 4338 if (msix_cnt < 2) 4339 return; 4340 4341 /* now load the slice aware firmware see what it supports */ 4342 old_fw = sc->fw_name; 4343 if (old_fw == mxge_fw_aligned) 4344 sc->fw_name = mxge_fw_rss_aligned; 4345 else 4346 sc->fw_name = mxge_fw_rss_unaligned; 4347 status = mxge_load_firmware(sc, 0); 4348 if (status != 0) { 4349 device_printf(sc->dev, "Falling back to a single slice\n"); 4350 return; 4351 } 4352 4353 /* try to send a reset command to the card to see if it 4354 is alive */ 4355 memset(&cmd, 0, sizeof (cmd)); 4356 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4357 if (status != 0) { 4358 device_printf(sc->dev, "failed reset\n"); 4359 goto abort_with_fw; 4360 } 4361 4362 /* get rx ring size */ 4363 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4364 if (status != 0) { 4365 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4366 goto abort_with_fw; 4367 } 4368 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4369 4370 /* tell it the size of the interrupt queues */ 4371 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4372 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4373 if (status != 0) { 4374 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4375 goto abort_with_fw; 4376 } 4377 4378 /* ask the maximum number of slices it supports */ 4379 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4380 if (status != 0) { 4381 device_printf(sc->dev, 4382 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4383 goto abort_with_fw; 4384 } 4385 sc->num_slices = cmd.data0; 4386 if (sc->num_slices > msix_cnt) 4387 sc->num_slices = msix_cnt; 4388 4389 if (mxge_max_slices == -1) { 4390 /* cap to number of CPUs in system */ 4391 if (sc->num_slices > mp_ncpus) 4392 sc->num_slices = mp_ncpus; 4393 } else { 4394 if (sc->num_slices > mxge_max_slices) 4395 sc->num_slices = mxge_max_slices; 4396 } 4397 /* make sure it is a power of two */ 4398 while (sc->num_slices & (sc->num_slices - 1)) 4399 sc->num_slices--; 4400 4401 if (mxge_verbose) 4402 device_printf(sc->dev, "using %d slices\n", 4403 sc->num_slices); 4404 4405 return; 4406 4407abort_with_fw: 4408 sc->fw_name = old_fw; 4409 (void) mxge_load_firmware(sc, 0); 4410} 4411 4412static int 4413mxge_add_msix_irqs(mxge_softc_t *sc) 4414{ 4415 size_t bytes; 4416 int count, err, i, rid; 4417 4418 rid = PCIR_BAR(2); 4419 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4420 &rid, RF_ACTIVE); 4421 4422 if (sc->msix_table_res == NULL) { 4423 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4424 return ENXIO; 4425 } 4426 4427 count = sc->num_slices; 4428 err = pci_alloc_msix(sc->dev, &count); 4429 if (err != 0) { 4430 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4431 "err = %d \n", sc->num_slices, err); 4432 goto abort_with_msix_table; 4433 } 4434 if (count < sc->num_slices) { 4435 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4436 count, sc->num_slices); 4437 device_printf(sc->dev, 4438 "Try setting hw.mxge.max_slices to %d\n", 4439 count); 4440 err = ENOSPC; 4441 goto abort_with_msix; 4442 } 4443 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4444 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4445 if (sc->msix_irq_res == NULL) { 4446 err = ENOMEM; 4447 goto abort_with_msix; 4448 } 4449 4450 for (i = 0; i < sc->num_slices; i++) { 4451 rid = i + 1; 4452 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4453 SYS_RES_IRQ, 4454 &rid, RF_ACTIVE); 4455 if (sc->msix_irq_res[i] == NULL) { 4456 device_printf(sc->dev, "couldn't allocate IRQ res" 4457 " for message %d\n", i); 4458 err = ENXIO; 4459 goto abort_with_res; 4460 } 4461 } 4462 4463 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4464 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4465 4466 for (i = 0; i < sc->num_slices; i++) { 4467 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4468 INTR_TYPE_NET | INTR_MPSAFE, 4469#if __FreeBSD_version > 700030 4470 NULL, 4471#endif 4472 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4473 if (err != 0) { 4474 device_printf(sc->dev, "couldn't setup intr for " 4475 "message %d\n", i); 4476 goto abort_with_intr; 4477 } 4478 } 4479 4480 if (mxge_verbose) { 4481 device_printf(sc->dev, "using %d msix IRQs:", 4482 sc->num_slices); 4483 for (i = 0; i < sc->num_slices; i++) 4484 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4485 printf("\n"); 4486 } 4487 return (0); 4488 4489abort_with_intr: 4490 for (i = 0; i < sc->num_slices; i++) { 4491 if (sc->msix_ih[i] != NULL) { 4492 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4493 sc->msix_ih[i]); 4494 sc->msix_ih[i] = NULL; 4495 } 4496 } 4497 free(sc->msix_ih, M_DEVBUF); 4498 4499 4500abort_with_res: 4501 for (i = 0; i < sc->num_slices; i++) { 4502 rid = i + 1; 4503 if (sc->msix_irq_res[i] != NULL) 4504 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4505 sc->msix_irq_res[i]); 4506 sc->msix_irq_res[i] = NULL; 4507 } 4508 free(sc->msix_irq_res, M_DEVBUF); 4509 4510 4511abort_with_msix: 4512 pci_release_msi(sc->dev); 4513 4514abort_with_msix_table: 4515 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4516 sc->msix_table_res); 4517 4518 return err; 4519} 4520 4521static int 4522mxge_add_single_irq(mxge_softc_t *sc) 4523{ 4524 int count, err, rid; 4525 4526 count = pci_msi_count(sc->dev); 4527 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4528 rid = 1; 4529 } else { 4530 rid = 0; 4531 sc->legacy_irq = 1; 4532 } 4533 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4534 1, RF_SHAREABLE | RF_ACTIVE); 4535 if (sc->irq_res == NULL) { 4536 device_printf(sc->dev, "could not alloc interrupt\n"); 4537 return ENXIO; 4538 } 4539 if (mxge_verbose) 4540 device_printf(sc->dev, "using %s irq %ld\n", 4541 sc->legacy_irq ? "INTx" : "MSI", 4542 rman_get_start(sc->irq_res)); 4543 err = bus_setup_intr(sc->dev, sc->irq_res, 4544 INTR_TYPE_NET | INTR_MPSAFE, 4545#if __FreeBSD_version > 700030 4546 NULL, 4547#endif 4548 mxge_intr, &sc->ss[0], &sc->ih); 4549 if (err != 0) { 4550 bus_release_resource(sc->dev, SYS_RES_IRQ, 4551 sc->legacy_irq ? 0 : 1, sc->irq_res); 4552 if (!sc->legacy_irq) 4553 pci_release_msi(sc->dev); 4554 } 4555 return err; 4556} 4557 4558static void 4559mxge_rem_msix_irqs(mxge_softc_t *sc) 4560{ 4561 int i, rid; 4562 4563 for (i = 0; i < sc->num_slices; i++) { 4564 if (sc->msix_ih[i] != NULL) { 4565 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4566 sc->msix_ih[i]); 4567 sc->msix_ih[i] = NULL; 4568 } 4569 } 4570 free(sc->msix_ih, M_DEVBUF); 4571 4572 for (i = 0; i < sc->num_slices; i++) { 4573 rid = i + 1; 4574 if (sc->msix_irq_res[i] != NULL) 4575 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4576 sc->msix_irq_res[i]); 4577 sc->msix_irq_res[i] = NULL; 4578 } 4579 free(sc->msix_irq_res, M_DEVBUF); 4580 4581 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4582 sc->msix_table_res); 4583 4584 pci_release_msi(sc->dev); 4585 return; 4586} 4587 4588static void 4589mxge_rem_single_irq(mxge_softc_t *sc) 4590{ 4591 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4592 bus_release_resource(sc->dev, SYS_RES_IRQ, 4593 sc->legacy_irq ? 0 : 1, sc->irq_res); 4594 if (!sc->legacy_irq) 4595 pci_release_msi(sc->dev); 4596} 4597 4598static void 4599mxge_rem_irq(mxge_softc_t *sc) 4600{ 4601 if (sc->num_slices > 1) 4602 mxge_rem_msix_irqs(sc); 4603 else 4604 mxge_rem_single_irq(sc); 4605} 4606 4607static int 4608mxge_add_irq(mxge_softc_t *sc) 4609{ 4610 int err; 4611 4612 if (sc->num_slices > 1) 4613 err = mxge_add_msix_irqs(sc); 4614 else 4615 err = mxge_add_single_irq(sc); 4616 4617 if (0 && err == 0 && sc->num_slices > 1) { 4618 mxge_rem_msix_irqs(sc); 4619 err = mxge_add_msix_irqs(sc); 4620 } 4621 return err; 4622} 4623 4624 4625static int 4626mxge_attach(device_t dev) 4627{ 4628 mxge_softc_t *sc = device_get_softc(dev); 4629 struct ifnet *ifp; 4630 int err, rid; 4631 4632 sc->dev = dev; 4633 mxge_fetch_tunables(sc); 4634 4635 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4636 sc->tq = taskqueue_create_fast("mxge_taskq", M_WAITOK, 4637 taskqueue_thread_enqueue, 4638 &sc->tq); 4639 if (sc->tq == NULL) { 4640 err = ENOMEM; 4641 goto abort_with_nothing; 4642 } 4643 4644 err = bus_dma_tag_create(NULL, /* parent */ 4645 1, /* alignment */ 4646 0, /* boundary */ 4647 BUS_SPACE_MAXADDR, /* low */ 4648 BUS_SPACE_MAXADDR, /* high */ 4649 NULL, NULL, /* filter */ 4650 65536 + 256, /* maxsize */ 4651 MXGE_MAX_SEND_DESC, /* num segs */ 4652 65536, /* maxsegsize */ 4653 0, /* flags */ 4654 NULL, NULL, /* lock */ 4655 &sc->parent_dmat); /* tag */ 4656 4657 if (err != 0) { 4658 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4659 err); 4660 goto abort_with_tq; 4661 } 4662 4663 ifp = sc->ifp = if_alloc(IFT_ETHER); 4664 if (ifp == NULL) { 4665 device_printf(dev, "can not if_alloc()\n"); 4666 err = ENOSPC; 4667 goto abort_with_parent_dmat; 4668 } 4669 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4670 4671 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4672 device_get_nameunit(dev)); 4673 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4674 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4675 "%s:drv", device_get_nameunit(dev)); 4676 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4677 MTX_NETWORK_LOCK, MTX_DEF); 4678 4679 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4680 4681 mxge_setup_cfg_space(sc); 4682 4683 /* Map the board into the kernel */ 4684 rid = PCIR_BARS; 4685 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4686 ~0, 1, RF_ACTIVE); 4687 if (sc->mem_res == NULL) { 4688 device_printf(dev, "could not map memory\n"); 4689 err = ENXIO; 4690 goto abort_with_lock; 4691 } 4692 sc->sram = rman_get_virtual(sc->mem_res); 4693 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4694 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4695 device_printf(dev, "impossible memory region size %ld\n", 4696 rman_get_size(sc->mem_res)); 4697 err = ENXIO; 4698 goto abort_with_mem_res; 4699 } 4700 4701 /* make NULL terminated copy of the EEPROM strings section of 4702 lanai SRAM */ 4703 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4704 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4705 rman_get_bushandle(sc->mem_res), 4706 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4707 sc->eeprom_strings, 4708 MXGE_EEPROM_STRINGS_SIZE - 2); 4709 err = mxge_parse_strings(sc); 4710 if (err != 0) 4711 goto abort_with_mem_res; 4712 4713 /* Enable write combining for efficient use of PCIe bus */ 4714 mxge_enable_wc(sc); 4715 4716 /* Allocate the out of band dma memory */ 4717 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4718 sizeof (mxge_cmd_t), 64); 4719 if (err != 0) 4720 goto abort_with_mem_res; 4721 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4722 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4723 if (err != 0) 4724 goto abort_with_cmd_dma; 4725 4726 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4727 if (err != 0) 4728 goto abort_with_zeropad_dma; 4729 4730 /* select & load the firmware */ 4731 err = mxge_select_firmware(sc); 4732 if (err != 0) 4733 goto abort_with_dmabench; 4734 sc->intr_coal_delay = mxge_intr_coal_delay; 4735 4736 mxge_slice_probe(sc); 4737 err = mxge_alloc_slices(sc); 4738 if (err != 0) 4739 goto abort_with_dmabench; 4740 4741 err = mxge_reset(sc, 0); 4742 if (err != 0) 4743 goto abort_with_slices; 4744 4745 err = mxge_alloc_rings(sc); 4746 if (err != 0) { 4747 device_printf(sc->dev, "failed to allocate rings\n"); 4748 goto abort_with_slices; 4749 } 4750 4751 err = mxge_add_irq(sc); 4752 if (err != 0) { 4753 device_printf(sc->dev, "failed to add irq\n"); 4754 goto abort_with_rings; 4755 } 4756 4757 ifp->if_baudrate = IF_Gbps(10UL); 4758 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4759 IFCAP_VLAN_MTU | IFCAP_LINKSTATE; 4760#ifdef INET 4761 ifp->if_capabilities |= IFCAP_LRO; 4762#endif 4763 4764#ifdef MXGE_NEW_VLAN_API 4765 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4766 4767 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4768 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4769 sc->fw_ver_tiny >= 32) 4770 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4771#endif 4772 4773 sc->max_mtu = mxge_max_mtu(sc); 4774 if (sc->max_mtu >= 9000) 4775 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4776 else 4777 device_printf(dev, "MTU limited to %d. Install " 4778 "latest firmware for 9000 byte jumbo support\n", 4779 sc->max_mtu - ETHER_HDR_LEN); 4780 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4781 ifp->if_capenable = ifp->if_capabilities; 4782 if (sc->lro_cnt == 0) 4783 ifp->if_capenable &= ~IFCAP_LRO; 4784 sc->csum_flag = 1; 4785 ifp->if_init = mxge_init; 4786 ifp->if_softc = sc; 4787 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4788 ifp->if_ioctl = mxge_ioctl; 4789 ifp->if_start = mxge_start; 4790 /* Initialise the ifmedia structure */ 4791 ifmedia_init(&sc->media, 0, mxge_media_change, 4792 mxge_media_status); 4793 mxge_media_init(sc); 4794 mxge_media_probe(sc); 4795 sc->dying = 0; 4796 ether_ifattach(ifp, sc->mac_addr); 4797 /* ether_ifattach sets mtu to ETHERMTU */ 4798 if (mxge_initial_mtu != ETHERMTU) 4799 mxge_change_mtu(sc, mxge_initial_mtu); 4800 4801 mxge_add_sysctls(sc); 4802#ifdef IFNET_BUF_RING 4803 ifp->if_transmit = mxge_transmit; 4804 ifp->if_qflush = mxge_qflush; 4805#endif 4806 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4807 device_get_nameunit(sc->dev)); 4808 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4809 return 0; 4810 4811abort_with_rings: 4812 mxge_free_rings(sc); 4813abort_with_slices: 4814 mxge_free_slices(sc); 4815abort_with_dmabench: 4816 mxge_dma_free(&sc->dmabench_dma); 4817abort_with_zeropad_dma: 4818 mxge_dma_free(&sc->zeropad_dma); 4819abort_with_cmd_dma: 4820 mxge_dma_free(&sc->cmd_dma); 4821abort_with_mem_res: 4822 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4823abort_with_lock: 4824 pci_disable_busmaster(dev); 4825 mtx_destroy(&sc->cmd_mtx); 4826 mtx_destroy(&sc->driver_mtx); 4827 if_free(ifp); 4828abort_with_parent_dmat: 4829 bus_dma_tag_destroy(sc->parent_dmat); 4830abort_with_tq: 4831 if (sc->tq != NULL) { 4832 taskqueue_drain(sc->tq, &sc->watchdog_task); 4833 taskqueue_free(sc->tq); 4834 sc->tq = NULL; 4835 } 4836abort_with_nothing: 4837 return err; 4838} 4839 4840static int 4841mxge_detach(device_t dev) 4842{ 4843 mxge_softc_t *sc = device_get_softc(dev); 4844 4845 if (mxge_vlans_active(sc)) { 4846 device_printf(sc->dev, 4847 "Detach vlans before removing module\n"); 4848 return EBUSY; 4849 } 4850 mtx_lock(&sc->driver_mtx); 4851 sc->dying = 1; 4852 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4853 mxge_close(sc, 0); 4854 mtx_unlock(&sc->driver_mtx); 4855 ether_ifdetach(sc->ifp); 4856 if (sc->tq != NULL) { 4857 taskqueue_drain(sc->tq, &sc->watchdog_task); 4858 taskqueue_free(sc->tq); 4859 sc->tq = NULL; 4860 } 4861 callout_drain(&sc->co_hdl); 4862 ifmedia_removeall(&sc->media); 4863 mxge_dummy_rdma(sc, 0); 4864 mxge_rem_sysctls(sc); 4865 mxge_rem_irq(sc); 4866 mxge_free_rings(sc); 4867 mxge_free_slices(sc); 4868 mxge_dma_free(&sc->dmabench_dma); 4869 mxge_dma_free(&sc->zeropad_dma); 4870 mxge_dma_free(&sc->cmd_dma); 4871 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4872 pci_disable_busmaster(dev); 4873 mtx_destroy(&sc->cmd_mtx); 4874 mtx_destroy(&sc->driver_mtx); 4875 if_free(sc->ifp); 4876 bus_dma_tag_destroy(sc->parent_dmat); 4877 return 0; 4878} 4879 4880static int 4881mxge_shutdown(device_t dev) 4882{ 4883 return 0; 4884} 4885 4886/* 4887 This file uses Myri10GE driver indentation. 4888 4889 Local Variables: 4890 c-file-style:"linux" 4891 tab-width:8 4892 End: 4893*/ 4894