1/****************************************************************************** 2 3Copyright (c) 2006-2013, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Neither the name of the Myricom Inc, nor the names of its 13 contributors may be used to endorse or promote products derived from 14 this software without specific prior written permission. 15 16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26POSSIBILITY OF SUCH DAMAGE. 27 28***************************************************************************/ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: stable/10/sys/dev/mxge/if_mxge.c 329834 2018-02-22 19:40:03Z rpokala $"); 32 33#include <sys/param.h> 34#include <sys/systm.h> 35#include <sys/linker.h> 36#include <sys/firmware.h> 37#include <sys/endian.h> 38#include <sys/sockio.h> 39#include <sys/mbuf.h> 40#include <sys/malloc.h> 41#include <sys/kdb.h> 42#include <sys/kernel.h> 43#include <sys/lock.h> 44#include <sys/module.h> 45#include <sys/socket.h> 46#include <sys/sysctl.h> 47#include <sys/sx.h> 48#include <sys/taskqueue.h> 49 50#include <net/if.h> 51#include <net/if_arp.h> 52#include <net/ethernet.h> 53#include <net/if_dl.h> 54#include <net/if_media.h> 55 56#include <net/bpf.h> 57 58#include <net/if_types.h> 59#include <net/if_vlan_var.h> 60#include <net/zlib.h> 61 62#include <netinet/in_systm.h> 63#include <netinet/in.h> 64#include <netinet/ip.h> 65#include <netinet/ip6.h> 66#include <netinet/tcp.h> 67#include <netinet/tcp_lro.h> 68#include <netinet6/ip6_var.h> 69 70#include <machine/bus.h> 71#include <machine/in_cksum.h> 72#include <machine/resource.h> 73#include <sys/bus.h> 74#include <sys/rman.h> 75#include <sys/smp.h> 76 77#include <dev/pci/pcireg.h> 78#include <dev/pci/pcivar.h> 79#include <dev/pci/pci_private.h> /* XXX for pci_cfg_restore */ 80 81#include <vm/vm.h> /* for pmap_mapdev() */ 82#include <vm/pmap.h> 83 84#if defined(__i386) || defined(__amd64) 85#include <machine/specialreg.h> 86#endif 87 88#include <dev/mxge/mxge_mcp.h> 89#include <dev/mxge/mcp_gen_header.h> 90/*#define MXGE_FAKE_IFP*/ 91#include <dev/mxge/if_mxge_var.h> 92#ifdef IFNET_BUF_RING 93#include <sys/buf_ring.h> 94#endif 95 96#include "opt_inet.h" 97#include "opt_inet6.h" 98 99/* tunable params */ 100static int mxge_nvidia_ecrc_enable = 1; 101static int mxge_force_firmware = 0; 102static int mxge_intr_coal_delay = 30; 103static int mxge_deassert_wait = 1; 104static int mxge_flow_control = 1; 105static int mxge_verbose = 0; 106static int mxge_ticks; 107static int mxge_max_slices = 1; 108static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 109static int mxge_always_promisc = 0; 110static int mxge_initial_mtu = ETHERMTU_JUMBO; 111static int mxge_throttle = 0; 112static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 113static char *mxge_fw_aligned = "mxge_eth_z8e"; 114static char *mxge_fw_rss_aligned = "mxge_rss_eth_z8e"; 115static char *mxge_fw_rss_unaligned = "mxge_rss_ethp_z8e"; 116 117static int mxge_probe(device_t dev); 118static int mxge_attach(device_t dev); 119static int mxge_detach(device_t dev); 120static int mxge_shutdown(device_t dev); 121static void mxge_intr(void *arg); 122 123static device_method_t mxge_methods[] = 124{ 125 /* Device interface */ 126 DEVMETHOD(device_probe, mxge_probe), 127 DEVMETHOD(device_attach, mxge_attach), 128 DEVMETHOD(device_detach, mxge_detach), 129 DEVMETHOD(device_shutdown, mxge_shutdown), 130 131 DEVMETHOD_END 132}; 133 134static driver_t mxge_driver = 135{ 136 "mxge", 137 mxge_methods, 138 sizeof(mxge_softc_t), 139}; 140 141static devclass_t mxge_devclass; 142 143/* Declare ourselves to be a child of the PCI bus.*/ 144DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 145MODULE_DEPEND(mxge, firmware, 1, 1, 1); 146MODULE_DEPEND(mxge, zlib, 1, 1, 1); 147 148static int mxge_load_firmware(mxge_softc_t *sc, int adopt); 149static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); 150static int mxge_close(mxge_softc_t *sc, int down); 151static int mxge_open(mxge_softc_t *sc); 152static void mxge_tick(void *arg); 153 154static int 155mxge_probe(device_t dev) 156{ 157 int rev; 158 159 160 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 161 ((pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E) || 162 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E_9))) { 163 rev = pci_get_revid(dev); 164 switch (rev) { 165 case MXGE_PCI_REV_Z8E: 166 device_set_desc(dev, "Myri10G-PCIE-8A"); 167 break; 168 case MXGE_PCI_REV_Z8ES: 169 device_set_desc(dev, "Myri10G-PCIE-8B"); 170 break; 171 default: 172 device_set_desc(dev, "Myri10G-PCIE-8??"); 173 device_printf(dev, "Unrecognized rev %d NIC\n", 174 rev); 175 break; 176 } 177 return 0; 178 } 179 return ENXIO; 180} 181 182static void 183mxge_enable_wc(mxge_softc_t *sc) 184{ 185#if defined(__i386) || defined(__amd64) 186 vm_offset_t len; 187 int err; 188 189 sc->wc = 1; 190 len = rman_get_size(sc->mem_res); 191 err = pmap_change_attr((vm_offset_t) sc->sram, 192 len, PAT_WRITE_COMBINING); 193 if (err != 0) { 194 device_printf(sc->dev, "pmap_change_attr failed, %d\n", 195 err); 196 sc->wc = 0; 197 } 198#endif 199} 200 201 202/* callback to get our DMA address */ 203static void 204mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 205 int error) 206{ 207 if (error == 0) { 208 *(bus_addr_t *) arg = segs->ds_addr; 209 } 210} 211 212static int 213mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 214 bus_size_t alignment) 215{ 216 int err; 217 device_t dev = sc->dev; 218 bus_size_t boundary, maxsegsize; 219 220 if (bytes > 4096 && alignment == 4096) { 221 boundary = 0; 222 maxsegsize = bytes; 223 } else { 224 boundary = 4096; 225 maxsegsize = 4096; 226 } 227 228 /* allocate DMAable memory tags */ 229 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 230 alignment, /* alignment */ 231 boundary, /* boundary */ 232 BUS_SPACE_MAXADDR, /* low */ 233 BUS_SPACE_MAXADDR, /* high */ 234 NULL, NULL, /* filter */ 235 bytes, /* maxsize */ 236 1, /* num segs */ 237 maxsegsize, /* maxsegsize */ 238 BUS_DMA_COHERENT, /* flags */ 239 NULL, NULL, /* lock */ 240 &dma->dmat); /* tag */ 241 if (err != 0) { 242 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 243 return err; 244 } 245 246 /* allocate DMAable memory & map */ 247 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 248 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 249 | BUS_DMA_ZERO), &dma->map); 250 if (err != 0) { 251 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 252 goto abort_with_dmat; 253 } 254 255 /* load the memory */ 256 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 257 mxge_dmamap_callback, 258 (void *)&dma->bus_addr, 0); 259 if (err != 0) { 260 device_printf(dev, "couldn't load map (err = %d)\n", err); 261 goto abort_with_mem; 262 } 263 return 0; 264 265abort_with_mem: 266 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 267abort_with_dmat: 268 (void)bus_dma_tag_destroy(dma->dmat); 269 return err; 270} 271 272 273static void 274mxge_dma_free(mxge_dma_t *dma) 275{ 276 bus_dmamap_unload(dma->dmat, dma->map); 277 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 278 (void)bus_dma_tag_destroy(dma->dmat); 279} 280 281/* 282 * The eeprom strings on the lanaiX have the format 283 * SN=x\0 284 * MAC=x:x:x:x:x:x\0 285 * PC=text\0 286 */ 287 288static int 289mxge_parse_strings(mxge_softc_t *sc) 290{ 291 char *ptr; 292 int i, found_mac, found_sn2; 293 char *endptr; 294 295 ptr = sc->eeprom_strings; 296 found_mac = 0; 297 found_sn2 = 0; 298 while (*ptr != '\0') { 299 if (strncmp(ptr, "MAC=", 4) == 0) { 300 ptr += 4; 301 for (i = 0;;) { 302 sc->mac_addr[i] = strtoul(ptr, &endptr, 16); 303 if (endptr - ptr != 2) 304 goto abort; 305 ptr = endptr; 306 if (++i == 6) 307 break; 308 if (*ptr++ != ':') 309 goto abort; 310 } 311 found_mac = 1; 312 } else if (strncmp(ptr, "PC=", 3) == 0) { 313 ptr += 3; 314 strlcpy(sc->product_code_string, ptr, 315 sizeof(sc->product_code_string)); 316 } else if (!found_sn2 && (strncmp(ptr, "SN=", 3) == 0)) { 317 ptr += 3; 318 strlcpy(sc->serial_number_string, ptr, 319 sizeof(sc->serial_number_string)); 320 } else if (strncmp(ptr, "SN2=", 4) == 0) { 321 /* SN2 takes precedence over SN */ 322 ptr += 4; 323 found_sn2 = 1; 324 strlcpy(sc->serial_number_string, ptr, 325 sizeof(sc->serial_number_string)); 326 } 327 while (*ptr++ != '\0') {} 328 } 329 330 if (found_mac) 331 return 0; 332 333 abort: 334 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 335 336 return ENXIO; 337} 338 339#if defined __i386 || defined i386 || defined __i386__ || defined __x86_64__ 340static void 341mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 342{ 343 uint32_t val; 344 unsigned long base, off; 345 char *va, *cfgptr; 346 device_t pdev, mcp55; 347 uint16_t vendor_id, device_id, word; 348 uintptr_t bus, slot, func, ivend, idev; 349 uint32_t *ptr32; 350 351 352 if (!mxge_nvidia_ecrc_enable) 353 return; 354 355 pdev = device_get_parent(device_get_parent(sc->dev)); 356 if (pdev == NULL) { 357 device_printf(sc->dev, "could not find parent?\n"); 358 return; 359 } 360 vendor_id = pci_read_config(pdev, PCIR_VENDOR, 2); 361 device_id = pci_read_config(pdev, PCIR_DEVICE, 2); 362 363 if (vendor_id != 0x10de) 364 return; 365 366 base = 0; 367 368 if (device_id == 0x005d) { 369 /* ck804, base address is magic */ 370 base = 0xe0000000UL; 371 } else if (device_id >= 0x0374 && device_id <= 0x378) { 372 /* mcp55, base address stored in chipset */ 373 mcp55 = pci_find_bsf(0, 0, 0); 374 if (mcp55 && 375 0x10de == pci_read_config(mcp55, PCIR_VENDOR, 2) && 376 0x0369 == pci_read_config(mcp55, PCIR_DEVICE, 2)) { 377 word = pci_read_config(mcp55, 0x90, 2); 378 base = ((unsigned long)word & 0x7ffeU) << 25; 379 } 380 } 381 if (!base) 382 return; 383 384 /* XXXX 385 Test below is commented because it is believed that doing 386 config read/write beyond 0xff will access the config space 387 for the next larger function. Uncomment this and remove 388 the hacky pmap_mapdev() way of accessing config space when 389 FreeBSD grows support for extended pcie config space access 390 */ 391#if 0 392 /* See if we can, by some miracle, access the extended 393 config space */ 394 val = pci_read_config(pdev, 0x178, 4); 395 if (val != 0xffffffff) { 396 val |= 0x40; 397 pci_write_config(pdev, 0x178, val, 4); 398 return; 399 } 400#endif 401 /* Rather than using normal pci config space writes, we must 402 * map the Nvidia config space ourselves. This is because on 403 * opteron/nvidia class machine the 0xe000000 mapping is 404 * handled by the nvidia chipset, that means the internal PCI 405 * device (the on-chip northbridge), or the amd-8131 bridge 406 * and things behind them are not visible by this method. 407 */ 408 409 BUS_READ_IVAR(device_get_parent(pdev), pdev, 410 PCI_IVAR_BUS, &bus); 411 BUS_READ_IVAR(device_get_parent(pdev), pdev, 412 PCI_IVAR_SLOT, &slot); 413 BUS_READ_IVAR(device_get_parent(pdev), pdev, 414 PCI_IVAR_FUNCTION, &func); 415 BUS_READ_IVAR(device_get_parent(pdev), pdev, 416 PCI_IVAR_VENDOR, &ivend); 417 BUS_READ_IVAR(device_get_parent(pdev), pdev, 418 PCI_IVAR_DEVICE, &idev); 419 420 off = base 421 + 0x00100000UL * (unsigned long)bus 422 + 0x00001000UL * (unsigned long)(func 423 + 8 * slot); 424 425 /* map it into the kernel */ 426 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 427 428 429 if (va == NULL) { 430 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 431 return; 432 } 433 /* get a pointer to the config space mapped into the kernel */ 434 cfgptr = va + (off & PAGE_MASK); 435 436 /* make sure that we can really access it */ 437 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 438 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 439 if (! (vendor_id == ivend && device_id == idev)) { 440 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 441 vendor_id, device_id); 442 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 443 return; 444 } 445 446 ptr32 = (uint32_t*)(cfgptr + 0x178); 447 val = *ptr32; 448 449 if (val == 0xffffffff) { 450 device_printf(sc->dev, "extended mapping failed\n"); 451 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 452 return; 453 } 454 *ptr32 = val | 0x40; 455 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 456 if (mxge_verbose) 457 device_printf(sc->dev, 458 "Enabled ECRC on upstream Nvidia bridge " 459 "at %d:%d:%d\n", 460 (int)bus, (int)slot, (int)func); 461 return; 462} 463#else 464static void 465mxge_enable_nvidia_ecrc(mxge_softc_t *sc) 466{ 467 device_printf(sc->dev, 468 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 469 return; 470} 471#endif 472 473 474static int 475mxge_dma_test(mxge_softc_t *sc, int test_type) 476{ 477 mxge_cmd_t cmd; 478 bus_addr_t dmatest_bus = sc->dmabench_dma.bus_addr; 479 int status; 480 uint32_t len; 481 char *test = " "; 482 483 484 /* Run a small DMA test. 485 * The magic multipliers to the length tell the firmware 486 * to do DMA read, write, or read+write tests. The 487 * results are returned in cmd.data0. The upper 16 488 * bits of the return is the number of transfers completed. 489 * The lower 16 bits is the time in 0.5us ticks that the 490 * transfers took to complete. 491 */ 492 493 len = sc->tx_boundary; 494 495 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 496 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 497 cmd.data2 = len * 0x10000; 498 status = mxge_send_cmd(sc, test_type, &cmd); 499 if (status != 0) { 500 test = "read"; 501 goto abort; 502 } 503 sc->read_dma = ((cmd.data0>>16) * len * 2) / 504 (cmd.data0 & 0xffff); 505 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 506 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 507 cmd.data2 = len * 0x1; 508 status = mxge_send_cmd(sc, test_type, &cmd); 509 if (status != 0) { 510 test = "write"; 511 goto abort; 512 } 513 sc->write_dma = ((cmd.data0>>16) * len * 2) / 514 (cmd.data0 & 0xffff); 515 516 cmd.data0 = MXGE_LOWPART_TO_U32(dmatest_bus); 517 cmd.data1 = MXGE_HIGHPART_TO_U32(dmatest_bus); 518 cmd.data2 = len * 0x10001; 519 status = mxge_send_cmd(sc, test_type, &cmd); 520 if (status != 0) { 521 test = "read/write"; 522 goto abort; 523 } 524 sc->read_write_dma = ((cmd.data0>>16) * len * 2 * 2) / 525 (cmd.data0 & 0xffff); 526 527abort: 528 if (status != 0 && test_type != MXGEFW_CMD_UNALIGNED_TEST) 529 device_printf(sc->dev, "DMA %s benchmark failed: %d\n", 530 test, status); 531 532 return status; 533} 534 535/* 536 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 537 * when the PCI-E Completion packets are aligned on an 8-byte 538 * boundary. Some PCI-E chip sets always align Completion packets; on 539 * the ones that do not, the alignment can be enforced by enabling 540 * ECRC generation (if supported). 541 * 542 * When PCI-E Completion packets are not aligned, it is actually more 543 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 544 * 545 * If the driver can neither enable ECRC nor verify that it has 546 * already been enabled, then it must use a firmware image which works 547 * around unaligned completion packets (ethp_z8e.dat), and it should 548 * also ensure that it never gives the device a Read-DMA which is 549 * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is 550 * enabled, then the driver should use the aligned (eth_z8e.dat) 551 * firmware image, and set tx_boundary to 4KB. 552 */ 553 554static int 555mxge_firmware_probe(mxge_softc_t *sc) 556{ 557 device_t dev = sc->dev; 558 int reg, status; 559 uint16_t pectl; 560 561 sc->tx_boundary = 4096; 562 /* 563 * Verify the max read request size was set to 4KB 564 * before trying the test with 4KB. 565 */ 566 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 567 pectl = pci_read_config(dev, reg + 0x8, 2); 568 if ((pectl & (5 << 12)) != (5 << 12)) { 569 device_printf(dev, "Max Read Req. size != 4k (0x%x\n", 570 pectl); 571 sc->tx_boundary = 2048; 572 } 573 } 574 575 /* 576 * load the optimized firmware (which assumes aligned PCIe 577 * completions) in order to see if it works on this host. 578 */ 579 sc->fw_name = mxge_fw_aligned; 580 status = mxge_load_firmware(sc, 1); 581 if (status != 0) { 582 return status; 583 } 584 585 /* 586 * Enable ECRC if possible 587 */ 588 mxge_enable_nvidia_ecrc(sc); 589 590 /* 591 * Run a DMA test which watches for unaligned completions and 592 * aborts on the first one seen. Not required on Z8ES or newer. 593 */ 594 if (pci_get_revid(sc->dev) >= MXGE_PCI_REV_Z8ES) 595 return 0; 596 status = mxge_dma_test(sc, MXGEFW_CMD_UNALIGNED_TEST); 597 if (status == 0) 598 return 0; /* keep the aligned firmware */ 599 600 if (status != E2BIG) 601 device_printf(dev, "DMA test failed: %d\n", status); 602 if (status == ENOSYS) 603 device_printf(dev, "Falling back to ethp! " 604 "Please install up to date fw\n"); 605 return status; 606} 607 608static int 609mxge_select_firmware(mxge_softc_t *sc) 610{ 611 int aligned = 0; 612 int force_firmware = mxge_force_firmware; 613 614 if (sc->throttle) 615 force_firmware = sc->throttle; 616 617 if (force_firmware != 0) { 618 if (force_firmware == 1) 619 aligned = 1; 620 else 621 aligned = 0; 622 if (mxge_verbose) 623 device_printf(sc->dev, 624 "Assuming %s completions (forced)\n", 625 aligned ? "aligned" : "unaligned"); 626 goto abort; 627 } 628 629 /* if the PCIe link width is 4 or less, we can use the aligned 630 firmware and skip any checks */ 631 if (sc->link_width != 0 && sc->link_width <= 4) { 632 device_printf(sc->dev, 633 "PCIe x%d Link, expect reduced performance\n", 634 sc->link_width); 635 aligned = 1; 636 goto abort; 637 } 638 639 if (0 == mxge_firmware_probe(sc)) 640 return 0; 641 642abort: 643 if (aligned) { 644 sc->fw_name = mxge_fw_aligned; 645 sc->tx_boundary = 4096; 646 } else { 647 sc->fw_name = mxge_fw_unaligned; 648 sc->tx_boundary = 2048; 649 } 650 return (mxge_load_firmware(sc, 0)); 651} 652 653static int 654mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 655{ 656 657 658 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 659 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 660 be32toh(hdr->mcp_type)); 661 return EIO; 662 } 663 664 /* save firmware version for sysctl */ 665 strlcpy(sc->fw_version, hdr->version, sizeof(sc->fw_version)); 666 if (mxge_verbose) 667 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 668 669 sscanf(sc->fw_version, "%d.%d.%d", &sc->fw_ver_major, 670 &sc->fw_ver_minor, &sc->fw_ver_tiny); 671 672 if (!(sc->fw_ver_major == MXGEFW_VERSION_MAJOR 673 && sc->fw_ver_minor == MXGEFW_VERSION_MINOR)) { 674 device_printf(sc->dev, "Found firmware version %s\n", 675 sc->fw_version); 676 device_printf(sc->dev, "Driver needs %d.%d\n", 677 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 678 return EINVAL; 679 } 680 return 0; 681 682} 683 684static void * 685z_alloc(void *nil, u_int items, u_int size) 686{ 687 void *ptr; 688 689 ptr = malloc(items * size, M_TEMP, M_NOWAIT); 690 return ptr; 691} 692 693static void 694z_free(void *nil, void *ptr) 695{ 696 free(ptr, M_TEMP); 697} 698 699 700static int 701mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 702{ 703 z_stream zs; 704 char *inflate_buffer; 705 const struct firmware *fw; 706 const mcp_gen_header_t *hdr; 707 unsigned hdr_offset; 708 int status; 709 unsigned int i; 710 char dummy; 711 size_t fw_len; 712 713 fw = firmware_get(sc->fw_name); 714 if (fw == NULL) { 715 device_printf(sc->dev, "Could not find firmware image %s\n", 716 sc->fw_name); 717 return ENOENT; 718 } 719 720 721 722 /* setup zlib and decompress f/w */ 723 bzero(&zs, sizeof (zs)); 724 zs.zalloc = z_alloc; 725 zs.zfree = z_free; 726 status = inflateInit(&zs); 727 if (status != Z_OK) { 728 status = EIO; 729 goto abort_with_fw; 730 } 731 732 /* the uncompressed size is stored as the firmware version, 733 which would otherwise go unused */ 734 fw_len = (size_t) fw->version; 735 inflate_buffer = malloc(fw_len, M_TEMP, M_NOWAIT); 736 if (inflate_buffer == NULL) 737 goto abort_with_zs; 738 zs.avail_in = fw->datasize; 739 zs.next_in = __DECONST(char *, fw->data); 740 zs.avail_out = fw_len; 741 zs.next_out = inflate_buffer; 742 status = inflate(&zs, Z_FINISH); 743 if (status != Z_STREAM_END) { 744 device_printf(sc->dev, "zlib %d\n", status); 745 status = EIO; 746 goto abort_with_buffer; 747 } 748 749 /* check id */ 750 hdr_offset = htobe32(*(const uint32_t *) 751 (inflate_buffer + MCP_HEADER_PTR_OFFSET)); 752 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw_len) { 753 device_printf(sc->dev, "Bad firmware file"); 754 status = EIO; 755 goto abort_with_buffer; 756 } 757 hdr = (const void*)(inflate_buffer + hdr_offset); 758 759 status = mxge_validate_firmware(sc, hdr); 760 if (status != 0) 761 goto abort_with_buffer; 762 763 /* Copy the inflated firmware to NIC SRAM. */ 764 for (i = 0; i < fw_len; i += 256) { 765 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 766 inflate_buffer + i, 767 min(256U, (unsigned)(fw_len - i))); 768 wmb(); 769 dummy = *sc->sram; 770 wmb(); 771 } 772 773 *limit = fw_len; 774 status = 0; 775abort_with_buffer: 776 free(inflate_buffer, M_TEMP); 777abort_with_zs: 778 inflateEnd(&zs); 779abort_with_fw: 780 firmware_put(fw, FIRMWARE_UNLOAD); 781 return status; 782} 783 784/* 785 * Enable or disable periodic RDMAs from the host to make certain 786 * chipsets resend dropped PCIe messages 787 */ 788 789static void 790mxge_dummy_rdma(mxge_softc_t *sc, int enable) 791{ 792 char buf_bytes[72]; 793 volatile uint32_t *confirm; 794 volatile char *submit; 795 uint32_t *buf, dma_low, dma_high; 796 int i; 797 798 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 799 800 /* clear confirmation addr */ 801 confirm = (volatile uint32_t *)sc->cmd; 802 *confirm = 0; 803 wmb(); 804 805 /* send an rdma command to the PCIe engine, and wait for the 806 response in the confirmation address. The firmware should 807 write a -1 there to indicate it is alive and well 808 */ 809 810 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 811 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 812 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 813 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 814 buf[2] = htobe32(0xffffffff); /* confirm data */ 815 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 816 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 817 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 818 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 819 buf[5] = htobe32(enable); /* enable? */ 820 821 822 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 823 824 mxge_pio_copy(submit, buf, 64); 825 wmb(); 826 DELAY(1000); 827 wmb(); 828 i = 0; 829 while (*confirm != 0xffffffff && i < 20) { 830 DELAY(1000); 831 i++; 832 } 833 if (*confirm != 0xffffffff) { 834 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 835 (enable ? "enable" : "disable"), confirm, 836 *confirm); 837 } 838 return; 839} 840 841static int 842mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 843{ 844 mcp_cmd_t *buf; 845 char buf_bytes[sizeof(*buf) + 8]; 846 volatile mcp_cmd_response_t *response = sc->cmd; 847 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 848 uint32_t dma_low, dma_high; 849 int err, sleep_total = 0; 850 851 /* ensure buf is aligned to 8 bytes */ 852 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 853 854 buf->data0 = htobe32(data->data0); 855 buf->data1 = htobe32(data->data1); 856 buf->data2 = htobe32(data->data2); 857 buf->cmd = htobe32(cmd); 858 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 859 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 860 861 buf->response_addr.low = htobe32(dma_low); 862 buf->response_addr.high = htobe32(dma_high); 863 mtx_lock(&sc->cmd_mtx); 864 response->result = 0xffffffff; 865 wmb(); 866 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 867 868 /* wait up to 20ms */ 869 err = EAGAIN; 870 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 871 bus_dmamap_sync(sc->cmd_dma.dmat, 872 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 873 wmb(); 874 switch (be32toh(response->result)) { 875 case 0: 876 data->data0 = be32toh(response->data); 877 err = 0; 878 break; 879 case 0xffffffff: 880 DELAY(1000); 881 break; 882 case MXGEFW_CMD_UNKNOWN: 883 err = ENOSYS; 884 break; 885 case MXGEFW_CMD_ERROR_UNALIGNED: 886 err = E2BIG; 887 break; 888 case MXGEFW_CMD_ERROR_BUSY: 889 err = EBUSY; 890 break; 891 case MXGEFW_CMD_ERROR_I2C_ABSENT: 892 err = ENXIO; 893 break; 894 default: 895 device_printf(sc->dev, 896 "mxge: command %d " 897 "failed, result = %d\n", 898 cmd, be32toh(response->result)); 899 err = ENXIO; 900 break; 901 } 902 if (err != EAGAIN) 903 break; 904 } 905 if (err == EAGAIN) 906 device_printf(sc->dev, "mxge: command %d timed out" 907 "result = %d\n", 908 cmd, be32toh(response->result)); 909 mtx_unlock(&sc->cmd_mtx); 910 return err; 911} 912 913static int 914mxge_adopt_running_firmware(mxge_softc_t *sc) 915{ 916 struct mcp_gen_header *hdr; 917 const size_t bytes = sizeof (struct mcp_gen_header); 918 size_t hdr_offset; 919 int status; 920 921 /* find running firmware header */ 922 hdr_offset = htobe32(*(volatile uint32_t *) 923 (sc->sram + MCP_HEADER_PTR_OFFSET)); 924 925 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 926 device_printf(sc->dev, 927 "Running firmware has bad header offset (%d)\n", 928 (int)hdr_offset); 929 return EIO; 930 } 931 932 /* copy header of running firmware from SRAM to host memory to 933 * validate firmware */ 934 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 935 if (hdr == NULL) { 936 device_printf(sc->dev, "could not malloc firmware hdr\n"); 937 return ENOMEM; 938 } 939 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 940 rman_get_bushandle(sc->mem_res), 941 hdr_offset, (char *)hdr, bytes); 942 status = mxge_validate_firmware(sc, hdr); 943 free(hdr, M_DEVBUF); 944 945 /* 946 * check to see if adopted firmware has bug where adopting 947 * it will cause broadcasts to be filtered unless the NIC 948 * is kept in ALLMULTI mode 949 */ 950 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 951 sc->fw_ver_tiny >= 4 && sc->fw_ver_tiny <= 11) { 952 sc->adopted_rx_filter_bug = 1; 953 device_printf(sc->dev, "Adopting fw %d.%d.%d: " 954 "working around rx filter bug\n", 955 sc->fw_ver_major, sc->fw_ver_minor, 956 sc->fw_ver_tiny); 957 } 958 959 return status; 960} 961 962 963static int 964mxge_load_firmware(mxge_softc_t *sc, int adopt) 965{ 966 volatile uint32_t *confirm; 967 volatile char *submit; 968 char buf_bytes[72]; 969 uint32_t *buf, size, dma_low, dma_high; 970 int status, i; 971 972 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 973 974 size = sc->sram_size; 975 status = mxge_load_firmware_helper(sc, &size); 976 if (status) { 977 if (!adopt) 978 return status; 979 /* Try to use the currently running firmware, if 980 it is new enough */ 981 status = mxge_adopt_running_firmware(sc); 982 if (status) { 983 device_printf(sc->dev, 984 "failed to adopt running firmware\n"); 985 return status; 986 } 987 device_printf(sc->dev, 988 "Successfully adopted running firmware\n"); 989 if (sc->tx_boundary == 4096) { 990 device_printf(sc->dev, 991 "Using firmware currently running on NIC" 992 ". For optimal\n"); 993 device_printf(sc->dev, 994 "performance consider loading optimized " 995 "firmware\n"); 996 } 997 sc->fw_name = mxge_fw_unaligned; 998 sc->tx_boundary = 2048; 999 return 0; 1000 } 1001 /* clear confirmation addr */ 1002 confirm = (volatile uint32_t *)sc->cmd; 1003 *confirm = 0; 1004 wmb(); 1005 /* send a reload command to the bootstrap MCP, and wait for the 1006 response in the confirmation address. The firmware should 1007 write a -1 there to indicate it is alive and well 1008 */ 1009 1010 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 1011 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 1012 1013 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 1014 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 1015 buf[2] = htobe32(0xffffffff); /* confirm data */ 1016 1017 /* FIX: All newest firmware should un-protect the bottom of 1018 the sram before handoff. However, the very first interfaces 1019 do not. Therefore the handoff copy must skip the first 8 bytes 1020 */ 1021 /* where the code starts*/ 1022 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 1023 buf[4] = htobe32(size - 8); /* length of code */ 1024 buf[5] = htobe32(8); /* where to copy to */ 1025 buf[6] = htobe32(0); /* where to jump to */ 1026 1027 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 1028 mxge_pio_copy(submit, buf, 64); 1029 wmb(); 1030 DELAY(1000); 1031 wmb(); 1032 i = 0; 1033 while (*confirm != 0xffffffff && i < 20) { 1034 DELAY(1000*10); 1035 i++; 1036 bus_dmamap_sync(sc->cmd_dma.dmat, 1037 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 1038 } 1039 if (*confirm != 0xffffffff) { 1040 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 1041 confirm, *confirm); 1042 1043 return ENXIO; 1044 } 1045 return 0; 1046} 1047 1048static int 1049mxge_update_mac_address(mxge_softc_t *sc) 1050{ 1051 mxge_cmd_t cmd; 1052 uint8_t *addr = sc->mac_addr; 1053 int status; 1054 1055 1056 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 1057 | (addr[2] << 8) | addr[3]); 1058 1059 cmd.data1 = ((addr[4] << 8) | (addr[5])); 1060 1061 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 1062 return status; 1063} 1064 1065static int 1066mxge_change_pause(mxge_softc_t *sc, int pause) 1067{ 1068 mxge_cmd_t cmd; 1069 int status; 1070 1071 if (pause) 1072 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 1073 &cmd); 1074 else 1075 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 1076 &cmd); 1077 1078 if (status) { 1079 device_printf(sc->dev, "Failed to set flow control mode\n"); 1080 return ENXIO; 1081 } 1082 sc->pause = pause; 1083 return 0; 1084} 1085 1086static void 1087mxge_change_promisc(mxge_softc_t *sc, int promisc) 1088{ 1089 mxge_cmd_t cmd; 1090 int status; 1091 1092 if (mxge_always_promisc) 1093 promisc = 1; 1094 1095 if (promisc) 1096 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 1097 &cmd); 1098 else 1099 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 1100 &cmd); 1101 1102 if (status) { 1103 device_printf(sc->dev, "Failed to set promisc mode\n"); 1104 } 1105} 1106 1107static void 1108mxge_set_multicast_list(mxge_softc_t *sc) 1109{ 1110 mxge_cmd_t cmd; 1111 struct ifmultiaddr *ifma; 1112 struct ifnet *ifp = sc->ifp; 1113 int err; 1114 1115 /* This firmware is known to not support multicast */ 1116 if (!sc->fw_multicast_support) 1117 return; 1118 1119 /* Disable multicast filtering while we play with the lists*/ 1120 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 1121 if (err != 0) { 1122 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 1123 " error status: %d\n", err); 1124 return; 1125 } 1126 1127 if (sc->adopted_rx_filter_bug) 1128 return; 1129 1130 if (ifp->if_flags & IFF_ALLMULTI) 1131 /* request to disable multicast filtering, so quit here */ 1132 return; 1133 1134 /* Flush all the filters */ 1135 1136 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 1137 if (err != 0) { 1138 device_printf(sc->dev, 1139 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 1140 ", error status: %d\n", err); 1141 return; 1142 } 1143 1144 /* Walk the multicast list, and add each address */ 1145 1146 if_maddr_rlock(ifp); 1147 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1148 if (ifma->ifma_addr->sa_family != AF_LINK) 1149 continue; 1150 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 1151 &cmd.data0, 4); 1152 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 1153 &cmd.data1, 2); 1154 cmd.data0 = htonl(cmd.data0); 1155 cmd.data1 = htonl(cmd.data1); 1156 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 1157 if (err != 0) { 1158 device_printf(sc->dev, "Failed " 1159 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 1160 "%d\t", err); 1161 /* abort, leaving multicast filtering off */ 1162 if_maddr_runlock(ifp); 1163 return; 1164 } 1165 } 1166 if_maddr_runlock(ifp); 1167 /* Enable multicast filtering */ 1168 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 1169 if (err != 0) { 1170 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 1171 ", error status: %d\n", err); 1172 } 1173} 1174 1175static int 1176mxge_max_mtu(mxge_softc_t *sc) 1177{ 1178 mxge_cmd_t cmd; 1179 int status; 1180 1181 if (MJUMPAGESIZE - MXGEFW_PAD > MXGEFW_MAX_MTU) 1182 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1183 1184 /* try to set nbufs to see if it we can 1185 use virtually contiguous jumbos */ 1186 cmd.data0 = 0; 1187 status = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 1188 &cmd); 1189 if (status == 0) 1190 return MXGEFW_MAX_MTU - MXGEFW_PAD; 1191 1192 /* otherwise, we're limited to MJUMPAGESIZE */ 1193 return MJUMPAGESIZE - MXGEFW_PAD; 1194} 1195 1196static int 1197mxge_reset(mxge_softc_t *sc, int interrupts_setup) 1198{ 1199 struct mxge_slice_state *ss; 1200 mxge_rx_done_t *rx_done; 1201 volatile uint32_t *irq_claim; 1202 mxge_cmd_t cmd; 1203 int slice, status; 1204 1205 /* try to send a reset command to the card to see if it 1206 is alive */ 1207 memset(&cmd, 0, sizeof (cmd)); 1208 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 1209 if (status != 0) { 1210 device_printf(sc->dev, "failed reset\n"); 1211 return ENXIO; 1212 } 1213 1214 mxge_dummy_rdma(sc, 1); 1215 1216 1217 /* set the intrq size */ 1218 cmd.data0 = sc->rx_ring_size; 1219 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 1220 1221 /* 1222 * Even though we already know how many slices are supported 1223 * via mxge_slice_probe(), MXGEFW_CMD_GET_MAX_RSS_QUEUES 1224 * has magic side effects, and must be called after a reset. 1225 * It must be called prior to calling any RSS related cmds, 1226 * including assigning an interrupt queue for anything but 1227 * slice 0. It must also be called *after* 1228 * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by 1229 * the firmware to compute offsets. 1230 */ 1231 1232 if (sc->num_slices > 1) { 1233 /* ask the maximum number of slices it supports */ 1234 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, 1235 &cmd); 1236 if (status != 0) { 1237 device_printf(sc->dev, 1238 "failed to get number of slices\n"); 1239 return status; 1240 } 1241 /* 1242 * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior 1243 * to setting up the interrupt queue DMA 1244 */ 1245 cmd.data0 = sc->num_slices; 1246 cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE; 1247#ifdef IFNET_BUF_RING 1248 cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES; 1249#endif 1250 status = mxge_send_cmd(sc, MXGEFW_CMD_ENABLE_RSS_QUEUES, 1251 &cmd); 1252 if (status != 0) { 1253 device_printf(sc->dev, 1254 "failed to set number of slices\n"); 1255 return status; 1256 } 1257 } 1258 1259 1260 if (interrupts_setup) { 1261 /* Now exchange information about interrupts */ 1262 for (slice = 0; slice < sc->num_slices; slice++) { 1263 rx_done = &sc->ss[slice].rx_done; 1264 memset(rx_done->entry, 0, sc->rx_ring_size); 1265 cmd.data0 = MXGE_LOWPART_TO_U32(rx_done->dma.bus_addr); 1266 cmd.data1 = MXGE_HIGHPART_TO_U32(rx_done->dma.bus_addr); 1267 cmd.data2 = slice; 1268 status |= mxge_send_cmd(sc, 1269 MXGEFW_CMD_SET_INTRQ_DMA, 1270 &cmd); 1271 } 1272 } 1273 1274 status |= mxge_send_cmd(sc, 1275 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 1276 1277 1278 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 1279 1280 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 1281 irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 1282 1283 1284 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 1285 &cmd); 1286 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 1287 if (status != 0) { 1288 device_printf(sc->dev, "failed set interrupt parameters\n"); 1289 return status; 1290 } 1291 1292 1293 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 1294 1295 1296 /* run a DMA benchmark */ 1297 (void) mxge_dma_test(sc, MXGEFW_DMA_TEST); 1298 1299 for (slice = 0; slice < sc->num_slices; slice++) { 1300 ss = &sc->ss[slice]; 1301 1302 ss->irq_claim = irq_claim + (2 * slice); 1303 /* reset mcp/driver shared state back to 0 */ 1304 ss->rx_done.idx = 0; 1305 ss->rx_done.cnt = 0; 1306 ss->tx.req = 0; 1307 ss->tx.done = 0; 1308 ss->tx.pkt_done = 0; 1309 ss->tx.queue_active = 0; 1310 ss->tx.activate = 0; 1311 ss->tx.deactivate = 0; 1312 ss->tx.wake = 0; 1313 ss->tx.defrag = 0; 1314 ss->tx.stall = 0; 1315 ss->rx_big.cnt = 0; 1316 ss->rx_small.cnt = 0; 1317 ss->lc.lro_bad_csum = 0; 1318 ss->lc.lro_queued = 0; 1319 ss->lc.lro_flushed = 0; 1320 if (ss->fw_stats != NULL) { 1321 bzero(ss->fw_stats, sizeof *ss->fw_stats); 1322 } 1323 } 1324 sc->rdma_tags_available = 15; 1325 status = mxge_update_mac_address(sc); 1326 mxge_change_promisc(sc, sc->ifp->if_flags & IFF_PROMISC); 1327 mxge_change_pause(sc, sc->pause); 1328 mxge_set_multicast_list(sc); 1329 if (sc->throttle) { 1330 cmd.data0 = sc->throttle; 1331 if (mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, 1332 &cmd)) { 1333 device_printf(sc->dev, 1334 "can't enable throttle\n"); 1335 } 1336 } 1337 return status; 1338} 1339 1340static int 1341mxge_change_throttle(SYSCTL_HANDLER_ARGS) 1342{ 1343 mxge_cmd_t cmd; 1344 mxge_softc_t *sc; 1345 int err; 1346 unsigned int throttle; 1347 1348 sc = arg1; 1349 throttle = sc->throttle; 1350 err = sysctl_handle_int(oidp, &throttle, arg2, req); 1351 if (err != 0) { 1352 return err; 1353 } 1354 1355 if (throttle == sc->throttle) 1356 return 0; 1357 1358 if (throttle < MXGE_MIN_THROTTLE || throttle > MXGE_MAX_THROTTLE) 1359 return EINVAL; 1360 1361 mtx_lock(&sc->driver_mtx); 1362 cmd.data0 = throttle; 1363 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_THROTTLE_FACTOR, &cmd); 1364 if (err == 0) 1365 sc->throttle = throttle; 1366 mtx_unlock(&sc->driver_mtx); 1367 return err; 1368} 1369 1370static int 1371mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1372{ 1373 mxge_softc_t *sc; 1374 unsigned int intr_coal_delay; 1375 int err; 1376 1377 sc = arg1; 1378 intr_coal_delay = sc->intr_coal_delay; 1379 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1380 if (err != 0) { 1381 return err; 1382 } 1383 if (intr_coal_delay == sc->intr_coal_delay) 1384 return 0; 1385 1386 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1387 return EINVAL; 1388 1389 mtx_lock(&sc->driver_mtx); 1390 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1391 sc->intr_coal_delay = intr_coal_delay; 1392 1393 mtx_unlock(&sc->driver_mtx); 1394 return err; 1395} 1396 1397static int 1398mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1399{ 1400 mxge_softc_t *sc; 1401 unsigned int enabled; 1402 int err; 1403 1404 sc = arg1; 1405 enabled = sc->pause; 1406 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1407 if (err != 0) { 1408 return err; 1409 } 1410 if (enabled == sc->pause) 1411 return 0; 1412 1413 mtx_lock(&sc->driver_mtx); 1414 err = mxge_change_pause(sc, enabled); 1415 mtx_unlock(&sc->driver_mtx); 1416 return err; 1417} 1418 1419static int 1420mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1421{ 1422 int err; 1423 1424 if (arg1 == NULL) 1425 return EFAULT; 1426 arg2 = be32toh(*(int *)arg1); 1427 arg1 = NULL; 1428 err = sysctl_handle_int(oidp, arg1, arg2, req); 1429 1430 return err; 1431} 1432 1433static void 1434mxge_rem_sysctls(mxge_softc_t *sc) 1435{ 1436 struct mxge_slice_state *ss; 1437 int slice; 1438 1439 if (sc->slice_sysctl_tree == NULL) 1440 return; 1441 1442 for (slice = 0; slice < sc->num_slices; slice++) { 1443 ss = &sc->ss[slice]; 1444 if (ss == NULL || ss->sysctl_tree == NULL) 1445 continue; 1446 sysctl_ctx_free(&ss->sysctl_ctx); 1447 ss->sysctl_tree = NULL; 1448 } 1449 sysctl_ctx_free(&sc->slice_sysctl_ctx); 1450 sc->slice_sysctl_tree = NULL; 1451} 1452 1453static void 1454mxge_add_sysctls(mxge_softc_t *sc) 1455{ 1456 struct sysctl_ctx_list *ctx; 1457 struct sysctl_oid_list *children; 1458 mcp_irq_data_t *fw; 1459 struct mxge_slice_state *ss; 1460 int slice; 1461 char slice_num[8]; 1462 1463 ctx = device_get_sysctl_ctx(sc->dev); 1464 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1465 fw = sc->ss[0].fw_stats; 1466 1467 /* random information */ 1468 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1469 "firmware_version", 1470 CTLFLAG_RD, sc->fw_version, 1471 0, "firmware version"); 1472 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1473 "serial_number", 1474 CTLFLAG_RD, sc->serial_number_string, 1475 0, "serial number"); 1476 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1477 "product_code", 1478 CTLFLAG_RD, sc->product_code_string, 1479 0, "product_code"); 1480 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1481 "pcie_link_width", 1482 CTLFLAG_RD, &sc->link_width, 1483 0, "tx_boundary"); 1484 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1485 "tx_boundary", 1486 CTLFLAG_RD, &sc->tx_boundary, 1487 0, "tx_boundary"); 1488 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1489 "write_combine", 1490 CTLFLAG_RD, &sc->wc, 1491 0, "write combining PIO?"); 1492 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1493 "read_dma_MBs", 1494 CTLFLAG_RD, &sc->read_dma, 1495 0, "DMA Read speed in MB/s"); 1496 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1497 "write_dma_MBs", 1498 CTLFLAG_RD, &sc->write_dma, 1499 0, "DMA Write speed in MB/s"); 1500 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1501 "read_write_dma_MBs", 1502 CTLFLAG_RD, &sc->read_write_dma, 1503 0, "DMA concurrent Read/Write speed in MB/s"); 1504 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1505 "watchdog_resets", 1506 CTLFLAG_RD, &sc->watchdog_resets, 1507 0, "Number of times NIC was reset"); 1508 1509 1510 /* performance related tunables */ 1511 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1512 "intr_coal_delay", 1513 CTLTYPE_INT|CTLFLAG_RW, sc, 1514 0, mxge_change_intr_coal, 1515 "I", "interrupt coalescing delay in usecs"); 1516 1517 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1518 "throttle", 1519 CTLTYPE_INT|CTLFLAG_RW, sc, 1520 0, mxge_change_throttle, 1521 "I", "transmit throttling"); 1522 1523 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1524 "flow_control_enabled", 1525 CTLTYPE_INT|CTLFLAG_RW, sc, 1526 0, mxge_change_flow_control, 1527 "I", "interrupt coalescing delay in usecs"); 1528 1529 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1530 "deassert_wait", 1531 CTLFLAG_RW, &mxge_deassert_wait, 1532 0, "Wait for IRQ line to go low in ihandler"); 1533 1534 /* stats block from firmware is in network byte order. 1535 Need to swap it */ 1536 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1537 "link_up", 1538 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1539 0, mxge_handle_be32, 1540 "I", "link up"); 1541 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1542 "rdma_tags_available", 1543 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1544 0, mxge_handle_be32, 1545 "I", "rdma_tags_available"); 1546 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1547 "dropped_bad_crc32", 1548 CTLTYPE_INT|CTLFLAG_RD, 1549 &fw->dropped_bad_crc32, 1550 0, mxge_handle_be32, 1551 "I", "dropped_bad_crc32"); 1552 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1553 "dropped_bad_phy", 1554 CTLTYPE_INT|CTLFLAG_RD, 1555 &fw->dropped_bad_phy, 1556 0, mxge_handle_be32, 1557 "I", "dropped_bad_phy"); 1558 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1559 "dropped_link_error_or_filtered", 1560 CTLTYPE_INT|CTLFLAG_RD, 1561 &fw->dropped_link_error_or_filtered, 1562 0, mxge_handle_be32, 1563 "I", "dropped_link_error_or_filtered"); 1564 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1565 "dropped_link_overflow", 1566 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1567 0, mxge_handle_be32, 1568 "I", "dropped_link_overflow"); 1569 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1570 "dropped_multicast_filtered", 1571 CTLTYPE_INT|CTLFLAG_RD, 1572 &fw->dropped_multicast_filtered, 1573 0, mxge_handle_be32, 1574 "I", "dropped_multicast_filtered"); 1575 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1576 "dropped_no_big_buffer", 1577 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1578 0, mxge_handle_be32, 1579 "I", "dropped_no_big_buffer"); 1580 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1581 "dropped_no_small_buffer", 1582 CTLTYPE_INT|CTLFLAG_RD, 1583 &fw->dropped_no_small_buffer, 1584 0, mxge_handle_be32, 1585 "I", "dropped_no_small_buffer"); 1586 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1587 "dropped_overrun", 1588 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1589 0, mxge_handle_be32, 1590 "I", "dropped_overrun"); 1591 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1592 "dropped_pause", 1593 CTLTYPE_INT|CTLFLAG_RD, 1594 &fw->dropped_pause, 1595 0, mxge_handle_be32, 1596 "I", "dropped_pause"); 1597 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1598 "dropped_runt", 1599 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1600 0, mxge_handle_be32, 1601 "I", "dropped_runt"); 1602 1603 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1604 "dropped_unicast_filtered", 1605 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_unicast_filtered, 1606 0, mxge_handle_be32, 1607 "I", "dropped_unicast_filtered"); 1608 1609 /* verbose printing? */ 1610 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1611 "verbose", 1612 CTLFLAG_RW, &mxge_verbose, 1613 0, "verbose printing"); 1614 1615 /* add counters exported for debugging from all slices */ 1616 sysctl_ctx_init(&sc->slice_sysctl_ctx); 1617 sc->slice_sysctl_tree = 1618 SYSCTL_ADD_NODE(&sc->slice_sysctl_ctx, children, OID_AUTO, 1619 "slice", CTLFLAG_RD, 0, ""); 1620 1621 for (slice = 0; slice < sc->num_slices; slice++) { 1622 ss = &sc->ss[slice]; 1623 sysctl_ctx_init(&ss->sysctl_ctx); 1624 ctx = &ss->sysctl_ctx; 1625 children = SYSCTL_CHILDREN(sc->slice_sysctl_tree); 1626 sprintf(slice_num, "%d", slice); 1627 ss->sysctl_tree = 1628 SYSCTL_ADD_NODE(ctx, children, OID_AUTO, slice_num, 1629 CTLFLAG_RD, 0, ""); 1630 children = SYSCTL_CHILDREN(ss->sysctl_tree); 1631 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1632 "rx_small_cnt", 1633 CTLFLAG_RD, &ss->rx_small.cnt, 1634 0, "rx_small_cnt"); 1635 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1636 "rx_big_cnt", 1637 CTLFLAG_RD, &ss->rx_big.cnt, 1638 0, "rx_small_cnt"); 1639 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1640 "lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed, 1641 0, "number of lro merge queues flushed"); 1642 1643 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1644 "lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum, 1645 0, "number of bad csums preventing LRO"); 1646 1647 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1648 "lro_queued", CTLFLAG_RD, &ss->lc.lro_queued, 1649 0, "number of frames appended to lro merge" 1650 "queues"); 1651 1652#ifndef IFNET_BUF_RING 1653 /* only transmit from slice 0 for now */ 1654 if (slice > 0) 1655 continue; 1656#endif 1657 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1658 "tx_req", 1659 CTLFLAG_RD, &ss->tx.req, 1660 0, "tx_req"); 1661 1662 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1663 "tx_done", 1664 CTLFLAG_RD, &ss->tx.done, 1665 0, "tx_done"); 1666 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1667 "tx_pkt_done", 1668 CTLFLAG_RD, &ss->tx.pkt_done, 1669 0, "tx_done"); 1670 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1671 "tx_stall", 1672 CTLFLAG_RD, &ss->tx.stall, 1673 0, "tx_stall"); 1674 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1675 "tx_wake", 1676 CTLFLAG_RD, &ss->tx.wake, 1677 0, "tx_wake"); 1678 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1679 "tx_defrag", 1680 CTLFLAG_RD, &ss->tx.defrag, 1681 0, "tx_defrag"); 1682 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1683 "tx_queue_active", 1684 CTLFLAG_RD, &ss->tx.queue_active, 1685 0, "tx_queue_active"); 1686 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1687 "tx_activate", 1688 CTLFLAG_RD, &ss->tx.activate, 1689 0, "tx_activate"); 1690 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1691 "tx_deactivate", 1692 CTLFLAG_RD, &ss->tx.deactivate, 1693 0, "tx_deactivate"); 1694 } 1695} 1696 1697/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1698 backwards one at a time and handle ring wraps */ 1699 1700static inline void 1701mxge_submit_req_backwards(mxge_tx_ring_t *tx, 1702 mcp_kreq_ether_send_t *src, int cnt) 1703{ 1704 int idx, starting_slot; 1705 starting_slot = tx->req; 1706 while (cnt > 1) { 1707 cnt--; 1708 idx = (starting_slot + cnt) & tx->mask; 1709 mxge_pio_copy(&tx->lanai[idx], 1710 &src[cnt], sizeof(*src)); 1711 wmb(); 1712 } 1713} 1714 1715/* 1716 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1717 * at most 32 bytes at a time, so as to avoid involving the software 1718 * pio handler in the nic. We re-write the first segment's flags 1719 * to mark them valid only after writing the entire chain 1720 */ 1721 1722static inline void 1723mxge_submit_req(mxge_tx_ring_t *tx, mcp_kreq_ether_send_t *src, 1724 int cnt) 1725{ 1726 int idx, i; 1727 uint32_t *src_ints; 1728 volatile uint32_t *dst_ints; 1729 mcp_kreq_ether_send_t *srcp; 1730 volatile mcp_kreq_ether_send_t *dstp, *dst; 1731 uint8_t last_flags; 1732 1733 idx = tx->req & tx->mask; 1734 1735 last_flags = src->flags; 1736 src->flags = 0; 1737 wmb(); 1738 dst = dstp = &tx->lanai[idx]; 1739 srcp = src; 1740 1741 if ((idx + cnt) < tx->mask) { 1742 for (i = 0; i < (cnt - 1); i += 2) { 1743 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1744 wmb(); /* force write every 32 bytes */ 1745 srcp += 2; 1746 dstp += 2; 1747 } 1748 } else { 1749 /* submit all but the first request, and ensure 1750 that it is submitted below */ 1751 mxge_submit_req_backwards(tx, src, cnt); 1752 i = 0; 1753 } 1754 if (i < cnt) { 1755 /* submit the first request */ 1756 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1757 wmb(); /* barrier before setting valid flag */ 1758 } 1759 1760 /* re-write the last 32-bits with the valid flags */ 1761 src->flags = last_flags; 1762 src_ints = (uint32_t *)src; 1763 src_ints+=3; 1764 dst_ints = (volatile uint32_t *)dst; 1765 dst_ints+=3; 1766 *dst_ints = *src_ints; 1767 tx->req += cnt; 1768 wmb(); 1769} 1770 1771static int 1772mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, 1773 struct mxge_pkt_info *pi) 1774{ 1775 struct ether_vlan_header *eh; 1776 uint16_t etype; 1777 int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); 1778#if IFCAP_TSO6 && defined(INET6) 1779 int nxt; 1780#endif 1781 1782 eh = mtod(m, struct ether_vlan_header *); 1783 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1784 etype = ntohs(eh->evl_proto); 1785 pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 1786 } else { 1787 etype = ntohs(eh->evl_encap_proto); 1788 pi->ip_off = ETHER_HDR_LEN; 1789 } 1790 1791 switch (etype) { 1792 case ETHERTYPE_IP: 1793 /* 1794 * ensure ip header is in first mbuf, copy it to a 1795 * scratch buffer if not 1796 */ 1797 pi->ip = (struct ip *)(m->m_data + pi->ip_off); 1798 pi->ip6 = NULL; 1799 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { 1800 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), 1801 ss->scratch); 1802 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1803 } 1804 pi->ip_hlen = pi->ip->ip_hl << 2; 1805 if (!tso) 1806 return 0; 1807 1808 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1809 sizeof(struct tcphdr))) { 1810 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1811 sizeof(struct tcphdr), ss->scratch); 1812 pi->ip = (struct ip *)(ss->scratch + pi->ip_off); 1813 } 1814 pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); 1815 break; 1816#if IFCAP_TSO6 && defined(INET6) 1817 case ETHERTYPE_IPV6: 1818 pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); 1819 if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { 1820 m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), 1821 ss->scratch); 1822 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1823 } 1824 nxt = 0; 1825 pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); 1826 pi->ip_hlen -= pi->ip_off; 1827 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 1828 return EINVAL; 1829 1830 if (!tso) 1831 return 0; 1832 1833 if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) 1834 return EINVAL; 1835 1836 if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + 1837 sizeof(struct tcphdr))) { 1838 m_copydata(m, 0, pi->ip_off + pi->ip_hlen + 1839 sizeof(struct tcphdr), ss->scratch); 1840 pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); 1841 } 1842 pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); 1843 break; 1844#endif 1845 default: 1846 return EINVAL; 1847 } 1848 return 0; 1849} 1850 1851#if IFCAP_TSO4 1852 1853static void 1854mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, 1855 int busdma_seg_cnt, struct mxge_pkt_info *pi) 1856{ 1857 mxge_tx_ring_t *tx; 1858 mcp_kreq_ether_send_t *req; 1859 bus_dma_segment_t *seg; 1860 uint32_t low, high_swapped; 1861 int len, seglen, cum_len, cum_len_next; 1862 int next_is_first, chop, cnt, rdma_count, small; 1863 uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; 1864 uint8_t flags, flags_next; 1865 static int once; 1866 1867 mss = m->m_pkthdr.tso_segsz; 1868 1869 /* negative cum_len signifies to the 1870 * send loop that we are still in the 1871 * header portion of the TSO packet. 1872 */ 1873 1874 cksum_offset = pi->ip_off + pi->ip_hlen; 1875 cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); 1876 1877 /* TSO implies checksum offload on this hardware */ 1878 if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { 1879 /* 1880 * If packet has full TCP csum, replace it with pseudo hdr 1881 * sum that the NIC expects, otherwise the NIC will emit 1882 * packets with bad TCP checksums. 1883 */ 1884 m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); 1885 if (pi->ip6) { 1886#if (CSUM_TCP_IPV6 != 0) && defined(INET6) 1887 m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; 1888 sum = in6_cksum_pseudo(pi->ip6, 1889 m->m_pkthdr.len - cksum_offset, 1890 IPPROTO_TCP, 0); 1891#endif 1892 } else { 1893#ifdef INET 1894 m->m_pkthdr.csum_flags |= CSUM_TCP; 1895 sum = in_pseudo(pi->ip->ip_src.s_addr, 1896 pi->ip->ip_dst.s_addr, 1897 htons(IPPROTO_TCP + (m->m_pkthdr.len - 1898 cksum_offset))); 1899#endif 1900 } 1901 m_copyback(m, offsetof(struct tcphdr, th_sum) + 1902 cksum_offset, sizeof(sum), (caddr_t)&sum); 1903 } 1904 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1905 1906 1907 /* for TSO, pseudo_hdr_offset holds mss. 1908 * The firmware figures out where to put 1909 * the checksum by parsing the header. */ 1910 pseudo_hdr_offset = htobe16(mss); 1911 1912 if (pi->ip6) { 1913 /* 1914 * for IPv6 TSO, the "checksum offset" is re-purposed 1915 * to store the TCP header len 1916 */ 1917 cksum_offset = (pi->tcp->th_off << 2); 1918 } 1919 1920 tx = &ss->tx; 1921 req = tx->req_list; 1922 seg = tx->seg_list; 1923 cnt = 0; 1924 rdma_count = 0; 1925 /* "rdma_count" is the number of RDMAs belonging to the 1926 * current packet BEFORE the current send request. For 1927 * non-TSO packets, this is equal to "count". 1928 * For TSO packets, rdma_count needs to be reset 1929 * to 0 after a segment cut. 1930 * 1931 * The rdma_count field of the send request is 1932 * the number of RDMAs of the packet starting at 1933 * that request. For TSO send requests with one ore more cuts 1934 * in the middle, this is the number of RDMAs starting 1935 * after the last cut in the request. All previous 1936 * segments before the last cut implicitly have 1 RDMA. 1937 * 1938 * Since the number of RDMAs is not known beforehand, 1939 * it must be filled-in retroactively - after each 1940 * segmentation cut or at the end of the entire packet. 1941 */ 1942 1943 while (busdma_seg_cnt) { 1944 /* Break the busdma segment up into pieces*/ 1945 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1946 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1947 len = seg->ds_len; 1948 1949 while (len) { 1950 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1951 seglen = len; 1952 cum_len_next = cum_len + seglen; 1953 (req-rdma_count)->rdma_count = rdma_count + 1; 1954 if (__predict_true(cum_len >= 0)) { 1955 /* payload */ 1956 chop = (cum_len_next > mss); 1957 cum_len_next = cum_len_next % mss; 1958 next_is_first = (cum_len_next == 0); 1959 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1960 flags_next |= next_is_first * 1961 MXGEFW_FLAGS_FIRST; 1962 rdma_count |= -(chop | next_is_first); 1963 rdma_count += chop & !next_is_first; 1964 } else if (cum_len_next >= 0) { 1965 /* header ends */ 1966 rdma_count = -1; 1967 cum_len_next = 0; 1968 seglen = -cum_len; 1969 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1970 flags_next = MXGEFW_FLAGS_TSO_PLD | 1971 MXGEFW_FLAGS_FIRST | 1972 (small * MXGEFW_FLAGS_SMALL); 1973 } 1974 1975 req->addr_high = high_swapped; 1976 req->addr_low = htobe32(low); 1977 req->pseudo_hdr_offset = pseudo_hdr_offset; 1978 req->pad = 0; 1979 req->rdma_count = 1; 1980 req->length = htobe16(seglen); 1981 req->cksum_offset = cksum_offset; 1982 req->flags = flags | ((cum_len & 1) * 1983 MXGEFW_FLAGS_ALIGN_ODD); 1984 low += seglen; 1985 len -= seglen; 1986 cum_len = cum_len_next; 1987 flags = flags_next; 1988 req++; 1989 cnt++; 1990 rdma_count++; 1991 if (cksum_offset != 0 && !pi->ip6) { 1992 if (__predict_false(cksum_offset > seglen)) 1993 cksum_offset -= seglen; 1994 else 1995 cksum_offset = 0; 1996 } 1997 if (__predict_false(cnt > tx->max_desc)) 1998 goto drop; 1999 } 2000 busdma_seg_cnt--; 2001 seg++; 2002 } 2003 (req-rdma_count)->rdma_count = rdma_count; 2004 2005 do { 2006 req--; 2007 req->flags |= MXGEFW_FLAGS_TSO_LAST; 2008 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 2009 2010 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2011 mxge_submit_req(tx, tx->req_list, cnt); 2012#ifdef IFNET_BUF_RING 2013 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2014 /* tell the NIC to start polling this slice */ 2015 *tx->send_go = 1; 2016 tx->queue_active = 1; 2017 tx->activate++; 2018 wmb(); 2019 } 2020#endif 2021 return; 2022 2023drop: 2024 bus_dmamap_unload(tx->dmat, tx->info[tx->req & tx->mask].map); 2025 m_freem(m); 2026 ss->oerrors++; 2027 if (!once) { 2028 printf("tx->max_desc exceeded via TSO!\n"); 2029 printf("mss = %d, %ld, %d!\n", mss, 2030 (long)seg - (long)tx->seg_list, tx->max_desc); 2031 once = 1; 2032 } 2033 return; 2034 2035} 2036 2037#endif /* IFCAP_TSO4 */ 2038 2039#ifdef MXGE_NEW_VLAN_API 2040/* 2041 * We reproduce the software vlan tag insertion from 2042 * net/if_vlan.c:vlan_start() here so that we can advertise "hardware" 2043 * vlan tag insertion. We need to advertise this in order to have the 2044 * vlan interface respect our csum offload flags. 2045 */ 2046static struct mbuf * 2047mxge_vlan_tag_insert(struct mbuf *m) 2048{ 2049 struct ether_vlan_header *evl; 2050 2051 M_PREPEND(m, ETHER_VLAN_ENCAP_LEN, M_NOWAIT); 2052 if (__predict_false(m == NULL)) 2053 return NULL; 2054 if (m->m_len < sizeof(*evl)) { 2055 m = m_pullup(m, sizeof(*evl)); 2056 if (__predict_false(m == NULL)) 2057 return NULL; 2058 } 2059 /* 2060 * Transform the Ethernet header into an Ethernet header 2061 * with 802.1Q encapsulation. 2062 */ 2063 evl = mtod(m, struct ether_vlan_header *); 2064 bcopy((char *)evl + ETHER_VLAN_ENCAP_LEN, 2065 (char *)evl, ETHER_HDR_LEN - ETHER_TYPE_LEN); 2066 evl->evl_encap_proto = htons(ETHERTYPE_VLAN); 2067 evl->evl_tag = htons(m->m_pkthdr.ether_vtag); 2068 m->m_flags &= ~M_VLANTAG; 2069 return m; 2070} 2071#endif /* MXGE_NEW_VLAN_API */ 2072 2073static void 2074mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) 2075{ 2076 struct mxge_pkt_info pi = {0,0,0,0}; 2077 mxge_softc_t *sc; 2078 mcp_kreq_ether_send_t *req; 2079 bus_dma_segment_t *seg; 2080 struct mbuf *m_tmp; 2081 struct ifnet *ifp; 2082 mxge_tx_ring_t *tx; 2083 int cnt, cum_len, err, i, idx, odd_flag; 2084 uint16_t pseudo_hdr_offset; 2085 uint8_t flags, cksum_offset; 2086 2087 2088 sc = ss->sc; 2089 ifp = sc->ifp; 2090 tx = &ss->tx; 2091 2092#ifdef MXGE_NEW_VLAN_API 2093 if (m->m_flags & M_VLANTAG) { 2094 m = mxge_vlan_tag_insert(m); 2095 if (__predict_false(m == NULL)) 2096 goto drop_without_m; 2097 } 2098#endif 2099 if (m->m_pkthdr.csum_flags & 2100 (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2101 if (mxge_parse_tx(ss, m, &pi)) 2102 goto drop; 2103 } 2104 2105 /* (try to) map the frame for DMA */ 2106 idx = tx->req & tx->mask; 2107 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 2108 m, tx->seg_list, &cnt, 2109 BUS_DMA_NOWAIT); 2110 if (__predict_false(err == EFBIG)) { 2111 /* Too many segments in the chain. Try 2112 to defrag */ 2113 m_tmp = m_defrag(m, M_NOWAIT); 2114 if (m_tmp == NULL) { 2115 goto drop; 2116 } 2117 ss->tx.defrag++; 2118 m = m_tmp; 2119 err = bus_dmamap_load_mbuf_sg(tx->dmat, 2120 tx->info[idx].map, 2121 m, tx->seg_list, &cnt, 2122 BUS_DMA_NOWAIT); 2123 } 2124 if (__predict_false(err != 0)) { 2125 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 2126 " packet len = %d\n", err, m->m_pkthdr.len); 2127 goto drop; 2128 } 2129 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 2130 BUS_DMASYNC_PREWRITE); 2131 tx->info[idx].m = m; 2132 2133#if IFCAP_TSO4 2134 /* TSO is different enough, we handle it in another routine */ 2135 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 2136 mxge_encap_tso(ss, m, cnt, &pi); 2137 return; 2138 } 2139#endif 2140 2141 req = tx->req_list; 2142 cksum_offset = 0; 2143 pseudo_hdr_offset = 0; 2144 flags = MXGEFW_FLAGS_NO_TSO; 2145 2146 /* checksum offloading? */ 2147 if (m->m_pkthdr.csum_flags & 2148 (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { 2149 /* ensure ip header is in first mbuf, copy 2150 it to a scratch buffer if not */ 2151 cksum_offset = pi.ip_off + pi.ip_hlen; 2152 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 2153 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 2154 req->cksum_offset = cksum_offset; 2155 flags |= MXGEFW_FLAGS_CKSUM; 2156 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 2157 } else { 2158 odd_flag = 0; 2159 } 2160 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 2161 flags |= MXGEFW_FLAGS_SMALL; 2162 2163 /* convert segments into a request list */ 2164 cum_len = 0; 2165 seg = tx->seg_list; 2166 req->flags = MXGEFW_FLAGS_FIRST; 2167 for (i = 0; i < cnt; i++) { 2168 req->addr_low = 2169 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2170 req->addr_high = 2171 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2172 req->length = htobe16(seg->ds_len); 2173 req->cksum_offset = cksum_offset; 2174 if (cksum_offset > seg->ds_len) 2175 cksum_offset -= seg->ds_len; 2176 else 2177 cksum_offset = 0; 2178 req->pseudo_hdr_offset = pseudo_hdr_offset; 2179 req->pad = 0; /* complete solid 16-byte block */ 2180 req->rdma_count = 1; 2181 req->flags |= flags | ((cum_len & 1) * odd_flag); 2182 cum_len += seg->ds_len; 2183 seg++; 2184 req++; 2185 req->flags = 0; 2186 } 2187 req--; 2188 /* pad runts to 60 bytes */ 2189 if (cum_len < 60) { 2190 req++; 2191 req->addr_low = 2192 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 2193 req->addr_high = 2194 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 2195 req->length = htobe16(60 - cum_len); 2196 req->cksum_offset = 0; 2197 req->pseudo_hdr_offset = pseudo_hdr_offset; 2198 req->pad = 0; /* complete solid 16-byte block */ 2199 req->rdma_count = 1; 2200 req->flags |= flags | ((cum_len & 1) * odd_flag); 2201 cnt++; 2202 } 2203 2204 tx->req_list[0].rdma_count = cnt; 2205#if 0 2206 /* print what the firmware will see */ 2207 for (i = 0; i < cnt; i++) { 2208 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 2209 "cso:%d, flags:0x%x, rdma:%d\n", 2210 i, (int)ntohl(tx->req_list[i].addr_high), 2211 (int)ntohl(tx->req_list[i].addr_low), 2212 (int)ntohs(tx->req_list[i].length), 2213 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 2214 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 2215 tx->req_list[i].rdma_count); 2216 } 2217 printf("--------------\n"); 2218#endif 2219 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 2220 mxge_submit_req(tx, tx->req_list, cnt); 2221#ifdef IFNET_BUF_RING 2222 if ((ss->sc->num_slices > 1) && tx->queue_active == 0) { 2223 /* tell the NIC to start polling this slice */ 2224 *tx->send_go = 1; 2225 tx->queue_active = 1; 2226 tx->activate++; 2227 wmb(); 2228 } 2229#endif 2230 return; 2231 2232drop: 2233 m_freem(m); 2234drop_without_m: 2235 ss->oerrors++; 2236 return; 2237} 2238 2239#ifdef IFNET_BUF_RING 2240static void 2241mxge_qflush(struct ifnet *ifp) 2242{ 2243 mxge_softc_t *sc = ifp->if_softc; 2244 mxge_tx_ring_t *tx; 2245 struct mbuf *m; 2246 int slice; 2247 2248 for (slice = 0; slice < sc->num_slices; slice++) { 2249 tx = &sc->ss[slice].tx; 2250 mtx_lock(&tx->mtx); 2251 while ((m = buf_ring_dequeue_sc(tx->br)) != NULL) 2252 m_freem(m); 2253 mtx_unlock(&tx->mtx); 2254 } 2255 if_qflush(ifp); 2256} 2257 2258static inline void 2259mxge_start_locked(struct mxge_slice_state *ss) 2260{ 2261 mxge_softc_t *sc; 2262 struct mbuf *m; 2263 struct ifnet *ifp; 2264 mxge_tx_ring_t *tx; 2265 2266 sc = ss->sc; 2267 ifp = sc->ifp; 2268 tx = &ss->tx; 2269 2270 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2271 m = drbr_dequeue(ifp, tx->br); 2272 if (m == NULL) { 2273 return; 2274 } 2275 /* let BPF see it */ 2276 BPF_MTAP(ifp, m); 2277 2278 /* give it to the nic */ 2279 mxge_encap(ss, m); 2280 } 2281 /* ran out of transmit slots */ 2282 if (((ss->if_drv_flags & IFF_DRV_OACTIVE) == 0) 2283 && (!drbr_empty(ifp, tx->br))) { 2284 ss->if_drv_flags |= IFF_DRV_OACTIVE; 2285 tx->stall++; 2286 } 2287} 2288 2289static int 2290mxge_transmit_locked(struct mxge_slice_state *ss, struct mbuf *m) 2291{ 2292 mxge_softc_t *sc; 2293 struct ifnet *ifp; 2294 mxge_tx_ring_t *tx; 2295 int err; 2296 2297 sc = ss->sc; 2298 ifp = sc->ifp; 2299 tx = &ss->tx; 2300 2301 if ((ss->if_drv_flags & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) != 2302 IFF_DRV_RUNNING) { 2303 err = drbr_enqueue(ifp, tx->br, m); 2304 return (err); 2305 } 2306 2307 if (!drbr_needs_enqueue(ifp, tx->br) && 2308 ((tx->mask - (tx->req - tx->done)) > tx->max_desc)) { 2309 /* let BPF see it */ 2310 BPF_MTAP(ifp, m); 2311 /* give it to the nic */ 2312 mxge_encap(ss, m); 2313 } else if ((err = drbr_enqueue(ifp, tx->br, m)) != 0) { 2314 return (err); 2315 } 2316 if (!drbr_empty(ifp, tx->br)) 2317 mxge_start_locked(ss); 2318 return (0); 2319} 2320 2321static int 2322mxge_transmit(struct ifnet *ifp, struct mbuf *m) 2323{ 2324 mxge_softc_t *sc = ifp->if_softc; 2325 struct mxge_slice_state *ss; 2326 mxge_tx_ring_t *tx; 2327 int err = 0; 2328 int slice; 2329 2330 slice = m->m_pkthdr.flowid; 2331 slice &= (sc->num_slices - 1); /* num_slices always power of 2 */ 2332 2333 ss = &sc->ss[slice]; 2334 tx = &ss->tx; 2335 2336 if (mtx_trylock(&tx->mtx)) { 2337 err = mxge_transmit_locked(ss, m); 2338 mtx_unlock(&tx->mtx); 2339 } else { 2340 err = drbr_enqueue(ifp, tx->br, m); 2341 } 2342 2343 return (err); 2344} 2345 2346#else 2347 2348static inline void 2349mxge_start_locked(struct mxge_slice_state *ss) 2350{ 2351 mxge_softc_t *sc; 2352 struct mbuf *m; 2353 struct ifnet *ifp; 2354 mxge_tx_ring_t *tx; 2355 2356 sc = ss->sc; 2357 ifp = sc->ifp; 2358 tx = &ss->tx; 2359 while ((tx->mask - (tx->req - tx->done)) > tx->max_desc) { 2360 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 2361 if (m == NULL) { 2362 return; 2363 } 2364 /* let BPF see it */ 2365 BPF_MTAP(ifp, m); 2366 2367 /* give it to the nic */ 2368 mxge_encap(ss, m); 2369 } 2370 /* ran out of transmit slots */ 2371 if ((sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) { 2372 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 2373 tx->stall++; 2374 } 2375} 2376#endif 2377static void 2378mxge_start(struct ifnet *ifp) 2379{ 2380 mxge_softc_t *sc = ifp->if_softc; 2381 struct mxge_slice_state *ss; 2382 2383 /* only use the first slice for now */ 2384 ss = &sc->ss[0]; 2385 mtx_lock(&ss->tx.mtx); 2386 mxge_start_locked(ss); 2387 mtx_unlock(&ss->tx.mtx); 2388} 2389 2390/* 2391 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 2392 * at most 32 bytes at a time, so as to avoid involving the software 2393 * pio handler in the nic. We re-write the first segment's low 2394 * DMA address to mark it valid only after we write the entire chunk 2395 * in a burst 2396 */ 2397static inline void 2398mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 2399 mcp_kreq_ether_recv_t *src) 2400{ 2401 uint32_t low; 2402 2403 low = src->addr_low; 2404 src->addr_low = 0xffffffff; 2405 mxge_pio_copy(dst, src, 4 * sizeof (*src)); 2406 wmb(); 2407 mxge_pio_copy(dst + 4, src + 4, 4 * sizeof (*src)); 2408 wmb(); 2409 src->addr_low = low; 2410 dst->addr_low = low; 2411 wmb(); 2412} 2413 2414static int 2415mxge_get_buf_small(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2416{ 2417 bus_dma_segment_t seg; 2418 struct mbuf *m; 2419 mxge_rx_ring_t *rx = &ss->rx_small; 2420 int cnt, err; 2421 2422 m = m_gethdr(M_NOWAIT, MT_DATA); 2423 if (m == NULL) { 2424 rx->alloc_fail++; 2425 err = ENOBUFS; 2426 goto done; 2427 } 2428 m->m_len = MHLEN; 2429 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2430 &seg, &cnt, BUS_DMA_NOWAIT); 2431 if (err != 0) { 2432 m_free(m); 2433 goto done; 2434 } 2435 rx->info[idx].m = m; 2436 rx->shadow[idx].addr_low = 2437 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 2438 rx->shadow[idx].addr_high = 2439 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 2440 2441done: 2442 if ((idx & 7) == 7) 2443 mxge_submit_8rx(&rx->lanai[idx - 7], &rx->shadow[idx - 7]); 2444 return err; 2445} 2446 2447static int 2448mxge_get_buf_big(struct mxge_slice_state *ss, bus_dmamap_t map, int idx) 2449{ 2450 bus_dma_segment_t seg[3]; 2451 struct mbuf *m; 2452 mxge_rx_ring_t *rx = &ss->rx_big; 2453 int cnt, err, i; 2454 2455 m = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx->cl_size); 2456 if (m == NULL) { 2457 rx->alloc_fail++; 2458 err = ENOBUFS; 2459 goto done; 2460 } 2461 m->m_len = rx->mlen; 2462 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 2463 seg, &cnt, BUS_DMA_NOWAIT); 2464 if (err != 0) { 2465 m_free(m); 2466 goto done; 2467 } 2468 rx->info[idx].m = m; 2469 rx->shadow[idx].addr_low = 2470 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 2471 rx->shadow[idx].addr_high = 2472 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 2473 2474#if MXGE_VIRT_JUMBOS 2475 for (i = 1; i < cnt; i++) { 2476 rx->shadow[idx + i].addr_low = 2477 htobe32(MXGE_LOWPART_TO_U32(seg[i].ds_addr)); 2478 rx->shadow[idx + i].addr_high = 2479 htobe32(MXGE_HIGHPART_TO_U32(seg[i].ds_addr)); 2480 } 2481#endif 2482 2483done: 2484 for (i = 0; i < rx->nbufs; i++) { 2485 if ((idx & 7) == 7) { 2486 mxge_submit_8rx(&rx->lanai[idx - 7], 2487 &rx->shadow[idx - 7]); 2488 } 2489 idx++; 2490 } 2491 return err; 2492} 2493 2494#ifdef INET6 2495 2496static uint16_t 2497mxge_csum_generic(uint16_t *raw, int len) 2498{ 2499 uint32_t csum; 2500 2501 2502 csum = 0; 2503 while (len > 0) { 2504 csum += *raw; 2505 raw++; 2506 len -= 2; 2507 } 2508 csum = (csum >> 16) + (csum & 0xffff); 2509 csum = (csum >> 16) + (csum & 0xffff); 2510 return (uint16_t)csum; 2511} 2512 2513static inline uint16_t 2514mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum) 2515{ 2516 uint32_t partial; 2517 int nxt, cksum_offset; 2518 struct ip6_hdr *ip6 = p; 2519 uint16_t c; 2520 2521 nxt = ip6->ip6_nxt; 2522 cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN; 2523 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) { 2524 cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN, 2525 IPPROTO_IPV6, &nxt); 2526 if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) 2527 return (1); 2528 } 2529 2530 /* 2531 * IPv6 headers do not contain a checksum, and hence 2532 * do not checksum to zero, so they don't "fall out" 2533 * of the partial checksum calculation like IPv4 2534 * headers do. We need to fix the partial checksum by 2535 * subtracting the checksum of the IPv6 header. 2536 */ 2537 2538 partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset - 2539 ETHER_HDR_LEN); 2540 csum += ~partial; 2541 csum += (csum < ~partial); 2542 csum = (csum >> 16) + (csum & 0xFFFF); 2543 csum = (csum >> 16) + (csum & 0xFFFF); 2544 c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt, 2545 csum); 2546 c ^= 0xffff; 2547 return (c); 2548} 2549#endif /* INET6 */ 2550/* 2551 * Myri10GE hardware checksums are not valid if the sender 2552 * padded the frame with non-zero padding. This is because 2553 * the firmware just does a simple 16-bit 1s complement 2554 * checksum across the entire frame, excluding the first 14 2555 * bytes. It is best to simply to check the checksum and 2556 * tell the stack about it only if the checksum is good 2557 */ 2558 2559static inline uint16_t 2560mxge_rx_csum(struct mbuf *m, int csum) 2561{ 2562 struct ether_header *eh; 2563#ifdef INET 2564 struct ip *ip; 2565#endif 2566#if defined(INET) || defined(INET6) 2567 int cap = m->m_pkthdr.rcvif->if_capenable; 2568#endif 2569 uint16_t c, etype; 2570 2571 2572 eh = mtod(m, struct ether_header *); 2573 etype = ntohs(eh->ether_type); 2574 switch (etype) { 2575#ifdef INET 2576 case ETHERTYPE_IP: 2577 if ((cap & IFCAP_RXCSUM) == 0) 2578 return (1); 2579 ip = (struct ip *)(eh + 1); 2580 if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP) 2581 return (1); 2582 c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, 2583 htonl(ntohs(csum) + ntohs(ip->ip_len) - 2584 (ip->ip_hl << 2) + ip->ip_p)); 2585 c ^= 0xffff; 2586 break; 2587#endif 2588#ifdef INET6 2589 case ETHERTYPE_IPV6: 2590 if ((cap & IFCAP_RXCSUM_IPV6) == 0) 2591 return (1); 2592 c = mxge_rx_csum6((eh + 1), m, csum); 2593 break; 2594#endif 2595 default: 2596 c = 1; 2597 } 2598 return (c); 2599} 2600 2601static void 2602mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum) 2603{ 2604 struct ether_vlan_header *evl; 2605 struct ether_header *eh; 2606 uint32_t partial; 2607 2608 evl = mtod(m, struct ether_vlan_header *); 2609 eh = mtod(m, struct ether_header *); 2610 2611 /* 2612 * fix checksum by subtracting ETHER_VLAN_ENCAP_LEN bytes 2613 * after what the firmware thought was the end of the ethernet 2614 * header. 2615 */ 2616 2617 /* put checksum into host byte order */ 2618 *csum = ntohs(*csum); 2619 partial = ntohl(*(uint32_t *)(mtod(m, char *) + ETHER_HDR_LEN)); 2620 (*csum) += ~partial; 2621 (*csum) += ((*csum) < ~partial); 2622 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2623 (*csum) = ((*csum) >> 16) + ((*csum) & 0xFFFF); 2624 2625 /* restore checksum to network byte order; 2626 later consumers expect this */ 2627 *csum = htons(*csum); 2628 2629 /* save the tag */ 2630#ifdef MXGE_NEW_VLAN_API 2631 m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); 2632#else 2633 { 2634 struct m_tag *mtag; 2635 mtag = m_tag_alloc(MTAG_VLAN, MTAG_VLAN_TAG, sizeof(u_int), 2636 M_NOWAIT); 2637 if (mtag == NULL) 2638 return; 2639 VLAN_TAG_VALUE(mtag) = ntohs(evl->evl_tag); 2640 m_tag_prepend(m, mtag); 2641 } 2642 2643#endif 2644 m->m_flags |= M_VLANTAG; 2645 2646 /* 2647 * Remove the 802.1q header by copying the Ethernet 2648 * addresses over it and adjusting the beginning of 2649 * the data in the mbuf. The encapsulated Ethernet 2650 * type field is already in place. 2651 */ 2652 bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, 2653 ETHER_HDR_LEN - ETHER_TYPE_LEN); 2654 m_adj(m, ETHER_VLAN_ENCAP_LEN); 2655} 2656 2657 2658static inline void 2659mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, 2660 uint32_t csum, int lro) 2661{ 2662 mxge_softc_t *sc; 2663 struct ifnet *ifp; 2664 struct mbuf *m; 2665 struct ether_header *eh; 2666 mxge_rx_ring_t *rx; 2667 bus_dmamap_t old_map; 2668 int idx; 2669 2670 sc = ss->sc; 2671 ifp = sc->ifp; 2672 rx = &ss->rx_big; 2673 idx = rx->cnt & rx->mask; 2674 rx->cnt += rx->nbufs; 2675 /* save a pointer to the received mbuf */ 2676 m = rx->info[idx].m; 2677 /* try to replace the received mbuf */ 2678 if (mxge_get_buf_big(ss, rx->extra_map, idx)) { 2679 /* drop the frame -- the old mbuf is re-cycled */ 2680 ifp->if_ierrors++; 2681 return; 2682 } 2683 2684 /* unmap the received buffer */ 2685 old_map = rx->info[idx].map; 2686 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2687 bus_dmamap_unload(rx->dmat, old_map); 2688 2689 /* swap the bus_dmamap_t's */ 2690 rx->info[idx].map = rx->extra_map; 2691 rx->extra_map = old_map; 2692 2693 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2694 * aligned */ 2695 m->m_data += MXGEFW_PAD; 2696 2697 m->m_pkthdr.rcvif = ifp; 2698 m->m_len = m->m_pkthdr.len = len; 2699 ss->ipackets++; 2700 eh = mtod(m, struct ether_header *); 2701 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2702 mxge_vlan_tag_remove(m, &csum); 2703 } 2704 /* flowid only valid if RSS hashing is enabled */ 2705 if (sc->num_slices > 1) { 2706 m->m_pkthdr.flowid = (ss - sc->ss); 2707 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2708 } 2709 /* if the checksum is valid, mark it in the mbuf header */ 2710 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2711 (0 == mxge_rx_csum(m, csum))) { 2712 /* Tell the stack that the checksum is good */ 2713 m->m_pkthdr.csum_data = 0xffff; 2714 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2715 CSUM_DATA_VALID; 2716 2717#if defined(INET) || defined (INET6) 2718 if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0))) 2719 return; 2720#endif 2721 } 2722 /* pass the frame up the stack */ 2723 (*ifp->if_input)(ifp, m); 2724} 2725 2726static inline void 2727mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, 2728 uint32_t csum, int lro) 2729{ 2730 mxge_softc_t *sc; 2731 struct ifnet *ifp; 2732 struct ether_header *eh; 2733 struct mbuf *m; 2734 mxge_rx_ring_t *rx; 2735 bus_dmamap_t old_map; 2736 int idx; 2737 2738 sc = ss->sc; 2739 ifp = sc->ifp; 2740 rx = &ss->rx_small; 2741 idx = rx->cnt & rx->mask; 2742 rx->cnt++; 2743 /* save a pointer to the received mbuf */ 2744 m = rx->info[idx].m; 2745 /* try to replace the received mbuf */ 2746 if (mxge_get_buf_small(ss, rx->extra_map, idx)) { 2747 /* drop the frame -- the old mbuf is re-cycled */ 2748 ifp->if_ierrors++; 2749 return; 2750 } 2751 2752 /* unmap the received buffer */ 2753 old_map = rx->info[idx].map; 2754 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 2755 bus_dmamap_unload(rx->dmat, old_map); 2756 2757 /* swap the bus_dmamap_t's */ 2758 rx->info[idx].map = rx->extra_map; 2759 rx->extra_map = old_map; 2760 2761 /* mcp implicitly skips 1st 2 bytes so that packet is properly 2762 * aligned */ 2763 m->m_data += MXGEFW_PAD; 2764 2765 m->m_pkthdr.rcvif = ifp; 2766 m->m_len = m->m_pkthdr.len = len; 2767 ss->ipackets++; 2768 eh = mtod(m, struct ether_header *); 2769 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2770 mxge_vlan_tag_remove(m, &csum); 2771 } 2772 /* flowid only valid if RSS hashing is enabled */ 2773 if (sc->num_slices > 1) { 2774 m->m_pkthdr.flowid = (ss - sc->ss); 2775 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2776 } 2777 /* if the checksum is valid, mark it in the mbuf header */ 2778 if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) && 2779 (0 == mxge_rx_csum(m, csum))) { 2780 /* Tell the stack that the checksum is good */ 2781 m->m_pkthdr.csum_data = 0xffff; 2782 m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | 2783 CSUM_DATA_VALID; 2784 2785#if defined(INET) || defined (INET6) 2786 if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum))) 2787 return; 2788#endif 2789 } 2790 /* pass the frame up the stack */ 2791 (*ifp->if_input)(ifp, m); 2792} 2793 2794static inline void 2795mxge_clean_rx_done(struct mxge_slice_state *ss) 2796{ 2797 mxge_rx_done_t *rx_done = &ss->rx_done; 2798 int limit = 0; 2799 uint16_t length; 2800 uint16_t checksum; 2801 int lro; 2802 2803 lro = ss->sc->ifp->if_capenable & IFCAP_LRO; 2804 while (rx_done->entry[rx_done->idx].length != 0) { 2805 length = ntohs(rx_done->entry[rx_done->idx].length); 2806 rx_done->entry[rx_done->idx].length = 0; 2807 checksum = rx_done->entry[rx_done->idx].checksum; 2808 if (length <= (MHLEN - MXGEFW_PAD)) 2809 mxge_rx_done_small(ss, length, checksum, lro); 2810 else 2811 mxge_rx_done_big(ss, length, checksum, lro); 2812 rx_done->cnt++; 2813 rx_done->idx = rx_done->cnt & rx_done->mask; 2814 2815 /* limit potential for livelock */ 2816 if (__predict_false(++limit > rx_done->mask / 2)) 2817 break; 2818 } 2819#if defined(INET) || defined (INET6) 2820 while (!SLIST_EMPTY(&ss->lc.lro_active)) { 2821 struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active); 2822 SLIST_REMOVE_HEAD(&ss->lc.lro_active, next); 2823 tcp_lro_flush(&ss->lc, lro); 2824 } 2825#endif 2826} 2827 2828 2829static inline void 2830mxge_tx_done(struct mxge_slice_state *ss, uint32_t mcp_idx) 2831{ 2832 struct ifnet *ifp; 2833 mxge_tx_ring_t *tx; 2834 struct mbuf *m; 2835 bus_dmamap_t map; 2836 int idx; 2837 int *flags; 2838 2839 tx = &ss->tx; 2840 ifp = ss->sc->ifp; 2841 while (tx->pkt_done != mcp_idx) { 2842 idx = tx->done & tx->mask; 2843 tx->done++; 2844 m = tx->info[idx].m; 2845 /* mbuf and DMA map only attached to the first 2846 segment per-mbuf */ 2847 if (m != NULL) { 2848 ss->obytes += m->m_pkthdr.len; 2849 if (m->m_flags & M_MCAST) 2850 ss->omcasts++; 2851 ss->opackets++; 2852 tx->info[idx].m = NULL; 2853 map = tx->info[idx].map; 2854 bus_dmamap_unload(tx->dmat, map); 2855 m_freem(m); 2856 } 2857 if (tx->info[idx].flag) { 2858 tx->info[idx].flag = 0; 2859 tx->pkt_done++; 2860 } 2861 } 2862 2863 /* If we have space, clear IFF_OACTIVE to tell the stack that 2864 its OK to send packets */ 2865#ifdef IFNET_BUF_RING 2866 flags = &ss->if_drv_flags; 2867#else 2868 flags = &ifp->if_drv_flags; 2869#endif 2870 mtx_lock(&ss->tx.mtx); 2871 if ((*flags) & IFF_DRV_OACTIVE && 2872 tx->req - tx->done < (tx->mask + 1)/4) { 2873 *(flags) &= ~IFF_DRV_OACTIVE; 2874 ss->tx.wake++; 2875 mxge_start_locked(ss); 2876 } 2877#ifdef IFNET_BUF_RING 2878 if ((ss->sc->num_slices > 1) && (tx->req == tx->done)) { 2879 /* let the NIC stop polling this queue, since there 2880 * are no more transmits pending */ 2881 if (tx->req == tx->done) { 2882 *tx->send_stop = 1; 2883 tx->queue_active = 0; 2884 tx->deactivate++; 2885 wmb(); 2886 } 2887 } 2888#endif 2889 mtx_unlock(&ss->tx.mtx); 2890 2891} 2892 2893static struct mxge_media_type mxge_xfp_media_types[] = 2894{ 2895 {IFM_10G_CX4, 0x7f, "10GBASE-CX4 (module)"}, 2896 {IFM_10G_SR, (1 << 7), "10GBASE-SR"}, 2897 {IFM_10G_LR, (1 << 6), "10GBASE-LR"}, 2898 {0, (1 << 5), "10GBASE-ER"}, 2899 {IFM_10G_LRM, (1 << 4), "10GBASE-LRM"}, 2900 {0, (1 << 3), "10GBASE-SW"}, 2901 {0, (1 << 2), "10GBASE-LW"}, 2902 {0, (1 << 1), "10GBASE-EW"}, 2903 {0, (1 << 0), "Reserved"} 2904}; 2905static struct mxge_media_type mxge_sfp_media_types[] = 2906{ 2907 {IFM_10G_TWINAX, 0, "10GBASE-Twinax"}, 2908 {0, (1 << 7), "Reserved"}, 2909 {IFM_10G_LRM, (1 << 6), "10GBASE-LRM"}, 2910 {IFM_10G_LR, (1 << 5), "10GBASE-LR"}, 2911 {IFM_10G_SR, (1 << 4), "10GBASE-SR"}, 2912 {IFM_10G_TWINAX,(1 << 0), "10GBASE-Twinax"} 2913}; 2914 2915static void 2916mxge_media_set(mxge_softc_t *sc, int media_type) 2917{ 2918 2919 2920 ifmedia_add(&sc->media, IFM_ETHER | IFM_FDX | media_type, 2921 0, NULL); 2922 ifmedia_set(&sc->media, IFM_ETHER | IFM_FDX | media_type); 2923 sc->current_media = media_type; 2924 sc->media.ifm_media = sc->media.ifm_cur->ifm_media; 2925} 2926 2927static void 2928mxge_media_init(mxge_softc_t *sc) 2929{ 2930 char *ptr; 2931 int i; 2932 2933 ifmedia_removeall(&sc->media); 2934 mxge_media_set(sc, IFM_AUTO); 2935 2936 /* 2937 * parse the product code to deterimine the interface type 2938 * (CX4, XFP, Quad Ribbon Fiber) by looking at the character 2939 * after the 3rd dash in the driver's cached copy of the 2940 * EEPROM's product code string. 2941 */ 2942 ptr = sc->product_code_string; 2943 if (ptr == NULL) { 2944 device_printf(sc->dev, "Missing product code\n"); 2945 return; 2946 } 2947 2948 for (i = 0; i < 3; i++, ptr++) { 2949 ptr = strchr(ptr, '-'); 2950 if (ptr == NULL) { 2951 device_printf(sc->dev, 2952 "only %d dashes in PC?!?\n", i); 2953 return; 2954 } 2955 } 2956 if (*ptr == 'C' || *(ptr +1) == 'C') { 2957 /* -C is CX4 */ 2958 sc->connector = MXGE_CX4; 2959 mxge_media_set(sc, IFM_10G_CX4); 2960 } else if (*ptr == 'Q') { 2961 /* -Q is Quad Ribbon Fiber */ 2962 sc->connector = MXGE_QRF; 2963 device_printf(sc->dev, "Quad Ribbon Fiber Media\n"); 2964 /* FreeBSD has no media type for Quad ribbon fiber */ 2965 } else if (*ptr == 'R') { 2966 /* -R is XFP */ 2967 sc->connector = MXGE_XFP; 2968 } else if (*ptr == 'S' || *(ptr +1) == 'S') { 2969 /* -S or -2S is SFP+ */ 2970 sc->connector = MXGE_SFP; 2971 } else { 2972 device_printf(sc->dev, "Unknown media type: %c\n", *ptr); 2973 } 2974} 2975 2976/* 2977 * Determine the media type for a NIC. Some XFPs will identify 2978 * themselves only when their link is up, so this is initiated via a 2979 * link up interrupt. However, this can potentially take up to 2980 * several milliseconds, so it is run via the watchdog routine, rather 2981 * than in the interrupt handler itself. 2982 */ 2983static void 2984mxge_media_probe(mxge_softc_t *sc) 2985{ 2986 mxge_cmd_t cmd; 2987 char *cage_type; 2988 2989 struct mxge_media_type *mxge_media_types = NULL; 2990 int i, err, ms, mxge_media_type_entries; 2991 uint32_t byte; 2992 2993 sc->need_media_probe = 0; 2994 2995 if (sc->connector == MXGE_XFP) { 2996 /* -R is XFP */ 2997 mxge_media_types = mxge_xfp_media_types; 2998 mxge_media_type_entries = 2999 sizeof (mxge_xfp_media_types) / 3000 sizeof (mxge_xfp_media_types[0]); 3001 byte = MXGE_XFP_COMPLIANCE_BYTE; 3002 cage_type = "XFP"; 3003 } else if (sc->connector == MXGE_SFP) { 3004 /* -S or -2S is SFP+ */ 3005 mxge_media_types = mxge_sfp_media_types; 3006 mxge_media_type_entries = 3007 sizeof (mxge_sfp_media_types) / 3008 sizeof (mxge_sfp_media_types[0]); 3009 cage_type = "SFP+"; 3010 byte = 3; 3011 } else { 3012 /* nothing to do; media type cannot change */ 3013 return; 3014 } 3015 3016 /* 3017 * At this point we know the NIC has an XFP cage, so now we 3018 * try to determine what is in the cage by using the 3019 * firmware's XFP I2C commands to read the XFP 10GbE compilance 3020 * register. We read just one byte, which may take over 3021 * a millisecond 3022 */ 3023 3024 cmd.data0 = 0; /* just fetch 1 byte, not all 256 */ 3025 cmd.data1 = byte; 3026 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_READ, &cmd); 3027 if (err == MXGEFW_CMD_ERROR_I2C_FAILURE) { 3028 device_printf(sc->dev, "failed to read XFP\n"); 3029 } 3030 if (err == MXGEFW_CMD_ERROR_I2C_ABSENT) { 3031 device_printf(sc->dev, "Type R/S with no XFP!?!?\n"); 3032 } 3033 if (err != MXGEFW_CMD_OK) { 3034 return; 3035 } 3036 3037 /* now we wait for the data to be cached */ 3038 cmd.data0 = byte; 3039 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3040 for (ms = 0; (err == EBUSY) && (ms < 50); ms++) { 3041 DELAY(1000); 3042 cmd.data0 = byte; 3043 err = mxge_send_cmd(sc, MXGEFW_CMD_I2C_BYTE, &cmd); 3044 } 3045 if (err != MXGEFW_CMD_OK) { 3046 device_printf(sc->dev, "failed to read %s (%d, %dms)\n", 3047 cage_type, err, ms); 3048 return; 3049 } 3050 3051 if (cmd.data0 == mxge_media_types[0].bitmask) { 3052 if (mxge_verbose) 3053 device_printf(sc->dev, "%s:%s\n", cage_type, 3054 mxge_media_types[0].name); 3055 if (sc->current_media != mxge_media_types[0].flag) { 3056 mxge_media_init(sc); 3057 mxge_media_set(sc, mxge_media_types[0].flag); 3058 } 3059 return; 3060 } 3061 for (i = 1; i < mxge_media_type_entries; i++) { 3062 if (cmd.data0 & mxge_media_types[i].bitmask) { 3063 if (mxge_verbose) 3064 device_printf(sc->dev, "%s:%s\n", 3065 cage_type, 3066 mxge_media_types[i].name); 3067 3068 if (sc->current_media != mxge_media_types[i].flag) { 3069 mxge_media_init(sc); 3070 mxge_media_set(sc, mxge_media_types[i].flag); 3071 } 3072 return; 3073 } 3074 } 3075 if (mxge_verbose) 3076 device_printf(sc->dev, "%s media 0x%x unknown\n", 3077 cage_type, cmd.data0); 3078 3079 return; 3080} 3081 3082static void 3083mxge_intr(void *arg) 3084{ 3085 struct mxge_slice_state *ss = arg; 3086 mxge_softc_t *sc = ss->sc; 3087 mcp_irq_data_t *stats = ss->fw_stats; 3088 mxge_tx_ring_t *tx = &ss->tx; 3089 mxge_rx_done_t *rx_done = &ss->rx_done; 3090 uint32_t send_done_count; 3091 uint8_t valid; 3092 3093 3094#ifndef IFNET_BUF_RING 3095 /* an interrupt on a non-zero slice is implicitly valid 3096 since MSI-X irqs are not shared */ 3097 if (ss != sc->ss) { 3098 mxge_clean_rx_done(ss); 3099 *ss->irq_claim = be32toh(3); 3100 return; 3101 } 3102#endif 3103 3104 /* make sure the DMA has finished */ 3105 if (!stats->valid) { 3106 return; 3107 } 3108 valid = stats->valid; 3109 3110 if (sc->legacy_irq) { 3111 /* lower legacy IRQ */ 3112 *sc->irq_deassert = 0; 3113 if (!mxge_deassert_wait) 3114 /* don't wait for conf. that irq is low */ 3115 stats->valid = 0; 3116 } else { 3117 stats->valid = 0; 3118 } 3119 3120 /* loop while waiting for legacy irq deassertion */ 3121 do { 3122 /* check for transmit completes and receives */ 3123 send_done_count = be32toh(stats->send_done_count); 3124 while ((send_done_count != tx->pkt_done) || 3125 (rx_done->entry[rx_done->idx].length != 0)) { 3126 if (send_done_count != tx->pkt_done) 3127 mxge_tx_done(ss, (int)send_done_count); 3128 mxge_clean_rx_done(ss); 3129 send_done_count = be32toh(stats->send_done_count); 3130 } 3131 if (sc->legacy_irq && mxge_deassert_wait) 3132 wmb(); 3133 } while (*((volatile uint8_t *) &stats->valid)); 3134 3135 /* fw link & error stats meaningful only on the first slice */ 3136 if (__predict_false((ss == sc->ss) && stats->stats_updated)) { 3137 if (sc->link_state != stats->link_up) { 3138 sc->link_state = stats->link_up; 3139 if (sc->link_state) { 3140 if_link_state_change(sc->ifp, LINK_STATE_UP); 3141 if_initbaudrate(sc->ifp, IF_Gbps(10)); 3142 if (mxge_verbose) 3143 device_printf(sc->dev, "link up\n"); 3144 } else { 3145 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3146 sc->ifp->if_baudrate = 0; 3147 if (mxge_verbose) 3148 device_printf(sc->dev, "link down\n"); 3149 } 3150 sc->need_media_probe = 1; 3151 } 3152 if (sc->rdma_tags_available != 3153 be32toh(stats->rdma_tags_available)) { 3154 sc->rdma_tags_available = 3155 be32toh(stats->rdma_tags_available); 3156 device_printf(sc->dev, "RDMA timed out! %d tags " 3157 "left\n", sc->rdma_tags_available); 3158 } 3159 3160 if (stats->link_down) { 3161 sc->down_cnt += stats->link_down; 3162 sc->link_state = 0; 3163 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 3164 } 3165 } 3166 3167 /* check to see if we have rx token to pass back */ 3168 if (valid & 0x1) 3169 *ss->irq_claim = be32toh(3); 3170 *(ss->irq_claim + 1) = be32toh(3); 3171} 3172 3173static void 3174mxge_init(void *arg) 3175{ 3176 mxge_softc_t *sc = arg; 3177 struct ifnet *ifp = sc->ifp; 3178 3179 3180 mtx_lock(&sc->driver_mtx); 3181 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 3182 (void) mxge_open(sc); 3183 mtx_unlock(&sc->driver_mtx); 3184} 3185 3186 3187 3188static void 3189mxge_free_slice_mbufs(struct mxge_slice_state *ss) 3190{ 3191 int i; 3192 3193#if defined(INET) || defined(INET6) 3194 tcp_lro_free(&ss->lc); 3195#endif 3196 for (i = 0; i <= ss->rx_big.mask; i++) { 3197 if (ss->rx_big.info[i].m == NULL) 3198 continue; 3199 bus_dmamap_unload(ss->rx_big.dmat, 3200 ss->rx_big.info[i].map); 3201 m_freem(ss->rx_big.info[i].m); 3202 ss->rx_big.info[i].m = NULL; 3203 } 3204 3205 for (i = 0; i <= ss->rx_small.mask; i++) { 3206 if (ss->rx_small.info[i].m == NULL) 3207 continue; 3208 bus_dmamap_unload(ss->rx_small.dmat, 3209 ss->rx_small.info[i].map); 3210 m_freem(ss->rx_small.info[i].m); 3211 ss->rx_small.info[i].m = NULL; 3212 } 3213 3214 /* transmit ring used only on the first slice */ 3215 if (ss->tx.info == NULL) 3216 return; 3217 3218 for (i = 0; i <= ss->tx.mask; i++) { 3219 ss->tx.info[i].flag = 0; 3220 if (ss->tx.info[i].m == NULL) 3221 continue; 3222 bus_dmamap_unload(ss->tx.dmat, 3223 ss->tx.info[i].map); 3224 m_freem(ss->tx.info[i].m); 3225 ss->tx.info[i].m = NULL; 3226 } 3227} 3228 3229static void 3230mxge_free_mbufs(mxge_softc_t *sc) 3231{ 3232 int slice; 3233 3234 for (slice = 0; slice < sc->num_slices; slice++) 3235 mxge_free_slice_mbufs(&sc->ss[slice]); 3236} 3237 3238static void 3239mxge_free_slice_rings(struct mxge_slice_state *ss) 3240{ 3241 int i; 3242 3243 3244 if (ss->rx_done.entry != NULL) 3245 mxge_dma_free(&ss->rx_done.dma); 3246 ss->rx_done.entry = NULL; 3247 3248 if (ss->tx.req_bytes != NULL) 3249 free(ss->tx.req_bytes, M_DEVBUF); 3250 ss->tx.req_bytes = NULL; 3251 3252 if (ss->tx.seg_list != NULL) 3253 free(ss->tx.seg_list, M_DEVBUF); 3254 ss->tx.seg_list = NULL; 3255 3256 if (ss->rx_small.shadow != NULL) 3257 free(ss->rx_small.shadow, M_DEVBUF); 3258 ss->rx_small.shadow = NULL; 3259 3260 if (ss->rx_big.shadow != NULL) 3261 free(ss->rx_big.shadow, M_DEVBUF); 3262 ss->rx_big.shadow = NULL; 3263 3264 if (ss->tx.info != NULL) { 3265 if (ss->tx.dmat != NULL) { 3266 for (i = 0; i <= ss->tx.mask; i++) { 3267 bus_dmamap_destroy(ss->tx.dmat, 3268 ss->tx.info[i].map); 3269 } 3270 bus_dma_tag_destroy(ss->tx.dmat); 3271 } 3272 free(ss->tx.info, M_DEVBUF); 3273 } 3274 ss->tx.info = NULL; 3275 3276 if (ss->rx_small.info != NULL) { 3277 if (ss->rx_small.dmat != NULL) { 3278 for (i = 0; i <= ss->rx_small.mask; i++) { 3279 bus_dmamap_destroy(ss->rx_small.dmat, 3280 ss->rx_small.info[i].map); 3281 } 3282 bus_dmamap_destroy(ss->rx_small.dmat, 3283 ss->rx_small.extra_map); 3284 bus_dma_tag_destroy(ss->rx_small.dmat); 3285 } 3286 free(ss->rx_small.info, M_DEVBUF); 3287 } 3288 ss->rx_small.info = NULL; 3289 3290 if (ss->rx_big.info != NULL) { 3291 if (ss->rx_big.dmat != NULL) { 3292 for (i = 0; i <= ss->rx_big.mask; i++) { 3293 bus_dmamap_destroy(ss->rx_big.dmat, 3294 ss->rx_big.info[i].map); 3295 } 3296 bus_dmamap_destroy(ss->rx_big.dmat, 3297 ss->rx_big.extra_map); 3298 bus_dma_tag_destroy(ss->rx_big.dmat); 3299 } 3300 free(ss->rx_big.info, M_DEVBUF); 3301 } 3302 ss->rx_big.info = NULL; 3303} 3304 3305static void 3306mxge_free_rings(mxge_softc_t *sc) 3307{ 3308 int slice; 3309 3310 for (slice = 0; slice < sc->num_slices; slice++) 3311 mxge_free_slice_rings(&sc->ss[slice]); 3312} 3313 3314static int 3315mxge_alloc_slice_rings(struct mxge_slice_state *ss, int rx_ring_entries, 3316 int tx_ring_entries) 3317{ 3318 mxge_softc_t *sc = ss->sc; 3319 size_t bytes; 3320 int err, i; 3321 3322 /* allocate per-slice receive resources */ 3323 3324 ss->rx_small.mask = ss->rx_big.mask = rx_ring_entries - 1; 3325 ss->rx_done.mask = (2 * rx_ring_entries) - 1; 3326 3327 /* allocate the rx shadow rings */ 3328 bytes = rx_ring_entries * sizeof (*ss->rx_small.shadow); 3329 ss->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3330 3331 bytes = rx_ring_entries * sizeof (*ss->rx_big.shadow); 3332 ss->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3333 3334 /* allocate the rx host info rings */ 3335 bytes = rx_ring_entries * sizeof (*ss->rx_small.info); 3336 ss->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3337 3338 bytes = rx_ring_entries * sizeof (*ss->rx_big.info); 3339 ss->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3340 3341 /* allocate the rx busdma resources */ 3342 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3343 1, /* alignment */ 3344 4096, /* boundary */ 3345 BUS_SPACE_MAXADDR, /* low */ 3346 BUS_SPACE_MAXADDR, /* high */ 3347 NULL, NULL, /* filter */ 3348 MHLEN, /* maxsize */ 3349 1, /* num segs */ 3350 MHLEN, /* maxsegsize */ 3351 BUS_DMA_ALLOCNOW, /* flags */ 3352 NULL, NULL, /* lock */ 3353 &ss->rx_small.dmat); /* tag */ 3354 if (err != 0) { 3355 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 3356 err); 3357 return err; 3358 } 3359 3360 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3361 1, /* alignment */ 3362#if MXGE_VIRT_JUMBOS 3363 4096, /* boundary */ 3364#else 3365 0, /* boundary */ 3366#endif 3367 BUS_SPACE_MAXADDR, /* low */ 3368 BUS_SPACE_MAXADDR, /* high */ 3369 NULL, NULL, /* filter */ 3370 3*4096, /* maxsize */ 3371#if MXGE_VIRT_JUMBOS 3372 3, /* num segs */ 3373 4096, /* maxsegsize*/ 3374#else 3375 1, /* num segs */ 3376 MJUM9BYTES, /* maxsegsize*/ 3377#endif 3378 BUS_DMA_ALLOCNOW, /* flags */ 3379 NULL, NULL, /* lock */ 3380 &ss->rx_big.dmat); /* tag */ 3381 if (err != 0) { 3382 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 3383 err); 3384 return err; 3385 } 3386 for (i = 0; i <= ss->rx_small.mask; i++) { 3387 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3388 &ss->rx_small.info[i].map); 3389 if (err != 0) { 3390 device_printf(sc->dev, "Err %d rx_small dmamap\n", 3391 err); 3392 return err; 3393 } 3394 } 3395 err = bus_dmamap_create(ss->rx_small.dmat, 0, 3396 &ss->rx_small.extra_map); 3397 if (err != 0) { 3398 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 3399 err); 3400 return err; 3401 } 3402 3403 for (i = 0; i <= ss->rx_big.mask; i++) { 3404 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3405 &ss->rx_big.info[i].map); 3406 if (err != 0) { 3407 device_printf(sc->dev, "Err %d rx_big dmamap\n", 3408 err); 3409 return err; 3410 } 3411 } 3412 err = bus_dmamap_create(ss->rx_big.dmat, 0, 3413 &ss->rx_big.extra_map); 3414 if (err != 0) { 3415 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 3416 err); 3417 return err; 3418 } 3419 3420 /* now allocate TX resources */ 3421 3422#ifndef IFNET_BUF_RING 3423 /* only use a single TX ring for now */ 3424 if (ss != ss->sc->ss) 3425 return 0; 3426#endif 3427 3428 ss->tx.mask = tx_ring_entries - 1; 3429 ss->tx.max_desc = MIN(MXGE_MAX_SEND_DESC, tx_ring_entries / 4); 3430 3431 3432 /* allocate the tx request copy block */ 3433 bytes = 8 + 3434 sizeof (*ss->tx.req_list) * (ss->tx.max_desc + 4); 3435 ss->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 3436 /* ensure req_list entries are aligned to 8 bytes */ 3437 ss->tx.req_list = (mcp_kreq_ether_send_t *) 3438 ((unsigned long)(ss->tx.req_bytes + 7) & ~7UL); 3439 3440 /* allocate the tx busdma segment list */ 3441 bytes = sizeof (*ss->tx.seg_list) * ss->tx.max_desc; 3442 ss->tx.seg_list = (bus_dma_segment_t *) 3443 malloc(bytes, M_DEVBUF, M_WAITOK); 3444 3445 /* allocate the tx host info ring */ 3446 bytes = tx_ring_entries * sizeof (*ss->tx.info); 3447 ss->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 3448 3449 /* allocate the tx busdma resources */ 3450 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 3451 1, /* alignment */ 3452 sc->tx_boundary, /* boundary */ 3453 BUS_SPACE_MAXADDR, /* low */ 3454 BUS_SPACE_MAXADDR, /* high */ 3455 NULL, NULL, /* filter */ 3456 65536 + 256, /* maxsize */ 3457 ss->tx.max_desc - 2, /* num segs */ 3458 sc->tx_boundary, /* maxsegsz */ 3459 BUS_DMA_ALLOCNOW, /* flags */ 3460 NULL, NULL, /* lock */ 3461 &ss->tx.dmat); /* tag */ 3462 3463 if (err != 0) { 3464 device_printf(sc->dev, "Err %d allocating tx dmat\n", 3465 err); 3466 return err; 3467 } 3468 3469 /* now use these tags to setup dmamaps for each slot 3470 in the ring */ 3471 for (i = 0; i <= ss->tx.mask; i++) { 3472 err = bus_dmamap_create(ss->tx.dmat, 0, 3473 &ss->tx.info[i].map); 3474 if (err != 0) { 3475 device_printf(sc->dev, "Err %d tx dmamap\n", 3476 err); 3477 return err; 3478 } 3479 } 3480 return 0; 3481 3482} 3483 3484static int 3485mxge_alloc_rings(mxge_softc_t *sc) 3486{ 3487 mxge_cmd_t cmd; 3488 int tx_ring_size; 3489 int tx_ring_entries, rx_ring_entries; 3490 int err, slice; 3491 3492 /* get ring sizes */ 3493 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 3494 tx_ring_size = cmd.data0; 3495 if (err != 0) { 3496 device_printf(sc->dev, "Cannot determine tx ring sizes\n"); 3497 goto abort; 3498 } 3499 3500 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 3501 rx_ring_entries = sc->rx_ring_size / sizeof (mcp_dma_addr_t); 3502 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 3503 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 3504 IFQ_SET_READY(&sc->ifp->if_snd); 3505 3506 for (slice = 0; slice < sc->num_slices; slice++) { 3507 err = mxge_alloc_slice_rings(&sc->ss[slice], 3508 rx_ring_entries, 3509 tx_ring_entries); 3510 if (err != 0) 3511 goto abort; 3512 } 3513 return 0; 3514 3515abort: 3516 mxge_free_rings(sc); 3517 return err; 3518 3519} 3520 3521 3522static void 3523mxge_choose_params(int mtu, int *big_buf_size, int *cl_size, int *nbufs) 3524{ 3525 int bufsize = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3526 3527 if (bufsize < MCLBYTES) { 3528 /* easy, everything fits in a single buffer */ 3529 *big_buf_size = MCLBYTES; 3530 *cl_size = MCLBYTES; 3531 *nbufs = 1; 3532 return; 3533 } 3534 3535 if (bufsize < MJUMPAGESIZE) { 3536 /* still easy, everything still fits in a single buffer */ 3537 *big_buf_size = MJUMPAGESIZE; 3538 *cl_size = MJUMPAGESIZE; 3539 *nbufs = 1; 3540 return; 3541 } 3542#if MXGE_VIRT_JUMBOS 3543 /* now we need to use virtually contiguous buffers */ 3544 *cl_size = MJUM9BYTES; 3545 *big_buf_size = 4096; 3546 *nbufs = mtu / 4096 + 1; 3547 /* needs to be a power of two, so round up */ 3548 if (*nbufs == 3) 3549 *nbufs = 4; 3550#else 3551 *cl_size = MJUM9BYTES; 3552 *big_buf_size = MJUM9BYTES; 3553 *nbufs = 1; 3554#endif 3555} 3556 3557static int 3558mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size) 3559{ 3560 mxge_softc_t *sc; 3561 mxge_cmd_t cmd; 3562 bus_dmamap_t map; 3563 int err, i, slice; 3564 3565 3566 sc = ss->sc; 3567 slice = ss - sc->ss; 3568 3569#if defined(INET) || defined(INET6) 3570 (void)tcp_lro_init(&ss->lc); 3571#endif 3572 ss->lc.ifp = sc->ifp; 3573 3574 /* get the lanai pointers to the send and receive rings */ 3575 3576 err = 0; 3577#ifndef IFNET_BUF_RING 3578 /* We currently only send from the first slice */ 3579 if (slice == 0) { 3580#endif 3581 cmd.data0 = slice; 3582 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 3583 ss->tx.lanai = 3584 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 3585 ss->tx.send_go = (volatile uint32_t *) 3586 (sc->sram + MXGEFW_ETH_SEND_GO + 64 * slice); 3587 ss->tx.send_stop = (volatile uint32_t *) 3588 (sc->sram + MXGEFW_ETH_SEND_STOP + 64 * slice); 3589#ifndef IFNET_BUF_RING 3590 } 3591#endif 3592 cmd.data0 = slice; 3593 err |= mxge_send_cmd(sc, 3594 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 3595 ss->rx_small.lanai = 3596 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3597 cmd.data0 = slice; 3598 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 3599 ss->rx_big.lanai = 3600 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 3601 3602 if (err != 0) { 3603 device_printf(sc->dev, 3604 "failed to get ring sizes or locations\n"); 3605 return EIO; 3606 } 3607 3608 /* stock receive rings */ 3609 for (i = 0; i <= ss->rx_small.mask; i++) { 3610 map = ss->rx_small.info[i].map; 3611 err = mxge_get_buf_small(ss, map, i); 3612 if (err) { 3613 device_printf(sc->dev, "alloced %d/%d smalls\n", 3614 i, ss->rx_small.mask + 1); 3615 return ENOMEM; 3616 } 3617 } 3618 for (i = 0; i <= ss->rx_big.mask; i++) { 3619 ss->rx_big.shadow[i].addr_low = 0xffffffff; 3620 ss->rx_big.shadow[i].addr_high = 0xffffffff; 3621 } 3622 ss->rx_big.nbufs = nbufs; 3623 ss->rx_big.cl_size = cl_size; 3624 ss->rx_big.mlen = ss->sc->ifp->if_mtu + ETHER_HDR_LEN + 3625 ETHER_VLAN_ENCAP_LEN + MXGEFW_PAD; 3626 for (i = 0; i <= ss->rx_big.mask; i += ss->rx_big.nbufs) { 3627 map = ss->rx_big.info[i].map; 3628 err = mxge_get_buf_big(ss, map, i); 3629 if (err) { 3630 device_printf(sc->dev, "alloced %d/%d bigs\n", 3631 i, ss->rx_big.mask + 1); 3632 return ENOMEM; 3633 } 3634 } 3635 return 0; 3636} 3637 3638static int 3639mxge_open(mxge_softc_t *sc) 3640{ 3641 mxge_cmd_t cmd; 3642 int err, big_bytes, nbufs, slice, cl_size, i; 3643 bus_addr_t bus; 3644 volatile uint8_t *itable; 3645 struct mxge_slice_state *ss; 3646 3647 /* Copy the MAC address in case it was overridden */ 3648 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 3649 3650 err = mxge_reset(sc, 1); 3651 if (err != 0) { 3652 device_printf(sc->dev, "failed to reset\n"); 3653 return EIO; 3654 } 3655 3656 if (sc->num_slices > 1) { 3657 /* setup the indirection table */ 3658 cmd.data0 = sc->num_slices; 3659 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_TABLE_SIZE, 3660 &cmd); 3661 3662 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RSS_TABLE_OFFSET, 3663 &cmd); 3664 if (err != 0) { 3665 device_printf(sc->dev, 3666 "failed to setup rss tables\n"); 3667 return err; 3668 } 3669 3670 /* just enable an identity mapping */ 3671 itable = sc->sram + cmd.data0; 3672 for (i = 0; i < sc->num_slices; i++) 3673 itable[i] = (uint8_t)i; 3674 3675 cmd.data0 = 1; 3676 cmd.data1 = mxge_rss_hash_type; 3677 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_RSS_ENABLE, &cmd); 3678 if (err != 0) { 3679 device_printf(sc->dev, "failed to enable slices\n"); 3680 return err; 3681 } 3682 } 3683 3684 3685 mxge_choose_params(sc->ifp->if_mtu, &big_bytes, &cl_size, &nbufs); 3686 3687 cmd.data0 = nbufs; 3688 err = mxge_send_cmd(sc, MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, 3689 &cmd); 3690 /* error is only meaningful if we're trying to set 3691 MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS > 1 */ 3692 if (err && nbufs > 1) { 3693 device_printf(sc->dev, 3694 "Failed to set alway-use-n to %d\n", 3695 nbufs); 3696 return EIO; 3697 } 3698 /* Give the firmware the mtu and the big and small buffer 3699 sizes. The firmware wants the big buf size to be a power 3700 of two. Luckily, FreeBSD's clusters are powers of two */ 3701 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3702 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 3703 cmd.data0 = MHLEN - MXGEFW_PAD; 3704 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 3705 &cmd); 3706 cmd.data0 = big_bytes; 3707 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 3708 3709 if (err != 0) { 3710 device_printf(sc->dev, "failed to setup params\n"); 3711 goto abort; 3712 } 3713 3714 /* Now give him the pointer to the stats block */ 3715 for (slice = 0; 3716#ifdef IFNET_BUF_RING 3717 slice < sc->num_slices; 3718#else 3719 slice < 1; 3720#endif 3721 slice++) { 3722 ss = &sc->ss[slice]; 3723 cmd.data0 = 3724 MXGE_LOWPART_TO_U32(ss->fw_stats_dma.bus_addr); 3725 cmd.data1 = 3726 MXGE_HIGHPART_TO_U32(ss->fw_stats_dma.bus_addr); 3727 cmd.data2 = sizeof(struct mcp_irq_data); 3728 cmd.data2 |= (slice << 16); 3729 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 3730 } 3731 3732 if (err != 0) { 3733 bus = sc->ss->fw_stats_dma.bus_addr; 3734 bus += offsetof(struct mcp_irq_data, send_done_count); 3735 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 3736 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 3737 err = mxge_send_cmd(sc, 3738 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 3739 &cmd); 3740 /* Firmware cannot support multicast without STATS_DMA_V2 */ 3741 sc->fw_multicast_support = 0; 3742 } else { 3743 sc->fw_multicast_support = 1; 3744 } 3745 3746 if (err != 0) { 3747 device_printf(sc->dev, "failed to setup params\n"); 3748 goto abort; 3749 } 3750 3751 for (slice = 0; slice < sc->num_slices; slice++) { 3752 err = mxge_slice_open(&sc->ss[slice], nbufs, cl_size); 3753 if (err != 0) { 3754 device_printf(sc->dev, "couldn't open slice %d\n", 3755 slice); 3756 goto abort; 3757 } 3758 } 3759 3760 /* Finally, start the firmware running */ 3761 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 3762 if (err) { 3763 device_printf(sc->dev, "Couldn't bring up link\n"); 3764 goto abort; 3765 } 3766#ifdef IFNET_BUF_RING 3767 for (slice = 0; slice < sc->num_slices; slice++) { 3768 ss = &sc->ss[slice]; 3769 ss->if_drv_flags |= IFF_DRV_RUNNING; 3770 ss->if_drv_flags &= ~IFF_DRV_OACTIVE; 3771 } 3772#endif 3773 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 3774 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 3775 3776 return 0; 3777 3778 3779abort: 3780 mxge_free_mbufs(sc); 3781 3782 return err; 3783} 3784 3785static int 3786mxge_close(mxge_softc_t *sc, int down) 3787{ 3788 mxge_cmd_t cmd; 3789 int err, old_down_cnt; 3790#ifdef IFNET_BUF_RING 3791 struct mxge_slice_state *ss; 3792 int slice; 3793#endif 3794 3795#ifdef IFNET_BUF_RING 3796 for (slice = 0; slice < sc->num_slices; slice++) { 3797 ss = &sc->ss[slice]; 3798 ss->if_drv_flags &= ~IFF_DRV_RUNNING; 3799 } 3800#endif 3801 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3802 if (!down) { 3803 old_down_cnt = sc->down_cnt; 3804 wmb(); 3805 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 3806 if (err) { 3807 device_printf(sc->dev, 3808 "Couldn't bring down link\n"); 3809 } 3810 if (old_down_cnt == sc->down_cnt) { 3811 /* wait for down irq */ 3812 DELAY(10 * sc->intr_coal_delay); 3813 } 3814 wmb(); 3815 if (old_down_cnt == sc->down_cnt) { 3816 device_printf(sc->dev, "never got down irq\n"); 3817 } 3818 } 3819 mxge_free_mbufs(sc); 3820 3821 return 0; 3822} 3823 3824static void 3825mxge_setup_cfg_space(mxge_softc_t *sc) 3826{ 3827 device_t dev = sc->dev; 3828 int reg; 3829 uint16_t lnk, pectl; 3830 3831 /* find the PCIe link width and set max read request to 4KB*/ 3832 if (pci_find_cap(dev, PCIY_EXPRESS, ®) == 0) { 3833 lnk = pci_read_config(dev, reg + 0x12, 2); 3834 sc->link_width = (lnk >> 4) & 0x3f; 3835 3836 if (sc->pectl == 0) { 3837 pectl = pci_read_config(dev, reg + 0x8, 2); 3838 pectl = (pectl & ~0x7000) | (5 << 12); 3839 pci_write_config(dev, reg + 0x8, pectl, 2); 3840 sc->pectl = pectl; 3841 } else { 3842 /* restore saved pectl after watchdog reset */ 3843 pci_write_config(dev, reg + 0x8, sc->pectl, 2); 3844 } 3845 } 3846 3847 /* Enable DMA and Memory space access */ 3848 pci_enable_busmaster(dev); 3849} 3850 3851static uint32_t 3852mxge_read_reboot(mxge_softc_t *sc) 3853{ 3854 device_t dev = sc->dev; 3855 uint32_t vs; 3856 3857 /* find the vendor specific offset */ 3858 if (pci_find_cap(dev, PCIY_VENDOR, &vs) != 0) { 3859 device_printf(sc->dev, 3860 "could not find vendor specific offset\n"); 3861 return (uint32_t)-1; 3862 } 3863 /* enable read32 mode */ 3864 pci_write_config(dev, vs + 0x10, 0x3, 1); 3865 /* tell NIC which register to read */ 3866 pci_write_config(dev, vs + 0x18, 0xfffffff0, 4); 3867 return (pci_read_config(dev, vs + 0x14, 4)); 3868} 3869 3870static void 3871mxge_watchdog_reset(mxge_softc_t *sc) 3872{ 3873 struct pci_devinfo *dinfo; 3874 struct mxge_slice_state *ss; 3875 int err, running, s, num_tx_slices = 1; 3876 uint32_t reboot; 3877 uint16_t cmd; 3878 3879 err = ENXIO; 3880 3881 device_printf(sc->dev, "Watchdog reset!\n"); 3882 3883 /* 3884 * check to see if the NIC rebooted. If it did, then all of 3885 * PCI config space has been reset, and things like the 3886 * busmaster bit will be zero. If this is the case, then we 3887 * must restore PCI config space before the NIC can be used 3888 * again 3889 */ 3890 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3891 if (cmd == 0xffff) { 3892 /* 3893 * maybe the watchdog caught the NIC rebooting; wait 3894 * up to 100ms for it to finish. If it does not come 3895 * back, then give up 3896 */ 3897 DELAY(1000*100); 3898 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 3899 if (cmd == 0xffff) { 3900 device_printf(sc->dev, "NIC disappeared!\n"); 3901 } 3902 } 3903 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 3904 /* print the reboot status */ 3905 reboot = mxge_read_reboot(sc); 3906 device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", 3907 reboot); 3908 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 3909 if (running) { 3910 3911 /* 3912 * quiesce NIC so that TX routines will not try to 3913 * xmit after restoration of BAR 3914 */ 3915 3916 /* Mark the link as down */ 3917 if (sc->link_state) { 3918 sc->link_state = 0; 3919 if_link_state_change(sc->ifp, 3920 LINK_STATE_DOWN); 3921 } 3922#ifdef IFNET_BUF_RING 3923 num_tx_slices = sc->num_slices; 3924#endif 3925 /* grab all TX locks to ensure no tx */ 3926 for (s = 0; s < num_tx_slices; s++) { 3927 ss = &sc->ss[s]; 3928 mtx_lock(&ss->tx.mtx); 3929 } 3930 mxge_close(sc, 1); 3931 } 3932 /* restore PCI configuration space */ 3933 dinfo = device_get_ivars(sc->dev); 3934 pci_cfg_restore(sc->dev, dinfo); 3935 3936 /* and redo any changes we made to our config space */ 3937 mxge_setup_cfg_space(sc); 3938 3939 /* reload f/w */ 3940 err = mxge_load_firmware(sc, 0); 3941 if (err) { 3942 device_printf(sc->dev, 3943 "Unable to re-load f/w\n"); 3944 } 3945 if (running) { 3946 if (!err) 3947 err = mxge_open(sc); 3948 /* release all TX locks */ 3949 for (s = 0; s < num_tx_slices; s++) { 3950 ss = &sc->ss[s]; 3951#ifdef IFNET_BUF_RING 3952 mxge_start_locked(ss); 3953#endif 3954 mtx_unlock(&ss->tx.mtx); 3955 } 3956 } 3957 sc->watchdog_resets++; 3958 } else { 3959 device_printf(sc->dev, 3960 "NIC did not reboot, not resetting\n"); 3961 err = 0; 3962 } 3963 if (err) { 3964 device_printf(sc->dev, "watchdog reset failed\n"); 3965 } else { 3966 if (sc->dying == 2) 3967 sc->dying = 0; 3968 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 3969 } 3970} 3971 3972static void 3973mxge_watchdog_task(void *arg, int pending) 3974{ 3975 mxge_softc_t *sc = arg; 3976 3977 3978 mtx_lock(&sc->driver_mtx); 3979 mxge_watchdog_reset(sc); 3980 mtx_unlock(&sc->driver_mtx); 3981} 3982 3983static void 3984mxge_warn_stuck(mxge_softc_t *sc, mxge_tx_ring_t *tx, int slice) 3985{ 3986 tx = &sc->ss[slice].tx; 3987 device_printf(sc->dev, "slice %d struck? ring state:\n", slice); 3988 device_printf(sc->dev, 3989 "tx.req=%d tx.done=%d, tx.queue_active=%d\n", 3990 tx->req, tx->done, tx->queue_active); 3991 device_printf(sc->dev, "tx.activate=%d tx.deactivate=%d\n", 3992 tx->activate, tx->deactivate); 3993 device_printf(sc->dev, "pkt_done=%d fw=%d\n", 3994 tx->pkt_done, 3995 be32toh(sc->ss->fw_stats->send_done_count)); 3996} 3997 3998static int 3999mxge_watchdog(mxge_softc_t *sc) 4000{ 4001 mxge_tx_ring_t *tx; 4002 uint32_t rx_pause = be32toh(sc->ss->fw_stats->dropped_pause); 4003 int i, err = 0; 4004 4005 /* see if we have outstanding transmits, which 4006 have been pending for more than mxge_ticks */ 4007 for (i = 0; 4008#ifdef IFNET_BUF_RING 4009 (i < sc->num_slices) && (err == 0); 4010#else 4011 (i < 1) && (err == 0); 4012#endif 4013 i++) { 4014 tx = &sc->ss[i].tx; 4015 if (tx->req != tx->done && 4016 tx->watchdog_req != tx->watchdog_done && 4017 tx->done == tx->watchdog_done) { 4018 /* check for pause blocking before resetting */ 4019 if (tx->watchdog_rx_pause == rx_pause) { 4020 mxge_warn_stuck(sc, tx, i); 4021 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4022 return (ENXIO); 4023 } 4024 else 4025 device_printf(sc->dev, "Flow control blocking " 4026 "xmits, check link partner\n"); 4027 } 4028 4029 tx->watchdog_req = tx->req; 4030 tx->watchdog_done = tx->done; 4031 tx->watchdog_rx_pause = rx_pause; 4032 } 4033 4034 if (sc->need_media_probe) 4035 mxge_media_probe(sc); 4036 return (err); 4037} 4038 4039static u_long 4040mxge_update_stats(mxge_softc_t *sc) 4041{ 4042 struct mxge_slice_state *ss; 4043 u_long pkts = 0; 4044 u_long ipackets = 0; 4045 u_long opackets = 0; 4046#ifdef IFNET_BUF_RING 4047 u_long obytes = 0; 4048 u_long omcasts = 0; 4049 u_long odrops = 0; 4050#endif 4051 u_long oerrors = 0; 4052 int slice; 4053 4054 for (slice = 0; slice < sc->num_slices; slice++) { 4055 ss = &sc->ss[slice]; 4056 ipackets += ss->ipackets; 4057 opackets += ss->opackets; 4058#ifdef IFNET_BUF_RING 4059 obytes += ss->obytes; 4060 omcasts += ss->omcasts; 4061 odrops += ss->tx.br->br_drops; 4062#endif 4063 oerrors += ss->oerrors; 4064 } 4065 pkts = (ipackets - sc->ifp->if_ipackets); 4066 pkts += (opackets - sc->ifp->if_opackets); 4067 sc->ifp->if_ipackets = ipackets; 4068 sc->ifp->if_opackets = opackets; 4069#ifdef IFNET_BUF_RING 4070 sc->ifp->if_obytes = obytes; 4071 sc->ifp->if_omcasts = omcasts; 4072 sc->ifp->if_snd.ifq_drops = odrops; 4073#endif 4074 sc->ifp->if_oerrors = oerrors; 4075 return pkts; 4076} 4077 4078static void 4079mxge_tick(void *arg) 4080{ 4081 mxge_softc_t *sc = arg; 4082 u_long pkts = 0; 4083 int err = 0; 4084 int running, ticks; 4085 uint16_t cmd; 4086 4087 ticks = mxge_ticks; 4088 running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; 4089 if (running) { 4090 /* aggregate stats from different slices */ 4091 pkts = mxge_update_stats(sc); 4092 if (!sc->watchdog_countdown) { 4093 err = mxge_watchdog(sc); 4094 sc->watchdog_countdown = 4; 4095 } 4096 sc->watchdog_countdown--; 4097 } 4098 if (pkts == 0) { 4099 /* ensure NIC did not suffer h/w fault while idle */ 4100 cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); 4101 if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { 4102 sc->dying = 2; 4103 taskqueue_enqueue(sc->tq, &sc->watchdog_task); 4104 err = ENXIO; 4105 } 4106 /* look less often if NIC is idle */ 4107 ticks *= 4; 4108 } 4109 4110 if (err == 0) 4111 callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); 4112 4113} 4114 4115static int 4116mxge_media_change(struct ifnet *ifp) 4117{ 4118 return EINVAL; 4119} 4120 4121static int 4122mxge_change_mtu(mxge_softc_t *sc, int mtu) 4123{ 4124 struct ifnet *ifp = sc->ifp; 4125 int real_mtu, old_mtu; 4126 int err = 0; 4127 4128 4129 real_mtu = mtu + ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 4130 if ((real_mtu > sc->max_mtu) || real_mtu < 60) 4131 return EINVAL; 4132 mtx_lock(&sc->driver_mtx); 4133 old_mtu = ifp->if_mtu; 4134 ifp->if_mtu = mtu; 4135 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4136 mxge_close(sc, 0); 4137 err = mxge_open(sc); 4138 if (err != 0) { 4139 ifp->if_mtu = old_mtu; 4140 mxge_close(sc, 0); 4141 (void) mxge_open(sc); 4142 } 4143 } 4144 mtx_unlock(&sc->driver_mtx); 4145 return err; 4146} 4147 4148static void 4149mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 4150{ 4151 mxge_softc_t *sc = ifp->if_softc; 4152 4153 4154 if (sc == NULL) 4155 return; 4156 ifmr->ifm_status = IFM_AVALID; 4157 ifmr->ifm_active = IFM_ETHER | IFM_FDX; 4158 ifmr->ifm_status |= sc->link_state ? IFM_ACTIVE : 0; 4159 ifmr->ifm_active |= sc->current_media; 4160} 4161 4162static int 4163mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 4164{ 4165 mxge_softc_t *sc = ifp->if_softc; 4166 struct ifreq *ifr = (struct ifreq *)data; 4167 int err, mask; 4168 4169 err = 0; 4170 switch (command) { 4171 case SIOCSIFMTU: 4172 err = mxge_change_mtu(sc, ifr->ifr_mtu); 4173 break; 4174 4175 case SIOCSIFFLAGS: 4176 mtx_lock(&sc->driver_mtx); 4177 if (sc->dying) { 4178 mtx_unlock(&sc->driver_mtx); 4179 return EINVAL; 4180 } 4181 if (ifp->if_flags & IFF_UP) { 4182 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { 4183 err = mxge_open(sc); 4184 } else { 4185 /* take care of promis can allmulti 4186 flag chages */ 4187 mxge_change_promisc(sc, 4188 ifp->if_flags & IFF_PROMISC); 4189 mxge_set_multicast_list(sc); 4190 } 4191 } else { 4192 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 4193 mxge_close(sc, 0); 4194 } 4195 } 4196 mtx_unlock(&sc->driver_mtx); 4197 break; 4198 4199 case SIOCADDMULTI: 4200 case SIOCDELMULTI: 4201 mtx_lock(&sc->driver_mtx); 4202 mxge_set_multicast_list(sc); 4203 mtx_unlock(&sc->driver_mtx); 4204 break; 4205 4206 case SIOCSIFCAP: 4207 mtx_lock(&sc->driver_mtx); 4208 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 4209 if (mask & IFCAP_TXCSUM) { 4210 if (IFCAP_TXCSUM & ifp->if_capenable) { 4211 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 4212 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); 4213 } else { 4214 ifp->if_capenable |= IFCAP_TXCSUM; 4215 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 4216 } 4217 } else if (mask & IFCAP_RXCSUM) { 4218 if (IFCAP_RXCSUM & ifp->if_capenable) { 4219 ifp->if_capenable &= ~IFCAP_RXCSUM; 4220 } else { 4221 ifp->if_capenable |= IFCAP_RXCSUM; 4222 } 4223 } 4224 if (mask & IFCAP_TSO4) { 4225 if (IFCAP_TSO4 & ifp->if_capenable) { 4226 ifp->if_capenable &= ~IFCAP_TSO4; 4227 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 4228 ifp->if_capenable |= IFCAP_TSO4; 4229 ifp->if_hwassist |= CSUM_TSO; 4230 } else { 4231 printf("mxge requires tx checksum offload" 4232 " be enabled to use TSO\n"); 4233 err = EINVAL; 4234 } 4235 } 4236#if IFCAP_TSO6 4237 if (mask & IFCAP_TXCSUM_IPV6) { 4238 if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4239 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 4240 | IFCAP_TSO6); 4241 ifp->if_hwassist &= ~(CSUM_TCP_IPV6 4242 | CSUM_UDP); 4243 } else { 4244 ifp->if_capenable |= IFCAP_TXCSUM_IPV6; 4245 ifp->if_hwassist |= (CSUM_TCP_IPV6 4246 | CSUM_UDP_IPV6); 4247 } 4248 } else if (mask & IFCAP_RXCSUM_IPV6) { 4249 if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) { 4250 ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6; 4251 } else { 4252 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 4253 } 4254 } 4255 if (mask & IFCAP_TSO6) { 4256 if (IFCAP_TSO6 & ifp->if_capenable) { 4257 ifp->if_capenable &= ~IFCAP_TSO6; 4258 } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { 4259 ifp->if_capenable |= IFCAP_TSO6; 4260 ifp->if_hwassist |= CSUM_TSO; 4261 } else { 4262 printf("mxge requires tx checksum offload" 4263 " be enabled to use TSO\n"); 4264 err = EINVAL; 4265 } 4266 } 4267#endif /*IFCAP_TSO6 */ 4268 4269 if (mask & IFCAP_LRO) 4270 ifp->if_capenable ^= IFCAP_LRO; 4271 if (mask & IFCAP_VLAN_HWTAGGING) 4272 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 4273 if (mask & IFCAP_VLAN_HWTSO) 4274 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 4275 4276 if (!(ifp->if_capabilities & IFCAP_VLAN_HWTSO) || 4277 !(ifp->if_capenable & IFCAP_VLAN_HWTAGGING)) 4278 ifp->if_capenable &= ~IFCAP_VLAN_HWTSO; 4279 4280 mtx_unlock(&sc->driver_mtx); 4281 VLAN_CAPABILITIES(ifp); 4282 4283 break; 4284 4285 case SIOCGIFMEDIA: 4286 mtx_lock(&sc->driver_mtx); 4287 mxge_media_probe(sc); 4288 mtx_unlock(&sc->driver_mtx); 4289 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 4290 &sc->media, command); 4291 break; 4292 4293 default: 4294 err = ether_ioctl(ifp, command, data); 4295 break; 4296 } 4297 return err; 4298} 4299 4300static void 4301mxge_fetch_tunables(mxge_softc_t *sc) 4302{ 4303 4304 TUNABLE_INT_FETCH("hw.mxge.max_slices", &mxge_max_slices); 4305 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 4306 &mxge_flow_control); 4307 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 4308 &mxge_intr_coal_delay); 4309 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 4310 &mxge_nvidia_ecrc_enable); 4311 TUNABLE_INT_FETCH("hw.mxge.force_firmware", 4312 &mxge_force_firmware); 4313 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 4314 &mxge_deassert_wait); 4315 TUNABLE_INT_FETCH("hw.mxge.verbose", 4316 &mxge_verbose); 4317 TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks); 4318 TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc); 4319 TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type); 4320 TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type); 4321 TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu); 4322 TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle); 4323 4324 if (bootverbose) 4325 mxge_verbose = 1; 4326 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 4327 mxge_intr_coal_delay = 30; 4328 if (mxge_ticks == 0) 4329 mxge_ticks = hz / 2; 4330 sc->pause = mxge_flow_control; 4331 if (mxge_rss_hash_type < MXGEFW_RSS_HASH_TYPE_IPV4 4332 || mxge_rss_hash_type > MXGEFW_RSS_HASH_TYPE_MAX) { 4333 mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT; 4334 } 4335 if (mxge_initial_mtu > ETHERMTU_JUMBO || 4336 mxge_initial_mtu < ETHER_MIN_LEN) 4337 mxge_initial_mtu = ETHERMTU_JUMBO; 4338 4339 if (mxge_throttle && mxge_throttle > MXGE_MAX_THROTTLE) 4340 mxge_throttle = MXGE_MAX_THROTTLE; 4341 if (mxge_throttle && mxge_throttle < MXGE_MIN_THROTTLE) 4342 mxge_throttle = MXGE_MIN_THROTTLE; 4343 sc->throttle = mxge_throttle; 4344} 4345 4346 4347static void 4348mxge_free_slices(mxge_softc_t *sc) 4349{ 4350 struct mxge_slice_state *ss; 4351 int i; 4352 4353 4354 if (sc->ss == NULL) 4355 return; 4356 4357 for (i = 0; i < sc->num_slices; i++) { 4358 ss = &sc->ss[i]; 4359 if (ss->fw_stats != NULL) { 4360 mxge_dma_free(&ss->fw_stats_dma); 4361 ss->fw_stats = NULL; 4362#ifdef IFNET_BUF_RING 4363 if (ss->tx.br != NULL) { 4364 drbr_free(ss->tx.br, M_DEVBUF); 4365 ss->tx.br = NULL; 4366 } 4367#endif 4368 mtx_destroy(&ss->tx.mtx); 4369 } 4370 if (ss->rx_done.entry != NULL) { 4371 mxge_dma_free(&ss->rx_done.dma); 4372 ss->rx_done.entry = NULL; 4373 } 4374 } 4375 free(sc->ss, M_DEVBUF); 4376 sc->ss = NULL; 4377} 4378 4379static int 4380mxge_alloc_slices(mxge_softc_t *sc) 4381{ 4382 mxge_cmd_t cmd; 4383 struct mxge_slice_state *ss; 4384 size_t bytes; 4385 int err, i, max_intr_slots; 4386 4387 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4388 if (err != 0) { 4389 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4390 return err; 4391 } 4392 sc->rx_ring_size = cmd.data0; 4393 max_intr_slots = 2 * (sc->rx_ring_size / sizeof (mcp_dma_addr_t)); 4394 4395 bytes = sizeof (*sc->ss) * sc->num_slices; 4396 sc->ss = malloc(bytes, M_DEVBUF, M_NOWAIT | M_ZERO); 4397 if (sc->ss == NULL) 4398 return (ENOMEM); 4399 for (i = 0; i < sc->num_slices; i++) { 4400 ss = &sc->ss[i]; 4401 4402 ss->sc = sc; 4403 4404 /* allocate per-slice rx interrupt queues */ 4405 4406 bytes = max_intr_slots * sizeof (*ss->rx_done.entry); 4407 err = mxge_dma_alloc(sc, &ss->rx_done.dma, bytes, 4096); 4408 if (err != 0) 4409 goto abort; 4410 ss->rx_done.entry = ss->rx_done.dma.addr; 4411 bzero(ss->rx_done.entry, bytes); 4412 4413 /* 4414 * allocate the per-slice firmware stats; stats 4415 * (including tx) are used used only on the first 4416 * slice for now 4417 */ 4418#ifndef IFNET_BUF_RING 4419 if (i > 0) 4420 continue; 4421#endif 4422 4423 bytes = sizeof (*ss->fw_stats); 4424 err = mxge_dma_alloc(sc, &ss->fw_stats_dma, 4425 sizeof (*ss->fw_stats), 64); 4426 if (err != 0) 4427 goto abort; 4428 ss->fw_stats = (mcp_irq_data_t *)ss->fw_stats_dma.addr; 4429 snprintf(ss->tx.mtx_name, sizeof(ss->tx.mtx_name), 4430 "%s:tx(%d)", device_get_nameunit(sc->dev), i); 4431 mtx_init(&ss->tx.mtx, ss->tx.mtx_name, NULL, MTX_DEF); 4432#ifdef IFNET_BUF_RING 4433 ss->tx.br = buf_ring_alloc(2048, M_DEVBUF, M_WAITOK, 4434 &ss->tx.mtx); 4435#endif 4436 } 4437 4438 return (0); 4439 4440abort: 4441 mxge_free_slices(sc); 4442 return (ENOMEM); 4443} 4444 4445static void 4446mxge_slice_probe(mxge_softc_t *sc) 4447{ 4448 mxge_cmd_t cmd; 4449 char *old_fw; 4450 int msix_cnt, status, max_intr_slots; 4451 4452 sc->num_slices = 1; 4453 /* 4454 * don't enable multiple slices if they are not enabled, 4455 * or if this is not an SMP system 4456 */ 4457 4458 if (mxge_max_slices == 0 || mxge_max_slices == 1 || mp_ncpus < 2) 4459 return; 4460 4461 /* see how many MSI-X interrupts are available */ 4462 msix_cnt = pci_msix_count(sc->dev); 4463 if (msix_cnt < 2) 4464 return; 4465 4466 /* now load the slice aware firmware see what it supports */ 4467 old_fw = sc->fw_name; 4468 if (old_fw == mxge_fw_aligned) 4469 sc->fw_name = mxge_fw_rss_aligned; 4470 else 4471 sc->fw_name = mxge_fw_rss_unaligned; 4472 status = mxge_load_firmware(sc, 0); 4473 if (status != 0) { 4474 device_printf(sc->dev, "Falling back to a single slice\n"); 4475 return; 4476 } 4477 4478 /* try to send a reset command to the card to see if it 4479 is alive */ 4480 memset(&cmd, 0, sizeof (cmd)); 4481 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 4482 if (status != 0) { 4483 device_printf(sc->dev, "failed reset\n"); 4484 goto abort_with_fw; 4485 } 4486 4487 /* get rx ring size */ 4488 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 4489 if (status != 0) { 4490 device_printf(sc->dev, "Cannot determine rx ring size\n"); 4491 goto abort_with_fw; 4492 } 4493 max_intr_slots = 2 * (cmd.data0 / sizeof (mcp_dma_addr_t)); 4494 4495 /* tell it the size of the interrupt queues */ 4496 cmd.data0 = max_intr_slots * sizeof (struct mcp_slot); 4497 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 4498 if (status != 0) { 4499 device_printf(sc->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); 4500 goto abort_with_fw; 4501 } 4502 4503 /* ask the maximum number of slices it supports */ 4504 status = mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd); 4505 if (status != 0) { 4506 device_printf(sc->dev, 4507 "failed MXGEFW_CMD_GET_MAX_RSS_QUEUES\n"); 4508 goto abort_with_fw; 4509 } 4510 sc->num_slices = cmd.data0; 4511 if (sc->num_slices > msix_cnt) 4512 sc->num_slices = msix_cnt; 4513 4514 if (mxge_max_slices == -1) { 4515 /* cap to number of CPUs in system */ 4516 if (sc->num_slices > mp_ncpus) 4517 sc->num_slices = mp_ncpus; 4518 } else { 4519 if (sc->num_slices > mxge_max_slices) 4520 sc->num_slices = mxge_max_slices; 4521 } 4522 /* make sure it is a power of two */ 4523 while (sc->num_slices & (sc->num_slices - 1)) 4524 sc->num_slices--; 4525 4526 if (mxge_verbose) 4527 device_printf(sc->dev, "using %d slices\n", 4528 sc->num_slices); 4529 4530 return; 4531 4532abort_with_fw: 4533 sc->fw_name = old_fw; 4534 (void) mxge_load_firmware(sc, 0); 4535} 4536 4537static int 4538mxge_add_msix_irqs(mxge_softc_t *sc) 4539{ 4540 size_t bytes; 4541 int count, err, i, rid; 4542 4543 rid = PCIR_BAR(2); 4544 sc->msix_table_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, 4545 &rid, RF_ACTIVE); 4546 4547 if (sc->msix_table_res == NULL) { 4548 device_printf(sc->dev, "couldn't alloc MSIX table res\n"); 4549 return ENXIO; 4550 } 4551 4552 count = sc->num_slices; 4553 err = pci_alloc_msix(sc->dev, &count); 4554 if (err != 0) { 4555 device_printf(sc->dev, "pci_alloc_msix: failed, wanted %d" 4556 "err = %d \n", sc->num_slices, err); 4557 goto abort_with_msix_table; 4558 } 4559 if (count < sc->num_slices) { 4560 device_printf(sc->dev, "pci_alloc_msix: need %d, got %d\n", 4561 count, sc->num_slices); 4562 device_printf(sc->dev, 4563 "Try setting hw.mxge.max_slices to %d\n", 4564 count); 4565 err = ENOSPC; 4566 goto abort_with_msix; 4567 } 4568 bytes = sizeof (*sc->msix_irq_res) * sc->num_slices; 4569 sc->msix_irq_res = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4570 if (sc->msix_irq_res == NULL) { 4571 err = ENOMEM; 4572 goto abort_with_msix; 4573 } 4574 4575 for (i = 0; i < sc->num_slices; i++) { 4576 rid = i + 1; 4577 sc->msix_irq_res[i] = bus_alloc_resource_any(sc->dev, 4578 SYS_RES_IRQ, 4579 &rid, RF_ACTIVE); 4580 if (sc->msix_irq_res[i] == NULL) { 4581 device_printf(sc->dev, "couldn't allocate IRQ res" 4582 " for message %d\n", i); 4583 err = ENXIO; 4584 goto abort_with_res; 4585 } 4586 } 4587 4588 bytes = sizeof (*sc->msix_ih) * sc->num_slices; 4589 sc->msix_ih = malloc(bytes, M_DEVBUF, M_NOWAIT|M_ZERO); 4590 4591 for (i = 0; i < sc->num_slices; i++) { 4592 err = bus_setup_intr(sc->dev, sc->msix_irq_res[i], 4593 INTR_TYPE_NET | INTR_MPSAFE, 4594#if __FreeBSD_version > 700030 4595 NULL, 4596#endif 4597 mxge_intr, &sc->ss[i], &sc->msix_ih[i]); 4598 if (err != 0) { 4599 device_printf(sc->dev, "couldn't setup intr for " 4600 "message %d\n", i); 4601 goto abort_with_intr; 4602 } 4603 bus_describe_intr(sc->dev, sc->msix_irq_res[i], 4604 sc->msix_ih[i], "s%d", i); 4605 } 4606 4607 if (mxge_verbose) { 4608 device_printf(sc->dev, "using %d msix IRQs:", 4609 sc->num_slices); 4610 for (i = 0; i < sc->num_slices; i++) 4611 printf(" %ld", rman_get_start(sc->msix_irq_res[i])); 4612 printf("\n"); 4613 } 4614 return (0); 4615 4616abort_with_intr: 4617 for (i = 0; i < sc->num_slices; i++) { 4618 if (sc->msix_ih[i] != NULL) { 4619 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4620 sc->msix_ih[i]); 4621 sc->msix_ih[i] = NULL; 4622 } 4623 } 4624 free(sc->msix_ih, M_DEVBUF); 4625 4626 4627abort_with_res: 4628 for (i = 0; i < sc->num_slices; i++) { 4629 rid = i + 1; 4630 if (sc->msix_irq_res[i] != NULL) 4631 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4632 sc->msix_irq_res[i]); 4633 sc->msix_irq_res[i] = NULL; 4634 } 4635 free(sc->msix_irq_res, M_DEVBUF); 4636 4637 4638abort_with_msix: 4639 pci_release_msi(sc->dev); 4640 4641abort_with_msix_table: 4642 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4643 sc->msix_table_res); 4644 4645 return err; 4646} 4647 4648static int 4649mxge_add_single_irq(mxge_softc_t *sc) 4650{ 4651 int count, err, rid; 4652 4653 count = pci_msi_count(sc->dev); 4654 if (count == 1 && pci_alloc_msi(sc->dev, &count) == 0) { 4655 rid = 1; 4656 } else { 4657 rid = 0; 4658 sc->legacy_irq = 1; 4659 } 4660 sc->irq_res = bus_alloc_resource(sc->dev, SYS_RES_IRQ, &rid, 0, ~0, 4661 1, RF_SHAREABLE | RF_ACTIVE); 4662 if (sc->irq_res == NULL) { 4663 device_printf(sc->dev, "could not alloc interrupt\n"); 4664 return ENXIO; 4665 } 4666 if (mxge_verbose) 4667 device_printf(sc->dev, "using %s irq %ld\n", 4668 sc->legacy_irq ? "INTx" : "MSI", 4669 rman_get_start(sc->irq_res)); 4670 err = bus_setup_intr(sc->dev, sc->irq_res, 4671 INTR_TYPE_NET | INTR_MPSAFE, 4672#if __FreeBSD_version > 700030 4673 NULL, 4674#endif 4675 mxge_intr, &sc->ss[0], &sc->ih); 4676 if (err != 0) { 4677 bus_release_resource(sc->dev, SYS_RES_IRQ, 4678 sc->legacy_irq ? 0 : 1, sc->irq_res); 4679 if (!sc->legacy_irq) 4680 pci_release_msi(sc->dev); 4681 } 4682 return err; 4683} 4684 4685static void 4686mxge_rem_msix_irqs(mxge_softc_t *sc) 4687{ 4688 int i, rid; 4689 4690 for (i = 0; i < sc->num_slices; i++) { 4691 if (sc->msix_ih[i] != NULL) { 4692 bus_teardown_intr(sc->dev, sc->msix_irq_res[i], 4693 sc->msix_ih[i]); 4694 sc->msix_ih[i] = NULL; 4695 } 4696 } 4697 free(sc->msix_ih, M_DEVBUF); 4698 4699 for (i = 0; i < sc->num_slices; i++) { 4700 rid = i + 1; 4701 if (sc->msix_irq_res[i] != NULL) 4702 bus_release_resource(sc->dev, SYS_RES_IRQ, rid, 4703 sc->msix_irq_res[i]); 4704 sc->msix_irq_res[i] = NULL; 4705 } 4706 free(sc->msix_irq_res, M_DEVBUF); 4707 4708 bus_release_resource(sc->dev, SYS_RES_MEMORY, PCIR_BAR(2), 4709 sc->msix_table_res); 4710 4711 pci_release_msi(sc->dev); 4712 return; 4713} 4714 4715static void 4716mxge_rem_single_irq(mxge_softc_t *sc) 4717{ 4718 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 4719 bus_release_resource(sc->dev, SYS_RES_IRQ, 4720 sc->legacy_irq ? 0 : 1, sc->irq_res); 4721 if (!sc->legacy_irq) 4722 pci_release_msi(sc->dev); 4723} 4724 4725static void 4726mxge_rem_irq(mxge_softc_t *sc) 4727{ 4728 if (sc->num_slices > 1) 4729 mxge_rem_msix_irqs(sc); 4730 else 4731 mxge_rem_single_irq(sc); 4732} 4733 4734static int 4735mxge_add_irq(mxge_softc_t *sc) 4736{ 4737 int err; 4738 4739 if (sc->num_slices > 1) 4740 err = mxge_add_msix_irqs(sc); 4741 else 4742 err = mxge_add_single_irq(sc); 4743 4744 if (0 && err == 0 && sc->num_slices > 1) { 4745 mxge_rem_msix_irqs(sc); 4746 err = mxge_add_msix_irqs(sc); 4747 } 4748 return err; 4749} 4750 4751 4752static int 4753mxge_attach(device_t dev) 4754{ 4755 mxge_cmd_t cmd; 4756 mxge_softc_t *sc = device_get_softc(dev); 4757 struct ifnet *ifp; 4758 int err, rid; 4759 4760 sc->dev = dev; 4761 mxge_fetch_tunables(sc); 4762 4763 TASK_INIT(&sc->watchdog_task, 1, mxge_watchdog_task, sc); 4764 sc->tq = taskqueue_create("mxge_taskq", M_WAITOK, 4765 taskqueue_thread_enqueue, &sc->tq); 4766 if (sc->tq == NULL) { 4767 err = ENOMEM; 4768 goto abort_with_nothing; 4769 } 4770 4771 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 4772 1, /* alignment */ 4773 0, /* boundary */ 4774 BUS_SPACE_MAXADDR, /* low */ 4775 BUS_SPACE_MAXADDR, /* high */ 4776 NULL, NULL, /* filter */ 4777 65536 + 256, /* maxsize */ 4778 MXGE_MAX_SEND_DESC, /* num segs */ 4779 65536, /* maxsegsize */ 4780 0, /* flags */ 4781 NULL, NULL, /* lock */ 4782 &sc->parent_dmat); /* tag */ 4783 4784 if (err != 0) { 4785 device_printf(sc->dev, "Err %d allocating parent dmat\n", 4786 err); 4787 goto abort_with_tq; 4788 } 4789 4790 ifp = sc->ifp = if_alloc(IFT_ETHER); 4791 if (ifp == NULL) { 4792 device_printf(dev, "can not if_alloc()\n"); 4793 err = ENOSPC; 4794 goto abort_with_parent_dmat; 4795 } 4796 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 4797 4798 snprintf(sc->cmd_mtx_name, sizeof(sc->cmd_mtx_name), "%s:cmd", 4799 device_get_nameunit(dev)); 4800 mtx_init(&sc->cmd_mtx, sc->cmd_mtx_name, NULL, MTX_DEF); 4801 snprintf(sc->driver_mtx_name, sizeof(sc->driver_mtx_name), 4802 "%s:drv", device_get_nameunit(dev)); 4803 mtx_init(&sc->driver_mtx, sc->driver_mtx_name, 4804 MTX_NETWORK_LOCK, MTX_DEF); 4805 4806 callout_init_mtx(&sc->co_hdl, &sc->driver_mtx, 0); 4807 4808 mxge_setup_cfg_space(sc); 4809 4810 /* Map the board into the kernel */ 4811 rid = PCIR_BARS; 4812 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 4813 ~0, 1, RF_ACTIVE); 4814 if (sc->mem_res == NULL) { 4815 device_printf(dev, "could not map memory\n"); 4816 err = ENXIO; 4817 goto abort_with_lock; 4818 } 4819 sc->sram = rman_get_virtual(sc->mem_res); 4820 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 4821 if (sc->sram_size > rman_get_size(sc->mem_res)) { 4822 device_printf(dev, "impossible memory region size %ld\n", 4823 rman_get_size(sc->mem_res)); 4824 err = ENXIO; 4825 goto abort_with_mem_res; 4826 } 4827 4828 /* make NULL terminated copy of the EEPROM strings section of 4829 lanai SRAM */ 4830 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 4831 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 4832 rman_get_bushandle(sc->mem_res), 4833 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 4834 sc->eeprom_strings, 4835 MXGE_EEPROM_STRINGS_SIZE - 2); 4836 err = mxge_parse_strings(sc); 4837 if (err != 0) 4838 goto abort_with_mem_res; 4839 4840 /* Enable write combining for efficient use of PCIe bus */ 4841 mxge_enable_wc(sc); 4842 4843 /* Allocate the out of band dma memory */ 4844 err = mxge_dma_alloc(sc, &sc->cmd_dma, 4845 sizeof (mxge_cmd_t), 64); 4846 if (err != 0) 4847 goto abort_with_mem_res; 4848 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 4849 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 4850 if (err != 0) 4851 goto abort_with_cmd_dma; 4852 4853 err = mxge_dma_alloc(sc, &sc->dmabench_dma, 4096, 4096); 4854 if (err != 0) 4855 goto abort_with_zeropad_dma; 4856 4857 /* select & load the firmware */ 4858 err = mxge_select_firmware(sc); 4859 if (err != 0) 4860 goto abort_with_dmabench; 4861 sc->intr_coal_delay = mxge_intr_coal_delay; 4862 4863 mxge_slice_probe(sc); 4864 err = mxge_alloc_slices(sc); 4865 if (err != 0) 4866 goto abort_with_dmabench; 4867 4868 err = mxge_reset(sc, 0); 4869 if (err != 0) 4870 goto abort_with_slices; 4871 4872 err = mxge_alloc_rings(sc); 4873 if (err != 0) { 4874 device_printf(sc->dev, "failed to allocate rings\n"); 4875 goto abort_with_slices; 4876 } 4877 4878 err = mxge_add_irq(sc); 4879 if (err != 0) { 4880 device_printf(sc->dev, "failed to add irq\n"); 4881 goto abort_with_rings; 4882 } 4883 4884 if_initbaudrate(ifp, IF_Gbps(10)); 4885 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | 4886 IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 | 4887 IFCAP_RXCSUM_IPV6; 4888#if defined(INET) || defined(INET6) 4889 ifp->if_capabilities |= IFCAP_LRO; 4890#endif 4891 4892#ifdef MXGE_NEW_VLAN_API 4893 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 4894 4895 /* Only FW 1.4.32 and newer can do TSO over vlans */ 4896 if (sc->fw_ver_major == 1 && sc->fw_ver_minor == 4 && 4897 sc->fw_ver_tiny >= 32) 4898 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 4899#endif 4900 sc->max_mtu = mxge_max_mtu(sc); 4901 if (sc->max_mtu >= 9000) 4902 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 4903 else 4904 device_printf(dev, "MTU limited to %d. Install " 4905 "latest firmware for 9000 byte jumbo support\n", 4906 sc->max_mtu - ETHER_HDR_LEN); 4907 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 4908 ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; 4909 /* check to see if f/w supports TSO for IPv6 */ 4910 if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { 4911 if (CSUM_TCP_IPV6) 4912 ifp->if_capabilities |= IFCAP_TSO6; 4913 sc->max_tso6_hlen = min(cmd.data0, 4914 sizeof (sc->ss[0].scratch)); 4915 } 4916 ifp->if_capenable = ifp->if_capabilities; 4917 if (sc->lro_cnt == 0) 4918 ifp->if_capenable &= ~IFCAP_LRO; 4919 ifp->if_init = mxge_init; 4920 ifp->if_softc = sc; 4921 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 4922 ifp->if_ioctl = mxge_ioctl; 4923 ifp->if_start = mxge_start; 4924 /* Initialise the ifmedia structure */ 4925 ifmedia_init(&sc->media, 0, mxge_media_change, 4926 mxge_media_status); 4927 mxge_media_init(sc); 4928 mxge_media_probe(sc); 4929 sc->dying = 0; 4930 ether_ifattach(ifp, sc->mac_addr); 4931 /* ether_ifattach sets mtu to ETHERMTU */ 4932 if (mxge_initial_mtu != ETHERMTU) 4933 mxge_change_mtu(sc, mxge_initial_mtu); 4934 4935 mxge_add_sysctls(sc); 4936#ifdef IFNET_BUF_RING 4937 ifp->if_transmit = mxge_transmit; 4938 ifp->if_qflush = mxge_qflush; 4939#endif 4940 taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq", 4941 device_get_nameunit(sc->dev)); 4942 callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); 4943 return 0; 4944 4945abort_with_rings: 4946 mxge_free_rings(sc); 4947abort_with_slices: 4948 mxge_free_slices(sc); 4949abort_with_dmabench: 4950 mxge_dma_free(&sc->dmabench_dma); 4951abort_with_zeropad_dma: 4952 mxge_dma_free(&sc->zeropad_dma); 4953abort_with_cmd_dma: 4954 mxge_dma_free(&sc->cmd_dma); 4955abort_with_mem_res: 4956 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 4957abort_with_lock: 4958 pci_disable_busmaster(dev); 4959 mtx_destroy(&sc->cmd_mtx); 4960 mtx_destroy(&sc->driver_mtx); 4961 if_free(ifp); 4962abort_with_parent_dmat: 4963 bus_dma_tag_destroy(sc->parent_dmat); 4964abort_with_tq: 4965 if (sc->tq != NULL) { 4966 taskqueue_drain(sc->tq, &sc->watchdog_task); 4967 taskqueue_free(sc->tq); 4968 sc->tq = NULL; 4969 } 4970abort_with_nothing: 4971 return err; 4972} 4973 4974static int 4975mxge_detach(device_t dev) 4976{ 4977 mxge_softc_t *sc = device_get_softc(dev); 4978 4979 if (mxge_vlans_active(sc)) { 4980 device_printf(sc->dev, 4981 "Detach vlans before removing module\n"); 4982 return EBUSY; 4983 } 4984 mtx_lock(&sc->driver_mtx); 4985 sc->dying = 1; 4986 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 4987 mxge_close(sc, 0); 4988 mtx_unlock(&sc->driver_mtx); 4989 ether_ifdetach(sc->ifp); 4990 if (sc->tq != NULL) { 4991 taskqueue_drain(sc->tq, &sc->watchdog_task); 4992 taskqueue_free(sc->tq); 4993 sc->tq = NULL; 4994 } 4995 callout_drain(&sc->co_hdl); 4996 ifmedia_removeall(&sc->media); 4997 mxge_dummy_rdma(sc, 0); 4998 mxge_rem_sysctls(sc); 4999 mxge_rem_irq(sc); 5000 mxge_free_rings(sc); 5001 mxge_free_slices(sc); 5002 mxge_dma_free(&sc->dmabench_dma); 5003 mxge_dma_free(&sc->zeropad_dma); 5004 mxge_dma_free(&sc->cmd_dma); 5005 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 5006 pci_disable_busmaster(dev); 5007 mtx_destroy(&sc->cmd_mtx); 5008 mtx_destroy(&sc->driver_mtx); 5009 if_free(sc->ifp); 5010 bus_dma_tag_destroy(sc->parent_dmat); 5011 return 0; 5012} 5013 5014static int 5015mxge_shutdown(device_t dev) 5016{ 5017 return 0; 5018} 5019 5020/* 5021 This file uses Myri10GE driver indentation. 5022 5023 Local Variables: 5024 c-file-style:"linux" 5025 tab-width:8 5026 End: 5027*/ 5028