if_mxge.c revision 163467
1/****************************************************************************** 2 3Copyright (c) 2006, Myricom Inc. 4All rights reserved. 5 6Redistribution and use in source and binary forms, with or without 7modification, are permitted provided that the following conditions are met: 8 9 1. Redistributions of source code must retain the above copyright notice, 10 this list of conditions and the following disclaimer. 11 12 2. Redistributions in binary form must reproduce the above copyright 13 notice, this list of conditions and the following disclaimer in the 14 documentation and/or other materials provided with the distribution. 15 16 3. Neither the name of the Myricom Inc, nor the names of its 17 contributors may be used to endorse or promote products derived from 18 this software without specific prior written permission. 19 20THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 24LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30POSSIBILITY OF SUCH DAMAGE. 31 32***************************************************************************/ 33 34#include <sys/cdefs.h> 35__FBSDID("$FreeBSD: head/sys/dev/mxge/if_mxge.c 163467 2006-10-17 14:39:19Z gallatin $"); 36 37#include <sys/param.h> 38#include <sys/systm.h> 39#include <sys/linker.h> 40#include <sys/firmware.h> 41#include <sys/endian.h> 42#include <sys/sockio.h> 43#include <sys/mbuf.h> 44#include <sys/malloc.h> 45#include <sys/kdb.h> 46#include <sys/kernel.h> 47#include <sys/module.h> 48#include <sys/memrange.h> 49#include <sys/socket.h> 50#include <sys/sysctl.h> 51#include <sys/sx.h> 52 53#include <net/if.h> 54#include <net/if_arp.h> 55#include <net/ethernet.h> 56#include <net/if_dl.h> 57#include <net/if_media.h> 58 59#include <net/bpf.h> 60 61#include <net/if_types.h> 62#include <net/if_vlan_var.h> 63#include <net/zlib.h> 64 65#include <netinet/in_systm.h> 66#include <netinet/in.h> 67#include <netinet/ip.h> 68#include <netinet/tcp.h> 69 70#include <machine/bus.h> 71#include <machine/resource.h> 72#include <sys/bus.h> 73#include <sys/rman.h> 74 75#include <dev/pci/pcireg.h> 76#include <dev/pci/pcivar.h> 77 78#include <vm/vm.h> /* for pmap_mapdev() */ 79#include <vm/pmap.h> 80 81#include <dev/mxge/mxge_mcp.h> 82#include <dev/mxge/mcp_gen_header.h> 83#include <dev/mxge/if_mxge_var.h> 84 85/* tunable params */ 86static int mxge_nvidia_ecrc_enable = 1; 87static int mxge_max_intr_slots = 1024; 88static int mxge_intr_coal_delay = 30; 89static int mxge_deassert_wait = 1; 90static int mxge_flow_control = 1; 91static int mxge_verbose = 0; 92static char *mxge_fw_unaligned = "mxge_ethp_z8e"; 93static char *mxge_fw_aligned = "mxge_eth_z8e"; 94 95static int mxge_probe(device_t dev); 96static int mxge_attach(device_t dev); 97static int mxge_detach(device_t dev); 98static int mxge_shutdown(device_t dev); 99static void mxge_intr(void *arg); 100 101static device_method_t mxge_methods[] = 102{ 103 /* Device interface */ 104 DEVMETHOD(device_probe, mxge_probe), 105 DEVMETHOD(device_attach, mxge_attach), 106 DEVMETHOD(device_detach, mxge_detach), 107 DEVMETHOD(device_shutdown, mxge_shutdown), 108 {0, 0} 109}; 110 111static driver_t mxge_driver = 112{ 113 "mxge", 114 mxge_methods, 115 sizeof(mxge_softc_t), 116}; 117 118static devclass_t mxge_devclass; 119 120/* Declare ourselves to be a child of the PCI bus.*/ 121DRIVER_MODULE(mxge, pci, mxge_driver, mxge_devclass, 0, 0); 122MODULE_DEPEND(mxge, firmware, 1, 1, 1); 123 124static int 125mxge_probe(device_t dev) 126{ 127 if ((pci_get_vendor(dev) == MXGE_PCI_VENDOR_MYRICOM) && 128 (pci_get_device(dev) == MXGE_PCI_DEVICE_Z8E)) { 129 device_set_desc(dev, "Myri10G-PCIE-8A"); 130 return 0; 131 } 132 return ENXIO; 133} 134 135static void 136mxge_enable_wc(mxge_softc_t *sc) 137{ 138 struct mem_range_desc mrdesc; 139 vm_paddr_t pa; 140 vm_offset_t len; 141 int err, action; 142 143 pa = rman_get_start(sc->mem_res); 144 len = rman_get_size(sc->mem_res); 145 mrdesc.mr_base = pa; 146 mrdesc.mr_len = len; 147 mrdesc.mr_flags = MDF_WRITECOMBINE; 148 action = MEMRANGE_SET_UPDATE; 149 strcpy((char *)&mrdesc.mr_owner, "mxge"); 150 err = mem_range_attr_set(&mrdesc, &action); 151 if (err != 0) { 152 device_printf(sc->dev, 153 "w/c failed for pa 0x%lx, len 0x%lx, err = %d\n", 154 (unsigned long)pa, (unsigned long)len, err); 155 } else { 156 sc->wc = 1; 157 } 158} 159 160 161/* callback to get our DMA address */ 162static void 163mxge_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, 164 int error) 165{ 166 if (error == 0) { 167 *(bus_addr_t *) arg = segs->ds_addr; 168 } 169} 170 171static int 172mxge_dma_alloc(mxge_softc_t *sc, mxge_dma_t *dma, size_t bytes, 173 bus_size_t alignment) 174{ 175 int err; 176 device_t dev = sc->dev; 177 178 /* allocate DMAable memory tags */ 179 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 180 alignment, /* alignment */ 181 4096, /* boundary */ 182 BUS_SPACE_MAXADDR, /* low */ 183 BUS_SPACE_MAXADDR, /* high */ 184 NULL, NULL, /* filter */ 185 bytes, /* maxsize */ 186 1, /* num segs */ 187 4096, /* maxsegsize */ 188 BUS_DMA_COHERENT, /* flags */ 189 NULL, NULL, /* lock */ 190 &dma->dmat); /* tag */ 191 if (err != 0) { 192 device_printf(dev, "couldn't alloc tag (err = %d)\n", err); 193 return err; 194 } 195 196 /* allocate DMAable memory & map */ 197 err = bus_dmamem_alloc(dma->dmat, &dma->addr, 198 (BUS_DMA_WAITOK | BUS_DMA_COHERENT 199 | BUS_DMA_ZERO), &dma->map); 200 if (err != 0) { 201 device_printf(dev, "couldn't alloc mem (err = %d)\n", err); 202 goto abort_with_dmat; 203 } 204 205 /* load the memory */ 206 err = bus_dmamap_load(dma->dmat, dma->map, dma->addr, bytes, 207 mxge_dmamap_callback, 208 (void *)&dma->bus_addr, 0); 209 if (err != 0) { 210 device_printf(dev, "couldn't load map (err = %d)\n", err); 211 goto abort_with_mem; 212 } 213 return 0; 214 215abort_with_mem: 216 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 217abort_with_dmat: 218 (void)bus_dma_tag_destroy(dma->dmat); 219 return err; 220} 221 222 223static void 224mxge_dma_free(mxge_dma_t *dma) 225{ 226 bus_dmamap_unload(dma->dmat, dma->map); 227 bus_dmamem_free(dma->dmat, dma->addr, dma->map); 228 (void)bus_dma_tag_destroy(dma->dmat); 229} 230 231/* 232 * The eeprom strings on the lanaiX have the format 233 * SN=x\0 234 * MAC=x:x:x:x:x:x\0 235 * PC=text\0 236 */ 237 238static int 239mxge_parse_strings(mxge_softc_t *sc) 240{ 241#define MXGE_NEXT_STRING(p) while(ptr < limit && *ptr++) 242 243 char *ptr, *limit; 244 int i, found_mac; 245 246 ptr = sc->eeprom_strings; 247 limit = sc->eeprom_strings + MXGE_EEPROM_STRINGS_SIZE; 248 found_mac = 0; 249 while (ptr < limit && *ptr != '\0') { 250 if (memcmp(ptr, "MAC=", 4) == 0) { 251 ptr += 1; 252 sc->mac_addr_string = ptr; 253 for (i = 0; i < 6; i++) { 254 ptr += 3; 255 if ((ptr + 2) > limit) 256 goto abort; 257 sc->mac_addr[i] = strtoul(ptr, NULL, 16); 258 found_mac = 1; 259 } 260 } else if (memcmp(ptr, "PC=", 3) == 0) { 261 ptr += 3; 262 strncpy(sc->product_code_string, ptr, 263 sizeof (sc->product_code_string) - 1); 264 } else if (memcmp(ptr, "SN=", 3) == 0) { 265 ptr += 3; 266 strncpy(sc->serial_number_string, ptr, 267 sizeof (sc->serial_number_string) - 1); 268 } 269 MXGE_NEXT_STRING(ptr); 270 } 271 272 if (found_mac) 273 return 0; 274 275 abort: 276 device_printf(sc->dev, "failed to parse eeprom_strings\n"); 277 278 return ENXIO; 279} 280 281#if #cpu(i386) || defined __i386 || defined i386 || defined __i386__ || #cpu(x86_64) || defined __x86_64__ 282static int 283mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 284{ 285 uint32_t val; 286 unsigned long off; 287 char *va, *cfgptr; 288 uint16_t vendor_id, device_id; 289 uintptr_t bus, slot, func, ivend, idev; 290 uint32_t *ptr32; 291 292 /* XXXX 293 Test below is commented because it is believed that doing 294 config read/write beyond 0xff will access the config space 295 for the next larger function. Uncomment this and remove 296 the hacky pmap_mapdev() way of accessing config space when 297 FreeBSD grows support for extended pcie config space access 298 */ 299#if 0 300 /* See if we can, by some miracle, access the extended 301 config space */ 302 val = pci_read_config(pdev, 0x178, 4); 303 if (val != 0xffffffff) { 304 val |= 0x40; 305 pci_write_config(pdev, 0x178, val, 4); 306 return 0; 307 } 308#endif 309 /* Rather than using normal pci config space writes, we must 310 * map the Nvidia config space ourselves. This is because on 311 * opteron/nvidia class machine the 0xe000000 mapping is 312 * handled by the nvidia chipset, that means the internal PCI 313 * device (the on-chip northbridge), or the amd-8131 bridge 314 * and things behind them are not visible by this method. 315 */ 316 317 BUS_READ_IVAR(device_get_parent(pdev), pdev, 318 PCI_IVAR_BUS, &bus); 319 BUS_READ_IVAR(device_get_parent(pdev), pdev, 320 PCI_IVAR_SLOT, &slot); 321 BUS_READ_IVAR(device_get_parent(pdev), pdev, 322 PCI_IVAR_FUNCTION, &func); 323 BUS_READ_IVAR(device_get_parent(pdev), pdev, 324 PCI_IVAR_VENDOR, &ivend); 325 BUS_READ_IVAR(device_get_parent(pdev), pdev, 326 PCI_IVAR_DEVICE, &idev); 327 328 off = 0xe0000000UL 329 + 0x00100000UL * (unsigned long)bus 330 + 0x00001000UL * (unsigned long)(func 331 + 8 * slot); 332 333 /* map it into the kernel */ 334 va = pmap_mapdev(trunc_page((vm_paddr_t)off), PAGE_SIZE); 335 336 337 if (va == NULL) { 338 device_printf(sc->dev, "pmap_kenter_temporary didn't\n"); 339 return EIO; 340 } 341 /* get a pointer to the config space mapped into the kernel */ 342 cfgptr = va + (off & PAGE_MASK); 343 344 /* make sure that we can really access it */ 345 vendor_id = *(uint16_t *)(cfgptr + PCIR_VENDOR); 346 device_id = *(uint16_t *)(cfgptr + PCIR_DEVICE); 347 if (! (vendor_id == ivend && device_id == idev)) { 348 device_printf(sc->dev, "mapping failed: 0x%x:0x%x\n", 349 vendor_id, device_id); 350 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 351 return EIO; 352 } 353 354 ptr32 = (uint32_t*)(cfgptr + 0x178); 355 val = *ptr32; 356 357 if (val == 0xffffffff) { 358 device_printf(sc->dev, "extended mapping failed\n"); 359 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 360 return EIO; 361 } 362 *ptr32 = val | 0x40; 363 pmap_unmapdev((vm_offset_t)va, PAGE_SIZE); 364 if (mxge_verbose) 365 device_printf(sc->dev, 366 "Enabled ECRC on upstream Nvidia bridge " 367 "at %d:%d:%d\n", 368 (int)bus, (int)slot, (int)func); 369 return 0; 370} 371#else 372static int 373mxge_enable_nvidia_ecrc(mxge_softc_t *sc, device_t pdev) 374{ 375 device_printf(sc->dev, 376 "Nforce 4 chipset on non-x86/amd64!?!?!\n"); 377 return ENXIO; 378} 379#endif 380/* 381 * The Lanai Z8E PCI-E interface achieves higher Read-DMA throughput 382 * when the PCI-E Completion packets are aligned on an 8-byte 383 * boundary. Some PCI-E chip sets always align Completion packets; on 384 * the ones that do not, the alignment can be enforced by enabling 385 * ECRC generation (if supported). 386 * 387 * When PCI-E Completion packets are not aligned, it is actually more 388 * efficient to limit Read-DMA transactions to 2KB, rather than 4KB. 389 * 390 * If the driver can neither enable ECRC nor verify that it has 391 * already been enabled, then it must use a firmware image which works 392 * around unaligned completion packets (ethp_z8e.dat), and it should 393 * also ensure that it never gives the device a Read-DMA which is 394 * larger than 2KB by setting the tx.boundary to 2KB. If ECRC is 395 * enabled, then the driver should use the aligned (eth_z8e.dat) 396 * firmware image, and set tx.boundary to 4KB. 397 */ 398 399static void 400mxge_select_firmware(mxge_softc_t *sc) 401{ 402 int err, aligned = 0; 403 device_t pdev; 404 uint16_t pvend, pdid; 405 406 pdev = device_get_parent(device_get_parent(sc->dev)); 407 if (pdev == NULL) { 408 device_printf(sc->dev, "could not find parent?\n"); 409 goto abort; 410 } 411 pvend = pci_read_config(pdev, PCIR_VENDOR, 2); 412 pdid = pci_read_config(pdev, PCIR_DEVICE, 2); 413 414 /* see if we can enable ECRC's on an upstream 415 Nvidia bridge */ 416 if (mxge_nvidia_ecrc_enable && 417 (pvend == 0x10de && pdid == 0x005d)) { 418 err = mxge_enable_nvidia_ecrc(sc, pdev); 419 if (err == 0) { 420 aligned = 1; 421 if (mxge_verbose) 422 device_printf(sc->dev, 423 "Assuming aligned completions" 424 " (ECRC)\n"); 425 } 426 } 427 /* see if the upstream bridge is known to 428 provided aligned completions */ 429 if (/* HT2000 */ (pvend == 0x1166 && pdid == 0x0132) || 430 /* PLX */ (pvend == 0x10b5 && pdid == 0x8532) || 431 /* Intel */ (pvend == 0x8086 && 432 /* E5000 */(pdid >= 0x25f7 && pdid <= 0x25fa))) { 433 if (mxge_verbose) 434 device_printf(sc->dev, 435 "Assuming aligned completions " 436 "(0x%x:0x%x)\n", pvend, pdid); 437 } 438 439abort: 440 if (aligned) { 441 sc->fw_name = mxge_fw_aligned; 442 sc->tx.boundary = 4096; 443 } else { 444 sc->fw_name = mxge_fw_unaligned; 445 sc->tx.boundary = 2048; 446 } 447} 448 449union qualhack 450{ 451 const char *ro_char; 452 char *rw_char; 453}; 454 455static int 456mxge_validate_firmware(mxge_softc_t *sc, const mcp_gen_header_t *hdr) 457{ 458 int major, minor; 459 460 if (be32toh(hdr->mcp_type) != MCP_TYPE_ETH) { 461 device_printf(sc->dev, "Bad firmware type: 0x%x\n", 462 be32toh(hdr->mcp_type)); 463 return EIO; 464 } 465 466 /* save firmware version for sysctl */ 467 strncpy(sc->fw_version, hdr->version, sizeof (sc->fw_version)); 468 if (mxge_verbose) 469 device_printf(sc->dev, "firmware id: %s\n", hdr->version); 470 471 sscanf(sc->fw_version, "%d.%d", &major, &minor); 472 473 if (!(major == MXGEFW_VERSION_MAJOR 474 && minor == MXGEFW_VERSION_MINOR)) { 475 device_printf(sc->dev, "Found firmware version %s\n", 476 sc->fw_version); 477 device_printf(sc->dev, "Driver needs %d.%d\n", 478 MXGEFW_VERSION_MAJOR, MXGEFW_VERSION_MINOR); 479 return EINVAL; 480 } 481 return 0; 482 483} 484 485static int 486mxge_load_firmware_helper(mxge_softc_t *sc, uint32_t *limit) 487{ 488 struct firmware *fw; 489 const mcp_gen_header_t *hdr; 490 unsigned hdr_offset; 491 const char *fw_data; 492 union qualhack hack; 493 int status; 494 unsigned int i; 495 char dummy; 496 497 498 fw = firmware_get(sc->fw_name); 499 500 if (fw == NULL) { 501 device_printf(sc->dev, "Could not find firmware image %s\n", 502 sc->fw_name); 503 return ENOENT; 504 } 505 if (fw->datasize > *limit || 506 fw->datasize < MCP_HEADER_PTR_OFFSET + 4) { 507 device_printf(sc->dev, "Firmware image %s too large (%d/%d)\n", 508 sc->fw_name, (int)fw->datasize, (int) *limit); 509 status = ENOSPC; 510 goto abort_with_fw; 511 } 512 *limit = fw->datasize; 513 514 /* check id */ 515 fw_data = (const char *)fw->data; 516 hdr_offset = htobe32(*(const uint32_t *) 517 (fw_data + MCP_HEADER_PTR_OFFSET)); 518 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > fw->datasize) { 519 device_printf(sc->dev, "Bad firmware file"); 520 status = EIO; 521 goto abort_with_fw; 522 } 523 hdr = (const void*)(fw_data + hdr_offset); 524 525 status = mxge_validate_firmware(sc, hdr); 526 if (status != 0) 527 goto abort_with_fw; 528 529 hack.ro_char = fw_data; 530 /* Copy the inflated firmware to NIC SRAM. */ 531 for (i = 0; i < *limit; i += 256) { 532 mxge_pio_copy(sc->sram + MXGE_FW_OFFSET + i, 533 hack.rw_char + i, 534 min(256U, (unsigned)(*limit - i))); 535 mb(); 536 dummy = *sc->sram; 537 mb(); 538 } 539 540 status = 0; 541abort_with_fw: 542 firmware_put(fw, FIRMWARE_UNLOAD); 543 return status; 544} 545 546/* 547 * Enable or disable periodic RDMAs from the host to make certain 548 * chipsets resend dropped PCIe messages 549 */ 550 551static void 552mxge_dummy_rdma(mxge_softc_t *sc, int enable) 553{ 554 char buf_bytes[72]; 555 volatile uint32_t *confirm; 556 volatile char *submit; 557 uint32_t *buf, dma_low, dma_high; 558 int i; 559 560 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 561 562 /* clear confirmation addr */ 563 confirm = (volatile uint32_t *)sc->cmd; 564 *confirm = 0; 565 mb(); 566 567 /* send an rdma command to the PCIe engine, and wait for the 568 response in the confirmation address. The firmware should 569 write a -1 there to indicate it is alive and well 570 */ 571 572 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 573 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 574 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 575 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 576 buf[2] = htobe32(0xffffffff); /* confirm data */ 577 dma_low = MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr); 578 dma_high = MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr); 579 buf[3] = htobe32(dma_high); /* dummy addr MSW */ 580 buf[4] = htobe32(dma_low); /* dummy addr LSW */ 581 buf[5] = htobe32(enable); /* enable? */ 582 583 584 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_DUMMY_RDMA); 585 586 mxge_pio_copy(submit, buf, 64); 587 mb(); 588 DELAY(1000); 589 mb(); 590 i = 0; 591 while (*confirm != 0xffffffff && i < 20) { 592 DELAY(1000); 593 i++; 594 } 595 if (*confirm != 0xffffffff) { 596 device_printf(sc->dev, "dummy rdma %s failed (%p = 0x%x)", 597 (enable ? "enable" : "disable"), confirm, 598 *confirm); 599 } 600 return; 601} 602 603static int 604mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data) 605{ 606 mcp_cmd_t *buf; 607 char buf_bytes[sizeof(*buf) + 8]; 608 volatile mcp_cmd_response_t *response = sc->cmd; 609 volatile char *cmd_addr = sc->sram + MXGEFW_ETH_CMD; 610 uint32_t dma_low, dma_high; 611 int sleep_total = 0; 612 613 /* ensure buf is aligned to 8 bytes */ 614 buf = (mcp_cmd_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 615 616 buf->data0 = htobe32(data->data0); 617 buf->data1 = htobe32(data->data1); 618 buf->data2 = htobe32(data->data2); 619 buf->cmd = htobe32(cmd); 620 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 621 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 622 623 buf->response_addr.low = htobe32(dma_low); 624 buf->response_addr.high = htobe32(dma_high); 625 mtx_lock(&sc->cmd_lock); 626 response->result = 0xffffffff; 627 mb(); 628 mxge_pio_copy((volatile void *)cmd_addr, buf, sizeof (*buf)); 629 630 /* wait up to 20ms */ 631 for (sleep_total = 0; sleep_total < 20; sleep_total++) { 632 bus_dmamap_sync(sc->cmd_dma.dmat, 633 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 634 mb(); 635 if (response->result != 0xffffffff) { 636 if (response->result == 0) { 637 data->data0 = be32toh(response->data); 638 mtx_unlock(&sc->cmd_lock); 639 return 0; 640 } else { 641 device_printf(sc->dev, 642 "mxge: command %d " 643 "failed, result = %d\n", 644 cmd, be32toh(response->result)); 645 mtx_unlock(&sc->cmd_lock); 646 return ENXIO; 647 } 648 } 649 DELAY(1000); 650 } 651 mtx_unlock(&sc->cmd_lock); 652 device_printf(sc->dev, "mxge: command %d timed out" 653 "result = %d\n", 654 cmd, be32toh(response->result)); 655 return EAGAIN; 656} 657 658static int 659mxge_adopt_running_firmware(mxge_softc_t *sc) 660{ 661 struct mcp_gen_header *hdr; 662 const size_t bytes = sizeof (struct mcp_gen_header); 663 size_t hdr_offset; 664 int status; 665 666 /* find running firmware header */ 667 hdr_offset = htobe32(*(volatile uint32_t *) 668 (sc->sram + MCP_HEADER_PTR_OFFSET)); 669 670 if ((hdr_offset & 3) || hdr_offset + sizeof(*hdr) > sc->sram_size) { 671 device_printf(sc->dev, 672 "Running firmware has bad header offset (%d)\n", 673 (int)hdr_offset); 674 return EIO; 675 } 676 677 /* copy header of running firmware from SRAM to host memory to 678 * validate firmware */ 679 hdr = malloc(bytes, M_DEVBUF, M_NOWAIT); 680 if (hdr == NULL) { 681 device_printf(sc->dev, "could not malloc firmware hdr\n"); 682 return ENOMEM; 683 } 684 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 685 rman_get_bushandle(sc->mem_res), 686 hdr_offset, (char *)hdr, bytes); 687 status = mxge_validate_firmware(sc, hdr); 688 free(hdr, M_DEVBUF); 689 return status; 690} 691 692 693static int 694mxge_load_firmware(mxge_softc_t *sc) 695{ 696 volatile uint32_t *confirm; 697 volatile char *submit; 698 char buf_bytes[72]; 699 uint32_t *buf, size, dma_low, dma_high; 700 int status, i; 701 702 buf = (uint32_t *)((unsigned long)(buf_bytes + 7) & ~7UL); 703 704 size = sc->sram_size; 705 status = mxge_load_firmware_helper(sc, &size); 706 if (status) { 707 /* Try to use the currently running firmware, if 708 it is new enough */ 709 status = mxge_adopt_running_firmware(sc); 710 if (status) { 711 device_printf(sc->dev, 712 "failed to adopt running firmware\n"); 713 return status; 714 } 715 device_printf(sc->dev, 716 "Successfully adopted running firmware\n"); 717 if (sc->tx.boundary == 4096) { 718 device_printf(sc->dev, 719 "Using firmware currently running on NIC" 720 ". For optimal\n"); 721 device_printf(sc->dev, 722 "performance consider loading optimized " 723 "firmware\n"); 724 } 725 726 } 727 /* clear confirmation addr */ 728 confirm = (volatile uint32_t *)sc->cmd; 729 *confirm = 0; 730 mb(); 731 /* send a reload command to the bootstrap MCP, and wait for the 732 response in the confirmation address. The firmware should 733 write a -1 there to indicate it is alive and well 734 */ 735 736 dma_low = MXGE_LOWPART_TO_U32(sc->cmd_dma.bus_addr); 737 dma_high = MXGE_HIGHPART_TO_U32(sc->cmd_dma.bus_addr); 738 739 buf[0] = htobe32(dma_high); /* confirm addr MSW */ 740 buf[1] = htobe32(dma_low); /* confirm addr LSW */ 741 buf[2] = htobe32(0xffffffff); /* confirm data */ 742 743 /* FIX: All newest firmware should un-protect the bottom of 744 the sram before handoff. However, the very first interfaces 745 do not. Therefore the handoff copy must skip the first 8 bytes 746 */ 747 /* where the code starts*/ 748 buf[3] = htobe32(MXGE_FW_OFFSET + 8); 749 buf[4] = htobe32(size - 8); /* length of code */ 750 buf[5] = htobe32(8); /* where to copy to */ 751 buf[6] = htobe32(0); /* where to jump to */ 752 753 submit = (volatile char *)(sc->sram + MXGEFW_BOOT_HANDOFF); 754 mxge_pio_copy(submit, buf, 64); 755 mb(); 756 DELAY(1000); 757 mb(); 758 i = 0; 759 while (*confirm != 0xffffffff && i < 20) { 760 DELAY(1000*10); 761 i++; 762 bus_dmamap_sync(sc->cmd_dma.dmat, 763 sc->cmd_dma.map, BUS_DMASYNC_POSTREAD); 764 } 765 if (*confirm != 0xffffffff) { 766 device_printf(sc->dev,"handoff failed (%p = 0x%x)", 767 confirm, *confirm); 768 769 return ENXIO; 770 } 771 return 0; 772} 773 774static int 775mxge_update_mac_address(mxge_softc_t *sc) 776{ 777 mxge_cmd_t cmd; 778 uint8_t *addr = sc->mac_addr; 779 int status; 780 781 782 cmd.data0 = ((addr[0] << 24) | (addr[1] << 16) 783 | (addr[2] << 8) | addr[3]); 784 785 cmd.data1 = ((addr[4] << 8) | (addr[5])); 786 787 status = mxge_send_cmd(sc, MXGEFW_SET_MAC_ADDRESS, &cmd); 788 return status; 789} 790 791static int 792mxge_change_pause(mxge_softc_t *sc, int pause) 793{ 794 mxge_cmd_t cmd; 795 int status; 796 797 if (pause) 798 status = mxge_send_cmd(sc, MXGEFW_ENABLE_FLOW_CONTROL, 799 &cmd); 800 else 801 status = mxge_send_cmd(sc, MXGEFW_DISABLE_FLOW_CONTROL, 802 &cmd); 803 804 if (status) { 805 device_printf(sc->dev, "Failed to set flow control mode\n"); 806 return ENXIO; 807 } 808 sc->pause = pause; 809 return 0; 810} 811 812static void 813mxge_change_promisc(mxge_softc_t *sc, int promisc) 814{ 815 mxge_cmd_t cmd; 816 int status; 817 818 if (promisc) 819 status = mxge_send_cmd(sc, MXGEFW_ENABLE_PROMISC, 820 &cmd); 821 else 822 status = mxge_send_cmd(sc, MXGEFW_DISABLE_PROMISC, 823 &cmd); 824 825 if (status) { 826 device_printf(sc->dev, "Failed to set promisc mode\n"); 827 } 828} 829 830static void 831mxge_set_multicast_list(mxge_softc_t *sc) 832{ 833 mxge_cmd_t cmd; 834 struct ifmultiaddr *ifma; 835 struct ifnet *ifp = sc->ifp; 836 int err; 837 838 /* This firmware is known to not support multicast */ 839 if (!sc->fw_multicast_support) 840 return; 841 842 /* Disable multicast filtering while we play with the lists*/ 843 err = mxge_send_cmd(sc, MXGEFW_ENABLE_ALLMULTI, &cmd); 844 if (err != 0) { 845 device_printf(sc->dev, "Failed MXGEFW_ENABLE_ALLMULTI," 846 " error status: %d\n", err); 847 return; 848 } 849 850 851 if (ifp->if_flags & IFF_ALLMULTI) 852 /* request to disable multicast filtering, so quit here */ 853 return; 854 855 /* Flush all the filters */ 856 857 err = mxge_send_cmd(sc, MXGEFW_LEAVE_ALL_MULTICAST_GROUPS, &cmd); 858 if (err != 0) { 859 device_printf(sc->dev, 860 "Failed MXGEFW_LEAVE_ALL_MULTICAST_GROUPS" 861 ", error status: %d\n", err); 862 return; 863 } 864 865 /* Walk the multicast list, and add each address */ 866 867 IF_ADDR_LOCK(ifp); 868 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 869 if (ifma->ifma_addr->sa_family != AF_LINK) 870 continue; 871 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 872 &cmd.data0, 4); 873 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr) + 4, 874 &cmd.data1, 2); 875 cmd.data0 = htonl(cmd.data0); 876 cmd.data1 = htonl(cmd.data1); 877 err = mxge_send_cmd(sc, MXGEFW_JOIN_MULTICAST_GROUP, &cmd); 878 if (err != 0) { 879 device_printf(sc->dev, "Failed " 880 "MXGEFW_JOIN_MULTICAST_GROUP, error status:" 881 "%d\t", err); 882 /* abort, leaving multicast filtering off */ 883 IF_ADDR_UNLOCK(ifp); 884 return; 885 } 886 } 887 IF_ADDR_UNLOCK(ifp); 888 /* Enable multicast filtering */ 889 err = mxge_send_cmd(sc, MXGEFW_DISABLE_ALLMULTI, &cmd); 890 if (err != 0) { 891 device_printf(sc->dev, "Failed MXGEFW_DISABLE_ALLMULTI" 892 ", error status: %d\n", err); 893 } 894} 895 896 897static int 898mxge_reset(mxge_softc_t *sc) 899{ 900 901 mxge_cmd_t cmd; 902 mxge_dma_t dmabench_dma; 903 size_t bytes; 904 int status; 905 906 /* try to send a reset command to the card to see if it 907 is alive */ 908 memset(&cmd, 0, sizeof (cmd)); 909 status = mxge_send_cmd(sc, MXGEFW_CMD_RESET, &cmd); 910 if (status != 0) { 911 device_printf(sc->dev, "failed reset\n"); 912 return ENXIO; 913 } 914 915 mxge_dummy_rdma(sc, 1); 916 917 /* Now exchange information about interrupts */ 918 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry);\ 919 memset(sc->rx_done.entry, 0, bytes); 920 cmd.data0 = (uint32_t)bytes; 921 status = mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd); 922 cmd.data0 = MXGE_LOWPART_TO_U32(sc->rx_done.dma.bus_addr); 923 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->rx_done.dma.bus_addr); 924 status |= mxge_send_cmd(sc, MXGEFW_CMD_SET_INTRQ_DMA, &cmd); 925 926 status |= mxge_send_cmd(sc, 927 MXGEFW_CMD_GET_INTR_COAL_DELAY_OFFSET, &cmd); 928 929 930 sc->intr_coal_delay_ptr = (volatile uint32_t *)(sc->sram + cmd.data0); 931 932 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd); 933 sc->irq_claim = (volatile uint32_t *)(sc->sram + cmd.data0); 934 935 936 status |= mxge_send_cmd(sc, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, 937 &cmd); 938 sc->irq_deassert = (volatile uint32_t *)(sc->sram + cmd.data0); 939 if (status != 0) { 940 device_printf(sc->dev, "failed set interrupt parameters\n"); 941 return status; 942 } 943 944 945 *sc->intr_coal_delay_ptr = htobe32(sc->intr_coal_delay); 946 947 948 /* run a DMA benchmark */ 949 sc->read_dma = sc->write_dma = sc->read_write_dma = 0; 950 status = mxge_dma_alloc(sc, &dmabench_dma, 4096, 4096); 951 if (status) 952 goto dmabench_fail; 953 954 /* Read DMA */ 955 cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr); 956 cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr); 957 cmd.data2 = sc->tx.boundary * 0x10000; 958 959 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 960 if (status != 0) 961 device_printf(sc->dev, "read dma benchmark failed\n"); 962 else 963 sc->read_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) / 964 (cmd.data0 & 0xffff); 965 966 /* Write DMA */ 967 cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr); 968 cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr); 969 cmd.data2 = sc->tx.boundary * 0x1; 970 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 971 if (status != 0) 972 device_printf(sc->dev, "write dma benchmark failed\n"); 973 else 974 sc->write_dma = ((cmd.data0>>16) * sc->tx.boundary * 2) / 975 (cmd.data0 & 0xffff); 976 /* Read/Write DMA */ 977 cmd.data0 = MXGE_LOWPART_TO_U32(dmabench_dma.bus_addr); 978 cmd.data1 = MXGE_HIGHPART_TO_U32(dmabench_dma.bus_addr); 979 cmd.data2 = sc->tx.boundary * 0x10001; 980 status = mxge_send_cmd(sc, MXGEFW_DMA_TEST, &cmd); 981 if (status != 0) 982 device_printf(sc->dev, "read/write dma benchmark failed\n"); 983 else 984 sc->read_write_dma = 985 ((cmd.data0>>16) * sc->tx.boundary * 2 * 2) / 986 (cmd.data0 & 0xffff); 987 988 mxge_dma_free(&dmabench_dma); 989 990dmabench_fail: 991 /* reset mcp/driver shared state back to 0 */ 992 bzero(sc->rx_done.entry, bytes); 993 sc->rx_done.idx = 0; 994 sc->rx_done.cnt = 0; 995 sc->tx.req = 0; 996 sc->tx.done = 0; 997 sc->tx.pkt_done = 0; 998 sc->rx_big.cnt = 0; 999 sc->rx_small.cnt = 0; 1000 sc->rdma_tags_available = 15; 1001 status = mxge_update_mac_address(sc); 1002 mxge_change_promisc(sc, 0); 1003 mxge_change_pause(sc, sc->pause); 1004 mxge_set_multicast_list(sc); 1005 return status; 1006} 1007 1008static int 1009mxge_change_intr_coal(SYSCTL_HANDLER_ARGS) 1010{ 1011 mxge_softc_t *sc; 1012 unsigned int intr_coal_delay; 1013 int err; 1014 1015 sc = arg1; 1016 intr_coal_delay = sc->intr_coal_delay; 1017 err = sysctl_handle_int(oidp, &intr_coal_delay, arg2, req); 1018 if (err != 0) { 1019 return err; 1020 } 1021 if (intr_coal_delay == sc->intr_coal_delay) 1022 return 0; 1023 1024 if (intr_coal_delay == 0 || intr_coal_delay > 1000*1000) 1025 return EINVAL; 1026 1027 sx_xlock(&sc->driver_lock); 1028 *sc->intr_coal_delay_ptr = htobe32(intr_coal_delay); 1029 sc->intr_coal_delay = intr_coal_delay; 1030 1031 sx_xunlock(&sc->driver_lock); 1032 return err; 1033} 1034 1035static int 1036mxge_change_flow_control(SYSCTL_HANDLER_ARGS) 1037{ 1038 mxge_softc_t *sc; 1039 unsigned int enabled; 1040 int err; 1041 1042 sc = arg1; 1043 enabled = sc->pause; 1044 err = sysctl_handle_int(oidp, &enabled, arg2, req); 1045 if (err != 0) { 1046 return err; 1047 } 1048 if (enabled == sc->pause) 1049 return 0; 1050 1051 sx_xlock(&sc->driver_lock); 1052 err = mxge_change_pause(sc, enabled); 1053 sx_xunlock(&sc->driver_lock); 1054 return err; 1055} 1056 1057static int 1058mxge_handle_be32(SYSCTL_HANDLER_ARGS) 1059{ 1060 int err; 1061 1062 if (arg1 == NULL) 1063 return EFAULT; 1064 arg2 = be32toh(*(int *)arg1); 1065 arg1 = NULL; 1066 err = sysctl_handle_int(oidp, arg1, arg2, req); 1067 1068 return err; 1069} 1070 1071static void 1072mxge_add_sysctls(mxge_softc_t *sc) 1073{ 1074 struct sysctl_ctx_list *ctx; 1075 struct sysctl_oid_list *children; 1076 mcp_irq_data_t *fw; 1077 1078 ctx = device_get_sysctl_ctx(sc->dev); 1079 children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)); 1080 fw = sc->fw_stats; 1081 1082 /* random information */ 1083 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1084 "firmware_version", 1085 CTLFLAG_RD, &sc->fw_version, 1086 0, "firmware version"); 1087 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1088 "serial_number", 1089 CTLFLAG_RD, &sc->serial_number_string, 1090 0, "serial number"); 1091 SYSCTL_ADD_STRING(ctx, children, OID_AUTO, 1092 "product_code", 1093 CTLFLAG_RD, &sc->product_code_string, 1094 0, "product_code"); 1095 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1096 "tx_boundary", 1097 CTLFLAG_RD, &sc->tx.boundary, 1098 0, "tx_boundary"); 1099 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1100 "write_combine", 1101 CTLFLAG_RD, &sc->wc, 1102 0, "write combining PIO?"); 1103 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1104 "read_dma_MBs", 1105 CTLFLAG_RD, &sc->read_dma, 1106 0, "DMA Read speed in MB/s"); 1107 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1108 "write_dma_MBs", 1109 CTLFLAG_RD, &sc->write_dma, 1110 0, "DMA Write speed in MB/s"); 1111 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1112 "read_write_dma_MBs", 1113 CTLFLAG_RD, &sc->read_write_dma, 1114 0, "DMA concurrent Read/Write speed in MB/s"); 1115 1116 1117 /* performance related tunables */ 1118 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1119 "intr_coal_delay", 1120 CTLTYPE_INT|CTLFLAG_RW, sc, 1121 0, mxge_change_intr_coal, 1122 "I", "interrupt coalescing delay in usecs"); 1123 1124 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1125 "flow_control_enabled", 1126 CTLTYPE_INT|CTLFLAG_RW, sc, 1127 0, mxge_change_flow_control, 1128 "I", "interrupt coalescing delay in usecs"); 1129 1130 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1131 "deassert_wait", 1132 CTLFLAG_RW, &mxge_deassert_wait, 1133 0, "Wait for IRQ line to go low in ihandler"); 1134 1135 /* stats block from firmware is in network byte order. 1136 Need to swap it */ 1137 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1138 "link_up", 1139 CTLTYPE_INT|CTLFLAG_RD, &fw->link_up, 1140 0, mxge_handle_be32, 1141 "I", "link up"); 1142 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1143 "rdma_tags_available", 1144 CTLTYPE_INT|CTLFLAG_RD, &fw->rdma_tags_available, 1145 0, mxge_handle_be32, 1146 "I", "rdma_tags_available"); 1147 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1148 "dropped_link_overflow", 1149 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_link_overflow, 1150 0, mxge_handle_be32, 1151 "I", "dropped_link_overflow"); 1152 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1153 "dropped_link_error_or_filtered", 1154 CTLTYPE_INT|CTLFLAG_RD, 1155 &fw->dropped_link_error_or_filtered, 1156 0, mxge_handle_be32, 1157 "I", "dropped_link_error_or_filtered"); 1158 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1159 "dropped_multicast_filtered", 1160 CTLTYPE_INT|CTLFLAG_RD, 1161 &fw->dropped_multicast_filtered, 1162 0, mxge_handle_be32, 1163 "I", "dropped_multicast_filtered"); 1164 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1165 "dropped_runt", 1166 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_runt, 1167 0, mxge_handle_be32, 1168 "I", "dropped_runt"); 1169 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1170 "dropped_overrun", 1171 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_overrun, 1172 0, mxge_handle_be32, 1173 "I", "dropped_overrun"); 1174 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1175 "dropped_no_small_buffer", 1176 CTLTYPE_INT|CTLFLAG_RD, 1177 &fw->dropped_no_small_buffer, 1178 0, mxge_handle_be32, 1179 "I", "dropped_no_small_buffer"); 1180 SYSCTL_ADD_PROC(ctx, children, OID_AUTO, 1181 "dropped_no_big_buffer", 1182 CTLTYPE_INT|CTLFLAG_RD, &fw->dropped_no_big_buffer, 1183 0, mxge_handle_be32, 1184 "I", "dropped_no_big_buffer"); 1185 1186 /* host counters exported for debugging */ 1187 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1188 "rx_small_cnt", 1189 CTLFLAG_RD, &sc->rx_small.cnt, 1190 0, "rx_small_cnt"); 1191 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1192 "rx_big_cnt", 1193 CTLFLAG_RD, &sc->rx_big.cnt, 1194 0, "rx_small_cnt"); 1195 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1196 "tx_req", 1197 CTLFLAG_RD, &sc->tx.req, 1198 0, "tx_req"); 1199 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1200 "tx_done", 1201 CTLFLAG_RD, &sc->tx.done, 1202 0, "tx_done"); 1203 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1204 "tx_pkt_done", 1205 CTLFLAG_RD, &sc->tx.pkt_done, 1206 0, "tx_done"); 1207 1208 /* verbose printing? */ 1209 SYSCTL_ADD_INT(ctx, children, OID_AUTO, 1210 "verbose", 1211 CTLFLAG_RW, &mxge_verbose, 1212 0, "verbose printing"); 1213 1214} 1215 1216/* copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1217 backwards one at a time and handle ring wraps */ 1218 1219static inline void 1220mxge_submit_req_backwards(mxge_tx_buf_t *tx, 1221 mcp_kreq_ether_send_t *src, int cnt) 1222{ 1223 int idx, starting_slot; 1224 starting_slot = tx->req; 1225 while (cnt > 1) { 1226 cnt--; 1227 idx = (starting_slot + cnt) & tx->mask; 1228 mxge_pio_copy(&tx->lanai[idx], 1229 &src[cnt], sizeof(*src)); 1230 mb(); 1231 } 1232} 1233 1234/* 1235 * copy an array of mcp_kreq_ether_send_t's to the mcp. Copy 1236 * at most 32 bytes at a time, so as to avoid involving the software 1237 * pio handler in the nic. We re-write the first segment's flags 1238 * to mark them valid only after writing the entire chain 1239 */ 1240 1241static inline void 1242mxge_submit_req(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, 1243 int cnt) 1244{ 1245 int idx, i; 1246 uint32_t *src_ints; 1247 volatile uint32_t *dst_ints; 1248 mcp_kreq_ether_send_t *srcp; 1249 volatile mcp_kreq_ether_send_t *dstp, *dst; 1250 uint8_t last_flags; 1251 1252 idx = tx->req & tx->mask; 1253 1254 last_flags = src->flags; 1255 src->flags = 0; 1256 mb(); 1257 dst = dstp = &tx->lanai[idx]; 1258 srcp = src; 1259 1260 if ((idx + cnt) < tx->mask) { 1261 for (i = 0; i < (cnt - 1); i += 2) { 1262 mxge_pio_copy(dstp, srcp, 2 * sizeof(*src)); 1263 mb(); /* force write every 32 bytes */ 1264 srcp += 2; 1265 dstp += 2; 1266 } 1267 } else { 1268 /* submit all but the first request, and ensure 1269 that it is submitted below */ 1270 mxge_submit_req_backwards(tx, src, cnt); 1271 i = 0; 1272 } 1273 if (i < cnt) { 1274 /* submit the first request */ 1275 mxge_pio_copy(dstp, srcp, sizeof(*src)); 1276 mb(); /* barrier before setting valid flag */ 1277 } 1278 1279 /* re-write the last 32-bits with the valid flags */ 1280 src->flags = last_flags; 1281 src_ints = (uint32_t *)src; 1282 src_ints+=3; 1283 dst_ints = (volatile uint32_t *)dst; 1284 dst_ints+=3; 1285 *dst_ints = *src_ints; 1286 tx->req += cnt; 1287 mb(); 1288} 1289 1290static inline void 1291mxge_submit_req_wc(mxge_tx_buf_t *tx, mcp_kreq_ether_send_t *src, int cnt) 1292{ 1293 tx->req += cnt; 1294 mb(); 1295 while (cnt >= 4) { 1296 mxge_pio_copy((volatile char *)tx->wc_fifo, src, 64); 1297 mb(); 1298 src += 4; 1299 cnt -= 4; 1300 } 1301 if (cnt > 0) { 1302 /* pad it to 64 bytes. The src is 64 bytes bigger than it 1303 needs to be so that we don't overrun it */ 1304 mxge_pio_copy(tx->wc_fifo + MXGEFW_ETH_SEND_OFFSET(cnt), src, 64); 1305 mb(); 1306 } 1307} 1308 1309static void 1310mxge_encap_tso(mxge_softc_t *sc, struct mbuf *m, int busdma_seg_cnt) 1311{ 1312 mxge_tx_buf_t *tx; 1313 mcp_kreq_ether_send_t *req; 1314 bus_dma_segment_t *seg; 1315 struct ether_header *eh; 1316 struct ip *ip; 1317 struct tcphdr *tcp; 1318 uint32_t low, high_swapped; 1319 int len, seglen, cum_len, cum_len_next; 1320 int next_is_first, chop, cnt, rdma_count, small; 1321 uint16_t pseudo_hdr_offset, cksum_offset, mss; 1322 uint8_t flags, flags_next; 1323 static int once; 1324 1325 mss = m->m_pkthdr.tso_segsz; 1326 1327 /* negative cum_len signifies to the 1328 * send loop that we are still in the 1329 * header portion of the TSO packet. 1330 */ 1331 1332 /* ensure we have the ethernet, IP and TCP 1333 header together in the first mbuf, copy 1334 it to a scratch buffer if not */ 1335 if (__predict_false(m->m_len < sizeof (*eh) 1336 + sizeof (*ip))) { 1337 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1338 sc->scratch); 1339 eh = (struct ether_header *)sc->scratch; 1340 } else { 1341 eh = mtod(m, struct ether_header *); 1342 } 1343 ip = (struct ip *) (eh + 1); 1344 if (__predict_false(m->m_len < sizeof (*eh) + (ip->ip_hl << 2) 1345 + sizeof (*tcp))) { 1346 m_copydata(m, 0, sizeof (*eh) + (ip->ip_hl << 2) 1347 + sizeof (*tcp), sc->scratch); 1348 eh = (struct ether_header *) sc->scratch; 1349 ip = (struct ip *) (eh + 1); 1350 } 1351 1352 tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); 1353 cum_len = -(sizeof (*eh) + ((ip->ip_hl + tcp->th_off) << 2)); 1354 1355 /* TSO implies checksum offload on this hardware */ 1356 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1357 flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; 1358 1359 1360 /* for TSO, pseudo_hdr_offset holds mss. 1361 * The firmware figures out where to put 1362 * the checksum by parsing the header. */ 1363 pseudo_hdr_offset = htobe16(mss); 1364 1365 tx = &sc->tx; 1366 req = tx->req_list; 1367 seg = tx->seg_list; 1368 cnt = 0; 1369 rdma_count = 0; 1370 /* "rdma_count" is the number of RDMAs belonging to the 1371 * current packet BEFORE the current send request. For 1372 * non-TSO packets, this is equal to "count". 1373 * For TSO packets, rdma_count needs to be reset 1374 * to 0 after a segment cut. 1375 * 1376 * The rdma_count field of the send request is 1377 * the number of RDMAs of the packet starting at 1378 * that request. For TSO send requests with one ore more cuts 1379 * in the middle, this is the number of RDMAs starting 1380 * after the last cut in the request. All previous 1381 * segments before the last cut implicitly have 1 RDMA. 1382 * 1383 * Since the number of RDMAs is not known beforehand, 1384 * it must be filled-in retroactively - after each 1385 * segmentation cut or at the end of the entire packet. 1386 */ 1387 1388 while (busdma_seg_cnt) { 1389 /* Break the busdma segment up into pieces*/ 1390 low = MXGE_LOWPART_TO_U32(seg->ds_addr); 1391 high_swapped = htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1392 len = seglen = seg->ds_len; 1393 1394 while (len) { 1395 flags_next = flags & ~MXGEFW_FLAGS_FIRST; 1396 cum_len_next = cum_len + seglen; 1397 (req-rdma_count)->rdma_count = rdma_count + 1; 1398 if (__predict_true(cum_len >= 0)) { 1399 /* payload */ 1400 chop = (cum_len_next > mss); 1401 cum_len_next = cum_len_next % mss; 1402 next_is_first = (cum_len_next == 0); 1403 flags |= chop * MXGEFW_FLAGS_TSO_CHOP; 1404 flags_next |= next_is_first * 1405 MXGEFW_FLAGS_FIRST; 1406 rdma_count |= -(chop | next_is_first); 1407 rdma_count += chop & !next_is_first; 1408 } else if (cum_len_next >= 0) { 1409 /* header ends */ 1410 rdma_count = -1; 1411 cum_len_next = 0; 1412 seglen = -cum_len; 1413 small = (mss <= MXGEFW_SEND_SMALL_SIZE); 1414 flags_next = MXGEFW_FLAGS_TSO_PLD | 1415 MXGEFW_FLAGS_FIRST | 1416 (small * MXGEFW_FLAGS_SMALL); 1417 } 1418 1419 req->addr_high = high_swapped; 1420 req->addr_low = htobe32(low); 1421 req->pseudo_hdr_offset = pseudo_hdr_offset; 1422 req->pad = 0; 1423 req->rdma_count = 1; 1424 req->length = htobe16(seglen); 1425 req->cksum_offset = cksum_offset; 1426 req->flags = flags | ((cum_len & 1) * 1427 MXGEFW_FLAGS_ALIGN_ODD); 1428 low += seglen; 1429 len -= seglen; 1430 cum_len = cum_len_next; 1431 flags = flags_next; 1432 req++; 1433 cnt++; 1434 rdma_count++; 1435 if (__predict_false(cksum_offset > seglen)) 1436 cksum_offset -= seglen; 1437 else 1438 cksum_offset = 0; 1439 if (__predict_false(cnt > MXGE_MAX_SEND_DESC)) 1440 goto drop; 1441 } 1442 busdma_seg_cnt--; 1443 seg++; 1444 } 1445 (req-rdma_count)->rdma_count = rdma_count; 1446 1447 do { 1448 req--; 1449 req->flags |= MXGEFW_FLAGS_TSO_LAST; 1450 } while (!(req->flags & (MXGEFW_FLAGS_TSO_CHOP | MXGEFW_FLAGS_FIRST))); 1451 1452 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1453 if (tx->wc_fifo == NULL) 1454 mxge_submit_req(tx, tx->req_list, cnt); 1455 else 1456 mxge_submit_req_wc(tx, tx->req_list, cnt); 1457 return; 1458 1459drop: 1460 m_freem(m); 1461 sc->ifp->if_oerrors++; 1462 if (!once) { 1463 printf("MXGE_MAX_SEND_DESC exceeded via TSO!\n"); 1464 printf("mss = %d, %ld!\n", mss, (long)seg - (long)tx->seg_list); 1465 once = 1; 1466 } 1467 return; 1468 1469} 1470 1471static void 1472mxge_encap(mxge_softc_t *sc, struct mbuf *m) 1473{ 1474 mcp_kreq_ether_send_t *req; 1475 bus_dma_segment_t *seg; 1476 struct mbuf *m_tmp; 1477 struct ifnet *ifp; 1478 mxge_tx_buf_t *tx; 1479 struct ether_header *eh; 1480 struct ip *ip; 1481 int cnt, cum_len, err, i, idx, odd_flag; 1482 uint16_t pseudo_hdr_offset; 1483 uint8_t flags, cksum_offset; 1484 1485 1486 1487 ifp = sc->ifp; 1488 tx = &sc->tx; 1489 1490 /* (try to) map the frame for DMA */ 1491 idx = tx->req & tx->mask; 1492 err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, 1493 m, tx->seg_list, &cnt, 1494 BUS_DMA_NOWAIT); 1495 if (err == EFBIG) { 1496 /* Too many segments in the chain. Try 1497 to defrag */ 1498 m_tmp = m_defrag(m, M_NOWAIT); 1499 if (m_tmp == NULL) { 1500 goto drop; 1501 } 1502 m = m_tmp; 1503 err = bus_dmamap_load_mbuf_sg(tx->dmat, 1504 tx->info[idx].map, 1505 m, tx->seg_list, &cnt, 1506 BUS_DMA_NOWAIT); 1507 } 1508 if (err != 0) { 1509 device_printf(sc->dev, "bus_dmamap_load_mbuf_sg returned %d" 1510 " packet len = %d\n", err, m->m_pkthdr.len); 1511 goto drop; 1512 } 1513 bus_dmamap_sync(tx->dmat, tx->info[idx].map, 1514 BUS_DMASYNC_PREWRITE); 1515 tx->info[idx].m = m; 1516 1517 1518 /* TSO is different enough, we handle it in another routine */ 1519 if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { 1520 mxge_encap_tso(sc, m, cnt); 1521 return; 1522 } 1523 1524 req = tx->req_list; 1525 cksum_offset = 0; 1526 pseudo_hdr_offset = 0; 1527 flags = MXGEFW_FLAGS_NO_TSO; 1528 1529 /* checksum offloading? */ 1530 if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { 1531 /* ensure ip header is in first mbuf, copy 1532 it to a scratch buffer if not */ 1533 if (__predict_false(m->m_len < sizeof (*eh) 1534 + sizeof (*ip))) { 1535 m_copydata(m, 0, sizeof (*eh) + sizeof (*ip), 1536 sc->scratch); 1537 eh = (struct ether_header *)sc->scratch; 1538 } else { 1539 eh = mtod(m, struct ether_header *); 1540 } 1541 ip = (struct ip *) (eh + 1); 1542 cksum_offset = sizeof(*eh) + (ip->ip_hl << 2); 1543 pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; 1544 pseudo_hdr_offset = htobe16(pseudo_hdr_offset); 1545 req->cksum_offset = cksum_offset; 1546 flags |= MXGEFW_FLAGS_CKSUM; 1547 odd_flag = MXGEFW_FLAGS_ALIGN_ODD; 1548 } else { 1549 odd_flag = 0; 1550 } 1551 if (m->m_pkthdr.len < MXGEFW_SEND_SMALL_SIZE) 1552 flags |= MXGEFW_FLAGS_SMALL; 1553 1554 /* convert segments into a request list */ 1555 cum_len = 0; 1556 seg = tx->seg_list; 1557 req->flags = MXGEFW_FLAGS_FIRST; 1558 for (i = 0; i < cnt; i++) { 1559 req->addr_low = 1560 htobe32(MXGE_LOWPART_TO_U32(seg->ds_addr)); 1561 req->addr_high = 1562 htobe32(MXGE_HIGHPART_TO_U32(seg->ds_addr)); 1563 req->length = htobe16(seg->ds_len); 1564 req->cksum_offset = cksum_offset; 1565 if (cksum_offset > seg->ds_len) 1566 cksum_offset -= seg->ds_len; 1567 else 1568 cksum_offset = 0; 1569 req->pseudo_hdr_offset = pseudo_hdr_offset; 1570 req->pad = 0; /* complete solid 16-byte block */ 1571 req->rdma_count = 1; 1572 req->flags |= flags | ((cum_len & 1) * odd_flag); 1573 cum_len += seg->ds_len; 1574 seg++; 1575 req++; 1576 req->flags = 0; 1577 } 1578 req--; 1579 /* pad runts to 60 bytes */ 1580 if (cum_len < 60) { 1581 req++; 1582 req->addr_low = 1583 htobe32(MXGE_LOWPART_TO_U32(sc->zeropad_dma.bus_addr)); 1584 req->addr_high = 1585 htobe32(MXGE_HIGHPART_TO_U32(sc->zeropad_dma.bus_addr)); 1586 req->length = htobe16(60 - cum_len); 1587 req->cksum_offset = 0; 1588 req->pseudo_hdr_offset = pseudo_hdr_offset; 1589 req->pad = 0; /* complete solid 16-byte block */ 1590 req->rdma_count = 1; 1591 req->flags |= flags | ((cum_len & 1) * odd_flag); 1592 cnt++; 1593 } 1594 1595 tx->req_list[0].rdma_count = cnt; 1596#if 0 1597 /* print what the firmware will see */ 1598 for (i = 0; i < cnt; i++) { 1599 printf("%d: addr: 0x%x 0x%x len:%d pso%d," 1600 "cso:%d, flags:0x%x, rdma:%d\n", 1601 i, (int)ntohl(tx->req_list[i].addr_high), 1602 (int)ntohl(tx->req_list[i].addr_low), 1603 (int)ntohs(tx->req_list[i].length), 1604 (int)ntohs(tx->req_list[i].pseudo_hdr_offset), 1605 tx->req_list[i].cksum_offset, tx->req_list[i].flags, 1606 tx->req_list[i].rdma_count); 1607 } 1608 printf("--------------\n"); 1609#endif 1610 tx->info[((cnt - 1) + tx->req) & tx->mask].flag = 1; 1611 if (tx->wc_fifo == NULL) 1612 mxge_submit_req(tx, tx->req_list, cnt); 1613 else 1614 mxge_submit_req_wc(tx, tx->req_list, cnt); 1615 return; 1616 1617drop: 1618 m_freem(m); 1619 ifp->if_oerrors++; 1620 return; 1621} 1622 1623 1624 1625 1626static inline void 1627mxge_start_locked(mxge_softc_t *sc) 1628{ 1629 struct mbuf *m; 1630 struct ifnet *ifp; 1631 1632 ifp = sc->ifp; 1633 while ((sc->tx.mask - (sc->tx.req - sc->tx.done)) 1634 > MXGE_MAX_SEND_DESC) { 1635 1636 IFQ_DRV_DEQUEUE(&ifp->if_snd, m); 1637 if (m == NULL) { 1638 return; 1639 } 1640 /* let BPF see it */ 1641 BPF_MTAP(ifp, m); 1642 1643 /* give it to the nic */ 1644 mxge_encap(sc, m); 1645 } 1646 /* ran out of transmit slots */ 1647 sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; 1648} 1649 1650static void 1651mxge_start(struct ifnet *ifp) 1652{ 1653 mxge_softc_t *sc = ifp->if_softc; 1654 1655 1656 mtx_lock(&sc->tx_lock); 1657 mxge_start_locked(sc); 1658 mtx_unlock(&sc->tx_lock); 1659} 1660 1661/* 1662 * copy an array of mcp_kreq_ether_recv_t's to the mcp. Copy 1663 * at most 32 bytes at a time, so as to avoid involving the software 1664 * pio handler in the nic. We re-write the first segment's low 1665 * DMA address to mark it valid only after we write the entire chunk 1666 * in a burst 1667 */ 1668static inline void 1669mxge_submit_8rx(volatile mcp_kreq_ether_recv_t *dst, 1670 mcp_kreq_ether_recv_t *src) 1671{ 1672 uint32_t low; 1673 1674 low = src->addr_low; 1675 src->addr_low = 0xffffffff; 1676 mxge_pio_copy(dst, src, 8 * sizeof (*src)); 1677 mb(); 1678 dst->addr_low = low; 1679 mb(); 1680} 1681 1682static int 1683mxge_get_buf_small(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1684{ 1685 bus_dma_segment_t seg; 1686 struct mbuf *m; 1687 mxge_rx_buf_t *rx = &sc->rx_small; 1688 int cnt, err; 1689 1690 m = m_gethdr(M_DONTWAIT, MT_DATA); 1691 if (m == NULL) { 1692 rx->alloc_fail++; 1693 err = ENOBUFS; 1694 goto done; 1695 } 1696 m->m_len = MHLEN; 1697 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1698 &seg, &cnt, BUS_DMA_NOWAIT); 1699 if (err != 0) { 1700 m_free(m); 1701 goto done; 1702 } 1703 rx->info[idx].m = m; 1704 rx->shadow[idx].addr_low = 1705 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1706 rx->shadow[idx].addr_high = 1707 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1708 1709done: 1710 if ((idx & 7) == 7) { 1711 if (rx->wc_fifo == NULL) 1712 mxge_submit_8rx(&rx->lanai[idx - 7], 1713 &rx->shadow[idx - 7]); 1714 else { 1715 mb(); 1716 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1717 } 1718 } 1719 return err; 1720} 1721 1722static int 1723mxge_get_buf_big(mxge_softc_t *sc, bus_dmamap_t map, int idx) 1724{ 1725 bus_dma_segment_t seg; 1726 struct mbuf *m; 1727 mxge_rx_buf_t *rx = &sc->rx_big; 1728 int cnt, err; 1729 1730 m = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, sc->big_bytes); 1731 if (m == NULL) { 1732 rx->alloc_fail++; 1733 err = ENOBUFS; 1734 goto done; 1735 } 1736 m->m_len = sc->big_bytes; 1737 err = bus_dmamap_load_mbuf_sg(rx->dmat, map, m, 1738 &seg, &cnt, BUS_DMA_NOWAIT); 1739 if (err != 0) { 1740 m_free(m); 1741 goto done; 1742 } 1743 rx->info[idx].m = m; 1744 rx->shadow[idx].addr_low = 1745 htobe32(MXGE_LOWPART_TO_U32(seg.ds_addr)); 1746 rx->shadow[idx].addr_high = 1747 htobe32(MXGE_HIGHPART_TO_U32(seg.ds_addr)); 1748 1749done: 1750 if ((idx & 7) == 7) { 1751 if (rx->wc_fifo == NULL) 1752 mxge_submit_8rx(&rx->lanai[idx - 7], 1753 &rx->shadow[idx - 7]); 1754 else { 1755 mb(); 1756 mxge_pio_copy(rx->wc_fifo, &rx->shadow[idx - 7], 64); 1757 } 1758 } 1759 return err; 1760} 1761 1762static inline void 1763mxge_rx_csum(struct mbuf *m, int csum) 1764{ 1765 struct ether_header *eh; 1766 struct ip *ip; 1767 1768 eh = mtod(m, struct ether_header *); 1769 if (__predict_true(eh->ether_type == htons(ETHERTYPE_IP))) { 1770 ip = (struct ip *)(eh + 1); 1771 if (__predict_true(ip->ip_p == IPPROTO_TCP || 1772 ip->ip_p == IPPROTO_UDP)) { 1773 m->m_pkthdr.csum_data = csum; 1774 m->m_pkthdr.csum_flags = CSUM_DATA_VALID; 1775 } 1776 } 1777} 1778 1779static inline void 1780mxge_rx_done_big(mxge_softc_t *sc, int len, int csum) 1781{ 1782 struct ifnet *ifp; 1783 struct mbuf *m = 0; /* -Wunitialized */ 1784 struct mbuf *m_prev = 0; /* -Wunitialized */ 1785 struct mbuf *m_head = 0; 1786 bus_dmamap_t old_map; 1787 mxge_rx_buf_t *rx; 1788 int idx; 1789 1790 1791 rx = &sc->rx_big; 1792 ifp = sc->ifp; 1793 while (len > 0) { 1794 idx = rx->cnt & rx->mask; 1795 rx->cnt++; 1796 /* save a pointer to the received mbuf */ 1797 m = rx->info[idx].m; 1798 /* try to replace the received mbuf */ 1799 if (mxge_get_buf_big(sc, rx->extra_map, idx)) { 1800 goto drop; 1801 } 1802 /* unmap the received buffer */ 1803 old_map = rx->info[idx].map; 1804 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 1805 bus_dmamap_unload(rx->dmat, old_map); 1806 1807 /* swap the bus_dmamap_t's */ 1808 rx->info[idx].map = rx->extra_map; 1809 rx->extra_map = old_map; 1810 1811 /* chain multiple segments together */ 1812 if (!m_head) { 1813 m_head = m; 1814 /* mcp implicitly skips 1st bytes so that 1815 * packet is properly aligned */ 1816 m->m_data += MXGEFW_PAD; 1817 m->m_pkthdr.len = len; 1818 m->m_len = sc->big_bytes - MXGEFW_PAD; 1819 } else { 1820 m->m_len = sc->big_bytes; 1821 m->m_flags &= ~M_PKTHDR; 1822 m_prev->m_next = m; 1823 } 1824 len -= m->m_len; 1825 m_prev = m; 1826 } 1827 1828 /* trim trailing garbage from the last mbuf in the chain. If 1829 * there is any garbage, len will be negative */ 1830 m->m_len += len; 1831 1832 /* if the checksum is valid, mark it in the mbuf header */ 1833 if (sc->csum_flag) 1834 mxge_rx_csum(m_head, csum); 1835 1836 /* pass the frame up the stack */ 1837 m_head->m_pkthdr.rcvif = ifp; 1838 ifp->if_ipackets++; 1839 (*ifp->if_input)(ifp, m_head); 1840 return; 1841 1842drop: 1843 /* drop the frame -- the old mbuf(s) are re-cycled by running 1844 every slot through the allocator */ 1845 if (m_head) { 1846 len -= sc->big_bytes; 1847 m_freem(m_head); 1848 } else { 1849 len -= (sc->big_bytes + MXGEFW_PAD); 1850 } 1851 while ((int)len > 0) { 1852 idx = rx->cnt & rx->mask; 1853 rx->cnt++; 1854 m = rx->info[idx].m; 1855 if (0 == (mxge_get_buf_big(sc, rx->extra_map, idx))) { 1856 m_freem(m); 1857 /* unmap the received buffer */ 1858 old_map = rx->info[idx].map; 1859 bus_dmamap_sync(rx->dmat, old_map, 1860 BUS_DMASYNC_POSTREAD); 1861 bus_dmamap_unload(rx->dmat, old_map); 1862 1863 /* swap the bus_dmamap_t's */ 1864 rx->info[idx].map = rx->extra_map; 1865 rx->extra_map = old_map; 1866 } 1867 len -= sc->big_bytes; 1868 } 1869 1870 ifp->if_ierrors++; 1871 1872} 1873 1874static inline void 1875mxge_rx_done_small(mxge_softc_t *sc, uint32_t len, uint32_t csum) 1876{ 1877 struct ifnet *ifp; 1878 struct mbuf *m; 1879 mxge_rx_buf_t *rx; 1880 bus_dmamap_t old_map; 1881 int idx; 1882 1883 ifp = sc->ifp; 1884 rx = &sc->rx_small; 1885 idx = rx->cnt & rx->mask; 1886 rx->cnt++; 1887 /* save a pointer to the received mbuf */ 1888 m = rx->info[idx].m; 1889 /* try to replace the received mbuf */ 1890 if (mxge_get_buf_small(sc, rx->extra_map, idx)) { 1891 /* drop the frame -- the old mbuf is re-cycled */ 1892 ifp->if_ierrors++; 1893 return; 1894 } 1895 1896 /* unmap the received buffer */ 1897 old_map = rx->info[idx].map; 1898 bus_dmamap_sync(rx->dmat, old_map, BUS_DMASYNC_POSTREAD); 1899 bus_dmamap_unload(rx->dmat, old_map); 1900 1901 /* swap the bus_dmamap_t's */ 1902 rx->info[idx].map = rx->extra_map; 1903 rx->extra_map = old_map; 1904 1905 /* mcp implicitly skips 1st 2 bytes so that packet is properly 1906 * aligned */ 1907 m->m_data += MXGEFW_PAD; 1908 1909 /* if the checksum is valid, mark it in the mbuf header */ 1910 if (sc->csum_flag) 1911 mxge_rx_csum(m, csum); 1912 1913 /* pass the frame up the stack */ 1914 m->m_pkthdr.rcvif = ifp; 1915 m->m_len = m->m_pkthdr.len = len; 1916 ifp->if_ipackets++; 1917 (*ifp->if_input)(ifp, m); 1918} 1919 1920static inline void 1921mxge_clean_rx_done(mxge_softc_t *sc) 1922{ 1923 mxge_rx_done_t *rx_done = &sc->rx_done; 1924 int limit = 0; 1925 uint16_t length; 1926 uint16_t checksum; 1927 1928 1929 while (rx_done->entry[rx_done->idx].length != 0) { 1930 length = ntohs(rx_done->entry[rx_done->idx].length); 1931 rx_done->entry[rx_done->idx].length = 0; 1932 checksum = ntohs(rx_done->entry[rx_done->idx].checksum); 1933 if (length <= (MHLEN - MXGEFW_PAD)) 1934 mxge_rx_done_small(sc, length, checksum); 1935 else 1936 mxge_rx_done_big(sc, length, checksum); 1937 rx_done->cnt++; 1938 rx_done->idx = rx_done->cnt & (mxge_max_intr_slots - 1); 1939 1940 /* limit potential for livelock */ 1941 if (__predict_false(++limit > 2 * mxge_max_intr_slots)) 1942 break; 1943 1944 } 1945} 1946 1947 1948static inline void 1949mxge_tx_done(mxge_softc_t *sc, uint32_t mcp_idx) 1950{ 1951 struct ifnet *ifp; 1952 mxge_tx_buf_t *tx; 1953 struct mbuf *m; 1954 bus_dmamap_t map; 1955 int idx, limit; 1956 1957 limit = 0; 1958 tx = &sc->tx; 1959 ifp = sc->ifp; 1960 while (tx->pkt_done != mcp_idx) { 1961 idx = tx->done & tx->mask; 1962 tx->done++; 1963 m = tx->info[idx].m; 1964 /* mbuf and DMA map only attached to the first 1965 segment per-mbuf */ 1966 if (m != NULL) { 1967 ifp->if_opackets++; 1968 tx->info[idx].m = NULL; 1969 map = tx->info[idx].map; 1970 bus_dmamap_unload(tx->dmat, map); 1971 m_freem(m); 1972 } 1973 if (tx->info[idx].flag) { 1974 tx->info[idx].flag = 0; 1975 tx->pkt_done++; 1976 } 1977 /* limit potential for livelock by only handling 1978 2 full tx rings per call */ 1979 if (__predict_false(++limit > 2 * tx->mask)) 1980 break; 1981 } 1982 1983 /* If we have space, clear IFF_OACTIVE to tell the stack that 1984 its OK to send packets */ 1985 1986 if (ifp->if_drv_flags & IFF_DRV_OACTIVE && 1987 tx->req - tx->done < (tx->mask + 1)/4) { 1988 mtx_lock(&sc->tx_lock); 1989 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 1990 mxge_start_locked(sc); 1991 mtx_unlock(&sc->tx_lock); 1992 } 1993} 1994 1995static void 1996mxge_intr(void *arg) 1997{ 1998 mxge_softc_t *sc = arg; 1999 mcp_irq_data_t *stats = sc->fw_stats; 2000 mxge_tx_buf_t *tx = &sc->tx; 2001 mxge_rx_done_t *rx_done = &sc->rx_done; 2002 uint32_t send_done_count; 2003 uint8_t valid; 2004 2005 2006 /* make sure the DMA has finished */ 2007 if (!stats->valid) { 2008 return; 2009 } 2010 valid = stats->valid; 2011 2012 /* lower legacy IRQ */ 2013 *sc->irq_deassert = 0; 2014 mb(); 2015 if (!mxge_deassert_wait) 2016 /* don't wait for conf. that irq is low */ 2017 stats->valid = 0; 2018 do { 2019 /* check for transmit completes and receives */ 2020 send_done_count = be32toh(stats->send_done_count); 2021 while ((send_done_count != tx->pkt_done) || 2022 (rx_done->entry[rx_done->idx].length != 0)) { 2023 mxge_tx_done(sc, (int)send_done_count); 2024 mxge_clean_rx_done(sc); 2025 send_done_count = be32toh(stats->send_done_count); 2026 } 2027 } while (*((volatile uint8_t *) &stats->valid)); 2028 2029 if (__predict_false(stats->stats_updated)) { 2030 if (sc->link_state != stats->link_up) { 2031 sc->link_state = stats->link_up; 2032 if (sc->link_state) { 2033 if_link_state_change(sc->ifp, LINK_STATE_UP); 2034 if (mxge_verbose) 2035 device_printf(sc->dev, "link up\n"); 2036 } else { 2037 if_link_state_change(sc->ifp, LINK_STATE_DOWN); 2038 if (mxge_verbose) 2039 device_printf(sc->dev, "link down\n"); 2040 } 2041 } 2042 if (sc->rdma_tags_available != 2043 be32toh(sc->fw_stats->rdma_tags_available)) { 2044 sc->rdma_tags_available = 2045 be32toh(sc->fw_stats->rdma_tags_available); 2046 device_printf(sc->dev, "RDMA timed out! %d tags " 2047 "left\n", sc->rdma_tags_available); 2048 } 2049 sc->down_cnt += stats->link_down; 2050 } 2051 2052 /* check to see if we have rx token to pass back */ 2053 if (valid & 0x1) 2054 *sc->irq_claim = be32toh(3); 2055 *(sc->irq_claim + 1) = be32toh(3); 2056} 2057 2058static void 2059mxge_watchdog(struct ifnet *ifp) 2060{ 2061 printf("%s called\n", __FUNCTION__); 2062} 2063 2064static void 2065mxge_init(void *arg) 2066{ 2067} 2068 2069 2070 2071static void 2072mxge_free_mbufs(mxge_softc_t *sc) 2073{ 2074 int i; 2075 2076 for (i = 0; i <= sc->rx_big.mask; i++) { 2077 if (sc->rx_big.info[i].m == NULL) 2078 continue; 2079 bus_dmamap_unload(sc->rx_big.dmat, 2080 sc->rx_big.info[i].map); 2081 m_freem(sc->rx_big.info[i].m); 2082 sc->rx_big.info[i].m = NULL; 2083 } 2084 2085 for (i = 0; i <= sc->rx_big.mask; i++) { 2086 if (sc->rx_big.info[i].m == NULL) 2087 continue; 2088 bus_dmamap_unload(sc->rx_big.dmat, 2089 sc->rx_big.info[i].map); 2090 m_freem(sc->rx_big.info[i].m); 2091 sc->rx_big.info[i].m = NULL; 2092 } 2093 2094 for (i = 0; i <= sc->tx.mask; i++) { 2095 if (sc->tx.info[i].m == NULL) 2096 continue; 2097 bus_dmamap_unload(sc->tx.dmat, 2098 sc->tx.info[i].map); 2099 m_freem(sc->tx.info[i].m); 2100 sc->tx.info[i].m = NULL; 2101 } 2102} 2103 2104static void 2105mxge_free_rings(mxge_softc_t *sc) 2106{ 2107 int i; 2108 2109 if (sc->tx.req_bytes != NULL) 2110 free(sc->tx.req_bytes, M_DEVBUF); 2111 if (sc->tx.seg_list != NULL) 2112 free(sc->tx.seg_list, M_DEVBUF); 2113 if (sc->rx_small.shadow != NULL) 2114 free(sc->rx_small.shadow, M_DEVBUF); 2115 if (sc->rx_big.shadow != NULL) 2116 free(sc->rx_big.shadow, M_DEVBUF); 2117 if (sc->tx.info != NULL) { 2118 for (i = 0; i <= sc->tx.mask; i++) { 2119 if (sc->tx.info[i].map != NULL) 2120 bus_dmamap_destroy(sc->tx.dmat, 2121 sc->tx.info[i].map); 2122 } 2123 free(sc->tx.info, M_DEVBUF); 2124 } 2125 if (sc->rx_small.info != NULL) { 2126 for (i = 0; i <= sc->rx_small.mask; i++) { 2127 if (sc->rx_small.info[i].map != NULL) 2128 bus_dmamap_destroy(sc->rx_small.dmat, 2129 sc->rx_small.info[i].map); 2130 } 2131 free(sc->rx_small.info, M_DEVBUF); 2132 } 2133 if (sc->rx_big.info != NULL) { 2134 for (i = 0; i <= sc->rx_big.mask; i++) { 2135 if (sc->rx_big.info[i].map != NULL) 2136 bus_dmamap_destroy(sc->rx_big.dmat, 2137 sc->rx_big.info[i].map); 2138 } 2139 free(sc->rx_big.info, M_DEVBUF); 2140 } 2141 if (sc->rx_big.extra_map != NULL) 2142 bus_dmamap_destroy(sc->rx_big.dmat, 2143 sc->rx_big.extra_map); 2144 if (sc->rx_small.extra_map != NULL) 2145 bus_dmamap_destroy(sc->rx_small.dmat, 2146 sc->rx_small.extra_map); 2147 if (sc->tx.dmat != NULL) 2148 bus_dma_tag_destroy(sc->tx.dmat); 2149 if (sc->rx_small.dmat != NULL) 2150 bus_dma_tag_destroy(sc->rx_small.dmat); 2151 if (sc->rx_big.dmat != NULL) 2152 bus_dma_tag_destroy(sc->rx_big.dmat); 2153} 2154 2155static int 2156mxge_alloc_rings(mxge_softc_t *sc) 2157{ 2158 mxge_cmd_t cmd; 2159 int tx_ring_size, rx_ring_size; 2160 int tx_ring_entries, rx_ring_entries; 2161 int i, err; 2162 unsigned long bytes; 2163 2164 /* get ring sizes */ 2165 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd); 2166 tx_ring_size = cmd.data0; 2167 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd); 2168 if (err != 0) { 2169 device_printf(sc->dev, "Cannot determine ring sizes\n"); 2170 goto abort_with_nothing; 2171 } 2172 2173 rx_ring_size = cmd.data0; 2174 2175 tx_ring_entries = tx_ring_size / sizeof (mcp_kreq_ether_send_t); 2176 rx_ring_entries = rx_ring_size / sizeof (mcp_dma_addr_t); 2177 sc->ifp->if_snd.ifq_drv_maxlen = sc->ifp->if_snd.ifq_maxlen; 2178 IFQ_SET_MAXLEN(&sc->ifp->if_snd, tx_ring_entries - 1); 2179 IFQ_SET_READY(&sc->ifp->if_snd); 2180 2181 sc->tx.mask = tx_ring_entries - 1; 2182 sc->rx_small.mask = sc->rx_big.mask = rx_ring_entries - 1; 2183 2184 err = ENOMEM; 2185 2186 /* allocate the tx request copy block */ 2187 bytes = 8 + 2188 sizeof (*sc->tx.req_list) * (MXGE_MAX_SEND_DESC + 4); 2189 sc->tx.req_bytes = malloc(bytes, M_DEVBUF, M_WAITOK); 2190 if (sc->tx.req_bytes == NULL) 2191 goto abort_with_nothing; 2192 /* ensure req_list entries are aligned to 8 bytes */ 2193 sc->tx.req_list = (mcp_kreq_ether_send_t *) 2194 ((unsigned long)(sc->tx.req_bytes + 7) & ~7UL); 2195 2196 /* allocate the tx busdma segment list */ 2197 bytes = sizeof (*sc->tx.seg_list) * MXGE_MAX_SEND_DESC; 2198 sc->tx.seg_list = (bus_dma_segment_t *) 2199 malloc(bytes, M_DEVBUF, M_WAITOK); 2200 if (sc->tx.seg_list == NULL) 2201 goto abort_with_alloc; 2202 2203 /* allocate the rx shadow rings */ 2204 bytes = rx_ring_entries * sizeof (*sc->rx_small.shadow); 2205 sc->rx_small.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2206 if (sc->rx_small.shadow == NULL) 2207 goto abort_with_alloc; 2208 2209 bytes = rx_ring_entries * sizeof (*sc->rx_big.shadow); 2210 sc->rx_big.shadow = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2211 if (sc->rx_big.shadow == NULL) 2212 goto abort_with_alloc; 2213 2214 /* allocate the host info rings */ 2215 bytes = tx_ring_entries * sizeof (*sc->tx.info); 2216 sc->tx.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2217 if (sc->tx.info == NULL) 2218 goto abort_with_alloc; 2219 2220 bytes = rx_ring_entries * sizeof (*sc->rx_small.info); 2221 sc->rx_small.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2222 if (sc->rx_small.info == NULL) 2223 goto abort_with_alloc; 2224 2225 bytes = rx_ring_entries * sizeof (*sc->rx_big.info); 2226 sc->rx_big.info = malloc(bytes, M_DEVBUF, M_ZERO|M_WAITOK); 2227 if (sc->rx_big.info == NULL) 2228 goto abort_with_alloc; 2229 2230 /* allocate the busdma resources */ 2231 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2232 1, /* alignment */ 2233 sc->tx.boundary, /* boundary */ 2234 BUS_SPACE_MAXADDR, /* low */ 2235 BUS_SPACE_MAXADDR, /* high */ 2236 NULL, NULL, /* filter */ 2237 65536 + 256, /* maxsize */ 2238 MXGE_MAX_SEND_DESC/2, /* num segs */ 2239 sc->tx.boundary, /* maxsegsize */ 2240 BUS_DMA_ALLOCNOW, /* flags */ 2241 NULL, NULL, /* lock */ 2242 &sc->tx.dmat); /* tag */ 2243 2244 if (err != 0) { 2245 device_printf(sc->dev, "Err %d allocating tx dmat\n", 2246 err); 2247 goto abort_with_alloc; 2248 } 2249 2250 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2251 1, /* alignment */ 2252 4096, /* boundary */ 2253 BUS_SPACE_MAXADDR, /* low */ 2254 BUS_SPACE_MAXADDR, /* high */ 2255 NULL, NULL, /* filter */ 2256 MHLEN, /* maxsize */ 2257 1, /* num segs */ 2258 MHLEN, /* maxsegsize */ 2259 BUS_DMA_ALLOCNOW, /* flags */ 2260 NULL, NULL, /* lock */ 2261 &sc->rx_small.dmat); /* tag */ 2262 if (err != 0) { 2263 device_printf(sc->dev, "Err %d allocating rx_small dmat\n", 2264 err); 2265 goto abort_with_alloc; 2266 } 2267 2268 err = bus_dma_tag_create(sc->parent_dmat, /* parent */ 2269 1, /* alignment */ 2270 4096, /* boundary */ 2271 BUS_SPACE_MAXADDR, /* low */ 2272 BUS_SPACE_MAXADDR, /* high */ 2273 NULL, NULL, /* filter */ 2274 4096, /* maxsize */ 2275 1, /* num segs */ 2276 4096, /* maxsegsize */ 2277 BUS_DMA_ALLOCNOW, /* flags */ 2278 NULL, NULL, /* lock */ 2279 &sc->rx_big.dmat); /* tag */ 2280 if (err != 0) { 2281 device_printf(sc->dev, "Err %d allocating rx_big dmat\n", 2282 err); 2283 goto abort_with_alloc; 2284 } 2285 2286 /* now use these tags to setup dmamaps for each slot 2287 in each ring */ 2288 for (i = 0; i <= sc->tx.mask; i++) { 2289 err = bus_dmamap_create(sc->tx.dmat, 0, 2290 &sc->tx.info[i].map); 2291 if (err != 0) { 2292 device_printf(sc->dev, "Err %d tx dmamap\n", 2293 err); 2294 goto abort_with_alloc; 2295 } 2296 } 2297 for (i = 0; i <= sc->rx_small.mask; i++) { 2298 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2299 &sc->rx_small.info[i].map); 2300 if (err != 0) { 2301 device_printf(sc->dev, "Err %d rx_small dmamap\n", 2302 err); 2303 goto abort_with_alloc; 2304 } 2305 } 2306 err = bus_dmamap_create(sc->rx_small.dmat, 0, 2307 &sc->rx_small.extra_map); 2308 if (err != 0) { 2309 device_printf(sc->dev, "Err %d extra rx_small dmamap\n", 2310 err); 2311 goto abort_with_alloc; 2312 } 2313 2314 for (i = 0; i <= sc->rx_big.mask; i++) { 2315 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2316 &sc->rx_big.info[i].map); 2317 if (err != 0) { 2318 device_printf(sc->dev, "Err %d rx_big dmamap\n", 2319 err); 2320 goto abort_with_alloc; 2321 } 2322 } 2323 err = bus_dmamap_create(sc->rx_big.dmat, 0, 2324 &sc->rx_big.extra_map); 2325 if (err != 0) { 2326 device_printf(sc->dev, "Err %d extra rx_big dmamap\n", 2327 err); 2328 goto abort_with_alloc; 2329 } 2330 return 0; 2331 2332abort_with_alloc: 2333 mxge_free_rings(sc); 2334 2335abort_with_nothing: 2336 return err; 2337} 2338 2339static int 2340mxge_open(mxge_softc_t *sc) 2341{ 2342 mxge_cmd_t cmd; 2343 int i, err; 2344 bus_dmamap_t map; 2345 bus_addr_t bus; 2346 2347 2348 /* Copy the MAC address in case it was overridden */ 2349 bcopy(IF_LLADDR(sc->ifp), sc->mac_addr, ETHER_ADDR_LEN); 2350 2351 err = mxge_reset(sc); 2352 if (err != 0) { 2353 device_printf(sc->dev, "failed to reset\n"); 2354 return EIO; 2355 } 2356 2357 if (MCLBYTES >= 2358 sc->ifp->if_mtu + ETHER_HDR_LEN + MXGEFW_PAD) 2359 sc->big_bytes = MCLBYTES; 2360 else 2361 sc->big_bytes = MJUMPAGESIZE; 2362 2363 err = mxge_alloc_rings(sc); 2364 if (err != 0) { 2365 device_printf(sc->dev, "failed to allocate rings\n"); 2366 return err; 2367 } 2368 2369 err = bus_setup_intr(sc->dev, sc->irq_res, 2370 INTR_TYPE_NET | INTR_MPSAFE, 2371 mxge_intr, sc, &sc->ih); 2372 if (err != 0) { 2373 goto abort_with_rings; 2374 } 2375 2376 /* get the lanai pointers to the send and receive rings */ 2377 2378 err = mxge_send_cmd(sc, MXGEFW_CMD_GET_SEND_OFFSET, &cmd); 2379 sc->tx.lanai = 2380 (volatile mcp_kreq_ether_send_t *)(sc->sram + cmd.data0); 2381 err |= mxge_send_cmd(sc, 2382 MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd); 2383 sc->rx_small.lanai = 2384 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2385 err |= mxge_send_cmd(sc, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd); 2386 sc->rx_big.lanai = 2387 (volatile mcp_kreq_ether_recv_t *)(sc->sram + cmd.data0); 2388 2389 if (err != 0) { 2390 device_printf(sc->dev, 2391 "failed to get ring sizes or locations\n"); 2392 err = EIO; 2393 goto abort_with_irq; 2394 } 2395 2396 if (sc->wc) { 2397 sc->tx.wc_fifo = sc->sram + MXGEFW_ETH_SEND_4; 2398 sc->rx_small.wc_fifo = sc->sram + MXGEFW_ETH_RECV_SMALL; 2399 sc->rx_big.wc_fifo = sc->sram + MXGEFW_ETH_RECV_BIG; 2400 } else { 2401 sc->tx.wc_fifo = 0; 2402 sc->rx_small.wc_fifo = 0; 2403 sc->rx_big.wc_fifo = 0; 2404 } 2405 2406 2407 /* stock receive rings */ 2408 for (i = 0; i <= sc->rx_small.mask; i++) { 2409 map = sc->rx_small.info[i].map; 2410 err = mxge_get_buf_small(sc, map, i); 2411 if (err) { 2412 device_printf(sc->dev, "alloced %d/%d smalls\n", 2413 i, sc->rx_small.mask + 1); 2414 goto abort; 2415 } 2416 } 2417 for (i = 0; i <= sc->rx_big.mask; i++) { 2418 map = sc->rx_big.info[i].map; 2419 err = mxge_get_buf_big(sc, map, i); 2420 if (err) { 2421 device_printf(sc->dev, "alloced %d/%d bigs\n", 2422 i, sc->rx_big.mask + 1); 2423 goto abort; 2424 } 2425 } 2426 2427 /* Give the firmware the mtu and the big and small buffer 2428 sizes. The firmware wants the big buf size to be a power 2429 of two. Luckily, FreeBSD's clusters are powers of two */ 2430 cmd.data0 = sc->ifp->if_mtu + ETHER_HDR_LEN; 2431 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_MTU, &cmd); 2432 cmd.data0 = MHLEN - MXGEFW_PAD; 2433 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_SMALL_BUFFER_SIZE, 2434 &cmd); 2435 cmd.data0 = sc->big_bytes; 2436 err |= mxge_send_cmd(sc, MXGEFW_CMD_SET_BIG_BUFFER_SIZE, &cmd); 2437 2438 if (err != 0) { 2439 device_printf(sc->dev, "failed to setup params\n"); 2440 goto abort; 2441 } 2442 2443 /* Now give him the pointer to the stats block */ 2444 cmd.data0 = MXGE_LOWPART_TO_U32(sc->fw_stats_dma.bus_addr); 2445 cmd.data1 = MXGE_HIGHPART_TO_U32(sc->fw_stats_dma.bus_addr); 2446 cmd.data2 = sizeof(struct mcp_irq_data); 2447 err = mxge_send_cmd(sc, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd); 2448 2449 if (err != 0) { 2450 bus = sc->fw_stats_dma.bus_addr; 2451 bus += offsetof(struct mcp_irq_data, send_done_count); 2452 cmd.data0 = MXGE_LOWPART_TO_U32(bus); 2453 cmd.data1 = MXGE_HIGHPART_TO_U32(bus); 2454 err = mxge_send_cmd(sc, 2455 MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, 2456 &cmd); 2457 /* Firmware cannot support multicast without STATS_DMA_V2 */ 2458 sc->fw_multicast_support = 0; 2459 } else { 2460 sc->fw_multicast_support = 1; 2461 } 2462 2463 if (err != 0) { 2464 device_printf(sc->dev, "failed to setup params\n"); 2465 goto abort; 2466 } 2467 2468 /* Finally, start the firmware running */ 2469 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_UP, &cmd); 2470 if (err) { 2471 device_printf(sc->dev, "Couldn't bring up link\n"); 2472 goto abort; 2473 } 2474 sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; 2475 sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; 2476 2477 return 0; 2478 2479 2480abort: 2481 mxge_free_mbufs(sc); 2482abort_with_irq: 2483 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 2484abort_with_rings: 2485 mxge_free_rings(sc); 2486 return err; 2487} 2488 2489static int 2490mxge_close(mxge_softc_t *sc) 2491{ 2492 mxge_cmd_t cmd; 2493 int err, old_down_cnt; 2494 2495 sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2496 old_down_cnt = sc->down_cnt; 2497 mb(); 2498 err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); 2499 if (err) { 2500 device_printf(sc->dev, "Couldn't bring down link\n"); 2501 } 2502 if (old_down_cnt == sc->down_cnt) { 2503 /* wait for down irq */ 2504 (void)tsleep(&sc->down_cnt, PWAIT, "down mxge", hz); 2505 } 2506 if (old_down_cnt == sc->down_cnt) { 2507 device_printf(sc->dev, "never got down irq\n"); 2508 } 2509 if (sc->ih != NULL) 2510 bus_teardown_intr(sc->dev, sc->irq_res, sc->ih); 2511 mxge_free_mbufs(sc); 2512 mxge_free_rings(sc); 2513 return 0; 2514} 2515 2516 2517static int 2518mxge_media_change(struct ifnet *ifp) 2519{ 2520 return EINVAL; 2521} 2522 2523static int 2524mxge_change_mtu(mxge_softc_t *sc, int mtu) 2525{ 2526 struct ifnet *ifp = sc->ifp; 2527 int real_mtu, old_mtu; 2528 int err = 0; 2529 2530 2531 real_mtu = mtu + ETHER_HDR_LEN; 2532 if ((real_mtu > MXGE_MAX_ETHER_MTU) || 2533 real_mtu < 60) 2534 return EINVAL; 2535 sx_xlock(&sc->driver_lock); 2536 old_mtu = ifp->if_mtu; 2537 ifp->if_mtu = mtu; 2538 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 2539 mxge_close(sc); 2540 err = mxge_open(sc); 2541 if (err != 0) { 2542 ifp->if_mtu = old_mtu; 2543 mxge_close(sc); 2544 (void) mxge_open(sc); 2545 } 2546 } 2547 sx_xunlock(&sc->driver_lock); 2548 return err; 2549} 2550 2551static void 2552mxge_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) 2553{ 2554 mxge_softc_t *sc = ifp->if_softc; 2555 2556 2557 if (sc == NULL) 2558 return; 2559 ifmr->ifm_status = IFM_AVALID; 2560 ifmr->ifm_status |= sc->fw_stats->link_up ? IFM_ACTIVE : 0; 2561 ifmr->ifm_active = IFM_AUTO | IFM_ETHER; 2562 ifmr->ifm_active |= sc->fw_stats->link_up ? IFM_FDX : 0; 2563} 2564 2565static int 2566mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data) 2567{ 2568 mxge_softc_t *sc = ifp->if_softc; 2569 struct ifreq *ifr = (struct ifreq *)data; 2570 int err, mask; 2571 2572 err = 0; 2573 switch (command) { 2574 case SIOCSIFADDR: 2575 case SIOCGIFADDR: 2576 err = ether_ioctl(ifp, command, data); 2577 break; 2578 2579 case SIOCSIFMTU: 2580 err = mxge_change_mtu(sc, ifr->ifr_mtu); 2581 break; 2582 2583 case SIOCSIFFLAGS: 2584 sx_xlock(&sc->driver_lock); 2585 if (ifp->if_flags & IFF_UP) { 2586 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 2587 err = mxge_open(sc); 2588 else { 2589 /* take care of promis can allmulti 2590 flag chages */ 2591 mxge_change_promisc(sc, 2592 ifp->if_flags & IFF_PROMISC); 2593 mxge_set_multicast_list(sc); 2594 } 2595 } else { 2596 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 2597 mxge_close(sc); 2598 } 2599 sx_xunlock(&sc->driver_lock); 2600 break; 2601 2602 case SIOCADDMULTI: 2603 case SIOCDELMULTI: 2604 sx_xlock(&sc->driver_lock); 2605 mxge_set_multicast_list(sc); 2606 sx_xunlock(&sc->driver_lock); 2607 break; 2608 2609 case SIOCSIFCAP: 2610 sx_xlock(&sc->driver_lock); 2611 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 2612 if (mask & IFCAP_TXCSUM) { 2613 if (IFCAP_TXCSUM & ifp->if_capenable) { 2614 ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); 2615 ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP 2616 | CSUM_TSO); 2617 } else { 2618 ifp->if_capenable |= IFCAP_TXCSUM; 2619 ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); 2620 } 2621 } else if (mask & IFCAP_RXCSUM) { 2622 if (IFCAP_RXCSUM & ifp->if_capenable) { 2623 ifp->if_capenable &= ~IFCAP_RXCSUM; 2624 sc->csum_flag = 0; 2625 } else { 2626 ifp->if_capenable |= IFCAP_RXCSUM; 2627 sc->csum_flag = 1; 2628 } 2629 } 2630 if (mask & IFCAP_TSO4) { 2631 if (IFCAP_TSO4 & ifp->if_capenable) { 2632 ifp->if_capenable &= ~IFCAP_TSO4; 2633 ifp->if_hwassist &= ~CSUM_TSO; 2634 } else if (IFCAP_TXCSUM & ifp->if_capenable) { 2635 ifp->if_capenable |= IFCAP_TSO4; 2636 ifp->if_hwassist |= CSUM_TSO; 2637 } else { 2638 printf("mxge requires tx checksum offload" 2639 " be enabled to use TSO\n"); 2640 err = EINVAL; 2641 } 2642 } 2643 sx_xunlock(&sc->driver_lock); 2644 break; 2645 2646 case SIOCGIFMEDIA: 2647 err = ifmedia_ioctl(ifp, (struct ifreq *)data, 2648 &sc->media, command); 2649 break; 2650 2651 default: 2652 err = ENOTTY; 2653 } 2654 return err; 2655} 2656 2657static void 2658mxge_fetch_tunables(mxge_softc_t *sc) 2659{ 2660 2661 TUNABLE_INT_FETCH("hw.mxge.flow_control_enabled", 2662 &mxge_flow_control); 2663 TUNABLE_INT_FETCH("hw.mxge.intr_coal_delay", 2664 &mxge_intr_coal_delay); 2665 TUNABLE_INT_FETCH("hw.mxge.nvidia_ecrc_enable", 2666 &mxge_nvidia_ecrc_enable); 2667 TUNABLE_INT_FETCH("hw.mxge.deassert_wait", 2668 &mxge_deassert_wait); 2669 TUNABLE_INT_FETCH("hw.mxge.verbose", 2670 &mxge_verbose); 2671 2672 if (bootverbose) 2673 mxge_verbose = 1; 2674 if (mxge_intr_coal_delay < 0 || mxge_intr_coal_delay > 10*1000) 2675 mxge_intr_coal_delay = 30; 2676 sc->pause = mxge_flow_control; 2677} 2678 2679static int 2680mxge_attach(device_t dev) 2681{ 2682 mxge_softc_t *sc = device_get_softc(dev); 2683 struct ifnet *ifp; 2684 size_t bytes; 2685 int rid, err; 2686 uint16_t cmd; 2687 2688 sc->dev = dev; 2689 mxge_fetch_tunables(sc); 2690 2691 err = bus_dma_tag_create(NULL, /* parent */ 2692 1, /* alignment */ 2693 4096, /* boundary */ 2694 BUS_SPACE_MAXADDR, /* low */ 2695 BUS_SPACE_MAXADDR, /* high */ 2696 NULL, NULL, /* filter */ 2697 65536 + 256, /* maxsize */ 2698 MXGE_MAX_SEND_DESC, /* num segs */ 2699 4096, /* maxsegsize */ 2700 0, /* flags */ 2701 NULL, NULL, /* lock */ 2702 &sc->parent_dmat); /* tag */ 2703 2704 if (err != 0) { 2705 device_printf(sc->dev, "Err %d allocating parent dmat\n", 2706 err); 2707 goto abort_with_nothing; 2708 } 2709 2710 ifp = sc->ifp = if_alloc(IFT_ETHER); 2711 if (ifp == NULL) { 2712 device_printf(dev, "can not if_alloc()\n"); 2713 err = ENOSPC; 2714 goto abort_with_parent_dmat; 2715 } 2716 mtx_init(&sc->cmd_lock, NULL, 2717 MTX_NETWORK_LOCK, MTX_DEF); 2718 mtx_init(&sc->tx_lock, device_get_nameunit(dev), 2719 MTX_NETWORK_LOCK, MTX_DEF); 2720 sx_init(&sc->driver_lock, device_get_nameunit(dev)); 2721 2722 /* Enable DMA and Memory space access */ 2723 pci_enable_busmaster(dev); 2724 cmd = pci_read_config(dev, PCIR_COMMAND, 2); 2725 cmd |= PCIM_CMD_MEMEN; 2726 pci_write_config(dev, PCIR_COMMAND, cmd, 2); 2727 2728 /* Map the board into the kernel */ 2729 rid = PCIR_BARS; 2730 sc->mem_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, 0, 2731 ~0, 1, RF_ACTIVE); 2732 if (sc->mem_res == NULL) { 2733 device_printf(dev, "could not map memory\n"); 2734 err = ENXIO; 2735 goto abort_with_lock; 2736 } 2737 sc->sram = rman_get_virtual(sc->mem_res); 2738 sc->sram_size = 2*1024*1024 - (2*(48*1024)+(32*1024)) - 0x100; 2739 if (sc->sram_size > rman_get_size(sc->mem_res)) { 2740 device_printf(dev, "impossible memory region size %ld\n", 2741 rman_get_size(sc->mem_res)); 2742 err = ENXIO; 2743 goto abort_with_mem_res; 2744 } 2745 2746 /* make NULL terminated copy of the EEPROM strings section of 2747 lanai SRAM */ 2748 bzero(sc->eeprom_strings, MXGE_EEPROM_STRINGS_SIZE); 2749 bus_space_read_region_1(rman_get_bustag(sc->mem_res), 2750 rman_get_bushandle(sc->mem_res), 2751 sc->sram_size - MXGE_EEPROM_STRINGS_SIZE, 2752 sc->eeprom_strings, 2753 MXGE_EEPROM_STRINGS_SIZE - 2); 2754 err = mxge_parse_strings(sc); 2755 if (err != 0) 2756 goto abort_with_mem_res; 2757 2758 /* Enable write combining for efficient use of PCIe bus */ 2759 mxge_enable_wc(sc); 2760 2761 /* Allocate the out of band dma memory */ 2762 err = mxge_dma_alloc(sc, &sc->cmd_dma, 2763 sizeof (mxge_cmd_t), 64); 2764 if (err != 0) 2765 goto abort_with_mem_res; 2766 sc->cmd = (mcp_cmd_response_t *) sc->cmd_dma.addr; 2767 err = mxge_dma_alloc(sc, &sc->zeropad_dma, 64, 64); 2768 if (err != 0) 2769 goto abort_with_cmd_dma; 2770 2771 err = mxge_dma_alloc(sc, &sc->fw_stats_dma, 2772 sizeof (*sc->fw_stats), 64); 2773 if (err != 0) 2774 goto abort_with_zeropad_dma; 2775 sc->fw_stats = (mcp_irq_data_t *)sc->fw_stats_dma.addr; 2776 2777 2778 /* allocate interrupt queues */ 2779 bytes = mxge_max_intr_slots * sizeof (*sc->rx_done.entry); 2780 err = mxge_dma_alloc(sc, &sc->rx_done.dma, bytes, 4096); 2781 if (err != 0) 2782 goto abort_with_fw_stats; 2783 sc->rx_done.entry = sc->rx_done.dma.addr; 2784 bzero(sc->rx_done.entry, bytes); 2785 /* Add our ithread */ 2786 rid = 0; 2787 sc->irq_res = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 2788 1, RF_SHAREABLE | RF_ACTIVE); 2789 if (sc->irq_res == NULL) { 2790 device_printf(dev, "could not alloc interrupt\n"); 2791 goto abort_with_rx_done; 2792 } 2793 2794 /* load the firmware */ 2795 mxge_select_firmware(sc); 2796 2797 err = mxge_load_firmware(sc); 2798 if (err != 0) 2799 goto abort_with_irq_res; 2800 sc->intr_coal_delay = mxge_intr_coal_delay; 2801 err = mxge_reset(sc); 2802 if (err != 0) 2803 goto abort_with_irq_res; 2804 2805 /* hook into the network stack */ 2806 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 2807 ifp->if_baudrate = 100000000; 2808 ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4; 2809 ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; 2810 ifp->if_capenable = ifp->if_capabilities; 2811 sc->csum_flag = 1; 2812 ifp->if_init = mxge_init; 2813 ifp->if_softc = sc; 2814 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 2815 ifp->if_ioctl = mxge_ioctl; 2816 ifp->if_start = mxge_start; 2817 ifp->if_watchdog = mxge_watchdog; 2818 ether_ifattach(ifp, sc->mac_addr); 2819 /* ether_ifattach sets mtu to 1500 */ 2820 ifp->if_mtu = MXGE_MAX_ETHER_MTU - ETHER_HDR_LEN; 2821 2822 /* Initialise the ifmedia structure */ 2823 ifmedia_init(&sc->media, 0, mxge_media_change, 2824 mxge_media_status); 2825 ifmedia_add(&sc->media, IFM_ETHER|IFM_AUTO, 0, NULL); 2826 mxge_add_sysctls(sc); 2827 return 0; 2828 2829abort_with_irq_res: 2830 bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); 2831abort_with_rx_done: 2832 sc->rx_done.entry = NULL; 2833 mxge_dma_free(&sc->rx_done.dma); 2834abort_with_fw_stats: 2835 mxge_dma_free(&sc->fw_stats_dma); 2836abort_with_zeropad_dma: 2837 mxge_dma_free(&sc->zeropad_dma); 2838abort_with_cmd_dma: 2839 mxge_dma_free(&sc->cmd_dma); 2840abort_with_mem_res: 2841 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 2842abort_with_lock: 2843 pci_disable_busmaster(dev); 2844 mtx_destroy(&sc->cmd_lock); 2845 mtx_destroy(&sc->tx_lock); 2846 sx_destroy(&sc->driver_lock); 2847 if_free(ifp); 2848abort_with_parent_dmat: 2849 bus_dma_tag_destroy(sc->parent_dmat); 2850 2851abort_with_nothing: 2852 return err; 2853} 2854 2855static int 2856mxge_detach(device_t dev) 2857{ 2858 mxge_softc_t *sc = device_get_softc(dev); 2859 2860 sx_xlock(&sc->driver_lock); 2861 if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) 2862 mxge_close(sc); 2863 sx_xunlock(&sc->driver_lock); 2864 ether_ifdetach(sc->ifp); 2865 mxge_dummy_rdma(sc, 0); 2866 bus_release_resource(dev, SYS_RES_IRQ, 0, sc->irq_res); 2867 sc->rx_done.entry = NULL; 2868 mxge_dma_free(&sc->rx_done.dma); 2869 mxge_dma_free(&sc->fw_stats_dma); 2870 mxge_dma_free(&sc->zeropad_dma); 2871 mxge_dma_free(&sc->cmd_dma); 2872 bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BARS, sc->mem_res); 2873 pci_disable_busmaster(dev); 2874 mtx_destroy(&sc->cmd_lock); 2875 mtx_destroy(&sc->tx_lock); 2876 sx_destroy(&sc->driver_lock); 2877 if_free(sc->ifp); 2878 bus_dma_tag_destroy(sc->parent_dmat); 2879 return 0; 2880} 2881 2882static int 2883mxge_shutdown(device_t dev) 2884{ 2885 return 0; 2886} 2887 2888/* 2889 This file uses Myri10GE driver indentation. 2890 2891 Local Variables: 2892 c-file-style:"linux" 2893 tab-width:8 2894 End: 2895*/ 2896